Bug 1467516 [wpt PR 11380] - Update html5lib and six, a=testonly

Automatic update from web-platform-testsFix #7200: Update vendored html5lib to 1.0.1 This adds webencodings as another vendored package, and moves both to third_party (part of #10922). -- Fix #10922: move six into third_party and update to 1.11 -- wpt-commits: efdb898172298b29a50c2e39cd40ac191ee8b383, 7cd0b885a529734ef71afd3254df48f57f255512 wpt-pr: 11380 --HG-- rename : testing/web-platform/tests/tools/html5lib/.gitmodules => testing/web-platform/tests/tools/third_party/html5lib/.gitmodules rename : testing/web-platform/tests/tools/html5lib/CONTRIBUTING.rst => testing/web-platform/tests/tools/third_party/html5lib/CONTRIBUTING.rst rename : testing/web-platform/tests/tools/html5lib/LICENSE => testing/web-platform/tests/tools/third_party/html5lib/LICENSE rename : testing/web-platform/tests/tools/html5lib/doc/Makefile => testing/web-platform/tests/tools/third_party/html5lib/doc/Makefile rename : testing/web-platform/tests/tools/html5lib/doc/changes.rst => testing/web-platform/tests/tools/third_party/html5lib/doc/changes.rst rename : testing/web-platform/tests/tools/html5lib/doc/license.rst => testing/web-platform/tests/tools/third_party/html5lib/doc/license.rst rename : testing/web-platform/tests/tools/html5lib/doc/make.bat => testing/web-platform/tests/tools/third_party/html5lib/doc/make.bat rename : testing/web-platform/tests/tools/html5lib/doc/modules.rst => testing/web-platform/tests/tools/third_party/html5lib/doc/modules.rst rename : testing/web-platform/tests/tools/html5lib/html5lib/trie/datrie.py => testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/datrie.py rename : testing/web-platform/tests/tools/html5lib/html5lib/trie/py.py => testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/py.py rename : testing/web-platform/tests/tools/html5lib/html5lib/filters/_base.py => testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/base.py rename : testing/web-platform/tests/tools/html5lib/html5lib/tests/__init__.py => testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/__init__.py rename : testing/web-platform/tests/tools/html5lib/html5lib/tests/tokenizertotree.py => testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/tokenizertotree.py rename : testing/web-platform/tests/tools/html5lib/html5lib/tests/us-ascii.html => testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/us-ascii.html rename : testing/web-platform/tests/tools/html5lib/html5lib/tests/utf-8-bom.html => testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/utf-8-bom.html rename : testing/web-platform/tests/tools/six/MANIFEST.in => testing/web-platform/tests/tools/third_party/six/MANIFEST.in rename : testing/web-platform/tests/tools/six/documentation/Makefile => testing/web-platform/tests/tools/third_party/six/documentation/Makefile rename : testing/web-platform/tests/tools/html5lib/requirements.txt => testing/web-platform/tests/tools/third_party/six/six.egg-info/top_level.txt
2018-06-26 02:42:25 +00:00 · 2018-06-26 02:42:25 +00:00 · 02df1786d1
--- a/testing/web-platform/meta/MANIFEST.json
+++ b/testing/web-platform/meta/MANIFEST.json
@ -405432,7 +405432,7 @@
   "support"
  ],
  "./.gitmodules": [
-   "6a203e28d43909d7513daf8761281b351d2b2bd7",
+   "9e008399bdce736c7c03f7db0c3e8d624083c6b9",
   "support"
  ],
  "./.pyup.yml": [
--- a/testing/web-platform/tests/.gitmodules
+++ b/testing/web-platform/tests/.gitmodules
@ -1,6 +1,3 @@
-[submodule "tools/html5lib/html5lib/tests/testdata"]
-	path = tools/html5lib/html5lib/tests/testdata
-	url = https://github.com/html5lib/html5lib-tests.git
 [submodule "resources/webidl2/test/widlproc"]
 	path = resources/webidl2/test/widlproc
 	url = https://github.com/dontcallmedom/widlproc.git
--- a/testing/web-platform/tests/tools/html5lib/.gitignore
+++ b/testing/web-platform/tests/tools/html5lib/.gitignore
@ -1,20 +0,0 @@
-# Because we never want compiled Python
-__pycache__/
-*.pyc
-
-# Ignore stuff produced by distutils
-/build/
-/dist/
-/MANIFEST
-
-# Generated by parse.py -p
-stats.prof
-
-# From cover (esp. in combination with nose)
-.coverage
-
-# Because tox's data is inherently local
-/.tox/
-
-# We have no interest in built Sphinx files
-/doc/_build
--- a/testing/web-platform/tests/tools/html5lib/.travis.yml
+++ b/testing/web-platform/tests/tools/html5lib/.travis.yml
@ -1,37 +0,0 @@
-language: python
-python:
-  - "2.6"
-  - "2.7"
-  - "3.2"
-  - "3.3"
-  - "3.4"
-  - "pypy"
-
-env:
-  - USE_OPTIONAL=true
-  - USE_OPTIONAL=false
-
-matrix:
-  exclude:
-    - python: "2.7"
-      env: USE_OPTIONAL=false
-    - python: "3.4"
-      env: USE_OPTIONAL=false
-  include:
-    - python: "2.7"
-      env: USE_OPTIONAL=false FLAKE=true
-    - python: "3.4"
-      env: USE_OPTIONAL=false FLAKE=true
-
-before_install:
-  - git submodule update --init --recursive
-
-install:
-  - bash requirements-install.sh
-
-script:
-  - nosetests
-  - bash flake8-run.sh
-
-after_script:
-  - python debug-info.py
--- a/testing/web-platform/tests/tools/html5lib/CHANGES.rst
+++ b/testing/web-platform/tests/tools/html5lib/CHANGES.rst
@ -1,171 +0,0 @@
-Change Log
----------
-
-0.9999
-~~~~~~
-
-Released on XXX, 2014
-
-* XXX
-
-
-0.999
-~~~~~
-
-Released on December 23, 2013
-
-* Fix #127: add work-around for CPython issue #20007: .read(0) on
-  http.client.HTTPResponse drops the rest of the content.
-
-* Fix #115: lxml treewalker can now deal with fragments containing, at
-  their root level, text nodes with non-ASCII characters on Python 2.
-
-
-0.99
-~~~~
-
-Released on September 10, 2013
-
-* No library changes from 1.0b3; released as 0.99 as pip has changed
-  behaviour from 1.4 to avoid installing pre-release versions per
-  PEP 440.
-
-
-1.0b3
-~~~~~
-
-Released on July 24, 2013
-
-* Removed ``RecursiveTreeWalker`` from ``treewalkers._base``. Any
-  implementation using it should be moved to
-  ``NonRecursiveTreeWalker``, as everything bundled with html5lib has
-  for years.
-
-* Fix #67 so that ``BufferedStream`` to correctly returns a bytes
-  object, thereby fixing any case where html5lib is passed a
-  non-seekable RawIOBase-like object.
-
-
-1.0b2
-~~~~~
-
-Released on June 27, 2013
-
-* Removed reordering of attributes within the serializer. There is now
-  an ``alphabetical_attributes`` option which preserves the previous
-  behaviour through a new filter. This allows attribute order to be
-  preserved through html5lib if the tree builder preserves order.
-
-* Removed ``dom2sax`` from DOM treebuilders. It has been replaced by
-  ``treeadapters.sax.to_sax`` which is generic and supports any
-  treewalker; it also resolves all known bugs with ``dom2sax``.
-
-* Fix treewalker assertions on hitting bytes strings on
-  Python 2. Previous to 1.0b1, treewalkers coped with mixed
-  bytes/unicode data on Python 2; this reintroduces this prior
-  behaviour on Python 2. Behaviour is unchanged on Python 3.
-
-
-1.0b1
-~~~~~
-
-Released on May 17, 2013
-
-* Implementation updated to implement the `HTML specification
-  <http://www.whatwg.org/specs/web-apps/current-work/>`_ as of 5th May
-  2013 (`SVN <http://svn.whatwg.org/webapps/>`_ revision r7867).
-
-* Python 3.2+ supported in a single codebase using the ``six`` library.
-
-* Removed support for Python 2.5 and older.
-
-* Removed the deprecated Beautiful Soup 3 treebuilder.
-  ``beautifulsoup4`` can use ``html5lib`` as a parser instead. Note that
-  since it doesn't support namespaces, foreign content like SVG and
-  MathML is parsed incorrectly.
-
-* Removed ``simpletree`` from the package. The default tree builder is
-  now ``etree`` (using the ``xml.etree.cElementTree`` implementation if
-  available, and ``xml.etree.ElementTree`` otherwise).
-
-* Removed the ``XHTMLSerializer`` as it never actually guaranteed its
-  output was well-formed XML, and hence provided little of use.
-
-* Removed default DOM treebuilder, so ``html5lib.treebuilders.dom`` is no
-  longer supported. ``html5lib.treebuilders.getTreeBuilder("dom")`` will
-  return the default DOM treebuilder, which uses ``xml.dom.minidom``.
-
-* Optional heuristic character encoding detection now based on
-  ``charade`` for Python 2.6 - 3.3 compatibility.
-
-* Optional ``Genshi`` treewalker support fixed.
-
-* Many bugfixes, including:
-
-  * #33: null in attribute value breaks XML AttValue;
-
-  * #4: nested, indirect descendant, <button> causes infinite loop;
-
-  * `Google Code 215
-    <http://code.google.com/p/html5lib/issues/detail?id=215>`_: Properly
-    detect seekable streams;
-
-  * `Google Code 206
-    <http://code.google.com/p/html5lib/issues/detail?id=206>`_: add
-    support for <video preload=...>, <audio preload=...>;
-
-  * `Google Code 205
-    <http://code.google.com/p/html5lib/issues/detail?id=205>`_: add
-    support for <video poster=...>;
-
-  * `Google Code 202
-    <http://code.google.com/p/html5lib/issues/detail?id=202>`_: Unicode
-    file breaks InputStream.
-
-* Source code is now mostly PEP 8 compliant.
-
-* Test harness has been improved and now depends on ``nose``.
-
-* Documentation updated and moved to http://html5lib.readthedocs.org/.
-
-
-0.95
-~~~~
-
-Released on February 11, 2012
-
-
-0.90
-~~~~
-
-Released on January 17, 2010
-
-
-0.11.1
-~~~~~~
-
-Released on June 12, 2008
-
-
-0.11
-~~~~
-
-Released on June 10, 2008
-
-
-0.10
-~~~~
-
-Released on October 7, 2007
-
-
-0.9
-~~~
-
-Released on March 11, 2007
-
-
-0.2
-~~~
-
-Released on January 8, 2007
--- a/testing/web-platform/tests/tools/html5lib/doc/html5lib.rst
+++ b/testing/web-platform/tests/tools/html5lib/doc/html5lib.rst
@ -1,77 +0,0 @@
-html5lib Package
-================
-
-:mod:`html5lib` Package
-----------------------
-
-.. automodule:: html5lib.__init__
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`constants` Module
-----------------------
-
-.. automodule:: html5lib.constants
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`html5parser` Module
-------------------------
-
-.. automodule:: html5lib.html5parser
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`ihatexml` Module
----------------------
-
-.. automodule:: html5lib.ihatexml
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`inputstream` Module
-------------------------
-
-.. automodule:: html5lib.inputstream
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`sanitizer` Module
-----------------------
-
-.. automodule:: html5lib.sanitizer
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`tokenizer` Module
-----------------------
-
-.. automodule:: html5lib.tokenizer
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`utils` Module
-------------------
-
-.. automodule:: html5lib.utils
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-Subpackages
-----------
-
-.. toctree::
-
-    html5lib.filters
-    html5lib.serializer
-    html5lib.treebuilders
-    html5lib.treewalkers
-
--- a/testing/web-platform/tests/tools/html5lib/doc/html5lib.serializer.rst
+++ b/testing/web-platform/tests/tools/html5lib/doc/html5lib.serializer.rst
@ -1,19 +0,0 @@
-serializer Package
-==================
-
-:mod:`serializer` Package
-------------------------
-
-.. automodule:: html5lib.serializer
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`htmlserializer` Module
----------------------------
-
-.. automodule:: html5lib.serializer.htmlserializer
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
--- a/testing/web-platform/tests/tools/html5lib/doc/html5lib.treewalkers.rst
+++ b/testing/web-platform/tests/tools/html5lib/doc/html5lib.treewalkers.rst
@ -1,59 +0,0 @@
-treewalkers Package
-===================
-
-:mod:`treewalkers` Package
--------------------------
-
-.. automodule:: html5lib.treewalkers
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`_base` Module
-------------------
-
-.. automodule:: html5lib.treewalkers._base
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`dom` Module
-----------------
-
-.. automodule:: html5lib.treewalkers.dom
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`etree` Module
-------------------
-
-.. automodule:: html5lib.treewalkers.etree
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`genshistream` Module
--------------------------
-
-.. automodule:: html5lib.treewalkers.genshistream
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`lxmletree` Module
-----------------------
-
-.. automodule:: html5lib.treewalkers.lxmletree
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`pulldom` Module
---------------------
-
-.. automodule:: html5lib.treewalkers.pulldom
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
--- a/testing/web-platform/tests/tools/html5lib/flake8-run.sh
+++ b/testing/web-platform/tests/tools/html5lib/flake8-run.sh
@ -1,14 +0,0 @@
-#!/bin/bash -e
-
-if [[ ! -x $(which flake8) ]]; then
-  echo "fatal: flake8 not found on $PATH. Exiting."
-  exit 1
-fi
-
-if [[ $TRAVIS != "true" || $FLAKE == "true" ]]; then
-  find html5lib/ -name '*.py' -and -not -name 'constants.py' -print0 | xargs -0 flake8 --ignore=E501
-  flake1=$?
-  flake8 --max-line-length=99 --ignore=E126 html5lib/constants.py
-  flake2=$?
-  exit $[$flake1 || $flake2]
-fi
--- a/testing/web-platform/tests/tools/html5lib/html5lib/init.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/init.py
@ -1,23 +0,0 @@
-"""
-HTML parsing library based on the WHATWG "HTML5"
-specification. The parser is designed to be compatible with existing
-HTML found in the wild and implements well-defined error recovery that
-is largely compatible with modern desktop web browsers.
-
-Example usage:
-
-import html5lib
-f = open("my_document.html")
-tree = html5lib.parse(f)
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-
-from .html5parser import HTMLParser, parse, parseFragment
-from .treebuilders import getTreeBuilder
-from .treewalkers import getTreeWalker
-from .serializer import serialize
-
-__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
-           "getTreeWalker", "serialize"]
-__version__ = "0.9999-dev"
--- a/testing/web-platform/tests/tools/html5lib/html5lib/filters/alphabeticalattributes.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/filters/alphabeticalattributes.py
@ -1,20 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import _base
-
-try:
-    from collections import OrderedDict
-except ImportError:
-    from ordereddict import OrderedDict
-
-
-class Filter(_base.Filter):
-    def __iter__(self):
-        for token in _base.Filter.__iter__(self):
-            if token["type"] in ("StartTag", "EmptyTag"):
-                attrs = OrderedDict()
-                for name, value in sorted(token["data"].items(),
-                                          key=lambda x: x[0]):
-                    attrs[name] = value
-                token["data"] = attrs
-            yield token
--- a/testing/web-platform/tests/tools/html5lib/html5lib/filters/lint.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/filters/lint.py
@ -1,93 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from gettext import gettext
-_ = gettext
-
-from . import _base
-from ..constants import cdataElements, rcdataElements, voidElements
-
-from ..constants import spaceCharacters
-spaceCharacters = "".join(spaceCharacters)
-
-
-class LintError(Exception):
-    pass
-
-
-class Filter(_base.Filter):
-    def __iter__(self):
-        open_elements = []
-        contentModelFlag = "PCDATA"
-        for token in _base.Filter.__iter__(self):
-            type = token["type"]
-            if type in ("StartTag", "EmptyTag"):
-                name = token["name"]
-                if contentModelFlag != "PCDATA":
-                    raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
-                if not isinstance(name, str):
-                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
-                if not name:
-                    raise LintError(_("Empty tag name"))
-                if type == "StartTag" and name in voidElements:
-                    raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
-                elif type == "EmptyTag" and name not in voidElements:
-                    raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
-                if type == "StartTag":
-                    open_elements.append(name)
-                for name, value in token["data"]:
-                    if not isinstance(name, str):
-                        raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
-                    if not name:
-                        raise LintError(_("Empty attribute name"))
-                    if not isinstance(value, str):
-                        raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
-                if name in cdataElements:
-                    contentModelFlag = "CDATA"
-                elif name in rcdataElements:
-                    contentModelFlag = "RCDATA"
-                elif name == "plaintext":
-                    contentModelFlag = "PLAINTEXT"
-
-            elif type == "EndTag":
-                name = token["name"]
-                if not isinstance(name, str):
-                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
-                if not name:
-                    raise LintError(_("Empty tag name"))
-                if name in voidElements:
-                    raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
-                start_name = open_elements.pop()
-                if start_name != name:
-                    raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
-                contentModelFlag = "PCDATA"
-
-            elif type == "Comment":
-                if contentModelFlag != "PCDATA":
-                    raise LintError(_("Comment not in PCDATA content model flag"))
-
-            elif type in ("Characters", "SpaceCharacters"):
-                data = token["data"]
-                if not isinstance(data, str):
-                    raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
-                if not data:
-                    raise LintError(_("%(type)s token with empty data") % {"type": type})
-                if type == "SpaceCharacters":
-                    data = data.strip(spaceCharacters)
-                    if data:
-                        raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
-
-            elif type == "Doctype":
-                name = token["name"]
-                if contentModelFlag != "PCDATA":
-                    raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
-                if not isinstance(name, str):
-                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
-                # XXX: what to do with token["data"] ?
-
-            elif type in ("ParseError", "SerializeError"):
-                pass
-
-            else:
-                raise LintError(_("Unknown token type: %(type)s") % {"type": type})
-
-            yield token
--- a/testing/web-platform/tests/tools/html5lib/html5lib/filters/sanitizer.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/filters/sanitizer.py
@ -1,12 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import _base
-from ..sanitizer import HTMLSanitizerMixin
-
-
-class Filter(_base.Filter, HTMLSanitizerMixin):
-    def __iter__(self):
-        for token in _base.Filter.__iter__(self):
-            token = self.sanitize_token(token)
-            if token:
-                yield token
--- a/testing/web-platform/tests/tools/html5lib/html5lib/sanitizer.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/sanitizer.py
@ -1,271 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-from xml.sax.saxutils import escape, unescape
-
-from .tokenizer import HTMLTokenizer
-from .constants import tokenTypes
-
-
-class HTMLSanitizerMixin(object):
-    """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
-
-    acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
-                           'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
-                           'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
-                           'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
-                           'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
-                           'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
-                           'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
-                           'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
-                           'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
-                           'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
-                           'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
-                           'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
-                           'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
-
-    mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
-                       'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
-                       'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
-                       'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
-                       'munderover', 'none']
-
-    svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
-                    'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
-                    'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
-                    'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
-                    'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
-                    'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
-
-    acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
-                             'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
-                             'background', 'balance', 'bgcolor', 'bgproperties', 'border',
-                             'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
-                             'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
-                             'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
-                             'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
-                             'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
-                             'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
-                             'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
-                             'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
-                             'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
-                             'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
-                             'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
-                             'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
-                             'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
-                             'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
-                             'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
-                             'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
-                             'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
-                             'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
-                             'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
-                             'width', 'wrap', 'xml:lang']
-
-    mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
-                         'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
-                         'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
-                         'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
-                         'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
-                         'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
-                         'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
-                         'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
-                         'xlink:type', 'xmlns', 'xmlns:xlink']
-
-    svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
-                      'arabic-form', 'ascent', 'attributeName', 'attributeType',
-                      'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
-                      'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
-                      'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
-                      'fill-opacity', 'fill-rule', 'font-family', 'font-size',
-                      'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
-                      'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
-                      'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
-                      'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
-                      'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
-                      'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
-                      'opacity', 'orient', 'origin', 'overline-position',
-                      'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
-                      'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
-                      'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
-                      'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
-                      'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
-                      'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
-                      'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
-                      'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
-                      'transform', 'type', 'u1', 'u2', 'underline-position',
-                      'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
-                      'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
-                      'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
-                      'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
-                      'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
-                      'y1', 'y2', 'zoomAndPan']
-
-    attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
-                       'xlink:href', 'xml:base']
-
-    svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
-                               'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
-                               'mask', 'stroke']
-
-    svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
-                            'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
-                            'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
-                            'set', 'use']
-
-    acceptable_css_properties = ['azimuth', 'background-color',
-                                 'border-bottom-color', 'border-collapse', 'border-color',
-                                 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
-                                 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
-                                 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
-                                 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
-                                 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
-                                 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
-                                 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
-                                 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
-                                 'white-space', 'width']
-
-    acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
-                               'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
-                               'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
-                               'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
-                               'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
-                               'transparent', 'underline', 'white', 'yellow']
-
-    acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
-                                 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
-                                 'stroke-opacity']
-
-    acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
-                            'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
-                            'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
-                            'ssh', 'sftp', 'rtsp', 'afs']
-
-    # subclasses may define their own versions of these constants
-    allowed_elements = acceptable_elements + mathml_elements + svg_elements
-    allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
-    allowed_css_properties = acceptable_css_properties
-    allowed_css_keywords = acceptable_css_keywords
-    allowed_svg_properties = acceptable_svg_properties
-    allowed_protocols = acceptable_protocols
-
-    # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
-    # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
-    # attributes are parsed, and a restricted set, # specified by
-    # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
-    # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
-    # in ALLOWED_PROTOCOLS are allowed.
-    #
-    #   sanitize_html('<script> do_nasty_stuff() </script>')
-    #    => &lt;script> do_nasty_stuff() &lt;/script>
-    #   sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
-    #    => <a>Click here for $100</a>
-    def sanitize_token(self, token):
-
-        # accommodate filters which use token_type differently
-        token_type = token["type"]
-        if token_type in list(tokenTypes.keys()):
-            token_type = tokenTypes[token_type]
-
-        if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
-                          tokenTypes["EmptyTag"]):
-            if token["name"] in self.allowed_elements:
-                return self.allowed_token(token, token_type)
-            else:
-                return self.disallowed_token(token, token_type)
-        elif token_type == tokenTypes["Comment"]:
-            pass
-        else:
-            return token
-
-    def allowed_token(self, token, token_type):
-        if "data" in token:
-            attrs = dict([(name, val) for name, val in
-                          token["data"][::-1]
-                          if name in self.allowed_attributes])
-            for attr in self.attr_val_is_uri:
-                if attr not in attrs:
-                    continue
-                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
-                                       unescape(attrs[attr])).lower()
-                # remove replacement characters from unescaped characters
-                val_unescaped = val_unescaped.replace("\ufffd", "")
-                if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
-                    (val_unescaped.split(':')[0] not in
-                     self.allowed_protocols)):
-                    del attrs[attr]
-            for attr in self.svg_attr_val_allows_ref:
-                if attr in attrs:
-                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
-                                         ' ',
-                                         unescape(attrs[attr]))
-            if (token["name"] in self.svg_allow_local_href and
-                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
-                                                    attrs['xlink:href'])):
-                del attrs['xlink:href']
-            if 'style' in attrs:
-                attrs['style'] = self.sanitize_css(attrs['style'])
-            token["data"] = [[name, val] for name, val in list(attrs.items())]
-        return token
-
-    def disallowed_token(self, token, token_type):
-        if token_type == tokenTypes["EndTag"]:
-            token["data"] = "</%s>" % token["name"]
-        elif token["data"]:
-            attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
-            token["data"] = "<%s%s>" % (token["name"], attrs)
-        else:
-            token["data"] = "<%s>" % token["name"]
-        if token.get("selfClosing"):
-            token["data"] = token["data"][:-1] + "/>"
-
-        if token["type"] in list(tokenTypes.keys()):
-            token["type"] = "Characters"
-        else:
-            token["type"] = tokenTypes["Characters"]
-
-        del token["name"]
-        return token
-
-    def sanitize_css(self, style):
-        # disallow urls
-        style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
-
-        # gauntlet
-        if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
-            return ''
-        if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
-            return ''
-
-        clean = []
-        for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
-            if not value:
-                continue
-            if prop.lower() in self.allowed_css_properties:
-                clean.append(prop + ': ' + value + ';')
-            elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
-                                                'padding']:
-                for keyword in value.split():
-                    if keyword not in self.acceptable_css_keywords and \
-                            not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
-                        break
-                else:
-                    clean.append(prop + ': ' + value + ';')
-            elif prop.lower() in self.allowed_svg_properties:
-                clean.append(prop + ': ' + value + ';')
-
-        return ' '.join(clean)
-
-
-class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
-    def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
-                 lowercaseElementName=False, lowercaseAttrName=False, parser=None):
-        # Change case matching defaults as we only output lowercase html anyway
-        # This solution doesn't seem ideal...
-        HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
-                               lowercaseElementName, lowercaseAttrName, parser=parser)
-
-    def __iter__(self):
-        for token in HTMLTokenizer.__iter__(self):
-            token = self.sanitize_token(token)
-            if token:
-                yield token
--- a/testing/web-platform/tests/tools/html5lib/html5lib/serializer/init.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/serializer/init.py
@ -1,16 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from .. import treewalkers
-
-from .htmlserializer import HTMLSerializer
-
-
-def serialize(input, tree="etree", format="html", encoding=None,
-              **serializer_opts):
-    # XXX: Should we cache this?
-    walker = treewalkers.getTreeWalker(tree)
-    if format == "html":
-        s = HTMLSerializer(**serializer_opts)
-    else:
-        raise ValueError("type must be html")
-    return s.render(walker(input), encoding)
--- a/testing/web-platform/tests/tools/html5lib/html5lib/serializer/htmlserializer.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/serializer/htmlserializer.py
@ -1,320 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from six import text_type
-
-import gettext
-_ = gettext.gettext
-
-try:
-    from functools import reduce
-except ImportError:
-    pass
-
-from ..constants import voidElements, booleanAttributes, spaceCharacters
-from ..constants import rcdataElements, entities, xmlEntities
-from .. import utils
-from xml.sax.saxutils import escape
-
-spaceCharacters = "".join(spaceCharacters)
-
-try:
-    from codecs import register_error, xmlcharrefreplace_errors
-except ImportError:
-    unicode_encode_errors = "strict"
-else:
-    unicode_encode_errors = "htmlentityreplace"
-
-    encode_entity_map = {}
-    is_ucs4 = len("\U0010FFFF") == 1
-    for k, v in list(entities.items()):
-        # skip multi-character entities
-        if ((is_ucs4 and len(v) > 1) or
-                (not is_ucs4 and len(v) > 2)):
-            continue
-        if v != "&":
-            if len(v) == 2:
-                v = utils.surrogatePairToCodepoint(v)
-            else:
-                v = ord(v)
-            if v not in encode_entity_map or k.islower():
-                # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
-                encode_entity_map[v] = k
-
-    def htmlentityreplace_errors(exc):
-        if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
-            res = []
-            codepoints = []
-            skip = False
-            for i, c in enumerate(exc.object[exc.start:exc.end]):
-                if skip:
-                    skip = False
-                    continue
-                index = i + exc.start
-                if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
-                    codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
-                    skip = True
-                else:
-                    codepoint = ord(c)
-                codepoints.append(codepoint)
-            for cp in codepoints:
-                e = encode_entity_map.get(cp)
-                if e:
-                    res.append("&")
-                    res.append(e)
-                    if not e.endswith(";"):
-                        res.append(";")
-                else:
-                    res.append("&#x%s;" % (hex(cp)[2:]))
-            return ("".join(res), exc.end)
-        else:
-            return xmlcharrefreplace_errors(exc)
-
-    register_error(unicode_encode_errors, htmlentityreplace_errors)
-
-    del register_error
-
-
-class HTMLSerializer(object):
-
-    # attribute quoting options
-    quote_attr_values = False
-    quote_char = '"'
-    use_best_quote_char = True
-
-    # tag syntax options
-    omit_optional_tags = True
-    minimize_boolean_attributes = True
-    use_trailing_solidus = False
-    space_before_trailing_solidus = True
-
-    # escaping options
-    escape_lt_in_attrs = False
-    escape_rcdata = False
-    resolve_entities = True
-
-    # miscellaneous options
-    alphabetical_attributes = False
-    inject_meta_charset = True
-    strip_whitespace = False
-    sanitize = False
-
-    options = ("quote_attr_values", "quote_char", "use_best_quote_char",
-               "omit_optional_tags", "minimize_boolean_attributes",
-               "use_trailing_solidus", "space_before_trailing_solidus",
-               "escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
-               "alphabetical_attributes", "inject_meta_charset",
-               "strip_whitespace", "sanitize")
-
-    def __init__(self, **kwargs):
-        """Initialize HTMLSerializer.
-
-        Keyword options (default given first unless specified) include:
-
-        inject_meta_charset=True|False
-          Whether it insert a meta element to define the character set of the
-          document.
-        quote_attr_values=True|False
-          Whether to quote attribute values that don't require quoting
-          per HTML5 parsing rules.
-        quote_char=u'"'|u"'"
-          Use given quote character for attribute quoting. Default is to
-          use double quote unless attribute value contains a double quote,
-          in which case single quotes are used instead.
-        escape_lt_in_attrs=False|True
-          Whether to escape < in attribute values.
-        escape_rcdata=False|True
-          Whether to escape characters that need to be escaped within normal
-          elements within rcdata elements such as style.
-        resolve_entities=True|False
-          Whether to resolve named character entities that appear in the
-          source tree. The XML predefined entities &lt; &gt; &amp; &quot; &apos;
-          are unaffected by this setting.
-        strip_whitespace=False|True
-          Whether to remove semantically meaningless whitespace. (This
-          compresses all whitespace to a single space except within pre.)
-        minimize_boolean_attributes=True|False
-          Shortens boolean attributes to give just the attribute value,
-          for example <input disabled="disabled"> becomes <input disabled>.
-        use_trailing_solidus=False|True
-          Includes a close-tag slash at the end of the start tag of void
-          elements (empty elements whose end tag is forbidden). E.g. <hr/>.
-        space_before_trailing_solidus=True|False
-          Places a space immediately before the closing slash in a tag
-          using a trailing solidus. E.g. <hr />. Requires use_trailing_solidus.
-        sanitize=False|True
-          Strip all unsafe or unknown constructs from output.
-          See `html5lib user documentation`_
-        omit_optional_tags=True|False
-          Omit start/end tags that are optional.
-        alphabetical_attributes=False|True
-          Reorder attributes to be in alphabetical order.
-
-        .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
-        """
-        if 'quote_char' in kwargs:
-            self.use_best_quote_char = False
-        for attr in self.options:
-            setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
-        self.errors = []
-        self.strict = False
-
-    def encode(self, string):
-        assert(isinstance(string, text_type))
-        if self.encoding:
-            return string.encode(self.encoding, unicode_encode_errors)
-        else:
-            return string
-
-    def encodeStrict(self, string):
-        assert(isinstance(string, text_type))
-        if self.encoding:
-            return string.encode(self.encoding, "strict")
-        else:
-            return string
-
-    def serialize(self, treewalker, encoding=None):
-        self.encoding = encoding
-        in_cdata = False
-        self.errors = []
-
-        if encoding and self.inject_meta_charset:
-            from ..filters.inject_meta_charset import Filter
-            treewalker = Filter(treewalker, encoding)
-        # WhitespaceFilter should be used before OptionalTagFilter
-        # for maximum efficiently of this latter filter
-        if self.strip_whitespace:
-            from ..filters.whitespace import Filter
-            treewalker = Filter(treewalker)
-        if self.sanitize:
-            from ..filters.sanitizer import Filter
-            treewalker = Filter(treewalker)
-        if self.omit_optional_tags:
-            from ..filters.optionaltags import Filter
-            treewalker = Filter(treewalker)
-        # Alphabetical attributes must be last, as other filters
-        # could add attributes and alter the order
-        if self.alphabetical_attributes:
-            from ..filters.alphabeticalattributes import Filter
-            treewalker = Filter(treewalker)
-
-        for token in treewalker:
-            type = token["type"]
-            if type == "Doctype":
-                doctype = "<!DOCTYPE %s" % token["name"]
-
-                if token["publicId"]:
-                    doctype += ' PUBLIC "%s"' % token["publicId"]
-                elif token["systemId"]:
-                    doctype += " SYSTEM"
-                if token["systemId"]:
-                    if token["systemId"].find('"') >= 0:
-                        if token["systemId"].find("'") >= 0:
-                            self.serializeError(_("System identifer contains both single and double quote characters"))
-                        quote_char = "'"
-                    else:
-                        quote_char = '"'
-                    doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
-
-                doctype += ">"
-                yield self.encodeStrict(doctype)
-
-            elif type in ("Characters", "SpaceCharacters"):
-                if type == "SpaceCharacters" or in_cdata:
-                    if in_cdata and token["data"].find("</") >= 0:
-                        self.serializeError(_("Unexpected </ in CDATA"))
-                    yield self.encode(token["data"])
-                else:
-                    yield self.encode(escape(token["data"]))
-
-            elif type in ("StartTag", "EmptyTag"):
-                name = token["name"]
-                yield self.encodeStrict("<%s" % name)
-                if name in rcdataElements and not self.escape_rcdata:
-                    in_cdata = True
-                elif in_cdata:
-                    self.serializeError(_("Unexpected child element of a CDATA element"))
-                for (attr_namespace, attr_name), attr_value in token["data"].items():
-                    # TODO: Add namespace support here
-                    k = attr_name
-                    v = attr_value
-                    yield self.encodeStrict(' ')
-
-                    yield self.encodeStrict(k)
-                    if not self.minimize_boolean_attributes or \
-                        (k not in booleanAttributes.get(name, tuple())
-                         and k not in booleanAttributes.get("", tuple())):
-                        yield self.encodeStrict("=")
-                        if self.quote_attr_values or not v:
-                            quote_attr = True
-                        else:
-                            quote_attr = reduce(lambda x, y: x or (y in v),
-                                                spaceCharacters + ">\"'=", False)
-                        v = v.replace("&", "&amp;")
-                        if self.escape_lt_in_attrs:
-                            v = v.replace("<", "&lt;")
-                        if quote_attr:
-                            quote_char = self.quote_char
-                            if self.use_best_quote_char:
-                                if "'" in v and '"' not in v:
-                                    quote_char = '"'
-                                elif '"' in v and "'" not in v:
-                                    quote_char = "'"
-                            if quote_char == "'":
-                                v = v.replace("'", "&#39;")
-                            else:
-                                v = v.replace('"', "&quot;")
-                            yield self.encodeStrict(quote_char)
-                            yield self.encode(v)
-                            yield self.encodeStrict(quote_char)
-                        else:
-                            yield self.encode(v)
-                if name in voidElements and self.use_trailing_solidus:
-                    if self.space_before_trailing_solidus:
-                        yield self.encodeStrict(" /")
-                    else:
-                        yield self.encodeStrict("/")
-                yield self.encode(">")
-
-            elif type == "EndTag":
-                name = token["name"]
-                if name in rcdataElements:
-                    in_cdata = False
-                elif in_cdata:
-                    self.serializeError(_("Unexpected child element of a CDATA element"))
-                yield self.encodeStrict("</%s>" % name)
-
-            elif type == "Comment":
-                data = token["data"]
-                if data.find("--") >= 0:
-                    self.serializeError(_("Comment contains --"))
-                yield self.encodeStrict("<!--%s-->" % token["data"])
-
-            elif type == "Entity":
-                name = token["name"]
-                key = name + ";"
-                if key not in entities:
-                    self.serializeError(_("Entity %s not recognized" % name))
-                if self.resolve_entities and key not in xmlEntities:
-                    data = entities[key]
-                else:
-                    data = "&%s;" % name
-                yield self.encodeStrict(data)
-
-            else:
-                self.serializeError(token["data"])
-
-    def render(self, treewalker, encoding=None):
-        if encoding:
-            return b"".join(list(self.serialize(treewalker, encoding)))
-        else:
-            return "".join(list(self.serialize(treewalker)))
-
-    def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
-        # XXX The idea is to make data mandatory.
-        self.errors.append(data)
-        if self.strict:
-            raise SerializeError
-
-
-def SerializeError(Exception):
-    """Error in serialized tree"""
-    pass
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/README
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/README
@ -1 +0,0 @@
-Each testcase file can be run through nose (using ``nosetests``).
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/mockParser.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/mockParser.py
@ -1,41 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import sys
-import os
-
-if __name__ == '__main__':
-    # Allow us to import from the src directory
-    os.chdir(os.path.split(os.path.abspath(__file__))[0])
-    sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
-
-from html5lib.tokenizer import HTMLTokenizer
-
-
-class HTMLParser(object):
-    """ Fake parser to test tokenizer output """
-    def parse(self, stream, output=True):
-        tokenizer = HTMLTokenizer(stream)
-        for token in tokenizer:
-            if output:
-                print(token)
-
-if __name__ == "__main__":
-    x = HTMLParser()
-    if len(sys.argv) > 1:
-        if len(sys.argv) > 2:
-            import hotshot
-            import hotshot.stats
-            prof = hotshot.Profile('stats.prof')
-            prof.runcall(x.parse, sys.argv[1], False)
-            prof.close()
-            stats = hotshot.stats.load('stats.prof')
-            stats.strip_dirs()
-            stats.sort_stats('time')
-            stats.print_stats()
-        else:
-            x.parse(sys.argv[1])
-    else:
-        print("""Usage: python mockParser.py filename [stats]
-        If stats is specified the hotshots profiler will run and output the
-        stats instead.
-        """)
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/performance/concatenation.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/performance/concatenation.py
@ -1,36 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-def f1():
-    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    x += y + z
-
-
-def f2():
-    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    x = x + y + z
-
-
-def f3():
-    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    x = "".join((x, y, z))
-
-
-def f4():
-    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    x = "%s%s%s" % (x, y, z)
-
-import timeit
-for x in range(4):
-    statement = "f%s" % (x + 1)
-    t = timeit.Timer(statement, "from __main__ import " + statement)
-    r = t.repeat(3, 1000000)
-    print(r, min(r))
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_encoding.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_encoding.py
@ -1,67 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import os
-import unittest
-
-try:
-    unittest.TestCase.assertEqual
-except AttributeError:
-    unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
-
-from .support import get_data_files, TestData, test_dir, errorMessage
-from html5lib import HTMLParser, inputstream
-
-
-class Html5EncodingTestCase(unittest.TestCase):
-    def test_codec_name_a(self):
-        self.assertEqual(inputstream.codecName("utf-8"), "utf-8")
-
-    def test_codec_name_b(self):
-        self.assertEqual(inputstream.codecName("utf8"), "utf-8")
-
-    def test_codec_name_c(self):
-        self.assertEqual(inputstream.codecName("  utf8  "), "utf-8")
-
-    def test_codec_name_d(self):
-        self.assertEqual(inputstream.codecName("ISO_8859--1"), "windows-1252")
-
-
-def runParserEncodingTest(data, encoding):
-    p = HTMLParser()
-    assert p.documentEncoding is None
-    p.parse(data, useChardet=False)
-    encoding = encoding.lower().decode("ascii")
-
-    assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
-
-
-def runPreScanEncodingTest(data, encoding):
-    stream = inputstream.HTMLBinaryInputStream(data, chardet=False)
-    encoding = encoding.lower().decode("ascii")
-
-    # Very crude way to ignore irrelevant tests
-    if len(data) > stream.numBytesMeta:
-        return
-
-    assert encoding == stream.charEncoding[0], errorMessage(data, encoding, stream.charEncoding[0])
-
-
-def test_encoding():
-    for filename in get_data_files("encoding"):
-        tests = TestData(filename, b"data", encoding=None)
-        for idx, test in enumerate(tests):
-            yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
-            yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
-
-try:
-    try:
-        import charade  # flake8: noqa
-    except ImportError:
-        import chardet  # flake8: noqa
-except ImportError:
-    print("charade/chardet not found, skipping chardet tests")
-else:
-    def test_chardet():
-        with open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt"), "rb") as fp:
-            encoding = inputstream.HTMLInputStream(fp.read()).charEncoding
-            assert encoding[0].lower() == "big5"
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser.py
@ -1,96 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import os
-import sys
-import traceback
-import warnings
-import re
-
-warnings.simplefilter("error")
-
-from .support import get_data_files
-from .support import TestData, convert, convertExpected, treeTypes
-from html5lib import html5parser, constants
-
-# Run the parse error checks
-checkParseErrors = False
-
-# XXX - There should just be one function here but for some reason the testcase
-# format differs from the treedump format by a single space character
-
-
-def convertTreeDump(data):
-    return "\n".join(convert(3)(data).split("\n")[1:])
-
-namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
-
-
-def runParserTest(innerHTML, input, expected, errors, treeClass,
-                  namespaceHTMLElements):
-    with warnings.catch_warnings(record=True) as caughtWarnings:
-        warnings.simplefilter("always")
-        p = html5parser.HTMLParser(tree=treeClass,
-                                   namespaceHTMLElements=namespaceHTMLElements)
-
-        try:
-            if innerHTML:
-                document = p.parseFragment(input, innerHTML)
-            else:
-                document = p.parse(input)
-        except:
-            errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
-                                  "\nTraceback:", traceback.format_exc()])
-            assert False, errorMsg
-
-    otherWarnings = [x for x in caughtWarnings
-                     if not issubclass(x.category, constants.DataLossWarning)]
-    assert len(otherWarnings) == 0, [(x.category, x.message) for x in otherWarnings]
-    if len(caughtWarnings):
-        return
-
-    output = convertTreeDump(p.tree.testSerializer(document))
-
-    expected = convertExpected(expected)
-    if namespaceHTMLElements:
-        expected = namespaceExpected(r"\1<html \2>", expected)
-
-    errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
-                          "\nReceived:", output])
-    assert expected == output, errorMsg
-
-    errStr = []
-    for (line, col), errorcode, datavars in p.errors:
-        assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars))
-        errStr.append("Line: %i Col: %i %s" % (line, col,
-                                               constants.E[errorcode] % datavars))
-
-    errorMsg2 = "\n".join(["\n\nInput:", input,
-                           "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
-                           "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
-    if checkParseErrors:
-            assert len(p.errors) == len(errors), errorMsg2
-
-
-def test_parser():
-    sys.stderr.write('Testing tree builders ' + " ".join(list(treeTypes.keys())) + "\n")
-    files = get_data_files('tree-construction')
-
-    for filename in files:
-        testName = os.path.basename(filename).replace(".dat", "")
-        if testName in ("template",):
-            continue
-
-        tests = TestData(filename, "data")
-
-        for index, test in enumerate(tests):
-            input, errors, innerHTML, expected = [test[key] for key in
-                                                  ('data', 'errors',
-                                                   'document-fragment',
-                                                   'document')]
-            if errors:
-                errors = errors.split("\n")
-
-            for treeName, treeCls in treeTypes.items():
-                for namespaceHTMLElements in (True, False):
-                    yield (runParserTest, innerHTML, input, expected, errors, treeCls,
-                           namespaceHTMLElements)
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser2.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser2.py
@ -1,64 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import io
-
-from . import support  # flake8: noqa
-from html5lib import html5parser
-from html5lib.constants import namespaces
-from html5lib import treebuilders
-
-import unittest
-
-# tests that aren't autogenerated from text files
-
-
-class MoreParserTests(unittest.TestCase):
-
-    def setUp(self):
-        self.dom_tree = treebuilders.getTreeBuilder("dom")
-
-    def test_assertDoctypeCloneable(self):
-        parser = html5parser.HTMLParser(tree=self.dom_tree)
-        doc = parser.parse('<!DOCTYPE HTML>')
-        self.assertTrue(doc.cloneNode(True))
-
-    def test_line_counter(self):
-        # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
-        parser = html5parser.HTMLParser(tree=self.dom_tree)
-        parser.parse("<pre>\nx\n&gt;\n</pre>")
-
-    def test_namespace_html_elements_0_dom(self):
-        parser = html5parser.HTMLParser(tree=self.dom_tree, namespaceHTMLElements=True)
-        doc = parser.parse("<html></html>")
-        self.assertTrue(doc.childNodes[0].namespaceURI == namespaces["html"])
-
-    def test_namespace_html_elements_1_dom(self):
-        parser = html5parser.HTMLParser(tree=self.dom_tree, namespaceHTMLElements=False)
-        doc = parser.parse("<html></html>")
-        self.assertTrue(doc.childNodes[0].namespaceURI is None)
-
-    def test_namespace_html_elements_0_etree(self):
-        parser = html5parser.HTMLParser(namespaceHTMLElements=True)
-        doc = parser.parse("<html></html>")
-        self.assertTrue(list(doc)[0].tag == "{%s}html" % (namespaces["html"],))
-
-    def test_namespace_html_elements_1_etree(self):
-        parser = html5parser.HTMLParser(namespaceHTMLElements=False)
-        doc = parser.parse("<html></html>")
-        self.assertTrue(list(doc)[0].tag == "html")
-
-    def test_unicode_file(self):
-        parser = html5parser.HTMLParser()
-        parser.parse(io.StringIO("a"))
-
-
-def buildTestSuite():
-    return unittest.defaultTestLoader.loadTestsFromName(__name__)
-
-
-def main():
-    buildTestSuite()
-    unittest.main()
-
-if __name__ == '__main__':
-    main()
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_sanitizer.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_sanitizer.py
@ -1,105 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-try:
-    import json
-except ImportError:
-    import simplejson as json
-
-from html5lib import html5parser, sanitizer, constants, treebuilders
-
-
-def toxmlFactory():
-    tree = treebuilders.getTreeBuilder("etree")
-
-    def toxml(element):
-        # encode/decode roundtrip required for Python 2.6 compatibility
-        result_bytes = tree.implementation.tostring(element, encoding="utf-8")
-        return result_bytes.decode("utf-8")
-
-    return toxml
-
-
-def runSanitizerTest(name, expected, input, toxml=None):
-    if toxml is None:
-        toxml = toxmlFactory()
-    expected = ''.join([toxml(token) for token in html5parser.HTMLParser().
-                        parseFragment(expected)])
-    expected = json.loads(json.dumps(expected))
-    assert expected == sanitize_html(input)
-
-
-def sanitize_html(stream, toxml=None):
-    if toxml is None:
-        toxml = toxmlFactory()
-    return ''.join([toxml(token) for token in
-                    html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
-                    parseFragment(stream)])
-
-
-def test_should_handle_astral_plane_characters():
-    assert '<html:p xmlns:html="http://www.w3.org/1999/xhtml">\U0001d4b5 \U0001d538</html:p>' == sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
-
-
-def test_sanitizer():
-    toxml = toxmlFactory()
-    for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
-        if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
-            continue  # TODO
-        if tag_name != tag_name.lower():
-            continue  # TODO
-        if tag_name == 'image':
-            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
-                   "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
-                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
-                   toxml)
-        elif tag_name == 'br':
-            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
-                   "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
-                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
-                   toxml)
-        elif tag_name in constants.voidElements:
-            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
-                   "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
-                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
-                   toxml)
-        else:
-            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
-                   "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
-                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
-                   toxml)
-
-    for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
-        tag_name = tag_name.upper()
-        yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name,
-               "&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name, tag_name),
-               "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
-               toxml)
-
-    for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
-        if attribute_name != attribute_name.lower():
-            continue  # TODO
-        if attribute_name == 'style':
-            continue
-        yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
-               "<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
-               "<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name,
-               toxml)
-
-    for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
-        attribute_name = attribute_name.upper()
-        yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name,
-               "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
-               "<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name,
-               toxml)
-
-    for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
-        yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol,
-               "<a href=\"%s\">foo</a>" % protocol,
-               """<a href="%s">foo</a>""" % protocol,
-               toxml)
-
-    for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
-        yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
-               "<a href=\"%s\">foo</a>" % protocol,
-               """<a href="%s">foo</a>""" % protocol,
-               toxml)
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_serializer.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_serializer.py
@ -1,178 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import json
-import unittest
-
-from .support import get_data_files
-
-try:
-    unittest.TestCase.assertEqual
-except AttributeError:
-    unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
-
-import html5lib
-from html5lib import constants
-from html5lib.serializer import HTMLSerializer, serialize
-from html5lib.treewalkers._base import TreeWalker
-
-optionals_loaded = []
-
-try:
-    from lxml import etree
-    optionals_loaded.append("lxml")
-except ImportError:
-    pass
-
-default_namespace = constants.namespaces["html"]
-
-
-class JsonWalker(TreeWalker):
-    def __iter__(self):
-        for token in self.tree:
-            type = token[0]
-            if type == "StartTag":
-                if len(token) == 4:
-                    namespace, name, attrib = token[1:4]
-                else:
-                    namespace = default_namespace
-                    name, attrib = token[1:3]
-                yield self.startTag(namespace, name, self._convertAttrib(attrib))
-            elif type == "EndTag":
-                if len(token) == 3:
-                    namespace, name = token[1:3]
-                else:
-                    namespace = default_namespace
-                    name = token[1]
-                yield self.endTag(namespace, name)
-            elif type == "EmptyTag":
-                if len(token) == 4:
-                    namespace, name, attrib = token[1:]
-                else:
-                    namespace = default_namespace
-                    name, attrib = token[1:]
-                for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)):
-                    yield token
-            elif type == "Comment":
-                yield self.comment(token[1])
-            elif type in ("Characters", "SpaceCharacters"):
-                for token in self.text(token[1]):
-                    yield token
-            elif type == "Doctype":
-                if len(token) == 4:
-                    yield self.doctype(token[1], token[2], token[3])
-                elif len(token) == 3:
-                    yield self.doctype(token[1], token[2])
-                else:
-                    yield self.doctype(token[1])
-            else:
-                raise ValueError("Unknown token type: " + type)
-
-    def _convertAttrib(self, attribs):
-        """html5lib tree-walkers use a dict of (namespace, name): value for
-        attributes, but JSON cannot represent this. Convert from the format
-        in the serializer tests (a list of dicts with "namespace", "name",
-        and "value" as keys) to html5lib's tree-walker format."""
-        attrs = {}
-        for attrib in attribs:
-            name = (attrib["namespace"], attrib["name"])
-            assert(name not in attrs)
-            attrs[name] = attrib["value"]
-        return attrs
-
-
-def serialize_html(input, options):
-    options = dict([(str(k), v) for k, v in options.items()])
-    stream = JsonWalker(input)
-    serializer = HTMLSerializer(alphabetical_attributes=True, **options)
-    return serializer.render(stream, options.get("encoding", None))
-
-
-def runSerializerTest(input, expected, options):
-    encoding = options.get("encoding", None)
-
-    if encoding:
-        encode = lambda x: x.encode(encoding)
-        expected = list(map(encode, expected))
-
-    result = serialize_html(input, options)
-    if len(expected) == 1:
-        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
-    elif result not in expected:
-        assert False, "Expected: %s, Received: %s" % (expected, result)
-
-
-class EncodingTestCase(unittest.TestCase):
-    def throwsWithLatin1(self, input):
-        self.assertRaises(UnicodeEncodeError, serialize_html, input, {"encoding": "iso-8859-1"})
-
-    def testDoctypeName(self):
-        self.throwsWithLatin1([["Doctype", "\u0101"]])
-
-    def testDoctypePublicId(self):
-        self.throwsWithLatin1([["Doctype", "potato", "\u0101"]])
-
-    def testDoctypeSystemId(self):
-        self.throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]])
-
-    def testCdataCharacters(self):
-        runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
-                          ["<style>&amacr;"], {"encoding": "iso-8859-1"})
-
-    def testCharacters(self):
-        runSerializerTest([["Characters", "\u0101"]],
-                          ["&amacr;"], {"encoding": "iso-8859-1"})
-
-    def testStartTagName(self):
-        self.throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]])
-
-    def testEmptyTagName(self):
-        self.throwsWithLatin1([["EmptyTag", "http://www.w3.org/1999/xhtml", "\u0101", []]])
-
-    def testAttributeName(self):
-        self.throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]])
-
-    def testAttributeValue(self):
-        runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span",
-                            [{"namespace": None, "name": "potato", "value": "\u0101"}]]],
-                          ["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
-
-    def testEndTagName(self):
-        self.throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]])
-
-    def testComment(self):
-        self.throwsWithLatin1([["Comment", "\u0101"]])
-
-
-if "lxml" in optionals_loaded:
-    class LxmlTestCase(unittest.TestCase):
-        def setUp(self):
-            self.parser = etree.XMLParser(resolve_entities=False)
-            self.treewalker = html5lib.getTreeWalker("lxml")
-            self.serializer = HTMLSerializer()
-
-        def testEntityReplacement(self):
-            doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
-            tree = etree.fromstring(doc, parser=self.parser).getroottree()
-            result = serialize(tree, tree="lxml", omit_optional_tags=False)
-            self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result)
-
-        def testEntityXML(self):
-            doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>"""
-            tree = etree.fromstring(doc, parser=self.parser).getroottree()
-            result = serialize(tree, tree="lxml", omit_optional_tags=False)
-            self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""", result)
-
-        def testEntityNoResolve(self):
-            doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
-            tree = etree.fromstring(doc, parser=self.parser).getroottree()
-            result = serialize(tree, tree="lxml", omit_optional_tags=False,
-                                          resolve_entities=False)
-            self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)
-
-
-def test_serializer():
-    for filename in get_data_files('serializer', '*.test'):
-        with open(filename) as fp:
-            tests = json.load(fp)
-            for index, test in enumerate(tests['tests']):
-                yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_stream.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_stream.py
@ -1,183 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import support  # flake8: noqa
-import unittest
-import codecs
-from io import BytesIO
-
-from six.moves import http_client
-
-from html5lib.inputstream import (BufferedStream, HTMLInputStream,
-                                  HTMLUnicodeInputStream, HTMLBinaryInputStream)
-
-class BufferedStreamTest(unittest.TestCase):
-    def test_basic(self):
-        s = b"abc"
-        fp = BufferedStream(BytesIO(s))
-        read = fp.read(10)
-        assert read == s
-
-    def test_read_length(self):
-        fp = BufferedStream(BytesIO(b"abcdef"))
-        read1 = fp.read(1)
-        assert read1 == b"a"
-        read2 = fp.read(2)
-        assert read2 == b"bc"
-        read3 = fp.read(3)
-        assert read3 == b"def"
-        read4 = fp.read(4)
-        assert read4 == b""
-
-    def test_tell(self):
-        fp = BufferedStream(BytesIO(b"abcdef"))
-        read1 = fp.read(1)
-        assert fp.tell() == 1
-        read2 = fp.read(2)
-        assert fp.tell() == 3
-        read3 = fp.read(3)
-        assert fp.tell() == 6
-        read4 = fp.read(4)
-        assert fp.tell() == 6
-
-    def test_seek(self):
-        fp = BufferedStream(BytesIO(b"abcdef"))
-        read1 = fp.read(1)
-        assert read1 == b"a"
-        fp.seek(0)
-        read2 = fp.read(1)
-        assert read2 == b"a"
-        read3 = fp.read(2)
-        assert read3 == b"bc"
-        fp.seek(2)
-        read4 = fp.read(2)
-        assert read4 == b"cd"
-        fp.seek(4)
-        read5 = fp.read(2)
-        assert read5 == b"ef"
-
-    def test_seek_tell(self):
-        fp = BufferedStream(BytesIO(b"abcdef"))
-        read1 = fp.read(1)
-        assert fp.tell() == 1
-        fp.seek(0)
-        read2 = fp.read(1)
-        assert fp.tell() == 1
-        read3 = fp.read(2)
-        assert fp.tell() == 3
-        fp.seek(2)
-        read4 = fp.read(2)
-        assert fp.tell() == 4
-        fp.seek(4)
-        read5 = fp.read(2)
-        assert fp.tell() == 6
-
-
-class HTMLUnicodeInputStreamShortChunk(HTMLUnicodeInputStream):
-    _defaultChunkSize = 2
-
-
-class HTMLBinaryInputStreamShortChunk(HTMLBinaryInputStream):
-    _defaultChunkSize = 2
-
-
-class HTMLInputStreamTest(unittest.TestCase):
-
-    def test_char_ascii(self):
-        stream = HTMLInputStream(b"'", encoding='ascii')
-        self.assertEqual(stream.charEncoding[0], 'ascii')
-        self.assertEqual(stream.char(), "'")
-
-    def test_char_utf8(self):
-        stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8')
-        self.assertEqual(stream.charEncoding[0], 'utf-8')
-        self.assertEqual(stream.char(), '\u2018')
-
-    def test_char_win1252(self):
-        stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252'))
-        self.assertEqual(stream.charEncoding[0], 'windows-1252')
-        self.assertEqual(stream.char(), "\xa9")
-        self.assertEqual(stream.char(), "\xf1")
-        self.assertEqual(stream.char(), "\u2019")
-
-    def test_bom(self):
-        stream = HTMLInputStream(codecs.BOM_UTF8 + b"'")
-        self.assertEqual(stream.charEncoding[0], 'utf-8')
-        self.assertEqual(stream.char(), "'")
-
-    def test_utf_16(self):
-        stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
-        self.assertTrue(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding)
-        self.assertEqual(len(stream.charsUntil(' ', True)), 1025)
-
-    def test_newlines(self):
-        stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\r\nccc\rddddxe")
-        self.assertEqual(stream.position(), (1, 0))
-        self.assertEqual(stream.charsUntil('c'), "a\nbb\n")
-        self.assertEqual(stream.position(), (3, 0))
-        self.assertEqual(stream.charsUntil('x'), "ccc\ndddd")
-        self.assertEqual(stream.position(), (4, 4))
-        self.assertEqual(stream.charsUntil('e'), "x")
-        self.assertEqual(stream.position(), (4, 5))
-
-    def test_newlines2(self):
-        size = HTMLUnicodeInputStream._defaultChunkSize
-        stream = HTMLInputStream("\r" * size + "\n")
-        self.assertEqual(stream.charsUntil('x'), "\n" * size)
-
-    def test_position(self):
-        stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\nccc\nddde\nf\ngh")
-        self.assertEqual(stream.position(), (1, 0))
-        self.assertEqual(stream.charsUntil('c'), "a\nbb\n")
-        self.assertEqual(stream.position(), (3, 0))
-        stream.unget("\n")
-        self.assertEqual(stream.position(), (2, 2))
-        self.assertEqual(stream.charsUntil('c'), "\n")
-        self.assertEqual(stream.position(), (3, 0))
-        stream.unget("\n")
-        self.assertEqual(stream.position(), (2, 2))
-        self.assertEqual(stream.char(), "\n")
-        self.assertEqual(stream.position(), (3, 0))
-        self.assertEqual(stream.charsUntil('e'), "ccc\nddd")
-        self.assertEqual(stream.position(), (4, 3))
-        self.assertEqual(stream.charsUntil('h'), "e\nf\ng")
-        self.assertEqual(stream.position(), (6, 1))
-
-    def test_position2(self):
-        stream = HTMLUnicodeInputStreamShortChunk("abc\nd")
-        self.assertEqual(stream.position(), (1, 0))
-        self.assertEqual(stream.char(), "a")
-        self.assertEqual(stream.position(), (1, 1))
-        self.assertEqual(stream.char(), "b")
-        self.assertEqual(stream.position(), (1, 2))
-        self.assertEqual(stream.char(), "c")
-        self.assertEqual(stream.position(), (1, 3))
-        self.assertEqual(stream.char(), "\n")
-        self.assertEqual(stream.position(), (2, 0))
-        self.assertEqual(stream.char(), "d")
-        self.assertEqual(stream.position(), (2, 1))
-
-    def test_python_issue_20007(self):
-        """
-        Make sure we have a work-around for Python bug #20007
-        http://bugs.python.org/issue20007
-        """
-        class FakeSocket(object):
-            def makefile(self, _mode, _bufsize=None):
-                return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
-
-        source = http_client.HTTPResponse(FakeSocket())
-        source.begin()
-        stream = HTMLInputStream(source)
-        self.assertEqual(stream.charsUntil(" "), "Text")
-
-
-def buildTestSuite():
-    return unittest.defaultTestLoader.loadTestsFromName(__name__)
-
-
-def main():
-    buildTestSuite()
-    unittest.main()
-
-if __name__ == '__main__':
-    main()
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_treewalkers.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_treewalkers.py
@ -1,353 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import os
-import sys
-import unittest
-import warnings
-from difflib import unified_diff
-
-try:
-    unittest.TestCase.assertEqual
-except AttributeError:
-    unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
-
-from .support import get_data_files, TestData, convertExpected
-
-from html5lib import html5parser, treewalkers, treebuilders, constants
-
-
-def PullDOMAdapter(node):
-    from xml.dom import Node
-    from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, COMMENT, CHARACTERS
-
-    if node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
-        for childNode in node.childNodes:
-            for event in PullDOMAdapter(childNode):
-                yield event
-
-    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
-        raise NotImplementedError("DOCTYPE nodes are not supported by PullDOM")
-
-    elif node.nodeType == Node.COMMENT_NODE:
-        yield COMMENT, node
-
-    elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
-        yield CHARACTERS, node
-
-    elif node.nodeType == Node.ELEMENT_NODE:
-        yield START_ELEMENT, node
-        for childNode in node.childNodes:
-            for event in PullDOMAdapter(childNode):
-                yield event
-        yield END_ELEMENT, node
-
-    else:
-        raise NotImplementedError("Node type not supported: " + str(node.nodeType))
-
-treeTypes = {
-    "DOM": {"builder": treebuilders.getTreeBuilder("dom"),
-            "walker": treewalkers.getTreeWalker("dom")},
-    "PullDOM": {"builder": treebuilders.getTreeBuilder("dom"),
-                "adapter": PullDOMAdapter,
-                "walker": treewalkers.getTreeWalker("pulldom")},
-}
-
-# Try whatever etree implementations are available from a list that are
-#"supposed" to work
-try:
-    import xml.etree.ElementTree as ElementTree
-except ImportError:
-    pass
-else:
-    treeTypes['ElementTree'] = \
-        {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
-         "walker": treewalkers.getTreeWalker("etree", ElementTree)}
-
-try:
-    import xml.etree.cElementTree as ElementTree
-except ImportError:
-    pass
-else:
-    treeTypes['cElementTree'] = \
-        {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
-         "walker": treewalkers.getTreeWalker("etree", ElementTree)}
-
-
-try:
-    import lxml.etree as ElementTree  # flake8: noqa
-except ImportError:
-    pass
-else:
-    treeTypes['lxml_native'] = \
-        {"builder": treebuilders.getTreeBuilder("lxml"),
-         "walker": treewalkers.getTreeWalker("lxml")}
-
-
-try:
-    from genshi.core import QName, Attrs
-    from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
-except ImportError:
-    pass
-else:
-    def GenshiAdapter(tree):
-        text = None
-        for token in treewalkers.getTreeWalker("dom")(tree):
-            type = token["type"]
-            if type in ("Characters", "SpaceCharacters"):
-                if text is None:
-                    text = token["data"]
-                else:
-                    text += token["data"]
-            elif text is not None:
-                yield TEXT, text, (None, -1, -1)
-                text = None
-
-            if type in ("StartTag", "EmptyTag"):
-                if token["namespace"]:
-                    name = "{%s}%s" % (token["namespace"], token["name"])
-                else:
-                    name = token["name"]
-                attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
-                               for attr, value in token["data"].items()])
-                yield (START, (QName(name), attrs), (None, -1, -1))
-                if type == "EmptyTag":
-                    type = "EndTag"
-
-            if type == "EndTag":
-                if token["namespace"]:
-                    name = "{%s}%s" % (token["namespace"], token["name"])
-                else:
-                    name = token["name"]
-
-                yield END, QName(name), (None, -1, -1)
-
-            elif type == "Comment":
-                yield COMMENT, token["data"], (None, -1, -1)
-
-            elif type == "Doctype":
-                yield DOCTYPE, (token["name"], token["publicId"],
-                                token["systemId"]), (None, -1, -1)
-
-            else:
-                pass  # FIXME: What to do?
-
-        if text is not None:
-            yield TEXT, text, (None, -1, -1)
-
-    treeTypes["genshi"] = \
-        {"builder": treebuilders.getTreeBuilder("dom"),
-         "adapter": GenshiAdapter,
-         "walker": treewalkers.getTreeWalker("genshi")}
-
-
-def concatenateCharacterTokens(tokens):
-    charactersToken = None
-    for token in tokens:
-        type = token["type"]
-        if type in ("Characters", "SpaceCharacters"):
-            if charactersToken is None:
-                charactersToken = {"type": "Characters", "data": token["data"]}
-            else:
-                charactersToken["data"] += token["data"]
-        else:
-            if charactersToken is not None:
-                yield charactersToken
-                charactersToken = None
-            yield token
-    if charactersToken is not None:
-        yield charactersToken
-
-
-def convertTokens(tokens):
-    output = []
-    indent = 0
-    for token in concatenateCharacterTokens(tokens):
-        type = token["type"]
-        if type in ("StartTag", "EmptyTag"):
-            if (token["namespace"] and
-                    token["namespace"] != constants.namespaces["html"]):
-                if token["namespace"] in constants.prefixes:
-                    name = constants.prefixes[token["namespace"]]
-                else:
-                    name = token["namespace"]
-                name += " " + token["name"]
-            else:
-                name = token["name"]
-            output.append("%s<%s>" % (" " * indent, name))
-            indent += 2
-            attrs = token["data"]
-            if attrs:
-                # TODO: Remove this if statement, attrs should always exist
-                for (namespace, name), value in sorted(attrs.items()):
-                    if namespace:
-                        if namespace in constants.prefixes:
-                            outputname = constants.prefixes[namespace]
-                        else:
-                            outputname = namespace
-                        outputname += " " + name
-                    else:
-                        outputname = name
-                    output.append("%s%s=\"%s\"" % (" " * indent, outputname, value))
-            if type == "EmptyTag":
-                indent -= 2
-        elif type == "EndTag":
-            indent -= 2
-        elif type == "Comment":
-            output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
-        elif type == "Doctype":
-            if token["name"]:
-                if token["publicId"]:
-                    output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
-                                  (" " * indent, token["name"],
-                                   token["publicId"],
-                                   token["systemId"] and token["systemId"] or ""))
-                elif token["systemId"]:
-                    output.append("""%s<!DOCTYPE %s "" "%s">""" %
-                                  (" " * indent, token["name"],
-                                   token["systemId"]))
-                else:
-                    output.append("%s<!DOCTYPE %s>" % (" " * indent,
-                                                       token["name"]))
-            else:
-                output.append("%s<!DOCTYPE >" % (" " * indent,))
-        elif type in ("Characters", "SpaceCharacters"):
-            output.append("%s\"%s\"" % (" " * indent, token["data"]))
-        else:
-            pass  # TODO: what to do with errors?
-    return "\n".join(output)
-
-import re
-attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
-
-
-def sortattrs(x):
-    lines = x.group(0).split("\n")
-    lines.sort()
-    return "\n".join(lines)
-
-
-class TokenTestCase(unittest.TestCase):
-    def test_all_tokens(self):
-        expected = [
-            {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
-            {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
-            {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
-            {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
-            {'data': 'a', 'type': 'Characters'},
-            {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
-            {'data': 'b', 'type': 'Characters'},
-            {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
-            {'data': 'c', 'type': 'Characters'},
-            {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
-            {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
-        ]
-        for treeName, treeCls in treeTypes.items():
-            p = html5parser.HTMLParser(tree=treeCls["builder"])
-            document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
-            document = treeCls.get("adapter", lambda x: x)(document)
-            output = treeCls["walker"](document)
-            for expectedToken, outputToken in zip(expected, output):
-                self.assertEqual(expectedToken, outputToken)
-
-
-def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
-    warnings.resetwarnings()
-    warnings.simplefilter("error")
-    try:
-        p = html5parser.HTMLParser(tree=treeClass["builder"])
-        if innerHTML:
-            document = p.parseFragment(input, innerHTML)
-        else:
-            document = p.parse(input)
-    except constants.DataLossWarning:
-        # Ignore testcases we know we don't pass
-        return
-
-    document = treeClass.get("adapter", lambda x: x)(document)
-    try:
-        output = convertTokens(treeClass["walker"](document))
-        output = attrlist.sub(sortattrs, output)
-        expected = attrlist.sub(sortattrs, convertExpected(expected))
-        diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
-                                    [line + "\n" for line in output.splitlines()],
-                                    "Expected", "Received"))
-        assert expected == output, "\n".join([
-            "", "Input:", input,
-                "", "Expected:", expected,
-                "", "Received:", output,
-                "", "Diff:", diff,
-        ])
-    except NotImplementedError:
-        pass  # Amnesty for those that confess...
-
-
-def test_treewalker():
-    sys.stdout.write('Testing tree walkers ' + " ".join(list(treeTypes.keys())) + "\n")
-
-    for treeName, treeCls in treeTypes.items():
-        files = get_data_files('tree-construction')
-        for filename in files:
-            testName = os.path.basename(filename).replace(".dat", "")
-            if testName in ("template",):
-                continue
-
-            tests = TestData(filename, "data")
-
-            for index, test in enumerate(tests):
-                (input, errors,
-                 innerHTML, expected) = [test[key] for key in ("data", "errors",
-                                                               "document-fragment",
-                                                               "document")]
-                errors = errors.split("\n")
-                yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls
-
-
-def set_attribute_on_first_child(docfrag, name, value, treeName):
-    """naively sets an attribute on the first child of the document
-    fragment passed in"""
-    setter = {'ElementTree': lambda d: d[0].set,
-              'DOM': lambda d: d.firstChild.setAttribute}
-    setter['cElementTree'] = setter['ElementTree']
-    try:
-        setter.get(treeName, setter['DOM'])(docfrag)(name, value)
-    except AttributeError:
-        setter['ElementTree'](docfrag)(name, value)
-
-
-def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
-    """tests what happens when we add attributes to the intext"""
-    treeName, treeClass = tree
-    parser = html5parser.HTMLParser(tree=treeClass["builder"])
-    document = parser.parseFragment(intext)
-    for nom, val in attrs_to_add:
-        set_attribute_on_first_child(document, nom, val, treeName)
-
-    document = treeClass.get("adapter", lambda x: x)(document)
-    output = convertTokens(treeClass["walker"](document))
-    output = attrlist.sub(sortattrs, output)
-    if not output in expected:
-        raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
-
-
-def test_treewalker_six_mix():
-    """Str/Unicode mix. If str attrs added to tree"""
-
-    # On Python 2.x string literals are of type str. Unless, like this
-    # file, the programmer imports unicode_literals from __future__.
-    # In that case, string literals become objects of type unicode.
-
-    # This test simulates a Py2 user, modifying attributes on a document
-    # fragment but not using the u'' syntax nor importing unicode_literals
-    sm_tests = [
-        ('<a href="http://example.com">Example</a>',
-         [(str('class'), str('test123'))],
-         '<a>\n  class="test123"\n  href="http://example.com"\n  "Example"'),
-
-        ('<link href="http://example.com/cow">',
-         [(str('rel'), str('alternate'))],
-         '<link>\n  href="http://example.com/cow"\n  rel="alternate"\n  "Example"')
-    ]
-
-    for tree in treeTypes.items():
-        for intext, attrs, expected in sm_tests:
-            yield runTreewalkerEditTest, intext, expected, attrs, tree
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_whitespace_filter.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_whitespace_filter.py
@ -1,133 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import unittest
-
-from html5lib.filters.whitespace import Filter
-from html5lib.constants import spaceCharacters
-spaceCharacters = "".join(spaceCharacters)
-
-try:
-    unittest.TestCase.assertEqual
-except AttributeError:
-    unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
-
-
-class TestCase(unittest.TestCase):
-    def runTest(self, input, expected):
-        output = list(Filter(input))
-        errorMsg = "\n".join(["\n\nInput:", str(input),
-                              "\nExpected:", str(expected),
-                              "\nReceived:", str(output)])
-        self.assertEqual(output, expected, errorMsg)
-
-    def runTestUnmodifiedOutput(self, input):
-        self.runTest(input, input)
-
-    def testPhrasingElements(self):
-        self.runTestUnmodifiedOutput(
-            [{"type": "Characters", "data": "This is a "},
-             {"type": "StartTag", "name": "span", "data": []},
-             {"type": "Characters", "data": "phrase"},
-             {"type": "EndTag", "name": "span", "data": []},
-             {"type": "SpaceCharacters", "data": " "},
-             {"type": "Characters", "data": "with"},
-             {"type": "SpaceCharacters", "data": " "},
-             {"type": "StartTag", "name": "em", "data": []},
-             {"type": "Characters", "data": "emphasised text"},
-             {"type": "EndTag", "name": "em", "data": []},
-             {"type": "Characters", "data": " and an "},
-             {"type": "StartTag", "name": "img", "data": [["alt", "image"]]},
-             {"type": "Characters", "data": "."}])
-
-    def testLeadingWhitespace(self):
-        self.runTest(
-            [{"type": "StartTag", "name": "p", "data": []},
-             {"type": "SpaceCharacters", "data": spaceCharacters},
-             {"type": "Characters", "data": "foo"},
-             {"type": "EndTag", "name": "p", "data": []}],
-            [{"type": "StartTag", "name": "p", "data": []},
-             {"type": "SpaceCharacters", "data": " "},
-             {"type": "Characters", "data": "foo"},
-             {"type": "EndTag", "name": "p", "data": []}])
-
-    def testLeadingWhitespaceAsCharacters(self):
-        self.runTest(
-            [{"type": "StartTag", "name": "p", "data": []},
-             {"type": "Characters", "data": spaceCharacters + "foo"},
-             {"type": "EndTag", "name": "p", "data": []}],
-            [{"type": "StartTag", "name": "p", "data": []},
-             {"type": "Characters", "data": " foo"},
-             {"type": "EndTag", "name": "p", "data": []}])
-
-    def testTrailingWhitespace(self):
-        self.runTest(
-            [{"type": "StartTag", "name": "p", "data": []},
-             {"type": "Characters", "data": "foo"},
-             {"type": "SpaceCharacters", "data": spaceCharacters},
-             {"type": "EndTag", "name": "p", "data": []}],
-            [{"type": "StartTag", "name": "p", "data": []},
-             {"type": "Characters", "data": "foo"},
-             {"type": "SpaceCharacters", "data": " "},
-             {"type": "EndTag", "name": "p", "data": []}])
-
-    def testTrailingWhitespaceAsCharacters(self):
-        self.runTest(
-            [{"type": "StartTag", "name": "p", "data": []},
-             {"type": "Characters", "data": "foo" + spaceCharacters},
-             {"type": "EndTag", "name": "p", "data": []}],
-            [{"type": "StartTag", "name": "p", "data": []},
-             {"type": "Characters", "data": "foo "},
-             {"type": "EndTag", "name": "p", "data": []}])
-
-    def testWhitespace(self):
-        self.runTest(
-            [{"type": "StartTag", "name": "p", "data": []},
-             {"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
-             {"type": "EndTag", "name": "p", "data": []}],
-            [{"type": "StartTag", "name": "p", "data": []},
-             {"type": "Characters", "data": "foo bar"},
-             {"type": "EndTag", "name": "p", "data": []}])
-
-    def testLeadingWhitespaceInPre(self):
-        self.runTestUnmodifiedOutput(
-            [{"type": "StartTag", "name": "pre", "data": []},
-             {"type": "SpaceCharacters", "data": spaceCharacters},
-             {"type": "Characters", "data": "foo"},
-             {"type": "EndTag", "name": "pre", "data": []}])
-
-    def testLeadingWhitespaceAsCharactersInPre(self):
-        self.runTestUnmodifiedOutput(
-            [{"type": "StartTag", "name": "pre", "data": []},
-             {"type": "Characters", "data": spaceCharacters + "foo"},
-             {"type": "EndTag", "name": "pre", "data": []}])
-
-    def testTrailingWhitespaceInPre(self):
-        self.runTestUnmodifiedOutput(
-            [{"type": "StartTag", "name": "pre", "data": []},
-             {"type": "Characters", "data": "foo"},
-             {"type": "SpaceCharacters", "data": spaceCharacters},
-             {"type": "EndTag", "name": "pre", "data": []}])
-
-    def testTrailingWhitespaceAsCharactersInPre(self):
-        self.runTestUnmodifiedOutput(
-            [{"type": "StartTag", "name": "pre", "data": []},
-             {"type": "Characters", "data": "foo" + spaceCharacters},
-             {"type": "EndTag", "name": "pre", "data": []}])
-
-    def testWhitespaceInPre(self):
-        self.runTestUnmodifiedOutput(
-            [{"type": "StartTag", "name": "pre", "data": []},
-             {"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
-             {"type": "EndTag", "name": "pre", "data": []}])
-
-
-def buildTestSuite():
-    return unittest.defaultTestLoader.loadTestsFromName(__name__)
-
-
-def main():
-    buildTestSuite()
-    unittest.main()
-
-if __name__ == "__main__":
-    main()
--- a/testing/web-platform/tests/tools/html5lib/html5lib/treeadapters/init.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/treeadapters/init.py
--- a/testing/web-platform/tests/tools/html5lib/html5lib/treebuilders/init.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/treebuilders/init.py
@ -1,76 +0,0 @@
-"""A collection of modules for building different kinds of tree from
-HTML documents.
-
-To create a treebuilder for a new type of tree, you need to do
-implement several things:
-
-1) A set of classes for various types of elements: Document, Doctype,
-Comment, Element. These must implement the interface of
-_base.treebuilders.Node (although comment nodes have a different
-signature for their constructor, see treebuilders.etree.Comment)
-Textual content may also be implemented as another node type, or not, as
-your tree implementation requires.
-
-2) A treebuilder object (called TreeBuilder by convention) that
-inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
-documentClass - the class to use for the bottommost node of a document
-elementClass - the class to use for HTML Elements
-commentClass - the class to use for comments
-doctypeClass - the class to use for doctypes
-It also has one required method:
-getDocument - Returns the root node of the complete document tree
-
-3) If you wish to run the unit tests, you must also create a
-testSerializer method on your treebuilder which accepts a node and
-returns a string containing Node and its children serialized according
-to the format used in the unittests
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-
-from ..utils import default_etree
-
-treeBuilderCache = {}
-
-
-def getTreeBuilder(treeType, implementation=None, **kwargs):
-    """Get a TreeBuilder class for various types of tree with built-in support
-
-    treeType - the name of the tree type required (case-insensitive). Supported
-               values are:
-
-               "dom" - A generic builder for DOM implementations, defaulting to
-                       a xml.dom.minidom based implementation.
-               "etree" - A generic builder for tree implementations exposing an
-                         ElementTree-like interface, defaulting to
-                         xml.etree.cElementTree if available and
-                         xml.etree.ElementTree if not.
-               "lxml" - A etree-based builder for lxml.etree, handling
-                        limitations of lxml's implementation.
-
-    implementation - (Currently applies to the "etree" and "dom" tree types). A
-                      module implementing the tree type e.g.
-                      xml.etree.ElementTree or xml.etree.cElementTree."""
-
-    treeType = treeType.lower()
-    if treeType not in treeBuilderCache:
-        if treeType == "dom":
-            from . import dom
-            # Come up with a sane default (pref. from the stdlib)
-            if implementation is None:
-                from xml.dom import minidom
-                implementation = minidom
-            # NEVER cache here, caching is done in the dom submodule
-            return dom.getDomModule(implementation, **kwargs).TreeBuilder
-        elif treeType == "lxml":
-            from . import etree_lxml
-            treeBuilderCache[treeType] = etree_lxml.TreeBuilder
-        elif treeType == "etree":
-            from . import etree
-            if implementation is None:
-                implementation = default_etree
-            # NEVER cache here, caching is done in the etree submodule
-            return etree.getETreeModule(implementation, **kwargs).TreeBuilder
-        else:
-            raise ValueError("""Unrecognised treebuilder "%s" """ % treeType)
-    return treeBuilderCache.get(treeType)
--- a/testing/web-platform/tests/tools/html5lib/html5lib/treewalkers/init.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/treewalkers/init.py
@ -1,57 +0,0 @@
-"""A collection of modules for iterating through different kinds of
-tree, generating tokens identical to those produced by the tokenizer
-module.
-
-To create a tree walker for a new type of tree, you need to do
-implement a tree walker object (called TreeWalker by convention) that
-implements a 'serialize' method taking a tree as sole argument and
-returning an iterator generating tokens.
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-
-import sys
-
-from ..utils import default_etree
-
-treeWalkerCache = {}
-
-
-def getTreeWalker(treeType, implementation=None, **kwargs):
-    """Get a TreeWalker class for various types of tree with built-in support
-
-    treeType - the name of the tree type required (case-insensitive). Supported
-               values are:
-
-                "dom" - The xml.dom.minidom DOM implementation
-                "pulldom" - The xml.dom.pulldom event stream
-                "etree" - A generic walker for tree implementations exposing an
-                          elementtree-like interface (known to work with
-                          ElementTree, cElementTree and lxml.etree).
-                "lxml" - Optimized walker for lxml.etree
-                "genshi" - a Genshi stream
-
-    implementation - (Currently applies to the "etree" tree type only). A module
-                      implementing the tree type e.g. xml.etree.ElementTree or
-                      cElementTree."""
-
-    treeType = treeType.lower()
-    if treeType not in treeWalkerCache:
-        if treeType in ("dom", "pulldom"):
-            name = "%s.%s" % (__name__, treeType)
-            __import__(name)
-            mod = sys.modules[name]
-            treeWalkerCache[treeType] = mod.TreeWalker
-        elif treeType == "genshi":
-            from . import genshistream
-            treeWalkerCache[treeType] = genshistream.TreeWalker
-        elif treeType == "lxml":
-            from . import lxmletree
-            treeWalkerCache[treeType] = lxmletree.TreeWalker
-        elif treeType == "etree":
-            from . import etree
-            if implementation is None:
-                implementation = default_etree
-            # XXX: NEVER cache here, caching is done in the etree submodule
-            return etree.getETreeModule(implementation, **kwargs).TreeWalker
-    return treeWalkerCache.get(treeType)
--- a/testing/web-platform/tests/tools/html5lib/html5lib/treewalkers/pulldom.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/treewalkers/pulldom.py
@ -1,63 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
-    COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
-
-from . import _base
-
-from ..constants import voidElements
-
-
-class TreeWalker(_base.TreeWalker):
-    def __iter__(self):
-        ignore_until = None
-        previous = None
-        for event in self.tree:
-            if previous is not None and \
-                    (ignore_until is None or previous[1] is ignore_until):
-                if previous[1] is ignore_until:
-                    ignore_until = None
-                for token in self.tokens(previous, event):
-                    yield token
-                    if token["type"] == "EmptyTag":
-                        ignore_until = previous[1]
-            previous = event
-        if ignore_until is None or previous[1] is ignore_until:
-            for token in self.tokens(previous, None):
-                yield token
-        elif ignore_until is not None:
-            raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
-
-    def tokens(self, event, next):
-        type, node = event
-        if type == START_ELEMENT:
-            name = node.nodeName
-            namespace = node.namespaceURI
-            attrs = {}
-            for attr in list(node.attributes.keys()):
-                attr = node.getAttributeNode(attr)
-                attrs[(attr.namespaceURI, attr.localName)] = attr.value
-            if name in voidElements:
-                for token in self.emptyTag(namespace,
-                                           name,
-                                           attrs,
-                                           not next or next[1] is not node):
-                    yield token
-            else:
-                yield self.startTag(namespace, name, attrs)
-
-        elif type == END_ELEMENT:
-            name = node.nodeName
-            namespace = node.namespaceURI
-            if name not in voidElements:
-                yield self.endTag(namespace, name)
-
-        elif type == COMMENT:
-            yield self.comment(node.nodeValue)
-
-        elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
-            for token in self.text(node.nodeValue):
-                yield token
-
-        else:
-            yield self.unknown(type)
--- a/testing/web-platform/tests/tools/html5lib/requirements-install.sh
+++ b/testing/web-platform/tests/tools/html5lib/requirements-install.sh
@ -1,16 +0,0 @@
-#!/bin/bash -e
-
-if [[ $USE_OPTIONAL != "true" && $USE_OPTIONAL != "false" ]]; then
-  echo "fatal: \$USE_OPTIONAL not set to true or false. Exiting."
-  exit 1
-fi
-
-pip install -r requirements-test.txt
-
-if [[ $USE_OPTIONAL == "true" && $TRAVIS_PYTHON_VERSION != "pypy" ]]; then
-  if [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then
-    pip install --allow-external Genshi --allow-insecure Genshi -r requirements-optional-2.6.txt
-  else
-    pip install --allow-external Genshi --allow-insecure Genshi -r requirements-optional-cpython.txt
-  fi
-fi
--- a/testing/web-platform/tests/tools/html5lib/requirements-optional-2.6.txt
+++ b/testing/web-platform/tests/tools/html5lib/requirements-optional-2.6.txt
@ -1,5 +0,0 @@
-r requirements-optional-cpython.txt
-
-# Can be used to force attributes to be serialized in alphabetical
-# order.
-ordereddict
--- a/testing/web-platform/tests/tools/html5lib/requirements-optional-cpython.txt
+++ b/testing/web-platform/tests/tools/html5lib/requirements-optional-cpython.txt
@ -1,5 +0,0 @@
-r requirements-optional.txt
-
-# lxml is supported with its own treebuilder ("lxml") and otherwise
-# uses the standard ElementTree support
-lxml
--- a/testing/web-platform/tests/tools/html5lib/requirements-optional.txt
+++ b/testing/web-platform/tests/tools/html5lib/requirements-optional.txt
@ -1,13 +0,0 @@
-r requirements.txt
-
-# We support a Genshi treewalker that can be used to serialize Genshi
-# streams.
-genshi
-
-# DATrie can be used in place of our Python trie implementation for
-# slightly better parsing performance.
-datrie
-
-# charade can be used as a fallback in case we are unable to determine
-# the encoding of a document.
-charade
--- a/testing/web-platform/tests/tools/html5lib/requirements-test.txt
+++ b/testing/web-platform/tests/tools/html5lib/requirements-test.txt
@ -1,5 +0,0 @@
-r requirements.txt
-
-flake8
-nose
-ordereddict # Python 2.6
--- a/testing/web-platform/tests/tools/html5lib/setup.py
+++ b/testing/web-platform/tests/tools/html5lib/setup.py
@ -1,44 +0,0 @@
-from distutils.core import setup
-import os
-import codecs
-
-classifiers=[
-    'Development Status :: 5 - Production/Stable',
-    'Intended Audience :: Developers',
-    'License :: OSI Approved :: MIT License',
-    'Operating System :: OS Independent',
-    'Programming Language :: Python',
-    'Programming Language :: Python :: 2',
-    'Programming Language :: Python :: 2.6',
-    'Programming Language :: Python :: 2.7',
-    'Programming Language :: Python :: 3',
-    'Programming Language :: Python :: 3.2',
-    'Programming Language :: Python :: 3.3',
-    'Topic :: Software Development :: Libraries :: Python Modules',
-    'Topic :: Text Processing :: Markup :: HTML'
-    ]
-
-packages = ['html5lib'] + ['html5lib.'+name
-                           for name in os.listdir(os.path.join('html5lib'))
-                           if os.path.isdir(os.path.join('html5lib', name)) and
-                           not name.startswith('.') and name != 'tests']
-
-current_dir = os.path.dirname(__file__)
-with codecs.open(os.path.join(current_dir, 'README.rst'), 'r', 'utf8') as readme_file:
-    with codecs.open(os.path.join(current_dir, 'CHANGES.rst'), 'r', 'utf8') as changes_file:
-        long_description = readme_file.read() + '\n' + changes_file.read()
-
-setup(name='html5lib',
-      version='0.9999-dev',
-      url='https://github.com/html5lib/html5lib-python',
-      license="MIT License",
-      description='HTML parser based on the WHATWG HTML specifcation',
-      long_description=long_description,
-      classifiers=classifiers,
-      maintainer='James Graham',
-      maintainer_email='james@hoppipolla.co.uk',
-      packages=packages,
-      install_requires=[
-          'six',
-      ],
-      )
--- a/testing/web-platform/tests/tools/html5lib/tox.ini
+++ b/testing/web-platform/tests/tools/html5lib/tox.ini
@ -1,30 +0,0 @@
-[tox]
-envlist = py26,py27,py32,py33,py34,pypy
-
-[testenv]
-deps =
-  -r{toxinidir}/requirements-optional-cpython.txt
-  flake8
-  nose
-commands =
-  {envbindir}/nosetests -q
-  {toxinidir}/flake8-run.sh
-install_command =
-  pip install {opts} {packages}
-
-[testenv:pypy]
-# lxml doesn't work and datrie doesn't make sense
-# (it's slower than the pure-python version)
-deps =
-  charade
-  flake8
-  Genshi
-  nose
-  six
-
-[testenv:py26]
-basepython = python2.6
-deps =
-  -r{toxinidir}/requirements-optional-2.6.txt
-  flake8
-  nose
--- a/testing/web-platform/tests/tools/html5lib/utils/iana_parse.py
+++ b/testing/web-platform/tests/tools/html5lib/utils/iana_parse.py
@ -1,24 +0,0 @@
-#!/usr/bin/env python
-import sys
-import urllib.request, urllib.error, urllib.parse
-import codecs
-
-def main():
-    encodings = []
-    f = urllib.request.urlopen(sys.argv[1])
-    for line in f:
-        if line.startswith("Name: ") or line.startswith("Alias: "):
-            enc = line.split()[1]
-            try:
-                codecs.lookup(enc)
-                if enc.lower not in encodings:
-                    encodings.append(enc.lower())
-            except LookupError:
-                pass
-    sys.stdout.write("encodings = frozenset((\n")
-    for enc in encodings:
-        sys.stdout.write('    "%s",\n'%enc)
-    sys.stdout.write('    ))')
-
-if __name__ == "__main__":
-    main()
--- a/testing/web-platform/tests/tools/html5lib/utils/spider.py
+++ b/testing/web-platform/tests/tools/html5lib/utils/spider.py
@ -1,122 +0,0 @@
-#!/usr/bin/env python
-"""Spider to try and find bugs in the parser. Requires httplib2 and elementtree
-
-usage:
-import spider
-s = spider.Spider()
-s.spider("http://www.google.com", maxURLs=100)
-"""
-
-import urllib.request, urllib.error, urllib.parse
-import urllib.robotparser
-import md5
-
-import httplib2
-
-import html5lib
-from html5lib.treebuilders import etree
-
-class Spider(object):
-    def __init__(self):
-        self.unvisitedURLs = set()
-        self.visitedURLs = set()
-        self.buggyURLs=set()
-        self.robotParser = urllib.robotparser.RobotFileParser()
-        self.contentDigest = {}
-        self.http = httplib2.Http(".cache")
-
-    def run(self, initialURL, maxURLs=1000):
-        urlNumber = 0
-        self.visitedURLs.add(initialURL)
-        content = self.loadURL(initialURL)
-        while maxURLs is None or urlNumber < maxURLs:
-            if content is not None:
-                self.parse(content)
-                urlNumber += 1
-            if not self.unvisitedURLs:
-                break
-            content = self.loadURL(self.unvisitedURLs.pop())
-
-    def parse(self, content):
-        failed = False
-        p = html5lib.HTMLParser(tree=etree.TreeBuilder)
-        try:
-            tree = p.parse(content)
-        except:
-            self.buggyURLs.add(self.currentURL)
-            failed = True
-            print("BUGGY:", self.currentURL)
-        self.visitedURLs.add(self.currentURL)
-        if not failed:
-            self.updateURLs(tree)
-
-    def loadURL(self, url):
-        resp, content = self.http.request(url, "GET")
-        self.currentURL = url
-        digest = md5.md5(content).hexdigest()
-        if digest in self.contentDigest:
-            content = None
-            self.visitedURLs.add(url)
-        else:
-            self.contentDigest[digest] = url
-
-        if resp['status'] != "200":
-            content = None
-
-        return content
-
-    def updateURLs(self, tree):
-        """Take all the links in the current document, extract the URLs and
-        update the list of visited and unvisited URLs according to whether we
-        have seen them before or not"""
-        urls = set()
-        #Remove all links we have already visited
-        for link in tree.findall(".//a"):
-                try:
-                    url = urllib.parse.urldefrag(link.attrib['href'])[0]
-                    if (url and url not in self.unvisitedURLs and url
-                        not in self.visitedURLs):
-                        urls.add(url)
-                except KeyError:
-                    pass
-
-        #Remove all non-http URLs and a dd a sutiable base URL where that is
-        #missing
-        newUrls = set()
-        for url in urls:
-            splitURL = list(urllib.parse.urlsplit(url))
-            if splitURL[0] != "http":
-                continue
-            if splitURL[1] == "":
-                splitURL[1] = urllib.parse.urlsplit(self.currentURL)[1]
-            newUrls.add(urllib.parse.urlunsplit(splitURL))
-        urls = newUrls
-
-        responseHeaders = {}
-        #Now we want to find the content types of the links we haven't visited
-        for url in urls:
-            try:
-                resp, content = self.http.request(url, "HEAD")
-                responseHeaders[url] = resp
-            except AttributeError as KeyError:
-                #Don't know why this happens
-                pass
-
-
-        #Remove links not of content-type html or pages not found
-        #XXX - need to deal with other status codes?
-        toVisit = set([url for url in urls if url in responseHeaders and
-                      "html" in responseHeaders[url]['content-type'] and
-                      responseHeaders[url]['status'] == "200"])
-
-        #Now check we are allowed to spider the page
-        for url in toVisit:
-            robotURL = list(urllib.parse.urlsplit(url)[:2])
-            robotURL.extend(["robots.txt", "", ""])
-            robotURL = urllib.parse.urlunsplit(robotURL)
-            self.robotParser.set_url(robotURL)
-            if not self.robotParser.can_fetch("*", url):
-                toVisit.remove(url)
-
-        self.visitedURLs.update(urls)
-        self.unvisitedURLs.update(toVisit)
--- a/testing/web-platform/tests/tools/localpaths.py
+++ b/testing/web-platform/tests/tools/localpaths.py
@ -5,14 +5,15 @@ here = os.path.abspath(os.path.split(__file__)[0])
 repo_root = os.path.abspath(os.path.join(here, os.pardir))

 sys.path.insert(0, os.path.join(here))
-sys.path.insert(0, os.path.join(here, "six"))
-sys.path.insert(0, os.path.join(here, "html5lib"))
 sys.path.insert(0, os.path.join(here, "wptserve"))
 sys.path.insert(0, os.path.join(here, "pywebsocket"))
 sys.path.insert(0, os.path.join(here, "third_party", "attrs", "src"))
 sys.path.insert(0, os.path.join(here, "third_party", "funcsigs"))
+sys.path.insert(0, os.path.join(here, "third_party", "html5lib"))
 sys.path.insert(0, os.path.join(here, "third_party", "pluggy"))
 sys.path.insert(0, os.path.join(here, "third_party", "py"))
 sys.path.insert(0, os.path.join(here, "third_party", "pytest"))
+sys.path.insert(0, os.path.join(here, "third_party", "six"))
+sys.path.insert(0, os.path.join(here, "third_party", "webencodings"))
 sys.path.insert(0, os.path.join(here, "webdriver"))
 sys.path.insert(0, os.path.join(here, "wptrunner"))
--- a/testing/web-platform/tests/tools/six/.gitignore
+++ b/testing/web-platform/tests/tools/six/.gitignore
@ -1,9 +0,0 @@
-*#
-*.py[co]
-*.sw[po]
-*~
-MANIFEST
-documentation/_build
-\#*
-.tox
-six.egg-info
--- a/testing/web-platform/tests/tools/six/.hgignore
+++ b/testing/web-platform/tests/tools/six/.hgignore
@ -1,8 +0,0 @@
-syntax: glob
-*.pyc
-dist
-MANIFEST
-documentation/_build
-.tox
-.gitignore
-six.egg-info
--- a/testing/web-platform/tests/tools/six/CONTRIBUTORS
+++ b/testing/web-platform/tests/tools/six/CONTRIBUTORS
@ -1,22 +0,0 @@
-The primary author and maintainer of six is Benjamin Peterson. He would like to
-acknowledge the following people who submitted bug reports, pull requests, and
-otherwise worked to improve six:
-
-Marc Abramowitz
-Alexander Artemenko
-Aymeric Augustin
-Ned Batchelder
-Jason R. Coombs
-Julien Danjou
-Ben Darnell
-Ben Davis
-Joshua Harlow
-Anselm Kruis
-Alexander Lukanin
-James Mills
-Sridhar Ratnakumar
-Erik Rose
-Peter Ruibal
-Miroslav Shubernetskiy
-
-If you think you belong on this list, please let me know! --Benjamin
--- a/testing/web-platform/tests/tools/six/README
+++ b/testing/web-platform/tests/tools/six/README
@ -1,16 +0,0 @@
-Six is a Python 2 and 3 compatibility library.  It provides utility functions
-for smoothing over the differences between the Python versions with the goal of
-writing Python code that is compatible on both Python versions.  See the
-documentation for more information on what is provided.
-
-Six supports every Python version since 2.5.  It is contained in only one Python
-file, so it can be easily copied into your project. (The copyright and license
-notice must be retained.)
-
-Online documentation is at http://pythonhosted.org/six/.
-
-Bugs can be reported to https://bitbucket.org/gutworth/six.  The code can also
-be found there.
-
-For questions about six or porting in general, email the python-porting mailing
-list: http://mail.python.org/mailman/listinfo/python-porting
--- a/testing/web-platform/tests/tools/six/setup.cfg
+++ b/testing/web-platform/tests/tools/six/setup.cfg
@ -1,2 +0,0 @@
-[wheel]
-universal = 1
--- a/testing/web-platform/tests/tools/six/setup.py
+++ b/testing/web-platform/tests/tools/six/setup.py
@ -1,32 +0,0 @@
-from __future__ import with_statement
-
-try:
-    from setuptools import setup
-except ImportError:
-    from distutils.core import setup
-
-import six
-
-six_classifiers = [
-    "Programming Language :: Python :: 2",
-    "Programming Language :: Python :: 3",
-    "Intended Audience :: Developers",
-    "License :: OSI Approved :: MIT License",
-    "Topic :: Software Development :: Libraries",
-    "Topic :: Utilities",
-]
-
-with open("README", "r") as fp:
-    six_long_description = fp.read()
-
-setup(name="six",
-      version=six.__version__,
-      author="Benjamin Peterson",
-      author_email="benjamin@python.org",
-      url="http://pypi.python.org/pypi/six/",
-      py_modules=["six"],
-      description="Python 2 and 3 compatibility utilities",
-      long_description=six_long_description,
-      license="MIT",
-      classifiers=six_classifiers
-      )
--- a/testing/web-platform/tests/tools/six/tox.ini
+++ b/testing/web-platform/tests/tools/six/tox.ini
@ -1,12 +0,0 @@
-[tox]
-envlist=py25,py26,py27,py31,py32,py33,py34,pypy
-indexserver=
-    default = http://pypi.python.org/simple
-    testrun = http://pypi.testrun.org
-
-[testenv]
-deps=pytest
-commands= py.test -rfsxX {posargs}
-
-[pytest]
-minversion=2.2.0
--- a/testing/web-platform/tests/tools/third_party/html5lib/.appveyor.yml
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.appveyor.yml
@ -0,0 +1,31 @@
+# To activate, change the Appveyor settings to use `.appveyor.yml`.
+environment:
+  global:
+    PATH: "C:\\Python27\\Scripts\\;%PATH%"
+    PYTEST_COMMAND: "coverage run -m pytest"
+  matrix:
+    - TOXENV: py27-base
+    - TOXENV: py27-optional
+    - TOXENV: py33-base
+    - TOXENV: py33-optional
+    - TOXENV: py34-base
+    - TOXENV: py34-optional
+    - TOXENV: py35-base
+    - TOXENV: py35-optional
+    - TOXENV: py36-base
+    - TOXENV: py36-optional
+
+install:
+  - git submodule update --init --recursive
+  - python -m pip install tox codecov
+
+build: off
+
+test_script:
+  - tox
+
+after_test:
+  - python debug-info.py
+
+on_success:
+  - codecov
--- a/testing/web-platform/tests/tools/third_party/html5lib/.coveragerc
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.coveragerc
@ -0,0 +1,8 @@
+[run]
+branch = True
+source = html5lib
+
+[paths]
+source =
+   html5lib
+   .tox/*/lib/python*/site-packages/html5lib
--- a/testing/web-platform/tests/tools/third_party/html5lib/.gitignore
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.gitignore
@ -0,0 +1,85 @@
+# Copyright (c) 2014 GitHub, Inc.
+#
+# Permission is hereby granted,  free of charge,  to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to  use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+doc/_build/
+
+# PyBuilder
+target/
+
+# Generated by parse.py -p
+stats.prof
+
+# IDE
+.idea
--- a/testing/web-platform/tests/tools/third_party/html5lib/.gitmodules
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.gitmodules
--- a/testing/web-platform/tests/tools/third_party/html5lib/.prospector.yaml
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.prospector.yaml
@ -0,0 +1,21 @@
+strictness: veryhigh
+doc-warnings: false
+test-warnings: false
+
+max-line-length: 139
+
+requirements:
+  - requirements.txt
+  - requirements-test.txt
+  - requirements-optional.txt
+
+ignore-paths:
+  - parse.py
+  - utils/
+
+python-targets:
+  - 2
+  - 3
+
+mccabe:
+  run: false
--- a/testing/web-platform/tests/tools/third_party/html5lib/.pylintrc
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.pylintrc
@ -0,0 +1,10 @@
+[MASTER]
+ignore=tests
+
+[MESSAGES CONTROL]
+# messages up to fixme should probably be fixed somehow
+disable = redefined-builtin,attribute-defined-outside-init,anomalous-backslash-in-string,no-self-use,redefined-outer-name,bad-continuation,wrong-import-order,superfluous-parens,no-member,duplicate-code,super-init-not-called,abstract-method,property-on-old-class,wrong-import-position,no-name-in-module,no-init,bad-mcs-classmethod-argument,bad-classmethod-argument,fixme,invalid-name,import-error,too-few-public-methods,too-many-ancestors,too-many-arguments,too-many-boolean-expressions,too-many-branches,too-many-instance-attributes,too-many-locals,too-many-lines,too-many-public-methods,too-many-return-statements,too-many-statements,missing-docstring,line-too-long,locally-disabled,locally-enabled,bad-builtin,deprecated-lambda
+
+[FORMAT]
+max-line-length=139
+single-line-if-stmt=no
--- a/testing/web-platform/tests/tools/third_party/html5lib/.pytest.expect
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.pytest.expect
--- a/testing/web-platform/tests/tools/third_party/html5lib/.travis.yml
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.travis.yml
@ -0,0 +1,32 @@
+language: python
+python:
+  - "pypy"
+  - "3.6"
+  - "3.5"
+  - "3.4"
+  - "3.3"
+  - "2.7"
+
+sudo: false
+
+cache: pip
+
+env:
+  global:
+    - PYTEST_COMMAND="coverage run -m pytest"
+  matrix:
+    - TOXENV=optional
+    - TOXENV=base
+    - TOXENV=six19-optional
+
+install:
+  - pip install tox codecov
+
+script:
+  - tox
+
+after_script:
+  - python debug-info.py
+
+after_success:
+  - codecov
--- a/testing/web-platform/tests/tools/third_party/html5lib/AUTHORS.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/AUTHORS.rst
@ -6,6 +6,7 @@ Credits
 - James Graham
 - Geoffrey Sneddon
 - Łukasz Langa
+- Will Kahn-Greene


 Patches and suggestions
@ -16,19 +17,50 @@ Patches and suggestions
 - Lachlan Hunt
 - lantis63
 - Sam Ruby
- Tim Fletcher
 - Thomas Broyer
+- Tim Fletcher
 - Mark Pilgrim
- Philip Taylor
 - Ryan King
+- Philip Taylor
 - Edward Z. Yang
 - fantasai
 - Philip Jägenstedt
 - Ms2ger
+- Mohammad Taha Jahangir
 - Andy Wingo
 - Andreas Madsack
 - Karim Valiev
- Mohammad Taha Jahangir
 - Juan Carlos Garcia Segovia
 - Mike West
 - Marc DM
+- Simon Sapin
+- Michael[tm] Smith
+- Ritwik Gupta
+- Marc Abramowitz
+- Tony Lopes
+- lilbludevil
+- Kevin
+- Drew Hubl
+- Austin Kumbera
+- Jim Baker
+- Jon Dufresne
+- Donald Stufft
+- Alex Gaynor
+- Nik Nyby
+- Jakub Wilk
+- Sigmund Cherem
+- Gabi Davar
+- Florian Mounier
+- neumond
+- Vitalik Verhovodov
+- Kovid Goyal
+- Adam Chainz
+- John Vandenberg
+- Eric Amorde
+- Benedikt Morbach
+- Jonathan Vanasco
+- Tom Most
+- Ville Skyttä
+- Hugo van Kemenade
+- Mark Vasilkov
+
--- a/testing/web-platform/tests/tools/third_party/html5lib/CHANGES.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/CHANGES.rst
@ -0,0 +1,335 @@
+Change Log
+----------
+
+1.0.1
+~~~~~
+
+Released on December 7, 2017
+
+Breaking changes:
+
+* Drop support for Python 2.6. (#330) (Thank you, Hugo, Will Kahn-Greene!)
+* Remove ``utils/spider.py`` (#353) (Thank you, Jon Dufresne!)
+
+Features:
+
+* Improve documentation. (#300, #307) (Thank you, Jon Dufresne, Tom Most,
+  Will Kahn-Greene!)
+* Add iframe seamless boolean attribute. (Thank you, Ritwik Gupta!)
+* Add itemscope as a boolean attribute. (#194) (Thank you, Jonathan Vanasco!)
+* Support Python 3.6. (#333) (Thank you, Jon Dufresne!)
+* Add CI support for Windows using AppVeyor. (Thank you, John Vandenberg!)
+* Improve testing and CI and add code coverage (#323, #334), (Thank you, Jon
+  Dufresne, John Vandenberg, Geoffrey Sneddon, Will Kahn-Greene!)
+* Semver-compliant version number.
+
+Bug fixes:
+
+* Add support for setuptools < 18.5 to support environment markers. (Thank you,
+  John Vandenberg!)
+* Add explicit dependency for six >= 1.9. (Thank you, Eric Amorde!)
+* Fix regexes to work with Python 3.7 regex adjustments. (#318, #379) (Thank
+  you, Benedikt Morbach, Ville Skyttä, Mark Vasilkov!)
+* Fix alphabeticalattributes filter namespace bug. (#324) (Thank you, Will
+  Kahn-Greene!)
+* Include license file in generated wheel package. (#350) (Thank you, Jon
+  Dufresne!)
+* Fix annotation-xml typo. (#339) (Thank you, Will Kahn-Greene!)
+* Allow uppercase hex chararcters in CSS colour check. (#377) (Thank you,
+  Komal Dembla, Hugo!)
+
+
+1.0
+~~~
+
+Released and unreleased on December 7, 2017. Badly packaged release.
+
+
+0.999999999/1.0b10
+~~~~~~~~~~~~~~~~~~
+
+Released on July 15, 2016
+
+* Fix attribute order going to the tree builder to be document order
+  instead of reverse document order(!).
+
+
+0.99999999/1.0b9
+~~~~~~~~~~~~~~~~
+
+Released on July 14, 2016
+
+* **Added ordereddict as a mandatory dependency on Python 2.6.**
+
+* Added ``lxml``, ``genshi``, ``datrie``, ``charade``, and ``all``
+  extras that will do the right thing based on the specific
+  interpreter implementation.
+
+* Now requires the ``mock`` package for the testsuite.
+
+* Cease supporting DATrie under PyPy.
+
+* **Remove PullDOM support, as this hasn't ever been properly
+  tested, doesn't entirely work, and as far as I can tell is
+  completely unused by anyone.**
+
+* Move testsuite to ``py.test``.
+
+* **Fix #124: move to webencodings for decoding the input byte stream;
+  this makes html5lib compliant with the Encoding Standard, and
+  introduces a required dependency on webencodings.**
+
+* **Cease supporting Python 3.2 (in both CPython and PyPy forms).**
+
+* **Fix comments containing double-dash with lxml 3.5 and above.**
+
+* **Use scripting disabled by default (as we don't implement
+  scripting).**
+
+* **Fix #11, avoiding the XSS bug potentially caused by serializer
+  allowing attribute values to be escaped out of in old browser versions,
+  changing the quote_attr_values option on serializer to take one of
+  three values, "always" (the old True value), "legacy" (the new option,
+  and the new default), and "spec" (the old False value, and the old
+  default).**
+
+* **Fix #72 by rewriting the sanitizer to apply only to treewalkers
+  (instead of the tokenizer); as such, this will require amending all
+  callers of it to use it via the treewalker API.**
+
+* **Drop support of charade, now that chardet is supported once more.**
+
+* **Replace the charset keyword argument on parse and related methods
+  with a set of keyword arguments: override_encoding, transport_encoding,
+  same_origin_parent_encoding, likely_encoding, and default_encoding.**
+
+* **Move filters._base, treebuilder._base, and treewalkers._base to .base
+  to clarify their status as public.**
+
+* **Get rid of the sanitizer package. Merge sanitizer.sanitize into the
+  sanitizer.htmlsanitizer module and move that to sanitizer. This means
+  anyone who used sanitizer.sanitize or sanitizer.HTMLSanitizer needs no
+  code changes.**
+
+* **Rename treewalkers.lxmletree to .etree_lxml and
+  treewalkers.genshistream to .genshi to have a consistent API.**
+
+* Move a whole load of stuff (inputstream, ihatexml, trie, tokenizer,
+  utils) to be underscore prefixed to clarify their status as private.
+
+
+0.9999999/1.0b8
+~~~~~~~~~~~~~~~
+
+Released on September 10, 2015
+
+* Fix #195: fix the sanitizer to drop broken URLs (it threw an
+  exception between 0.9999 and 0.999999).
+
+
+0.999999/1.0b7
+~~~~~~~~~~~~~~
+
+Released on July 7, 2015
+
+* Fix #189: fix the sanitizer to allow relative URLs again (as it did
+  prior to 0.9999/1.0b5).
+
+
+0.99999/1.0b6
+~~~~~~~~~~~~~
+
+Released on April 30, 2015
+
+* Fix #188: fix the sanitizer to not throw an exception when sanitizing
+  bogus data URLs.
+
+
+0.9999/1.0b5
+~~~~~~~~~~~~
+
+Released on April 29, 2015
+
+* Fix #153: Sanitizer fails to treat some attributes as URLs. Despite how
+  this sounds, this has no known security implications.  No known version
+  of IE (5.5 to current), Firefox (3 to current), Safari (6 to current),
+  Chrome (1 to current), or Opera (12 to current) will run any script
+  provided in these attributes.
+
+* Pass error message to the ParseError exception in strict parsing mode.
+
+* Allow data URIs in the sanitizer, with a whitelist of content-types.
+
+* Add support for Python implementations that don't support lone
+  surrogates (read: Jython). Fixes #2.
+
+* Remove localization of error messages. This functionality was totally
+  unused (and untested that everything was localizable), so we may as
+  well follow numerous browsers in not supporting translating technical
+  strings.
+
+* Expose treewalkers.pprint as a public API.
+
+* Add a documentEncoding property to HTML5Parser, fix #121.
+
+
+0.999
+~~~~~
+
+Released on December 23, 2013
+
+* Fix #127: add work-around for CPython issue #20007: .read(0) on
+  http.client.HTTPResponse drops the rest of the content.
+
+* Fix #115: lxml treewalker can now deal with fragments containing, at
+  their root level, text nodes with non-ASCII characters on Python 2.
+
+
+0.99
+~~~~
+
+Released on September 10, 2013
+
+* No library changes from 1.0b3; released as 0.99 as pip has changed
+  behaviour from 1.4 to avoid installing pre-release versions per
+  PEP 440.
+
+
+1.0b3
+~~~~~
+
+Released on July 24, 2013
+
+* Removed ``RecursiveTreeWalker`` from ``treewalkers._base``. Any
+  implementation using it should be moved to
+  ``NonRecursiveTreeWalker``, as everything bundled with html5lib has
+  for years.
+
+* Fix #67 so that ``BufferedStream`` to correctly returns a bytes
+  object, thereby fixing any case where html5lib is passed a
+  non-seekable RawIOBase-like object.
+
+
+1.0b2
+~~~~~
+
+Released on June 27, 2013
+
+* Removed reordering of attributes within the serializer. There is now
+  an ``alphabetical_attributes`` option which preserves the previous
+  behaviour through a new filter. This allows attribute order to be
+  preserved through html5lib if the tree builder preserves order.
+
+* Removed ``dom2sax`` from DOM treebuilders. It has been replaced by
+  ``treeadapters.sax.to_sax`` which is generic and supports any
+  treewalker; it also resolves all known bugs with ``dom2sax``.
+
+* Fix treewalker assertions on hitting bytes strings on
+  Python 2. Previous to 1.0b1, treewalkers coped with mixed
+  bytes/unicode data on Python 2; this reintroduces this prior
+  behaviour on Python 2. Behaviour is unchanged on Python 3.
+
+
+1.0b1
+~~~~~
+
+Released on May 17, 2013
+
+* Implementation updated to implement the `HTML specification
+  <http://www.whatwg.org/specs/web-apps/current-work/>`_ as of 5th May
+  2013 (`SVN <http://svn.whatwg.org/webapps/>`_ revision r7867).
+
+* Python 3.2+ supported in a single codebase using the ``six`` library.
+
+* Removed support for Python 2.5 and older.
+
+* Removed the deprecated Beautiful Soup 3 treebuilder.
+  ``beautifulsoup4`` can use ``html5lib`` as a parser instead. Note that
+  since it doesn't support namespaces, foreign content like SVG and
+  MathML is parsed incorrectly.
+
+* Removed ``simpletree`` from the package. The default tree builder is
+  now ``etree`` (using the ``xml.etree.cElementTree`` implementation if
+  available, and ``xml.etree.ElementTree`` otherwise).
+
+* Removed the ``XHTMLSerializer`` as it never actually guaranteed its
+  output was well-formed XML, and hence provided little of use.
+
+* Removed default DOM treebuilder, so ``html5lib.treebuilders.dom`` is no
+  longer supported. ``html5lib.treebuilders.getTreeBuilder("dom")`` will
+  return the default DOM treebuilder, which uses ``xml.dom.minidom``.
+
+* Optional heuristic character encoding detection now based on
+  ``charade`` for Python 2.6 - 3.3 compatibility.
+
+* Optional ``Genshi`` treewalker support fixed.
+
+* Many bugfixes, including:
+
+  * #33: null in attribute value breaks XML AttValue;
+
+  * #4: nested, indirect descendant, <button> causes infinite loop;
+
+  * `Google Code 215
+    <http://code.google.com/p/html5lib/issues/detail?id=215>`_: Properly
+    detect seekable streams;
+
+  * `Google Code 206
+    <http://code.google.com/p/html5lib/issues/detail?id=206>`_: add
+    support for <video preload=...>, <audio preload=...>;
+
+  * `Google Code 205
+    <http://code.google.com/p/html5lib/issues/detail?id=205>`_: add
+    support for <video poster=...>;
+
+  * `Google Code 202
+    <http://code.google.com/p/html5lib/issues/detail?id=202>`_: Unicode
+    file breaks InputStream.
+
+* Source code is now mostly PEP 8 compliant.
+
+* Test harness has been improved and now depends on ``nose``.
+
+* Documentation updated and moved to https://html5lib.readthedocs.io/.
+
+
+0.95
+~~~~
+
+Released on February 11, 2012
+
+
+0.90
+~~~~
+
+Released on January 17, 2010
+
+
+0.11.1
+~~~~~~
+
+Released on June 12, 2008
+
+
+0.11
+~~~~
+
+Released on June 10, 2008
+
+
+0.10
+~~~~
+
+Released on October 7, 2007
+
+
+0.9
+~~~
+
+Released on March 11, 2007
+
+
+0.2
+~~~
+
+Released on January 8, 2007
--- a/testing/web-platform/tests/tools/third_party/html5lib/CONTRIBUTING.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/CONTRIBUTING.rst
--- a/testing/web-platform/tests/tools/third_party/html5lib/LICENSE
+++ b/testing/web-platform/tests/tools/third_party/html5lib/LICENSE
--- a/testing/web-platform/tests/tools/third_party/html5lib/MANIFEST.in
+++ b/testing/web-platform/tests/tools/third_party/html5lib/MANIFEST.in
@ -1,6 +1,10 @@
 include LICENSE
+include AUTHORS.rst
 include CHANGES.rst
 include README.rst
 include requirements*.txt
+include .pytest.expect
+include tox.ini
+include pytest.ini
 graft html5lib/tests/testdata
 recursive-include html5lib/tests *.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/README.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/README.rst
@ -51,7 +51,7 @@ pass into html5lib as follows:
  import html5lib

  with closing(urlopen("http://example.com/")) as f:
-      document = html5lib.parse(f, encoding=f.info().getparam("charset"))
+      document = html5lib.parse(f, transport_encoding=f.info().getparam("charset"))

 When using with ``urllib.request`` (Python 3), the charset from HTTP
 should be pass into html5lib as follows:
@ -62,7 +62,7 @@ should be pass into html5lib as follows:
  import html5lib

  with urlopen("http://example.com/") as f:
-      document = html5lib.parse(f, encoding=f.info().get_content_charset())
+      document = html5lib.parse(f, transport_encoding=f.info().get_content_charset())

 To have more control over the parser, create a parser object explicitly.
 For instance, to make the parser raise exceptions on parse errors, use:
@ -84,13 +84,13 @@ format:
  parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
  minidom_document = parser.parse("<p>Hello World!")

-More documentation is available at http://html5lib.readthedocs.org/.
+More documentation is available at https://html5lib.readthedocs.io/.


 Installation
 ------------

-html5lib works on CPython 2.6+, CPython 3.2+ and PyPy.  To install it,
+html5lib works on CPython 2.7+, CPython 3.3+ and PyPy.  To install it,
 use:

 .. code-block:: bash
@ -104,8 +104,8 @@ Optional Dependencies
 The following third-party libraries may be used for additional
 functionality:

- ``datrie`` can be used to improve parsing performance (though in
-  almost all cases the improvement is marginal);
+- ``datrie`` can be used under CPython to improve parsing performance
+  (though in almost all cases the improvement is marginal);

 - ``lxml`` is supported as a tree format (for both building and
  walking) under CPython (but *not* PyPy where it is known to cause
@ -113,13 +113,8 @@ functionality:

 - ``genshi`` has a treewalker (but not builder); and

- ``charade`` can be used as a fallback when character encoding cannot
-  be determined; ``chardet``, from which it was forked, can also be used
-  on Python 2.
-
- ``ordereddict`` can be used under Python 2.6
-  (``collections.OrderedDict`` is used instead on later versions) to
-  serialize attributes in alphabetical order.
+- ``chardet`` can be used as a fallback when character encoding cannot
+  be determined.


 Bugs
@ -132,9 +127,8 @@ Please report any bugs on the `issue tracker
 Tests
 -----

-Unit tests require the ``nose`` library and can be run using the
-``nosetests`` command in the root directory; ``ordereddict`` is
-required under Python 2.6. All should pass.
+Unit tests require the ``pytest`` and ``mock`` libraries and can be
+run using the ``py.test`` command in the root directory.

 Test data are contained in a separate `html5lib-tests
 <https://github.com/html5lib/html5lib-tests>`_ repository and included
--- a/testing/web-platform/tests/tools/third_party/html5lib/debug-info.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/debug-info.py
@ -12,7 +12,7 @@ info = {
    "maxsize": sys.maxsize
 }

-search_modules = ["charade", "chardet", "datrie", "genshi", "html5lib", "lxml", "six"]
+search_modules = ["chardet", "datrie", "genshi", "html5lib", "lxml", "six"]
 found_modules = []

 for m in search_modules:
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/Makefile
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/Makefile
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/changes.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/changes.rst
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/conf.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/conf.py
@ -126,7 +126,7 @@ html_theme = 'default'
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+#html_static_path = ['_static']

 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.filters.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.filters.rst
@ -1,59 +1,58 @@
 filters Package
 ===============

-:mod:`_base` Module
+:mod:`base` Module
 -------------------

-.. automodule:: html5lib.filters._base
+.. automodule:: html5lib.filters.base
    :members:
-    :undoc-members:
    :show-inheritance:
+    :special-members: __init__

 :mod:`alphabeticalattributes` Module
 ------------------------------------

 .. automodule:: html5lib.filters.alphabeticalattributes
    :members:
-    :undoc-members:
    :show-inheritance:
+    :special-members: __init__

 :mod:`inject_meta_charset` Module
 ---------------------------------

 .. automodule:: html5lib.filters.inject_meta_charset
    :members:
-    :undoc-members:
    :show-inheritance:
+    :special-members: __init__

 :mod:`lint` Module
 ------------------

 .. automodule:: html5lib.filters.lint
    :members:
-    :undoc-members:
    :show-inheritance:
+    :special-members: __init__

 :mod:`optionaltags` Module
 --------------------------

 .. automodule:: html5lib.filters.optionaltags
    :members:
-    :undoc-members:
    :show-inheritance:
+    :special-members: __init__

 :mod:`sanitizer` Module
 -----------------------

 .. automodule:: html5lib.filters.sanitizer
    :members:
-    :undoc-members:
    :show-inheritance:
+    :special-members: __init__

 :mod:`whitespace` Module
 ------------------------

 .. automodule:: html5lib.filters.whitespace
    :members:
-    :undoc-members:
    :show-inheritance:
-
+    :special-members: __init__
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.rst
@ -0,0 +1,38 @@
+html5lib Package
+================
+
+.. automodule:: html5lib
+    :members: __version__
+
+:mod:`constants` Module
+-----------------------
+
+.. automodule:: html5lib.constants
+    :members:
+    :show-inheritance:
+
+:mod:`html5parser` Module
+-------------------------
+
+.. automodule:: html5lib.html5parser
+    :members:
+    :show-inheritance:
+    :special-members: __init__
+
+:mod:`serializer` Module
+------------------------
+
+.. automodule:: html5lib.serializer
+    :members:
+    :show-inheritance:
+    :special-members: __init__
+
+Subpackages
+-----------
+
+.. toctree::
+
+    html5lib.filters
+    html5lib.treebuilders
+    html5lib.treewalkers
+    html5lib.treeadapters
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.treeadapters.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.treeadapters.rst
@ -0,0 +1,20 @@
+treeadapters Package
+====================
+
+:mod:`~html5lib.treeadapters` Package
+-------------------------------------
+
+.. automodule:: html5lib.treeadapters
+    :members:
+    :show-inheritance:
+    :special-members: __init__
+
+.. automodule:: html5lib.treeadapters.genshi
+    :members:
+    :show-inheritance:
+    :special-members: __init__
+
+.. automodule:: html5lib.treeadapters.sax
+    :members:
+    :show-inheritance:
+    :special-members: __init__
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.treebuilders.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.treebuilders.rst
@ -6,38 +6,37 @@ treebuilders Package

 .. automodule:: html5lib.treebuilders
    :members:
-    :undoc-members:
    :show-inheritance:
+    :special-members: __init__

-:mod:`_base` Module
+:mod:`base` Module
 -------------------

-.. automodule:: html5lib.treebuilders._base
+.. automodule:: html5lib.treebuilders.base
    :members:
-    :undoc-members:
    :show-inheritance:
+    :special-members: __init__

 :mod:`dom` Module
 -----------------

 .. automodule:: html5lib.treebuilders.dom
    :members:
-    :undoc-members:
    :show-inheritance:
+    :special-members: __init__

 :mod:`etree` Module
 -------------------

 .. automodule:: html5lib.treebuilders.etree
    :members:
-    :undoc-members:
    :show-inheritance:
+    :special-members: __init__

 :mod:`etree_lxml` Module
 ------------------------

 .. automodule:: html5lib.treebuilders.etree_lxml
    :members:
-    :undoc-members:
    :show-inheritance:
-
+    :special-members: __init__
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.treewalkers.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.treewalkers.rst
@ -0,0 +1,50 @@
+treewalkers Package
+===================
+
+:mod:`treewalkers` Package
+--------------------------
+
+.. automodule:: html5lib.treewalkers
+    :members:
+    :show-inheritance:
+    :special-members: __init__
+
+:mod:`base` Module
+------------------
+
+.. automodule:: html5lib.treewalkers.base
+    :members:
+    :show-inheritance:
+    :special-members: __init__
+
+:mod:`dom` Module
+-----------------
+
+.. automodule:: html5lib.treewalkers.dom
+    :members:
+    :show-inheritance:
+    :special-members: __init__
+
+:mod:`etree` Module
+-------------------
+
+.. automodule:: html5lib.treewalkers.etree
+    :members:
+    :show-inheritance:
+    :special-members: __init__
+
+:mod:`etree_lxml` Module
+------------------------
+
+.. automodule:: html5lib.treewalkers.etree_lxml
+    :members:
+    :show-inheritance:
+    :special-members: __init__
+
+:mod:`genshi` Module
+--------------------
+
+.. automodule:: html5lib.treewalkers.genshi
+    :members:
+    :show-inheritance:
+    :special-members: __init__
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/index.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/index.rst
@ -8,6 +8,7 @@ Overview
   :maxdepth: 2

   movingparts
+   modules
   changes
   License <license>

--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/license.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/license.rst
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/make.bat
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/make.bat
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/modules.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/modules.rst
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/movingparts.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/movingparts.rst
@ -4,22 +4,25 @@ The moving parts
 html5lib consists of a number of components, which are responsible for
 handling its features.

+Parsing uses a *tree builder* to generate a *tree*, the in-memory representation of the document.
+Several tree representations are supported, as are translations to other formats via *tree adapters*.
+The tree may be translated to a token stream with a *tree walker*, from which :class:`~html5lib.serializer.HTMLSerializer` produces a stream of bytes.
+The token stream may also be transformed by use of *filters* to accomplish tasks like sanitization.

 Tree builders
 -------------

 The parser reads HTML by tokenizing the content and building a tree that
-the user can later access. There are three main types of trees that
-html5lib can build:
+the user can later access. html5lib can build three types of trees:

-* ``etree`` - this is the default; builds a tree based on ``xml.etree``,
+* ``etree`` - this is the default; builds a tree based on :mod:`xml.etree`,
  which can be found in the standard library. Whenever possible, the
  accelerated ``ElementTree`` implementation (i.e.
  ``xml.etree.cElementTree`` on Python 2.x) is used.

-* ``dom`` - builds a tree based on ``xml.dom.minidom``.
+* ``dom`` - builds a tree based on :mod:`xml.dom.minidom`.

-* ``lxml.etree`` - uses lxml's implementation of the ``ElementTree``
+* ``lxml`` - uses the :mod:`lxml.etree` implementation of the ``ElementTree``
  API.  The performance gains are relatively small compared to using the
  accelerated ``ElementTree`` module.

@ -31,21 +34,15 @@ You can specify the builder by name when using the shorthand API:
  with open("mydocument.html", "rb") as f:
      lxml_etree_document = html5lib.parse(f, treebuilder="lxml")

-When instantiating a parser object, you have to pass a tree builder
-class in the ``tree`` keyword attribute:
+To get a builder class by name, use the :func:`~html5lib.treebuilders.getTreeBuilder` function.
+
+When instantiating a :class:`~html5lib.html5parser.HTMLParser` object, you must pass a tree builder class via the ``tree`` keyword attribute:

 .. code-block:: python

  import html5lib
-  parser = html5lib.HTMLParser(tree=SomeTreeBuilder)
-  document = parser.parse("<p>Hello World!")
-
-To get a builder class by name, use the ``getTreeBuilder`` function:
-
-.. code-block:: python
-
-  import html5lib
-  parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
+  TreeBuilder = html5lib.getTreeBuilder("dom")
+  parser = html5lib.HTMLParser(tree=TreeBuilder)
  minidom_document = parser.parse("<p>Hello World!")

 The implementation of builders can be found in `html5lib/treebuilders/
@ -55,17 +52,13 @@ The implementation of builders can be found in `html5lib/treebuilders/
 Tree walkers
 ------------

-Once a tree is ready, you can work on it either manually, or using
-a tree walker, which provides a streaming view of the tree. html5lib
-provides walkers for all three supported types of trees (``etree``,
-``dom`` and ``lxml``).
+In addition to manipulating a tree directly, you can use a tree walker to generate a streaming view of it.
+html5lib provides walkers for ``etree``, ``dom``, and ``lxml`` trees, as well as ``genshi`` `markup streams <https://genshi.edgewall.org/wiki/Documentation/streams.html>`_.

 The implementation of walkers can be found in `html5lib/treewalkers/
 <https://github.com/html5lib/html5lib-python/tree/master/html5lib/treewalkers>`_.

-Walkers make consuming HTML easier. html5lib uses them to provide you
-with has a couple of handy tools.
-
+html5lib provides :class:`~html5lib.serializer.HTMLSerializer` for generating a stream of bytes from a token stream, and several filters which manipulate the stream.

 HTMLSerializer
 ~~~~~~~~~~~~~~
@ -90,15 +83,14 @@ The serializer lets you write HTML back as a stream of bytes.
  '>'
  'Witam wszystkich'

-You can customize the serializer behaviour in a variety of ways, consult
-the :class:`~html5lib.serializer.htmlserializer.HTMLSerializer`
-documentation.
+You can customize the serializer behaviour in a variety of ways. Consult
+the :class:`~html5lib.serializer.HTMLSerializer` documentation.


 Filters
 ~~~~~~~

-You can alter the stream content with filters provided by html5lib:
+html5lib provides several filters:

 * :class:`alphabeticalattributes.Filter
  <html5lib.filters.alphabeticalattributes.Filter>` sorts attributes on
@ -110,11 +102,11 @@ You can alter the stream content with filters provided by html5lib:
  the document

 * :class:`lint.Filter <html5lib.filters.lint.Filter>` raises
-  ``LintError`` exceptions on invalid tag and attribute names, invalid
+  :exc:`AssertionError` exceptions on invalid tag and attribute names, invalid
  PCDATA, etc.

 * :class:`optionaltags.Filter <html5lib.filters.optionaltags.Filter>`
-  removes tags from the stream which are not necessary to produce valid
+  removes tags from the token stream which are not necessary to produce valid
  HTML

 * :class:`sanitizer.Filter <html5lib.filters.sanitizer.Filter>` removes
@ -125,9 +117,9 @@ You can alter the stream content with filters provided by html5lib:

 * :class:`whitespace.Filter <html5lib.filters.whitespace.Filter>`
  collapses all whitespace characters to single spaces unless they're in
-  ``<pre/>`` or ``textarea`` tags.
+  ``<pre/>`` or ``<textarea/>`` tags.

-To use a filter, simply wrap it around a stream:
+To use a filter, simply wrap it around a token stream:

 .. code-block:: python

@ -136,15 +128,17 @@ To use a filter, simply wrap it around a stream:
  >>> dom = html5lib.parse("<p><script>alert('Boo!')", treebuilder="dom")
  >>> walker = html5lib.getTreeWalker("dom")
  >>> stream = walker(dom)
-  >>> sane_stream = sanitizer.Filter(stream) clean_stream = sanitizer.Filter(stream)
+  >>> clean_stream = sanitizer.Filter(stream)


 Tree adapters
 -------------

-Used to translate one type of tree to another. More documentation
-pending, sorry.
+Tree adapters can be used to translate between tree formats.
+Two adapters are provided by html5lib:

+* :func:`html5lib.treeadapters.genshi.to_genshi()` generates a `Genshi markup stream <https://genshi.edgewall.org/wiki/Documentation/streams.html>`_.
+* :func:`html5lib.treeadapters.sax.to_sax()` calls a SAX handler based on the tree.

 Encoding discovery
 ------------------
@ -156,54 +150,16 @@ the following way:
 * The encoding may be explicitly specified by passing the name of the
  encoding as the encoding parameter to the
  :meth:`~html5lib.html5parser.HTMLParser.parse` method on
-  ``HTMLParser`` objects.
+  :class:`~html5lib.html5parser.HTMLParser` objects.

 * If no encoding is specified, the parser will attempt to detect the
  encoding from a ``<meta>``  element in the first 512 bytes of the
  document (this is only a partial implementation of the current HTML
-  5 specification).
+  specification).

-* If no encoding can be found and the chardet library is available, an
+* If no encoding can be found and the :mod:`chardet` library is available, an
  attempt will be made to sniff the encoding from the byte pattern.

 * If all else fails, the default encoding will be used. This is usually
  `Windows-1252 <http://en.wikipedia.org/wiki/Windows-1252>`_, which is
  a common fallback used by Web browsers.
-
-
-Tokenizers
----------
-
-The part of the parser responsible for translating a raw input stream
-into meaningful tokens is the tokenizer. Currently html5lib provides
-two.
-
-To set up a tokenizer, simply pass it when instantiating
-a :class:`~html5lib.html5parser.HTMLParser`:
-
-.. code-block:: python
-
-  import html5lib
-  from html5lib import sanitizer
-
-  p = html5lib.HTMLParser(tokenizer=sanitizer.HTMLSanitizer)
-  p.parse("<p>Surprise!<script>alert('Boo!');</script>")
-
-HTMLTokenizer
-~~~~~~~~~~~~~
-
-This is the default tokenizer, the heart of html5lib. The implementation
-can be found in `html5lib/tokenizer.py
-<https://github.com/html5lib/html5lib-python/blob/master/html5lib/tokenizer.py>`_.
-
-HTMLSanitizer
-~~~~~~~~~~~~~
-
-This is a tokenizer that removes unsafe markup and CSS styles from the
-input. Elements that are known to be safe are passed through and the
-rest is converted to visible text. The default configuration of the
-sanitizer follows the `WHATWG Sanitization Rules
-<http://wiki.whatwg.org/wiki/Sanitization_rules>`_.
-
-The implementation can be found in `html5lib/sanitizer.py
-<https://github.com/html5lib/html5lib-python/blob/master/html5lib/sanitizer.py>`_.
--- a/testing/web-platform/tests/tools/third_party/html5lib/flake8-run.sh
+++ b/testing/web-platform/tests/tools/third_party/html5lib/flake8-run.sh
@ -0,0 +1,9 @@
+#!/bin/bash -e
+
+if [[ ! -x $(which flake8) ]]; then
+  echo "fatal: flake8 not found on $PATH. Exiting."
+  exit 1
+fi
+
+flake8 `dirname $0`
+exit $?
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/init.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/init.py
@ -0,0 +1,35 @@
+"""
+HTML parsing library based on the `WHATWG HTML specification
+<https://whatwg.org/html>`_. The parser is designed to be compatible with
+existing HTML found in the wild and implements well-defined error recovery that
+is largely compatible with modern desktop web browsers.
+
+Example usage::
+
+    import html5lib
+    with open("my_document.html", "rb") as f:
+        tree = html5lib.parse(f)
+
+For convenience, this module re-exports the following names:
+
+* :func:`~.html5parser.parse`
+* :func:`~.html5parser.parseFragment`
+* :class:`~.html5parser.HTMLParser`
+* :func:`~.treebuilders.getTreeBuilder`
+* :func:`~.treewalkers.getTreeWalker`
+* :func:`~.serializer.serialize`
+"""
+
+from __future__ import absolute_import, division, unicode_literals
+
+from .html5parser import HTMLParser, parse, parseFragment
+from .treebuilders import getTreeBuilder
+from .treewalkers import getTreeWalker
+from .serializer import serialize
+
+__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
+           "getTreeWalker", "serialize"]
+
+# this has to be at the top level, see how setup.py parses this
+#: Distribution version number.
+__version__ = "1.0.1"
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_ihatexml.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_ihatexml.py
@ -175,18 +175,18 @@ def escapeRegexp(string):
    return string

 # output from the above
-nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
+nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa

-nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
+nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa

 # Simpler things
-nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
+nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")


 class InfosetFilter(object):
    replacementRegexp = re.compile(r"U[\dA-F]{5,5}")

-    def __init__(self, replaceChars=None,
+    def __init__(self,
                 dropXmlnsLocalName=False,
                 dropXmlnsAttrNs=False,
                 preventDoubleDashComments=False,
@ -217,7 +217,7 @@ class InfosetFilter(object):
        else:
            return self.toXmlName(name)

-    def coerceElement(self, name, namespace=None):
+    def coerceElement(self, name):
        return self.toXmlName(name)

    def coerceComment(self, data):
@ -225,11 +225,14 @@ class InfosetFilter(object):
            while "--" in data:
                warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
                data = data.replace("--", "- -")
+            if data.endswith("-"):
+                warnings.warn("Comments cannot end in a dash", DataLossWarning)
+                data += " "
        return data

    def coerceCharacters(self, data):
        if self.replaceFormFeedCharacters:
-            for i in range(data.count("\x0C")):
+            for _ in range(data.count("\x0C")):
                warnings.warn("Text cannot contain U+000C", DataLossWarning)
            data = data.replace("\x0C", " ")
        # Other non-xml characters
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_inputstream.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_inputstream.py
@ -1,13 +1,16 @@
 from __future__ import absolute_import, division, unicode_literals
-from six import text_type
-from six.moves import http_client
+
+from six import text_type, binary_type
+from six.moves import http_client, urllib

 import codecs
 import re

+import webencodings
+
 from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
-from .constants import encodings, ReparseException
-from . import utils
+from .constants import _ReparseException
+from . import _utils

 from io import StringIO

@ -16,19 +19,26 @@ try:
 except ImportError:
    BytesIO = StringIO

-try:
-    from io import BufferedIOBase
-except ImportError:
-    class BufferedIOBase(object):
-        pass
-
 # Non-unicode versions of constants for use in the pre-parser
 spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
 asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
 asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
 spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])

-invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
+
+invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"  # noqa
+
+if _utils.supports_lone_surrogates:
+    # Use one extra step of indirection and create surrogates with
+    # eval. Not using this indirection would introduce an illegal
+    # unicode literal on platforms not supporting such lone
+    # surrogates.
+    assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
+    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
+                                    eval('"\\uD800-\\uDFFF"') +  # pylint:disable=eval-used
+                                    "]")
+else:
+    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)

 non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
                                  0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
@ -38,7 +48,7 @@ non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
                                  0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
                                  0x10FFFE, 0x10FFFF])

-ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
+ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")

 # Cache for charsUntil()
 charsUntilRegEx = {}
@ -118,10 +128,13 @@ class BufferedStream(object):
        return b"".join(rv)


-def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
-    if isinstance(source, http_client.HTTPResponse):
-        # Work around Python bug #20007: read(0) closes the connection.
-        # http://bugs.python.org/issue20007
+def HTMLInputStream(source, **kwargs):
+    # Work around Python bug #20007: read(0) closes the connection.
+    # http://bugs.python.org/issue20007
+    if (isinstance(source, http_client.HTTPResponse) or
+        # Also check for addinfourl wrapping HTTPResponse
+        (isinstance(source, urllib.response.addbase) and
+         isinstance(source.fp, http_client.HTTPResponse))):
        isUnicode = False
    elif hasattr(source, "read"):
        isUnicode = isinstance(source.read(0), text_type)
@ -129,12 +142,13 @@ def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
        isUnicode = isinstance(source, text_type)

    if isUnicode:
-        if encoding is not None:
-            raise TypeError("Cannot explicitly set an encoding with a unicode string")
+        encodings = [x for x in kwargs if x.endswith("_encoding")]
+        if encodings:
+            raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)

-        return HTMLUnicodeInputStream(source)
+        return HTMLUnicodeInputStream(source, **kwargs)
    else:
-        return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
+        return HTMLBinaryInputStream(source, **kwargs)


 class HTMLUnicodeInputStream(object):
@ -160,22 +174,21 @@ class HTMLUnicodeInputStream(object):
        regardless of any BOM or later declaration (such as in a meta
        element)

-        parseMeta - Look for a <meta> element containing encoding information
-
        """

-        # Craziness
-        if len("\U0010FFFF") == 1:
+        if not _utils.supports_lone_surrogates:
+            # Such platforms will have already checked for such
+            # surrogate errors, so no need to do this checking.
+            self.reportCharacterErrors = None
+        elif len("\U0010FFFF") == 1:
            self.reportCharacterErrors = self.characterErrorsUCS4
-            self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
        else:
            self.reportCharacterErrors = self.characterErrorsUCS2
-            self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])")

        # List of where new lines occur
        self.newLines = [0]

-        self.charEncoding = ("utf-8", "certain")
+        self.charEncoding = (lookupEncoding("utf-8"), "certain")
        self.dataStream = self.openStream(source)

        self.reset()
@ -265,12 +278,10 @@ class HTMLUnicodeInputStream(object):
                self._bufferedCharacter = data[-1]
                data = data[:-1]

-        self.reportCharacterErrors(data)
+        if self.reportCharacterErrors:
+            self.reportCharacterErrors(data)

        # Replace invalid characters
-        # Note U+0000 is dealt with in the tokenizer
-        data = self.replaceCharactersRegexp.sub("\ufffd", data)
-
        data = data.replace("\r\n", "\n")
        data = data.replace("\r", "\n")

@ -280,7 +291,7 @@ class HTMLUnicodeInputStream(object):
        return True

    def characterErrorsUCS4(self, data):
-        for i in range(len(invalid_unicode_re.findall(data))):
+        for _ in range(len(invalid_unicode_re.findall(data))):
            self.errors.append("invalid-codepoint")

    def characterErrorsUCS2(self, data):
@ -293,9 +304,9 @@ class HTMLUnicodeInputStream(object):
            codepoint = ord(match.group())
            pos = match.start()
            # Pretty sure there should be endianness issues here
-            if utils.isSurrogatePair(data[pos:pos + 2]):
+            if _utils.isSurrogatePair(data[pos:pos + 2]):
                # We have a surrogate pair!
-                char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
+                char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
                if char_val in non_bmp_invalid_codepoints:
                    self.errors.append("invalid-codepoint")
                skip = True
@ -378,7 +389,9 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):

    """

-    def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
+    def __init__(self, source, override_encoding=None, transport_encoding=None,
+                 same_origin_parent_encoding=None, likely_encoding=None,
+                 default_encoding="windows-1252", useChardet=True):
        """Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
@ -391,8 +404,6 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
        regardless of any BOM or later declaration (such as in a meta
        element)

-        parseMeta - Look for a <meta> element containing encoding information
-
        """
        # Raw Stream - for unicode objects this will encode to utf-8 and set
        #              self.charEncoding as appropriate
@ -400,27 +411,28 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):

        HTMLUnicodeInputStream.__init__(self, self.rawStream)

-        self.charEncoding = (codecName(encoding), "certain")
-
        # Encoding Information
        # Number of bytes to use when looking for a meta element with
        # encoding information
-        self.numBytesMeta = 512
+        self.numBytesMeta = 1024
        # Number of bytes to use when using detecting encoding using chardet
        self.numBytesChardet = 100
-        # Encoding to use if no other information can be found
-        self.defaultEncoding = "windows-1252"
+        # Things from args
+        self.override_encoding = override_encoding
+        self.transport_encoding = transport_encoding
+        self.same_origin_parent_encoding = same_origin_parent_encoding
+        self.likely_encoding = likely_encoding
+        self.default_encoding = default_encoding

-        # Detect encoding iff no explicit "transport level" encoding is supplied
-        if (self.charEncoding[0] is None):
-            self.charEncoding = self.detectEncoding(parseMeta, chardet)
+        # Determine encoding
+        self.charEncoding = self.determineEncoding(useChardet)
+        assert self.charEncoding[0] is not None

        # Call superclass
        self.reset()

    def reset(self):
-        self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
-                                                                 'replace')
+        self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
        HTMLUnicodeInputStream.reset(self)

    def openStream(self, source):
@ -437,29 +449,50 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):

        try:
            stream.seek(stream.tell())
-        except:
+        except:  # pylint:disable=bare-except
            stream = BufferedStream(stream)

        return stream

-    def detectEncoding(self, parseMeta=True, chardet=True):
-        # First look for a BOM
+    def determineEncoding(self, chardet=True):
+        # BOMs take precedence over everything
        # This will also read past the BOM if present
-        encoding = self.detectBOM()
-        confidence = "certain"
-        # If there is no BOM need to look for meta elements with encoding
-        # information
-        if encoding is None and parseMeta:
-            encoding = self.detectEncodingMeta()
-            confidence = "tentative"
-        # Guess with chardet, if avaliable
-        if encoding is None and chardet:
-            confidence = "tentative"
+        charEncoding = self.detectBOM(), "certain"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # If we've been overriden, we've been overriden
+        charEncoding = lookupEncoding(self.override_encoding), "certain"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Now check the transport layer
+        charEncoding = lookupEncoding(self.transport_encoding), "certain"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Look for meta elements with encoding information
+        charEncoding = self.detectEncodingMeta(), "tentative"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Parent document encoding
+        charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
+        if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
+            return charEncoding
+
+        # "likely" encoding
+        charEncoding = lookupEncoding(self.likely_encoding), "tentative"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Guess with chardet, if available
+        if chardet:
            try:
-                try:
-                    from charade.universaldetector import UniversalDetector
-                except ImportError:
-                    from chardet.universaldetector import UniversalDetector
+                from chardet.universaldetector import UniversalDetector
+            except ImportError:
+                pass
+            else:
                buffers = []
                detector = UniversalDetector()
                while not detector.done:
@ -470,37 +503,34 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
                    buffers.append(buffer)
                    detector.feed(buffer)
                detector.close()
-                encoding = detector.result['encoding']
+                encoding = lookupEncoding(detector.result['encoding'])
                self.rawStream.seek(0)
-            except ImportError:
-                pass
-        # If all else fails use the default encoding
-        if encoding is None:
-            confidence = "tentative"
-            encoding = self.defaultEncoding
+                if encoding is not None:
+                    return encoding, "tentative"

-        # Substitute for equivalent encodings:
-        encodingSub = {"iso-8859-1": "windows-1252"}
+        # Try the default encoding
+        charEncoding = lookupEncoding(self.default_encoding), "tentative"
+        if charEncoding[0] is not None:
+            return charEncoding

-        if encoding.lower() in encodingSub:
-            encoding = encodingSub[encoding.lower()]
-
-        return encoding, confidence
+        # Fallback to html5lib's default if even that hasn't worked
+        return lookupEncoding("windows-1252"), "tentative"

    def changeEncoding(self, newEncoding):
        assert self.charEncoding[1] != "certain"
-        newEncoding = codecName(newEncoding)
-        if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
-            newEncoding = "utf-8"
+        newEncoding = lookupEncoding(newEncoding)
        if newEncoding is None:
            return
+        if newEncoding.name in ("utf-16be", "utf-16le"):
+            newEncoding = lookupEncoding("utf-8")
+            assert newEncoding is not None
        elif newEncoding == self.charEncoding[0]:
            self.charEncoding = (self.charEncoding[0], "certain")
        else:
            self.rawStream.seek(0)
-            self.reset()
            self.charEncoding = (newEncoding, "certain")
-            raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
+            self.reset()
+            raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))

    def detectBOM(self):
        """Attempts to detect at BOM at the start of the stream. If
@ -508,8 +538,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
        encoding otherwise return None"""
        bomDict = {
            codecs.BOM_UTF8: 'utf-8',
-            codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
-            codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
+            codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
+            codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
        }

        # Go to beginning of file and read in 4 bytes
@ -529,9 +559,12 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):

        # Set the read position past the BOM if one was found, otherwise
        # set it to the start of the stream
-        self.rawStream.seek(encoding and seek or 0)
-
-        return encoding
+        if encoding:
+            self.rawStream.seek(seek)
+            return lookupEncoding(encoding)
+        else:
+            self.rawStream.seek(0)
+            return None

    def detectEncodingMeta(self):
        """Report the encoding declared by the meta element
@ -542,8 +575,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
        self.rawStream.seek(0)
        encoding = parser.getEncoding()

-        if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
-            encoding = "utf-8"
+        if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
+            encoding = lookupEncoding("utf-8")

        return encoding

@ -557,6 +590,7 @@ class EncodingBytes(bytes):
        return bytes.__new__(self, value.lower())

    def __init__(self, value):
+        # pylint:disable=unused-argument
        self._position = -1

    def __iter__(self):
@ -667,7 +701,7 @@ class EncodingParser(object):
            (b"<!", self.handleOther),
            (b"<?", self.handleOther),
            (b"<", self.handlePossibleStartTag))
-        for byte in self.data:
+        for _ in self.data:
            keepParsing = True
            for key, method in methodDispatch:
                if self.data.matchBytes(key):
@ -706,7 +740,7 @@ class EncodingParser(object):
                        return False
                elif attr[0] == b"charset":
                    tentativeEncoding = attr[1]
-                    codec = codecName(tentativeEncoding)
+                    codec = lookupEncoding(tentativeEncoding)
                    if codec is not None:
                        self.encoding = codec
                        return False
@ -714,7 +748,7 @@ class EncodingParser(object):
                    contentParser = ContentAttrParser(EncodingBytes(attr[1]))
                    tentativeEncoding = contentParser.parse()
                    if tentativeEncoding is not None:
-                        codec = codecName(tentativeEncoding)
+                        codec = lookupEncoding(tentativeEncoding)
                        if codec is not None:
                            if hasPragma:
                                self.encoding = codec
@ -871,16 +905,19 @@ class ContentAttrParser(object):
            return None


-def codecName(encoding):
+def lookupEncoding(encoding):
    """Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding."""
-    if isinstance(encoding, bytes):
+    if isinstance(encoding, binary_type):
        try:
            encoding = encoding.decode("ascii")
        except UnicodeDecodeError:
            return None
-    if encoding:
-        canonicalName = ascii_punctuation_re.sub("", encoding).lower()
-        return encodings.get(canonicalName, None)
+
+    if encoding is not None:
+        try:
+            return webencodings.lookup(encoding)
+        except AttributeError:
+            return None
    else:
        return None
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_tokenizer.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_tokenizer.py
@ -1,9 +1,6 @@
 from __future__ import absolute_import, division, unicode_literals

-try:
-    chr = unichr # flake8: noqa
-except NameError:
-    pass
+from six import unichr as chr

 from collections import deque

@ -14,9 +11,9 @@ from .constants import digits, hexDigits, EOF
 from .constants import tokenTypes, tagTokenTypes
 from .constants import replacementCharacters

-from .inputstream import HTMLInputStream
+from ._inputstream import HTMLInputStream

-from .trie import Trie
+from ._trie import Trie

 entitiesTrie = Trie(entities)

@ -34,16 +31,11 @@ class HTMLTokenizer(object):
      Points to HTMLInputStream object.
    """

-    def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
-                 lowercaseElementName=True, lowercaseAttrName=True, parser=None):
+    def __init__(self, stream, parser=None, **kwargs):

-        self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet)
+        self.stream = HTMLInputStream(stream, **kwargs)
        self.parser = parser

-        # Perform case conversions?
-        self.lowercaseElementName = lowercaseElementName
-        self.lowercaseAttrName = lowercaseAttrName
-
        # Setup the initial tokenizer state
        self.escapeFlag = False
        self.lastFourChars = []
@ -147,8 +139,8 @@ class HTMLTokenizer(object):
        output = "&"

        charStack = [self.stream.char()]
-        if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&")
-                or (allowedChar is not None and allowedChar == charStack[0])):
+        if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or
+                (allowedChar is not None and allowedChar == charStack[0])):
            self.stream.unget(charStack[0])

        elif charStack[0] == "#":
@ -235,8 +227,7 @@ class HTMLTokenizer(object):
        token = self.currentToken
        # Add token to the queue to be yielded
        if (token["type"] in tagTokenTypes):
-            if self.lowercaseElementName:
-                token["name"] = token["name"].translate(asciiUpper2Lower)
+            token["name"] = token["name"].translate(asciiUpper2Lower)
            if token["type"] == tokenTypes["EndTag"]:
                if token["data"]:
                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
@ -921,10 +912,9 @@ class HTMLTokenizer(object):
            # Attributes are not dropped at this stage. That happens when the
            # start tag token is emitted so values can still be safely appended
            # to attributes, but we do want to report the parse error in time.
-            if self.lowercaseAttrName:
-                self.currentToken["data"][-1][0] = (
-                    self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
-            for name, value in self.currentToken["data"][:-1]:
+            self.currentToken["data"][-1][0] = (
+                self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
+            for name, _ in self.currentToken["data"][:-1]:
                if self.currentToken["data"][-1][0] == name:
                    self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
                                            "duplicate-attribute"})
@ -1716,11 +1706,11 @@ class HTMLTokenizer(object):
                else:
                    data.append(char)

-        data = "".join(data)
+        data = "".join(data)  # pylint:disable=redefined-variable-type
        # Deal with null here rather than in the parser
        nullCount = data.count("\u0000")
        if nullCount > 0:
-            for i in range(nullCount):
+            for _ in range(nullCount):
                self.tokenQueue.append({"type": tokenTypes["ParseError"],
                                        "data": "invalid-codepoint"})
            data = data.replace("\u0000", "\uFFFD")
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/init.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/init.py
@ -4,9 +4,11 @@ from .py import Trie as PyTrie

 Trie = PyTrie

+# pylint:disable=wrong-import-position
 try:
    from .datrie import Trie as DATrie
 except ImportError:
    pass
 else:
    Trie = DATrie
+# pylint:enable=wrong-import-position
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/_base.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/_base.py
@ -7,13 +7,13 @@ class Trie(Mapping):
    """Abstract base class for tries"""

    def keys(self, prefix=None):
-        keys = super().keys()
+        # pylint:disable=arguments-differ
+        keys = super(Trie, self).keys()

        if prefix is None:
            return set(keys)

-        # Python 2.6: no set comprehensions
-        return set([x for x in keys if x.startswith(prefix)])
+        return {x for x in keys if x.startswith(prefix)}

    def has_keys_with_prefix(self, prefix):
        for key in self.keys():
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/datrie.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/datrie.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/py.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/py.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_utils.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_utils.py
@ -2,6 +2,8 @@ from __future__ import absolute_import, division, unicode_literals

 from types import ModuleType

+from six import text_type
+
 try:
    import xml.etree.cElementTree as default_etree
 except ImportError:
@ -9,7 +11,26 @@ except ImportError:


 __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
-           "surrogatePairToCodepoint", "moduleFactoryFactory"]
+           "surrogatePairToCodepoint", "moduleFactoryFactory",
+           "supports_lone_surrogates"]
+
+
+# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
+# caught by the below test. In general this would be any platform
+# using UTF-16 as its encoding of unicode strings, such as
+# Jython. This is because UTF-16 itself is based on the use of such
+# surrogates, and there is no mechanism to further escape such
+# escapes.
+try:
+    _x = eval('"\\uD800"')  # pylint:disable=eval-used
+    if not isinstance(_x, text_type):
+        # We need this with u"" because of http://bugs.jython.org/issue2039
+        _x = eval('u"\\uD800"')  # pylint:disable=eval-used
+        assert isinstance(_x, text_type)
+except:  # pylint:disable=bare-except
+    supports_lone_surrogates = False
+else:
+    supports_lone_surrogates = True


 class MethodDispatcher(dict):
@ -31,19 +52,20 @@ class MethodDispatcher(dict):
        # anything here.
        _dictEntries = []
        for name, value in items:
-            if type(name) in (list, tuple, frozenset, set):
+            if isinstance(name, (list, tuple, frozenset, set)):
                for item in name:
                    _dictEntries.append((item, value))
            else:
                _dictEntries.append((name, value))
        dict.__init__(self, _dictEntries)
+        assert len(self) == len(_dictEntries)
        self.default = None

    def __getitem__(self, key):
        return dict.get(self, key, self.default)


-# Some utility functions to dal with weirdness around UCS2 vs UCS4
+# Some utility functions to deal with weirdness around UCS2 vs UCS4
 # python builds

 def isSurrogatePair(data):
@ -70,13 +92,33 @@ def moduleFactoryFactory(factory):
        else:
            name = b"_%s_factory" % baseModule.__name__

-        if name in moduleCache:
-            return moduleCache[name]
-        else:
+        kwargs_tuple = tuple(kwargs.items())
+
+        try:
+            return moduleCache[name][args][kwargs_tuple]
+        except KeyError:
            mod = ModuleType(name)
            objs = factory(baseModule, *args, **kwargs)
            mod.__dict__.update(objs)
-            moduleCache[name] = mod
+            if "name" not in moduleCache:
+                moduleCache[name] = {}
+            if "args" not in moduleCache[name]:
+                moduleCache[name][args] = {}
+            if "kwargs" not in moduleCache[name][args]:
+                moduleCache[name][args][kwargs_tuple] = {}
+            moduleCache[name][args][kwargs_tuple] = mod
            return mod

    return moduleFactory
+
+
+def memoize(func):
+    cache = {}
+
+    def wrapped(*args, **kwargs):
+        key = (tuple(args), tuple(kwargs.items()))
+        if key not in cache:
+            cache[key] = func(*args, **kwargs)
+        return cache[key]
+
+    return wrapped
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/constants.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/constants.py
@ -1,292 +1,296 @@
 from __future__ import absolute_import, division, unicode_literals

 import string
-import gettext
-_ = gettext.gettext

 EOF = None

 E = {
    "null-character":
-        _("Null character in input stream, replaced with U+FFFD."),
+        "Null character in input stream, replaced with U+FFFD.",
    "invalid-codepoint":
-        _("Invalid codepoint in stream."),
+        "Invalid codepoint in stream.",
    "incorrectly-placed-solidus":
-        _("Solidus (/) incorrectly placed in tag."),
+        "Solidus (/) incorrectly placed in tag.",
    "incorrect-cr-newline-entity":
-        _("Incorrect CR newline entity, replaced with LF."),
+        "Incorrect CR newline entity, replaced with LF.",
    "illegal-windows-1252-entity":
-        _("Entity used with illegal number (windows-1252 reference)."),
+        "Entity used with illegal number (windows-1252 reference).",
    "cant-convert-numeric-entity":
-        _("Numeric entity couldn't be converted to character "
-          "(codepoint U+%(charAsInt)08x)."),
+        "Numeric entity couldn't be converted to character "
+        "(codepoint U+%(charAsInt)08x).",
    "illegal-codepoint-for-numeric-entity":
-        _("Numeric entity represents an illegal codepoint: "
-          "U+%(charAsInt)08x."),
+        "Numeric entity represents an illegal codepoint: "
+        "U+%(charAsInt)08x.",
    "numeric-entity-without-semicolon":
-        _("Numeric entity didn't end with ';'."),
+        "Numeric entity didn't end with ';'.",
    "expected-numeric-entity-but-got-eof":
-        _("Numeric entity expected. Got end of file instead."),
+        "Numeric entity expected. Got end of file instead.",
    "expected-numeric-entity":
-        _("Numeric entity expected but none found."),
+        "Numeric entity expected but none found.",
    "named-entity-without-semicolon":
-        _("Named entity didn't end with ';'."),
+        "Named entity didn't end with ';'.",
    "expected-named-entity":
-        _("Named entity expected. Got none."),
+        "Named entity expected. Got none.",
    "attributes-in-end-tag":
-        _("End tag contains unexpected attributes."),
+        "End tag contains unexpected attributes.",
    'self-closing-flag-on-end-tag':
-        _("End tag contains unexpected self-closing flag."),
+        "End tag contains unexpected self-closing flag.",
    "expected-tag-name-but-got-right-bracket":
-        _("Expected tag name. Got '>' instead."),
+        "Expected tag name. Got '>' instead.",
    "expected-tag-name-but-got-question-mark":
-        _("Expected tag name. Got '?' instead. (HTML doesn't "
-          "support processing instructions.)"),
+        "Expected tag name. Got '?' instead. (HTML doesn't "
+        "support processing instructions.)",
    "expected-tag-name":
-        _("Expected tag name. Got something else instead"),
+        "Expected tag name. Got something else instead",
    "expected-closing-tag-but-got-right-bracket":
-        _("Expected closing tag. Got '>' instead. Ignoring '</>'."),
+        "Expected closing tag. Got '>' instead. Ignoring '</>'.",
    "expected-closing-tag-but-got-eof":
-        _("Expected closing tag. Unexpected end of file."),
+        "Expected closing tag. Unexpected end of file.",
    "expected-closing-tag-but-got-char":
-        _("Expected closing tag. Unexpected character '%(data)s' found."),
+        "Expected closing tag. Unexpected character '%(data)s' found.",
    "eof-in-tag-name":
-        _("Unexpected end of file in the tag name."),
+        "Unexpected end of file in the tag name.",
    "expected-attribute-name-but-got-eof":
-        _("Unexpected end of file. Expected attribute name instead."),
+        "Unexpected end of file. Expected attribute name instead.",
    "eof-in-attribute-name":
-        _("Unexpected end of file in attribute name."),
+        "Unexpected end of file in attribute name.",
    "invalid-character-in-attribute-name":
-        _("Invalid character in attribute name"),
+        "Invalid character in attribute name",
    "duplicate-attribute":
-        _("Dropped duplicate attribute on tag."),
+        "Dropped duplicate attribute on tag.",
    "expected-end-of-tag-name-but-got-eof":
-        _("Unexpected end of file. Expected = or end of tag."),
+        "Unexpected end of file. Expected = or end of tag.",
    "expected-attribute-value-but-got-eof":
-        _("Unexpected end of file. Expected attribute value."),
+        "Unexpected end of file. Expected attribute value.",
    "expected-attribute-value-but-got-right-bracket":
-        _("Expected attribute value. Got '>' instead."),
+        "Expected attribute value. Got '>' instead.",
    'equals-in-unquoted-attribute-value':
-        _("Unexpected = in unquoted attribute"),
+        "Unexpected = in unquoted attribute",
    'unexpected-character-in-unquoted-attribute-value':
-        _("Unexpected character in unquoted attribute"),
+        "Unexpected character in unquoted attribute",
    "invalid-character-after-attribute-name":
-        _("Unexpected character after attribute name."),
+        "Unexpected character after attribute name.",
    "unexpected-character-after-attribute-value":
-        _("Unexpected character after attribute value."),
+        "Unexpected character after attribute value.",
    "eof-in-attribute-value-double-quote":
-        _("Unexpected end of file in attribute value (\")."),
+        "Unexpected end of file in attribute value (\").",
    "eof-in-attribute-value-single-quote":
-        _("Unexpected end of file in attribute value (')."),
+        "Unexpected end of file in attribute value (').",
    "eof-in-attribute-value-no-quotes":
-        _("Unexpected end of file in attribute value."),
+        "Unexpected end of file in attribute value.",
    "unexpected-EOF-after-solidus-in-tag":
-        _("Unexpected end of file in tag. Expected >"),
+        "Unexpected end of file in tag. Expected >",
    "unexpected-character-after-solidus-in-tag":
-        _("Unexpected character after / in tag. Expected >"),
+        "Unexpected character after / in tag. Expected >",
    "expected-dashes-or-doctype":
-        _("Expected '--' or 'DOCTYPE'. Not found."),
+        "Expected '--' or 'DOCTYPE'. Not found.",
    "unexpected-bang-after-double-dash-in-comment":
-        _("Unexpected ! after -- in comment"),
+        "Unexpected ! after -- in comment",
    "unexpected-space-after-double-dash-in-comment":
-        _("Unexpected space after -- in comment"),
+        "Unexpected space after -- in comment",
    "incorrect-comment":
-        _("Incorrect comment."),
+        "Incorrect comment.",
    "eof-in-comment":
-        _("Unexpected end of file in comment."),
+        "Unexpected end of file in comment.",
    "eof-in-comment-end-dash":
-        _("Unexpected end of file in comment (-)"),
+        "Unexpected end of file in comment (-)",
    "unexpected-dash-after-double-dash-in-comment":
-        _("Unexpected '-' after '--' found in comment."),
+        "Unexpected '-' after '--' found in comment.",
    "eof-in-comment-double-dash":
-        _("Unexpected end of file in comment (--)."),
+        "Unexpected end of file in comment (--).",
    "eof-in-comment-end-space-state":
-        _("Unexpected end of file in comment."),
+        "Unexpected end of file in comment.",
    "eof-in-comment-end-bang-state":
-        _("Unexpected end of file in comment."),
+        "Unexpected end of file in comment.",
    "unexpected-char-in-comment":
-        _("Unexpected character in comment found."),
+        "Unexpected character in comment found.",
    "need-space-after-doctype":
-        _("No space after literal string 'DOCTYPE'."),
+        "No space after literal string 'DOCTYPE'.",
    "expected-doctype-name-but-got-right-bracket":
-        _("Unexpected > character. Expected DOCTYPE name."),
+        "Unexpected > character. Expected DOCTYPE name.",
    "expected-doctype-name-but-got-eof":
-        _("Unexpected end of file. Expected DOCTYPE name."),
+        "Unexpected end of file. Expected DOCTYPE name.",
    "eof-in-doctype-name":
-        _("Unexpected end of file in DOCTYPE name."),
+        "Unexpected end of file in DOCTYPE name.",
    "eof-in-doctype":
-        _("Unexpected end of file in DOCTYPE."),
+        "Unexpected end of file in DOCTYPE.",
    "expected-space-or-right-bracket-in-doctype":
-        _("Expected space or '>'. Got '%(data)s'"),
+        "Expected space or '>'. Got '%(data)s'",
    "unexpected-end-of-doctype":
-        _("Unexpected end of DOCTYPE."),
+        "Unexpected end of DOCTYPE.",
    "unexpected-char-in-doctype":
-        _("Unexpected character in DOCTYPE."),
+        "Unexpected character in DOCTYPE.",
    "eof-in-innerhtml":
-        _("XXX innerHTML EOF"),
+        "XXX innerHTML EOF",
    "unexpected-doctype":
-        _("Unexpected DOCTYPE. Ignored."),
+        "Unexpected DOCTYPE. Ignored.",
    "non-html-root":
-        _("html needs to be the first start tag."),
+        "html needs to be the first start tag.",
    "expected-doctype-but-got-eof":
-        _("Unexpected End of file. Expected DOCTYPE."),
+        "Unexpected End of file. Expected DOCTYPE.",
    "unknown-doctype":
-        _("Erroneous DOCTYPE."),
+        "Erroneous DOCTYPE.",
    "expected-doctype-but-got-chars":
-        _("Unexpected non-space characters. Expected DOCTYPE."),
+        "Unexpected non-space characters. Expected DOCTYPE.",
    "expected-doctype-but-got-start-tag":
-        _("Unexpected start tag (%(name)s). Expected DOCTYPE."),
+        "Unexpected start tag (%(name)s). Expected DOCTYPE.",
    "expected-doctype-but-got-end-tag":
-        _("Unexpected end tag (%(name)s). Expected DOCTYPE."),
+        "Unexpected end tag (%(name)s). Expected DOCTYPE.",
    "end-tag-after-implied-root":
-        _("Unexpected end tag (%(name)s) after the (implied) root element."),
+        "Unexpected end tag (%(name)s) after the (implied) root element.",
    "expected-named-closing-tag-but-got-eof":
-        _("Unexpected end of file. Expected end tag (%(name)s)."),
+        "Unexpected end of file. Expected end tag (%(name)s).",
    "two-heads-are-not-better-than-one":
-        _("Unexpected start tag head in existing head. Ignored."),
+        "Unexpected start tag head in existing head. Ignored.",
    "unexpected-end-tag":
-        _("Unexpected end tag (%(name)s). Ignored."),
+        "Unexpected end tag (%(name)s). Ignored.",
    "unexpected-start-tag-out-of-my-head":
-        _("Unexpected start tag (%(name)s) that can be in head. Moved."),
+        "Unexpected start tag (%(name)s) that can be in head. Moved.",
    "unexpected-start-tag":
-        _("Unexpected start tag (%(name)s)."),
+        "Unexpected start tag (%(name)s).",
    "missing-end-tag":
-        _("Missing end tag (%(name)s)."),
+        "Missing end tag (%(name)s).",
    "missing-end-tags":
-        _("Missing end tags (%(name)s)."),
+        "Missing end tags (%(name)s).",
    "unexpected-start-tag-implies-end-tag":
-        _("Unexpected start tag (%(startName)s) "
-          "implies end tag (%(endName)s)."),
+        "Unexpected start tag (%(startName)s) "
+        "implies end tag (%(endName)s).",
    "unexpected-start-tag-treated-as":
-        _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
+        "Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
    "deprecated-tag":
-        _("Unexpected start tag %(name)s. Don't use it!"),
+        "Unexpected start tag %(name)s. Don't use it!",
    "unexpected-start-tag-ignored":
-        _("Unexpected start tag %(name)s. Ignored."),
+        "Unexpected start tag %(name)s. Ignored.",
    "expected-one-end-tag-but-got-another":
-        _("Unexpected end tag (%(gotName)s). "
-          "Missing end tag (%(expectedName)s)."),
+        "Unexpected end tag (%(gotName)s). "
+        "Missing end tag (%(expectedName)s).",
    "end-tag-too-early":
-        _("End tag (%(name)s) seen too early. Expected other end tag."),
+        "End tag (%(name)s) seen too early. Expected other end tag.",
    "end-tag-too-early-named":
-        _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
+        "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).",
    "end-tag-too-early-ignored":
-        _("End tag (%(name)s) seen too early. Ignored."),
+        "End tag (%(name)s) seen too early. Ignored.",
    "adoption-agency-1.1":
-        _("End tag (%(name)s) violates step 1, "
-          "paragraph 1 of the adoption agency algorithm."),
+        "End tag (%(name)s) violates step 1, "
+        "paragraph 1 of the adoption agency algorithm.",
    "adoption-agency-1.2":
-        _("End tag (%(name)s) violates step 1, "
-          "paragraph 2 of the adoption agency algorithm."),
+        "End tag (%(name)s) violates step 1, "
+        "paragraph 2 of the adoption agency algorithm.",
    "adoption-agency-1.3":
-        _("End tag (%(name)s) violates step 1, "
-          "paragraph 3 of the adoption agency algorithm."),
+        "End tag (%(name)s) violates step 1, "
+        "paragraph 3 of the adoption agency algorithm.",
    "adoption-agency-4.4":
-        _("End tag (%(name)s) violates step 4, "
-          "paragraph 4 of the adoption agency algorithm."),
+        "End tag (%(name)s) violates step 4, "
+        "paragraph 4 of the adoption agency algorithm.",
    "unexpected-end-tag-treated-as":
-        _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
+        "Unexpected end tag (%(originalName)s). Treated as %(newName)s.",
    "no-end-tag":
-        _("This element (%(name)s) has no end tag."),
+        "This element (%(name)s) has no end tag.",
    "unexpected-implied-end-tag-in-table":
-        _("Unexpected implied end tag (%(name)s) in the table phase."),
+        "Unexpected implied end tag (%(name)s) in the table phase.",
    "unexpected-implied-end-tag-in-table-body":
-        _("Unexpected implied end tag (%(name)s) in the table body phase."),
+        "Unexpected implied end tag (%(name)s) in the table body phase.",
    "unexpected-char-implies-table-voodoo":
-        _("Unexpected non-space characters in "
-          "table context caused voodoo mode."),
+        "Unexpected non-space characters in "
+        "table context caused voodoo mode.",
    "unexpected-hidden-input-in-table":
-        _("Unexpected input with type hidden in table context."),
+        "Unexpected input with type hidden in table context.",
    "unexpected-form-in-table":
-        _("Unexpected form in table context."),
+        "Unexpected form in table context.",
    "unexpected-start-tag-implies-table-voodoo":
-        _("Unexpected start tag (%(name)s) in "
-          "table context caused voodoo mode."),
+        "Unexpected start tag (%(name)s) in "
+        "table context caused voodoo mode.",
    "unexpected-end-tag-implies-table-voodoo":
-        _("Unexpected end tag (%(name)s) in "
-          "table context caused voodoo mode."),
+        "Unexpected end tag (%(name)s) in "
+        "table context caused voodoo mode.",
    "unexpected-cell-in-table-body":
-        _("Unexpected table cell start tag (%(name)s) "
-          "in the table body phase."),
+        "Unexpected table cell start tag (%(name)s) "
+        "in the table body phase.",
    "unexpected-cell-end-tag":
-        _("Got table cell end tag (%(name)s) "
-          "while required end tags are missing."),
+        "Got table cell end tag (%(name)s) "
+        "while required end tags are missing.",
    "unexpected-end-tag-in-table-body":
-        _("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
+        "Unexpected end tag (%(name)s) in the table body phase. Ignored.",
    "unexpected-implied-end-tag-in-table-row":
-        _("Unexpected implied end tag (%(name)s) in the table row phase."),
+        "Unexpected implied end tag (%(name)s) in the table row phase.",
    "unexpected-end-tag-in-table-row":
-        _("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
+        "Unexpected end tag (%(name)s) in the table row phase. Ignored.",
    "unexpected-select-in-select":
-        _("Unexpected select start tag in the select phase "
-          "treated as select end tag."),
+        "Unexpected select start tag in the select phase "
+        "treated as select end tag.",
    "unexpected-input-in-select":
-        _("Unexpected input start tag in the select phase."),
+        "Unexpected input start tag in the select phase.",
    "unexpected-start-tag-in-select":
-        _("Unexpected start tag token (%(name)s in the select phase. "
-          "Ignored."),
+        "Unexpected start tag token (%(name)s in the select phase. "
+        "Ignored.",
    "unexpected-end-tag-in-select":
-        _("Unexpected end tag (%(name)s) in the select phase. Ignored."),
+        "Unexpected end tag (%(name)s) in the select phase. Ignored.",
    "unexpected-table-element-start-tag-in-select-in-table":
-        _("Unexpected table element start tag (%(name)s) in the select in table phase."),
+        "Unexpected table element start tag (%(name)s) in the select in table phase.",
    "unexpected-table-element-end-tag-in-select-in-table":
-        _("Unexpected table element end tag (%(name)s) in the select in table phase."),
+        "Unexpected table element end tag (%(name)s) in the select in table phase.",
    "unexpected-char-after-body":
-        _("Unexpected non-space characters in the after body phase."),
+        "Unexpected non-space characters in the after body phase.",
    "unexpected-start-tag-after-body":
-        _("Unexpected start tag token (%(name)s)"
-          " in the after body phase."),
+        "Unexpected start tag token (%(name)s)"
+        " in the after body phase.",
    "unexpected-end-tag-after-body":
-        _("Unexpected end tag token (%(name)s)"
-          " in the after body phase."),
+        "Unexpected end tag token (%(name)s)"
+        " in the after body phase.",
    "unexpected-char-in-frameset":
-        _("Unexpected characters in the frameset phase. Characters ignored."),
+        "Unexpected characters in the frameset phase. Characters ignored.",
    "unexpected-start-tag-in-frameset":
-        _("Unexpected start tag token (%(name)s)"
-          " in the frameset phase. Ignored."),
+        "Unexpected start tag token (%(name)s)"
+        " in the frameset phase. Ignored.",
    "unexpected-frameset-in-frameset-innerhtml":
-        _("Unexpected end tag token (frameset) "
-          "in the frameset phase (innerHTML)."),
+        "Unexpected end tag token (frameset) "
+        "in the frameset phase (innerHTML).",
    "unexpected-end-tag-in-frameset":
-        _("Unexpected end tag token (%(name)s)"
-          " in the frameset phase. Ignored."),
+        "Unexpected end tag token (%(name)s)"
+        " in the frameset phase. Ignored.",
    "unexpected-char-after-frameset":
-        _("Unexpected non-space characters in the "
-          "after frameset phase. Ignored."),
+        "Unexpected non-space characters in the "
+        "after frameset phase. Ignored.",
    "unexpected-start-tag-after-frameset":
-        _("Unexpected start tag (%(name)s)"
-          " in the after frameset phase. Ignored."),
+        "Unexpected start tag (%(name)s)"
+        " in the after frameset phase. Ignored.",
    "unexpected-end-tag-after-frameset":
-        _("Unexpected end tag (%(name)s)"
-          " in the after frameset phase. Ignored."),
+        "Unexpected end tag (%(name)s)"
+        " in the after frameset phase. Ignored.",
    "unexpected-end-tag-after-body-innerhtml":
-        _("Unexpected end tag after body(innerHtml)"),
+        "Unexpected end tag after body(innerHtml)",
    "expected-eof-but-got-char":
-        _("Unexpected non-space characters. Expected end of file."),
+        "Unexpected non-space characters. Expected end of file.",
    "expected-eof-but-got-start-tag":
-        _("Unexpected start tag (%(name)s)"
-          ". Expected end of file."),
+        "Unexpected start tag (%(name)s)"
+        ". Expected end of file.",
    "expected-eof-but-got-end-tag":
-        _("Unexpected end tag (%(name)s)"
-          ". Expected end of file."),
+        "Unexpected end tag (%(name)s)"
+        ". Expected end of file.",
    "eof-in-table":
-        _("Unexpected end of file. Expected table content."),
+        "Unexpected end of file. Expected table content.",
    "eof-in-select":
-        _("Unexpected end of file. Expected select content."),
+        "Unexpected end of file. Expected select content.",
    "eof-in-frameset":
-        _("Unexpected end of file. Expected frameset content."),
+        "Unexpected end of file. Expected frameset content.",
    "eof-in-script-in-script":
-        _("Unexpected end of file. Expected script content."),
+        "Unexpected end of file. Expected script content.",
    "eof-in-foreign-lands":
-        _("Unexpected end of file. Expected foreign content"),
+        "Unexpected end of file. Expected foreign content",
    "non-void-element-with-trailing-solidus":
-        _("Trailing solidus not allowed on element %(name)s"),
+        "Trailing solidus not allowed on element %(name)s",
    "unexpected-html-element-in-foreign-content":
-        _("Element %(name)s not allowed in a non-html context"),
+        "Element %(name)s not allowed in a non-html context",
    "unexpected-end-tag-before-html":
-        _("Unexpected end tag (%(name)s) before html."),
+        "Unexpected end tag (%(name)s) before html.",
+    "unexpected-inhead-noscript-tag":
+        "Element %(name)s not allowed in a inhead-noscript context",
+    "eof-in-head-noscript":
+        "Unexpected end of file. Expected inhead-noscript content",
+    "char-in-head-noscript":
+        "Unexpected non-space character. Expected inhead-noscript content",
    "XXX-undefined-error":
-        _("Undefined error (this sucks and should be fixed)"),
+        "Undefined error (this sucks and should be fixed)",
 }

 namespaces = {
@ -298,7 +302,7 @@ namespaces = {
    "xmlns": "http://www.w3.org/2000/xmlns/"
 }

-scopingElements = frozenset((
+scopingElements = frozenset([
    (namespaces["html"], "applet"),
    (namespaces["html"], "caption"),
    (namespaces["html"], "html"),
@ -316,9 +320,9 @@ scopingElements = frozenset((
    (namespaces["svg"], "foreignObject"),
    (namespaces["svg"], "desc"),
    (namespaces["svg"], "title"),
-))
+])

-formattingElements = frozenset((
+formattingElements = frozenset([
    (namespaces["html"], "a"),
    (namespaces["html"], "b"),
    (namespaces["html"], "big"),
@ -333,9 +337,9 @@ formattingElements = frozenset((
    (namespaces["html"], "strong"),
    (namespaces["html"], "tt"),
    (namespaces["html"], "u")
-))
+])

-specialElements = frozenset((
+specialElements = frozenset([
    (namespaces["html"], "address"),
    (namespaces["html"], "applet"),
    (namespaces["html"], "area"),
@ -416,22 +420,89 @@ specialElements = frozenset((
    (namespaces["html"], "wbr"),
    (namespaces["html"], "xmp"),
    (namespaces["svg"], "foreignObject")
-))
+])

-htmlIntegrationPointElements = frozenset((
-    (namespaces["mathml"], "annotaion-xml"),
+htmlIntegrationPointElements = frozenset([
+    (namespaces["mathml"], "annotation-xml"),
    (namespaces["svg"], "foreignObject"),
    (namespaces["svg"], "desc"),
    (namespaces["svg"], "title")
-))
+])

-mathmlTextIntegrationPointElements = frozenset((
+mathmlTextIntegrationPointElements = frozenset([
    (namespaces["mathml"], "mi"),
    (namespaces["mathml"], "mo"),
    (namespaces["mathml"], "mn"),
    (namespaces["mathml"], "ms"),
    (namespaces["mathml"], "mtext")
-))
+])
+
+adjustSVGAttributes = {
+    "attributename": "attributeName",
+    "attributetype": "attributeType",
+    "basefrequency": "baseFrequency",
+    "baseprofile": "baseProfile",
+    "calcmode": "calcMode",
+    "clippathunits": "clipPathUnits",
+    "contentscripttype": "contentScriptType",
+    "contentstyletype": "contentStyleType",
+    "diffuseconstant": "diffuseConstant",
+    "edgemode": "edgeMode",
+    "externalresourcesrequired": "externalResourcesRequired",
+    "filterres": "filterRes",
+    "filterunits": "filterUnits",
+    "glyphref": "glyphRef",
+    "gradienttransform": "gradientTransform",
+    "gradientunits": "gradientUnits",
+    "kernelmatrix": "kernelMatrix",
+    "kernelunitlength": "kernelUnitLength",
+    "keypoints": "keyPoints",
+    "keysplines": "keySplines",
+    "keytimes": "keyTimes",
+    "lengthadjust": "lengthAdjust",
+    "limitingconeangle": "limitingConeAngle",
+    "markerheight": "markerHeight",
+    "markerunits": "markerUnits",
+    "markerwidth": "markerWidth",
+    "maskcontentunits": "maskContentUnits",
+    "maskunits": "maskUnits",
+    "numoctaves": "numOctaves",
+    "pathlength": "pathLength",
+    "patterncontentunits": "patternContentUnits",
+    "patterntransform": "patternTransform",
+    "patternunits": "patternUnits",
+    "pointsatx": "pointsAtX",
+    "pointsaty": "pointsAtY",
+    "pointsatz": "pointsAtZ",
+    "preservealpha": "preserveAlpha",
+    "preserveaspectratio": "preserveAspectRatio",
+    "primitiveunits": "primitiveUnits",
+    "refx": "refX",
+    "refy": "refY",
+    "repeatcount": "repeatCount",
+    "repeatdur": "repeatDur",
+    "requiredextensions": "requiredExtensions",
+    "requiredfeatures": "requiredFeatures",
+    "specularconstant": "specularConstant",
+    "specularexponent": "specularExponent",
+    "spreadmethod": "spreadMethod",
+    "startoffset": "startOffset",
+    "stddeviation": "stdDeviation",
+    "stitchtiles": "stitchTiles",
+    "surfacescale": "surfaceScale",
+    "systemlanguage": "systemLanguage",
+    "tablevalues": "tableValues",
+    "targetx": "targetX",
+    "targety": "targetY",
+    "textlength": "textLength",
+    "viewbox": "viewBox",
+    "viewtarget": "viewTarget",
+    "xchannelselector": "xChannelSelector",
+    "ychannelselector": "yChannelSelector",
+    "zoomandpan": "zoomAndPan"
+}
+
+adjustMathMLAttributes = {"definitionurl": "definitionURL"}

 adjustForeignAttributes = {
    "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
@ -451,21 +522,21 @@ adjustForeignAttributes = {
 unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
                                  adjustForeignAttributes.items()])

-spaceCharacters = frozenset((
+spaceCharacters = frozenset([
    "\t",
    "\n",
    "\u000C",
    " ",
    "\r"
-))
+])

-tableInsertModeElements = frozenset((
+tableInsertModeElements = frozenset([
    "table",
    "tbody",
    "tfoot",
    "thead",
    "tr"
-))
+])

 asciiLowercase = frozenset(string.ascii_lowercase)
 asciiUppercase = frozenset(string.ascii_uppercase)
@ -486,7 +557,7 @@ headingElements = (
    "h6"
 )

-voidElements = frozenset((
+voidElements = frozenset([
    "base",
    "command",
    "event-source",
@ -502,11 +573,11 @@ voidElements = frozenset((
    "input",
    "source",
    "track"
-))
+])

-cdataElements = frozenset(('title', 'textarea'))
+cdataElements = frozenset(['title', 'textarea'])

-rcdataElements = frozenset((
+rcdataElements = frozenset([
    'style',
    'script',
    'xmp',
@ -514,27 +585,28 @@ rcdataElements = frozenset((
    'noembed',
    'noframes',
    'noscript'
-))
+])

 booleanAttributes = {
-    "": frozenset(("irrelevant",)),
-    "style": frozenset(("scoped",)),
-    "img": frozenset(("ismap",)),
-    "audio": frozenset(("autoplay", "controls")),
-    "video": frozenset(("autoplay", "controls")),
-    "script": frozenset(("defer", "async")),
-    "details": frozenset(("open",)),
-    "datagrid": frozenset(("multiple", "disabled")),
-    "command": frozenset(("hidden", "disabled", "checked", "default")),
-    "hr": frozenset(("noshade")),
-    "menu": frozenset(("autosubmit",)),
-    "fieldset": frozenset(("disabled", "readonly")),
-    "option": frozenset(("disabled", "readonly", "selected")),
-    "optgroup": frozenset(("disabled", "readonly")),
-    "button": frozenset(("disabled", "autofocus")),
-    "input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")),
-    "select": frozenset(("disabled", "readonly", "autofocus", "multiple")),
-    "output": frozenset(("disabled", "readonly")),
+    "": frozenset(["irrelevant", "itemscope"]),
+    "style": frozenset(["scoped"]),
+    "img": frozenset(["ismap"]),
+    "audio": frozenset(["autoplay", "controls"]),
+    "video": frozenset(["autoplay", "controls"]),
+    "script": frozenset(["defer", "async"]),
+    "details": frozenset(["open"]),
+    "datagrid": frozenset(["multiple", "disabled"]),
+    "command": frozenset(["hidden", "disabled", "checked", "default"]),
+    "hr": frozenset(["noshade"]),
+    "menu": frozenset(["autosubmit"]),
+    "fieldset": frozenset(["disabled", "readonly"]),
+    "option": frozenset(["disabled", "readonly", "selected"]),
+    "optgroup": frozenset(["disabled", "readonly"]),
+    "button": frozenset(["disabled", "autofocus"]),
+    "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
+    "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
+    "output": frozenset(["disabled", "readonly"]),
+    "iframe": frozenset(["seamless"]),
 }

 # entitiesWindows1252 has to be _ordered_ and needs to have an index. It
@ -574,7 +646,7 @@ entitiesWindows1252 = (
    376     # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
 )

-xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
+xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;'])

 entities = {
    "AElig": "\xc6",
@ -2815,7 +2887,6 @@ replacementCharacters = {
    0x0d: "\u000D",
    0x80: "\u20AC",
    0x81: "\u0081",
-    0x81: "\u0081",
    0x82: "\u201A",
    0x83: "\u0192",
    0x84: "\u201E",
@ -2848,235 +2919,6 @@ replacementCharacters = {
    0x9F: "\u0178",
 }

-encodings = {
-    '437': 'cp437',
-    '850': 'cp850',
-    '852': 'cp852',
-    '855': 'cp855',
-    '857': 'cp857',
-    '860': 'cp860',
-    '861': 'cp861',
-    '862': 'cp862',
-    '863': 'cp863',
-    '865': 'cp865',
-    '866': 'cp866',
-    '869': 'cp869',
-    'ansix341968': 'ascii',
-    'ansix341986': 'ascii',
-    'arabic': 'iso8859-6',
-    'ascii': 'ascii',
-    'asmo708': 'iso8859-6',
-    'big5': 'big5',
-    'big5hkscs': 'big5hkscs',
-    'chinese': 'gbk',
-    'cp037': 'cp037',
-    'cp1026': 'cp1026',
-    'cp154': 'ptcp154',
-    'cp367': 'ascii',
-    'cp424': 'cp424',
-    'cp437': 'cp437',
-    'cp500': 'cp500',
-    'cp775': 'cp775',
-    'cp819': 'windows-1252',
-    'cp850': 'cp850',
-    'cp852': 'cp852',
-    'cp855': 'cp855',
-    'cp857': 'cp857',
-    'cp860': 'cp860',
-    'cp861': 'cp861',
-    'cp862': 'cp862',
-    'cp863': 'cp863',
-    'cp864': 'cp864',
-    'cp865': 'cp865',
-    'cp866': 'cp866',
-    'cp869': 'cp869',
-    'cp936': 'gbk',
-    'cpgr': 'cp869',
-    'cpis': 'cp861',
-    'csascii': 'ascii',
-    'csbig5': 'big5',
-    'cseuckr': 'cp949',
-    'cseucpkdfmtjapanese': 'euc_jp',
-    'csgb2312': 'gbk',
-    'cshproman8': 'hp-roman8',
-    'csibm037': 'cp037',
-    'csibm1026': 'cp1026',
-    'csibm424': 'cp424',
-    'csibm500': 'cp500',
-    'csibm855': 'cp855',
-    'csibm857': 'cp857',
-    'csibm860': 'cp860',
-    'csibm861': 'cp861',
-    'csibm863': 'cp863',
-    'csibm864': 'cp864',
-    'csibm865': 'cp865',
-    'csibm866': 'cp866',
-    'csibm869': 'cp869',
-    'csiso2022jp': 'iso2022_jp',
-    'csiso2022jp2': 'iso2022_jp_2',
-    'csiso2022kr': 'iso2022_kr',
-    'csiso58gb231280': 'gbk',
-    'csisolatin1': 'windows-1252',
-    'csisolatin2': 'iso8859-2',
-    'csisolatin3': 'iso8859-3',
-    'csisolatin4': 'iso8859-4',
-    'csisolatin5': 'windows-1254',
-    'csisolatin6': 'iso8859-10',
-    'csisolatinarabic': 'iso8859-6',
-    'csisolatincyrillic': 'iso8859-5',
-    'csisolatingreek': 'iso8859-7',
-    'csisolatinhebrew': 'iso8859-8',
-    'cskoi8r': 'koi8-r',
-    'csksc56011987': 'cp949',
-    'cspc775baltic': 'cp775',
-    'cspc850multilingual': 'cp850',
-    'cspc862latinhebrew': 'cp862',
-    'cspc8codepage437': 'cp437',
-    'cspcp852': 'cp852',
-    'csptcp154': 'ptcp154',
-    'csshiftjis': 'shift_jis',
-    'csunicode11utf7': 'utf-7',
-    'cyrillic': 'iso8859-5',
-    'cyrillicasian': 'ptcp154',
-    'ebcdiccpbe': 'cp500',
-    'ebcdiccpca': 'cp037',
-    'ebcdiccpch': 'cp500',
-    'ebcdiccphe': 'cp424',
-    'ebcdiccpnl': 'cp037',
-    'ebcdiccpus': 'cp037',
-    'ebcdiccpwt': 'cp037',
-    'ecma114': 'iso8859-6',
-    'ecma118': 'iso8859-7',
-    'elot928': 'iso8859-7',
-    'eucjp': 'euc_jp',
-    'euckr': 'cp949',
-    'extendedunixcodepackedformatforjapanese': 'euc_jp',
-    'gb18030': 'gb18030',
-    'gb2312': 'gbk',
-    'gb231280': 'gbk',
-    'gbk': 'gbk',
-    'greek': 'iso8859-7',
-    'greek8': 'iso8859-7',
-    'hebrew': 'iso8859-8',
-    'hproman8': 'hp-roman8',
-    'hzgb2312': 'hz',
-    'ibm037': 'cp037',
-    'ibm1026': 'cp1026',
-    'ibm367': 'ascii',
-    'ibm424': 'cp424',
-    'ibm437': 'cp437',
-    'ibm500': 'cp500',
-    'ibm775': 'cp775',
-    'ibm819': 'windows-1252',
-    'ibm850': 'cp850',
-    'ibm852': 'cp852',
-    'ibm855': 'cp855',
-    'ibm857': 'cp857',
-    'ibm860': 'cp860',
-    'ibm861': 'cp861',
-    'ibm862': 'cp862',
-    'ibm863': 'cp863',
-    'ibm864': 'cp864',
-    'ibm865': 'cp865',
-    'ibm866': 'cp866',
-    'ibm869': 'cp869',
-    'iso2022jp': 'iso2022_jp',
-    'iso2022jp2': 'iso2022_jp_2',
-    'iso2022kr': 'iso2022_kr',
-    'iso646irv1991': 'ascii',
-    'iso646us': 'ascii',
-    'iso88591': 'windows-1252',
-    'iso885910': 'iso8859-10',
-    'iso8859101992': 'iso8859-10',
-    'iso885911987': 'windows-1252',
-    'iso885913': 'iso8859-13',
-    'iso885914': 'iso8859-14',
-    'iso8859141998': 'iso8859-14',
-    'iso885915': 'iso8859-15',
-    'iso885916': 'iso8859-16',
-    'iso8859162001': 'iso8859-16',
-    'iso88592': 'iso8859-2',
-    'iso885921987': 'iso8859-2',
-    'iso88593': 'iso8859-3',
-    'iso885931988': 'iso8859-3',
-    'iso88594': 'iso8859-4',
-    'iso885941988': 'iso8859-4',
-    'iso88595': 'iso8859-5',
-    'iso885951988': 'iso8859-5',
-    'iso88596': 'iso8859-6',
-    'iso885961987': 'iso8859-6',
-    'iso88597': 'iso8859-7',
-    'iso885971987': 'iso8859-7',
-    'iso88598': 'iso8859-8',
-    'iso885981988': 'iso8859-8',
-    'iso88599': 'windows-1254',
-    'iso885991989': 'windows-1254',
-    'isoceltic': 'iso8859-14',
-    'isoir100': 'windows-1252',
-    'isoir101': 'iso8859-2',
-    'isoir109': 'iso8859-3',
-    'isoir110': 'iso8859-4',
-    'isoir126': 'iso8859-7',
-    'isoir127': 'iso8859-6',
-    'isoir138': 'iso8859-8',
-    'isoir144': 'iso8859-5',
-    'isoir148': 'windows-1254',
-    'isoir149': 'cp949',
-    'isoir157': 'iso8859-10',
-    'isoir199': 'iso8859-14',
-    'isoir226': 'iso8859-16',
-    'isoir58': 'gbk',
-    'isoir6': 'ascii',
-    'koi8r': 'koi8-r',
-    'koi8u': 'koi8-u',
-    'korean': 'cp949',
-    'ksc5601': 'cp949',
-    'ksc56011987': 'cp949',
-    'ksc56011989': 'cp949',
-    'l1': 'windows-1252',
-    'l10': 'iso8859-16',
-    'l2': 'iso8859-2',
-    'l3': 'iso8859-3',
-    'l4': 'iso8859-4',
-    'l5': 'windows-1254',
-    'l6': 'iso8859-10',
-    'l8': 'iso8859-14',
-    'latin1': 'windows-1252',
-    'latin10': 'iso8859-16',
-    'latin2': 'iso8859-2',
-    'latin3': 'iso8859-3',
-    'latin4': 'iso8859-4',
-    'latin5': 'windows-1254',
-    'latin6': 'iso8859-10',
-    'latin8': 'iso8859-14',
-    'latin9': 'iso8859-15',
-    'ms936': 'gbk',
-    'mskanji': 'shift_jis',
-    'pt154': 'ptcp154',
-    'ptcp154': 'ptcp154',
-    'r8': 'hp-roman8',
-    'roman8': 'hp-roman8',
-    'shiftjis': 'shift_jis',
-    'tis620': 'cp874',
-    'unicode11utf7': 'utf-7',
-    'us': 'ascii',
-    'usascii': 'ascii',
-    'utf16': 'utf-16',
-    'utf16be': 'utf-16-be',
-    'utf16le': 'utf-16-le',
-    'utf8': 'utf-8',
-    'windows1250': 'cp1250',
-    'windows1251': 'cp1251',
-    'windows1252': 'cp1252',
-    'windows1253': 'cp1253',
-    'windows1254': 'cp1254',
-    'windows1255': 'cp1255',
-    'windows1256': 'cp1256',
-    'windows1257': 'cp1257',
-    'windows1258': 'cp1258',
-    'windows936': 'gbk',
-    'x-x-big5': 'big5'}
-
 tokenTypes = {
    "Doctype": 0,
    "Characters": 1,
@ -3088,8 +2930,8 @@ tokenTypes = {
    "ParseError": 7
 }

-tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
-                           tokenTypes["EmptyTag"]))
+tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
+                           tokenTypes["EmptyTag"]])


 prefixes = dict([(v, k) for k, v in namespaces.items()])
@ -3097,8 +2939,9 @@ prefixes["http://www.w3.org/1998/Math/MathML"] = "math"


 class DataLossWarning(UserWarning):
+    """Raised when the current tree is unable to represent the input data"""
    pass


-class ReparseException(Exception):
+class _ReparseException(Exception):
    pass
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/init.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/init.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/alphabeticalattributes.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/alphabeticalattributes.py
@ -0,0 +1,29 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import base
+
+from collections import OrderedDict
+
+
+def _attr_key(attr):
+    """Return an appropriate key for an attribute for sorting
+
+    Attributes have a namespace that can be either ``None`` or a string. We
+    can't compare the two because they're different types, so we convert
+    ``None`` to an empty string first.
+
+    """
+    return (attr[0][0] or ''), attr[0][1]
+
+
+class Filter(base.Filter):
+    """Alphabetizes attributes for elements"""
+    def __iter__(self):
+        for token in base.Filter.__iter__(self):
+            if token["type"] in ("StartTag", "EmptyTag"):
+                attrs = OrderedDict()
+                for name, value in sorted(token["data"].items(),
+                                          key=_attr_key):
+                    attrs[name] = value
+                token["data"] = attrs
+            yield token
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/base.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/base.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/inject_meta_charset.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/inject_meta_charset.py
@ -1,11 +1,19 @@
 from __future__ import absolute_import, division, unicode_literals

-from . import _base
+from . import base


-class Filter(_base.Filter):
+class Filter(base.Filter):
+    """Injects ``<meta charset=ENCODING>`` tag into head of document"""
    def __init__(self, source, encoding):
-        _base.Filter.__init__(self, source)
+        """Creates a Filter
+
+        :arg source: the source token stream
+
+        :arg encoding: the encoding to set
+
+        """
+        base.Filter.__init__(self, source)
        self.encoding = encoding

    def __iter__(self):
@ -13,7 +21,7 @@ class Filter(_base.Filter):
        meta_found = (self.encoding is None)
        pending = []

-        for token in _base.Filter.__iter__(self):
+        for token in base.Filter.__iter__(self):
            type = token["type"]
            if type == "StartTag":
                if token["name"].lower() == "head":
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/lint.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/lint.py
@ -0,0 +1,93 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from six import text_type
+
+from . import base
+from ..constants import namespaces, voidElements
+
+from ..constants import spaceCharacters
+spaceCharacters = "".join(spaceCharacters)
+
+
+class Filter(base.Filter):
+    """Lints the token stream for errors
+
+    If it finds any errors, it'll raise an ``AssertionError``.
+
+    """
+    def __init__(self, source, require_matching_tags=True):
+        """Creates a Filter
+
+        :arg source: the source token stream
+
+        :arg require_matching_tags: whether or not to require matching tags
+
+        """
+        super(Filter, self).__init__(source)
+        self.require_matching_tags = require_matching_tags
+
+    def __iter__(self):
+        open_elements = []
+        for token in base.Filter.__iter__(self):
+            type = token["type"]
+            if type in ("StartTag", "EmptyTag"):
+                namespace = token["namespace"]
+                name = token["name"]
+                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace != ""
+                assert isinstance(name, text_type)
+                assert name != ""
+                assert isinstance(token["data"], dict)
+                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
+                    assert type == "EmptyTag"
+                else:
+                    assert type == "StartTag"
+                if type == "StartTag" and self.require_matching_tags:
+                    open_elements.append((namespace, name))
+                for (namespace, name), value in token["data"].items():
+                    assert namespace is None or isinstance(namespace, text_type)
+                    assert namespace != ""
+                    assert isinstance(name, text_type)
+                    assert name != ""
+                    assert isinstance(value, text_type)
+
+            elif type == "EndTag":
+                namespace = token["namespace"]
+                name = token["name"]
+                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace != ""
+                assert isinstance(name, text_type)
+                assert name != ""
+                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
+                    assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
+                elif self.require_matching_tags:
+                    start = open_elements.pop()
+                    assert start == (namespace, name)
+
+            elif type == "Comment":
+                data = token["data"]
+                assert isinstance(data, text_type)
+
+            elif type in ("Characters", "SpaceCharacters"):
+                data = token["data"]
+                assert isinstance(data, text_type)
+                assert data != ""
+                if type == "SpaceCharacters":
+                    assert data.strip(spaceCharacters) == ""
+
+            elif type == "Doctype":
+                name = token["name"]
+                assert name is None or isinstance(name, text_type)
+                assert token["publicId"] is None or isinstance(name, text_type)
+                assert token["systemId"] is None or isinstance(name, text_type)
+
+            elif type == "Entity":
+                assert isinstance(token["name"], text_type)
+
+            elif type == "SerializerError":
+                assert isinstance(token["data"], text_type)
+
+            else:
+                assert False, "Unknown token type: %(type)s" % {"type": type}
+
+            yield token
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/optionaltags.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/optionaltags.py
@ -1,9 +1,10 @@
 from __future__ import absolute_import, division, unicode_literals

-from . import _base
+from . import base


-class Filter(_base.Filter):
+class Filter(base.Filter):
+    """Removes optional tags from the token stream"""
    def slider(self):
        previous1 = previous2 = None
        for token in self.source:
@ -11,7 +12,8 @@ class Filter(_base.Filter):
                yield previous2, previous1, token
            previous2 = previous1
            previous1 = token
-        yield previous2, previous1, None
+        if previous1 is not None:
+            yield previous2, previous1, None

    def __iter__(self):
        for previous, token, next in self.slider():
@ -58,7 +60,7 @@ class Filter(_base.Filter):
        elif tagname == 'colgroup':
            # A colgroup element's start tag may be omitted if the first thing
            # inside the colgroup element is a col element, and if the element
-            # is not immediately preceeded by another colgroup element whose
+            # is not immediately preceded by another colgroup element whose
            # end tag has been omitted.
            if type in ("StartTag", "EmptyTag"):
                # XXX: we do not look at the preceding event, so instead we never
@ -70,7 +72,7 @@ class Filter(_base.Filter):
        elif tagname == 'tbody':
            # A tbody element's start tag may be omitted if the first thing
            # inside the tbody element is a tr element, and if the element is
-            # not immediately preceeded by a tbody, thead, or tfoot element
+            # not immediately preceded by a tbody, thead, or tfoot element
            # whose end tag has been omitted.
            if type == "StartTag":
                # omit the thead and tfoot elements' end tag when they are
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/sanitizer.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/sanitizer.py
@ -0,0 +1,896 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import re
+from xml.sax.saxutils import escape, unescape
+
+from six.moves import urllib_parse as urlparse
+
+from . import base
+from ..constants import namespaces, prefixes
+
+__all__ = ["Filter"]
+
+
+allowed_elements = frozenset((
+    (namespaces['html'], 'a'),
+    (namespaces['html'], 'abbr'),
+    (namespaces['html'], 'acronym'),
+    (namespaces['html'], 'address'),
+    (namespaces['html'], 'area'),
+    (namespaces['html'], 'article'),
+    (namespaces['html'], 'aside'),
+    (namespaces['html'], 'audio'),
+    (namespaces['html'], 'b'),
+    (namespaces['html'], 'big'),
+    (namespaces['html'], 'blockquote'),
+    (namespaces['html'], 'br'),
+    (namespaces['html'], 'button'),
+    (namespaces['html'], 'canvas'),
+    (namespaces['html'], 'caption'),
+    (namespaces['html'], 'center'),
+    (namespaces['html'], 'cite'),
+    (namespaces['html'], 'code'),
+    (namespaces['html'], 'col'),
+    (namespaces['html'], 'colgroup'),
+    (namespaces['html'], 'command'),
+    (namespaces['html'], 'datagrid'),
+    (namespaces['html'], 'datalist'),
+    (namespaces['html'], 'dd'),
+    (namespaces['html'], 'del'),
+    (namespaces['html'], 'details'),
+    (namespaces['html'], 'dfn'),
+    (namespaces['html'], 'dialog'),
+    (namespaces['html'], 'dir'),
+    (namespaces['html'], 'div'),
+    (namespaces['html'], 'dl'),
+    (namespaces['html'], 'dt'),
+    (namespaces['html'], 'em'),
+    (namespaces['html'], 'event-source'),
+    (namespaces['html'], 'fieldset'),
+    (namespaces['html'], 'figcaption'),
+    (namespaces['html'], 'figure'),
+    (namespaces['html'], 'footer'),
+    (namespaces['html'], 'font'),
+    (namespaces['html'], 'form'),
+    (namespaces['html'], 'header'),
+    (namespaces['html'], 'h1'),
+    (namespaces['html'], 'h2'),
+    (namespaces['html'], 'h3'),
+    (namespaces['html'], 'h4'),
+    (namespaces['html'], 'h5'),
+    (namespaces['html'], 'h6'),
+    (namespaces['html'], 'hr'),
+    (namespaces['html'], 'i'),
+    (namespaces['html'], 'img'),
+    (namespaces['html'], 'input'),
+    (namespaces['html'], 'ins'),
+    (namespaces['html'], 'keygen'),
+    (namespaces['html'], 'kbd'),
+    (namespaces['html'], 'label'),
+    (namespaces['html'], 'legend'),
+    (namespaces['html'], 'li'),
+    (namespaces['html'], 'm'),
+    (namespaces['html'], 'map'),
+    (namespaces['html'], 'menu'),
+    (namespaces['html'], 'meter'),
+    (namespaces['html'], 'multicol'),
+    (namespaces['html'], 'nav'),
+    (namespaces['html'], 'nextid'),
+    (namespaces['html'], 'ol'),
+    (namespaces['html'], 'output'),
+    (namespaces['html'], 'optgroup'),
+    (namespaces['html'], 'option'),
+    (namespaces['html'], 'p'),
+    (namespaces['html'], 'pre'),
+    (namespaces['html'], 'progress'),
+    (namespaces['html'], 'q'),
+    (namespaces['html'], 's'),
+    (namespaces['html'], 'samp'),
+    (namespaces['html'], 'section'),
+    (namespaces['html'], 'select'),
+    (namespaces['html'], 'small'),
+    (namespaces['html'], 'sound'),
+    (namespaces['html'], 'source'),
+    (namespaces['html'], 'spacer'),
+    (namespaces['html'], 'span'),
+    (namespaces['html'], 'strike'),
+    (namespaces['html'], 'strong'),
+    (namespaces['html'], 'sub'),
+    (namespaces['html'], 'sup'),
+    (namespaces['html'], 'table'),
+    (namespaces['html'], 'tbody'),
+    (namespaces['html'], 'td'),
+    (namespaces['html'], 'textarea'),
+    (namespaces['html'], 'time'),
+    (namespaces['html'], 'tfoot'),
+    (namespaces['html'], 'th'),
+    (namespaces['html'], 'thead'),
+    (namespaces['html'], 'tr'),
+    (namespaces['html'], 'tt'),
+    (namespaces['html'], 'u'),
+    (namespaces['html'], 'ul'),
+    (namespaces['html'], 'var'),
+    (namespaces['html'], 'video'),
+    (namespaces['mathml'], 'maction'),
+    (namespaces['mathml'], 'math'),
+    (namespaces['mathml'], 'merror'),
+    (namespaces['mathml'], 'mfrac'),
+    (namespaces['mathml'], 'mi'),
+    (namespaces['mathml'], 'mmultiscripts'),
+    (namespaces['mathml'], 'mn'),
+    (namespaces['mathml'], 'mo'),
+    (namespaces['mathml'], 'mover'),
+    (namespaces['mathml'], 'mpadded'),
+    (namespaces['mathml'], 'mphantom'),
+    (namespaces['mathml'], 'mprescripts'),
+    (namespaces['mathml'], 'mroot'),
+    (namespaces['mathml'], 'mrow'),
+    (namespaces['mathml'], 'mspace'),
+    (namespaces['mathml'], 'msqrt'),
+    (namespaces['mathml'], 'mstyle'),
+    (namespaces['mathml'], 'msub'),
+    (namespaces['mathml'], 'msubsup'),
+    (namespaces['mathml'], 'msup'),
+    (namespaces['mathml'], 'mtable'),
+    (namespaces['mathml'], 'mtd'),
+    (namespaces['mathml'], 'mtext'),
+    (namespaces['mathml'], 'mtr'),
+    (namespaces['mathml'], 'munder'),
+    (namespaces['mathml'], 'munderover'),
+    (namespaces['mathml'], 'none'),
+    (namespaces['svg'], 'a'),
+    (namespaces['svg'], 'animate'),
+    (namespaces['svg'], 'animateColor'),
+    (namespaces['svg'], 'animateMotion'),
+    (namespaces['svg'], 'animateTransform'),
+    (namespaces['svg'], 'clipPath'),
+    (namespaces['svg'], 'circle'),
+    (namespaces['svg'], 'defs'),
+    (namespaces['svg'], 'desc'),
+    (namespaces['svg'], 'ellipse'),
+    (namespaces['svg'], 'font-face'),
+    (namespaces['svg'], 'font-face-name'),
+    (namespaces['svg'], 'font-face-src'),
+    (namespaces['svg'], 'g'),
+    (namespaces['svg'], 'glyph'),
+    (namespaces['svg'], 'hkern'),
+    (namespaces['svg'], 'linearGradient'),
+    (namespaces['svg'], 'line'),
+    (namespaces['svg'], 'marker'),
+    (namespaces['svg'], 'metadata'),
+    (namespaces['svg'], 'missing-glyph'),
+    (namespaces['svg'], 'mpath'),
+    (namespaces['svg'], 'path'),
+    (namespaces['svg'], 'polygon'),
+    (namespaces['svg'], 'polyline'),
+    (namespaces['svg'], 'radialGradient'),
+    (namespaces['svg'], 'rect'),
+    (namespaces['svg'], 'set'),
+    (namespaces['svg'], 'stop'),
+    (namespaces['svg'], 'svg'),
+    (namespaces['svg'], 'switch'),
+    (namespaces['svg'], 'text'),
+    (namespaces['svg'], 'title'),
+    (namespaces['svg'], 'tspan'),
+    (namespaces['svg'], 'use'),
+))
+
+allowed_attributes = frozenset((
+    # HTML attributes
+    (None, 'abbr'),
+    (None, 'accept'),
+    (None, 'accept-charset'),
+    (None, 'accesskey'),
+    (None, 'action'),
+    (None, 'align'),
+    (None, 'alt'),
+    (None, 'autocomplete'),
+    (None, 'autofocus'),
+    (None, 'axis'),
+    (None, 'background'),
+    (None, 'balance'),
+    (None, 'bgcolor'),
+    (None, 'bgproperties'),
+    (None, 'border'),
+    (None, 'bordercolor'),
+    (None, 'bordercolordark'),
+    (None, 'bordercolorlight'),
+    (None, 'bottompadding'),
+    (None, 'cellpadding'),
+    (None, 'cellspacing'),
+    (None, 'ch'),
+    (None, 'challenge'),
+    (None, 'char'),
+    (None, 'charoff'),
+    (None, 'choff'),
+    (None, 'charset'),
+    (None, 'checked'),
+    (None, 'cite'),
+    (None, 'class'),
+    (None, 'clear'),
+    (None, 'color'),
+    (None, 'cols'),
+    (None, 'colspan'),
+    (None, 'compact'),
+    (None, 'contenteditable'),
+    (None, 'controls'),
+    (None, 'coords'),
+    (None, 'data'),
+    (None, 'datafld'),
+    (None, 'datapagesize'),
+    (None, 'datasrc'),
+    (None, 'datetime'),
+    (None, 'default'),
+    (None, 'delay'),
+    (None, 'dir'),
+    (None, 'disabled'),
+    (None, 'draggable'),
+    (None, 'dynsrc'),
+    (None, 'enctype'),
+    (None, 'end'),
+    (None, 'face'),
+    (None, 'for'),
+    (None, 'form'),
+    (None, 'frame'),
+    (None, 'galleryimg'),
+    (None, 'gutter'),
+    (None, 'headers'),
+    (None, 'height'),
+    (None, 'hidefocus'),
+    (None, 'hidden'),
+    (None, 'high'),
+    (None, 'href'),
+    (None, 'hreflang'),
+    (None, 'hspace'),
+    (None, 'icon'),
+    (None, 'id'),
+    (None, 'inputmode'),
+    (None, 'ismap'),
+    (None, 'keytype'),
+    (None, 'label'),
+    (None, 'leftspacing'),
+    (None, 'lang'),
+    (None, 'list'),
+    (None, 'longdesc'),
+    (None, 'loop'),
+    (None, 'loopcount'),
+    (None, 'loopend'),
+    (None, 'loopstart'),
+    (None, 'low'),
+    (None, 'lowsrc'),
+    (None, 'max'),
+    (None, 'maxlength'),
+    (None, 'media'),
+    (None, 'method'),
+    (None, 'min'),
+    (None, 'multiple'),
+    (None, 'name'),
+    (None, 'nohref'),
+    (None, 'noshade'),
+    (None, 'nowrap'),
+    (None, 'open'),
+    (None, 'optimum'),
+    (None, 'pattern'),
+    (None, 'ping'),
+    (None, 'point-size'),
+    (None, 'poster'),
+    (None, 'pqg'),
+    (None, 'preload'),
+    (None, 'prompt'),
+    (None, 'radiogroup'),
+    (None, 'readonly'),
+    (None, 'rel'),
+    (None, 'repeat-max'),
+    (None, 'repeat-min'),
+    (None, 'replace'),
+    (None, 'required'),
+    (None, 'rev'),
+    (None, 'rightspacing'),
+    (None, 'rows'),
+    (None, 'rowspan'),
+    (None, 'rules'),
+    (None, 'scope'),
+    (None, 'selected'),
+    (None, 'shape'),
+    (None, 'size'),
+    (None, 'span'),
+    (None, 'src'),
+    (None, 'start'),
+    (None, 'step'),
+    (None, 'style'),
+    (None, 'summary'),
+    (None, 'suppress'),
+    (None, 'tabindex'),
+    (None, 'target'),
+    (None, 'template'),
+    (None, 'title'),
+    (None, 'toppadding'),
+    (None, 'type'),
+    (None, 'unselectable'),
+    (None, 'usemap'),
+    (None, 'urn'),
+    (None, 'valign'),
+    (None, 'value'),
+    (None, 'variable'),
+    (None, 'volume'),
+    (None, 'vspace'),
+    (None, 'vrml'),
+    (None, 'width'),
+    (None, 'wrap'),
+    (namespaces['xml'], 'lang'),
+    # MathML attributes
+    (None, 'actiontype'),
+    (None, 'align'),
+    (None, 'columnalign'),
+    (None, 'columnalign'),
+    (None, 'columnalign'),
+    (None, 'columnlines'),
+    (None, 'columnspacing'),
+    (None, 'columnspan'),
+    (None, 'depth'),
+    (None, 'display'),
+    (None, 'displaystyle'),
+    (None, 'equalcolumns'),
+    (None, 'equalrows'),
+    (None, 'fence'),
+    (None, 'fontstyle'),
+    (None, 'fontweight'),
+    (None, 'frame'),
+    (None, 'height'),
+    (None, 'linethickness'),
+    (None, 'lspace'),
+    (None, 'mathbackground'),
+    (None, 'mathcolor'),
+    (None, 'mathvariant'),
+    (None, 'mathvariant'),
+    (None, 'maxsize'),
+    (None, 'minsize'),
+    (None, 'other'),
+    (None, 'rowalign'),
+    (None, 'rowalign'),
+    (None, 'rowalign'),
+    (None, 'rowlines'),
+    (None, 'rowspacing'),
+    (None, 'rowspan'),
+    (None, 'rspace'),
+    (None, 'scriptlevel'),
+    (None, 'selection'),
+    (None, 'separator'),
+    (None, 'stretchy'),
+    (None, 'width'),
+    (None, 'width'),
+    (namespaces['xlink'], 'href'),
+    (namespaces['xlink'], 'show'),
+    (namespaces['xlink'], 'type'),
+    # SVG attributes
+    (None, 'accent-height'),
+    (None, 'accumulate'),
+    (None, 'additive'),
+    (None, 'alphabetic'),
+    (None, 'arabic-form'),
+    (None, 'ascent'),
+    (None, 'attributeName'),
+    (None, 'attributeType'),
+    (None, 'baseProfile'),
+    (None, 'bbox'),
+    (None, 'begin'),
+    (None, 'by'),
+    (None, 'calcMode'),
+    (None, 'cap-height'),
+    (None, 'class'),
+    (None, 'clip-path'),
+    (None, 'color'),
+    (None, 'color-rendering'),
+    (None, 'content'),
+    (None, 'cx'),
+    (None, 'cy'),
+    (None, 'd'),
+    (None, 'dx'),
+    (None, 'dy'),
+    (None, 'descent'),
+    (None, 'display'),
+    (None, 'dur'),
+    (None, 'end'),
+    (None, 'fill'),
+    (None, 'fill-opacity'),
+    (None, 'fill-rule'),
+    (None, 'font-family'),
+    (None, 'font-size'),
+    (None, 'font-stretch'),
+    (None, 'font-style'),
+    (None, 'font-variant'),
+    (None, 'font-weight'),
+    (None, 'from'),
+    (None, 'fx'),
+    (None, 'fy'),
+    (None, 'g1'),
+    (None, 'g2'),
+    (None, 'glyph-name'),
+    (None, 'gradientUnits'),
+    (None, 'hanging'),
+    (None, 'height'),
+    (None, 'horiz-adv-x'),
+    (None, 'horiz-origin-x'),
+    (None, 'id'),
+    (None, 'ideographic'),
+    (None, 'k'),
+    (None, 'keyPoints'),
+    (None, 'keySplines'),
+    (None, 'keyTimes'),
+    (None, 'lang'),
+    (None, 'marker-end'),
+    (None, 'marker-mid'),
+    (None, 'marker-start'),
+    (None, 'markerHeight'),
+    (None, 'markerUnits'),
+    (None, 'markerWidth'),
+    (None, 'mathematical'),
+    (None, 'max'),
+    (None, 'min'),
+    (None, 'name'),
+    (None, 'offset'),
+    (None, 'opacity'),
+    (None, 'orient'),
+    (None, 'origin'),
+    (None, 'overline-position'),
+    (None, 'overline-thickness'),
+    (None, 'panose-1'),
+    (None, 'path'),
+    (None, 'pathLength'),
+    (None, 'points'),
+    (None, 'preserveAspectRatio'),
+    (None, 'r'),
+    (None, 'refX'),
+    (None, 'refY'),
+    (None, 'repeatCount'),
+    (None, 'repeatDur'),
+    (None, 'requiredExtensions'),
+    (None, 'requiredFeatures'),
+    (None, 'restart'),
+    (None, 'rotate'),
+    (None, 'rx'),
+    (None, 'ry'),
+    (None, 'slope'),
+    (None, 'stemh'),
+    (None, 'stemv'),
+    (None, 'stop-color'),
+    (None, 'stop-opacity'),
+    (None, 'strikethrough-position'),
+    (None, 'strikethrough-thickness'),
+    (None, 'stroke'),
+    (None, 'stroke-dasharray'),
+    (None, 'stroke-dashoffset'),
+    (None, 'stroke-linecap'),
+    (None, 'stroke-linejoin'),
+    (None, 'stroke-miterlimit'),
+    (None, 'stroke-opacity'),
+    (None, 'stroke-width'),
+    (None, 'systemLanguage'),
+    (None, 'target'),
+    (None, 'text-anchor'),
+    (None, 'to'),
+    (None, 'transform'),
+    (None, 'type'),
+    (None, 'u1'),
+    (None, 'u2'),
+    (None, 'underline-position'),
+    (None, 'underline-thickness'),
+    (None, 'unicode'),
+    (None, 'unicode-range'),
+    (None, 'units-per-em'),
+    (None, 'values'),
+    (None, 'version'),
+    (None, 'viewBox'),
+    (None, 'visibility'),
+    (None, 'width'),
+    (None, 'widths'),
+    (None, 'x'),
+    (None, 'x-height'),
+    (None, 'x1'),
+    (None, 'x2'),
+    (namespaces['xlink'], 'actuate'),
+    (namespaces['xlink'], 'arcrole'),
+    (namespaces['xlink'], 'href'),
+    (namespaces['xlink'], 'role'),
+    (namespaces['xlink'], 'show'),
+    (namespaces['xlink'], 'title'),
+    (namespaces['xlink'], 'type'),
+    (namespaces['xml'], 'base'),
+    (namespaces['xml'], 'lang'),
+    (namespaces['xml'], 'space'),
+    (None, 'y'),
+    (None, 'y1'),
+    (None, 'y2'),
+    (None, 'zoomAndPan'),
+))
+
+attr_val_is_uri = frozenset((
+    (None, 'href'),
+    (None, 'src'),
+    (None, 'cite'),
+    (None, 'action'),
+    (None, 'longdesc'),
+    (None, 'poster'),
+    (None, 'background'),
+    (None, 'datasrc'),
+    (None, 'dynsrc'),
+    (None, 'lowsrc'),
+    (None, 'ping'),
+    (namespaces['xlink'], 'href'),
+    (namespaces['xml'], 'base'),
+))
+
+svg_attr_val_allows_ref = frozenset((
+    (None, 'clip-path'),
+    (None, 'color-profile'),
+    (None, 'cursor'),
+    (None, 'fill'),
+    (None, 'filter'),
+    (None, 'marker'),
+    (None, 'marker-start'),
+    (None, 'marker-mid'),
+    (None, 'marker-end'),
+    (None, 'mask'),
+    (None, 'stroke'),
+))
+
+svg_allow_local_href = frozenset((
+    (None, 'altGlyph'),
+    (None, 'animate'),
+    (None, 'animateColor'),
+    (None, 'animateMotion'),
+    (None, 'animateTransform'),
+    (None, 'cursor'),
+    (None, 'feImage'),
+    (None, 'filter'),
+    (None, 'linearGradient'),
+    (None, 'pattern'),
+    (None, 'radialGradient'),
+    (None, 'textpath'),
+    (None, 'tref'),
+    (None, 'set'),
+    (None, 'use')
+))
+
+allowed_css_properties = frozenset((
+    'azimuth',
+    'background-color',
+    'border-bottom-color',
+    'border-collapse',
+    'border-color',
+    'border-left-color',
+    'border-right-color',
+    'border-top-color',
+    'clear',
+    'color',
+    'cursor',
+    'direction',
+    'display',
+    'elevation',
+    'float',
+    'font',
+    'font-family',
+    'font-size',
+    'font-style',
+    'font-variant',
+    'font-weight',
+    'height',
+    'letter-spacing',
+    'line-height',
+    'overflow',
+    'pause',
+    'pause-after',
+    'pause-before',
+    'pitch',
+    'pitch-range',
+    'richness',
+    'speak',
+    'speak-header',
+    'speak-numeral',
+    'speak-punctuation',
+    'speech-rate',
+    'stress',
+    'text-align',
+    'text-decoration',
+    'text-indent',
+    'unicode-bidi',
+    'vertical-align',
+    'voice-family',
+    'volume',
+    'white-space',
+    'width',
+))
+
+allowed_css_keywords = frozenset((
+    'auto',
+    'aqua',
+    'black',
+    'block',
+    'blue',
+    'bold',
+    'both',
+    'bottom',
+    'brown',
+    'center',
+    'collapse',
+    'dashed',
+    'dotted',
+    'fuchsia',
+    'gray',
+    'green',
+    '!important',
+    'italic',
+    'left',
+    'lime',
+    'maroon',
+    'medium',
+    'none',
+    'navy',
+    'normal',
+    'nowrap',
+    'olive',
+    'pointer',
+    'purple',
+    'red',
+    'right',
+    'solid',
+    'silver',
+    'teal',
+    'top',
+    'transparent',
+    'underline',
+    'white',
+    'yellow',
+))
+
+allowed_svg_properties = frozenset((
+    'fill',
+    'fill-opacity',
+    'fill-rule',
+    'stroke',
+    'stroke-width',
+    'stroke-linecap',
+    'stroke-linejoin',
+    'stroke-opacity',
+))
+
+allowed_protocols = frozenset((
+    'ed2k',
+    'ftp',
+    'http',
+    'https',
+    'irc',
+    'mailto',
+    'news',
+    'gopher',
+    'nntp',
+    'telnet',
+    'webcal',
+    'xmpp',
+    'callto',
+    'feed',
+    'urn',
+    'aim',
+    'rsync',
+    'tag',
+    'ssh',
+    'sftp',
+    'rtsp',
+    'afs',
+    'data',
+))
+
+allowed_content_types = frozenset((
+    'image/png',
+    'image/jpeg',
+    'image/gif',
+    'image/webp',
+    'image/bmp',
+    'text/plain',
+))
+
+
+data_content_type = re.compile(r'''
+                                ^
+                                # Match a content type <application>/<type>
+                                (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
+                                # Match any character set and encoding
+                                (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
+                                  |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
+                                # Assume the rest is data
+                                ,.*
+                                $
+                                ''',
+                               re.VERBOSE)
+
+
+class Filter(base.Filter):
+    """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
+    def __init__(self,
+                 source,
+                 allowed_elements=allowed_elements,
+                 allowed_attributes=allowed_attributes,
+                 allowed_css_properties=allowed_css_properties,
+                 allowed_css_keywords=allowed_css_keywords,
+                 allowed_svg_properties=allowed_svg_properties,
+                 allowed_protocols=allowed_protocols,
+                 allowed_content_types=allowed_content_types,
+                 attr_val_is_uri=attr_val_is_uri,
+                 svg_attr_val_allows_ref=svg_attr_val_allows_ref,
+                 svg_allow_local_href=svg_allow_local_href):
+        """Creates a Filter
+
+        :arg allowed_elements: set of elements to allow--everything else will
+            be escaped
+
+        :arg allowed_attributes: set of attributes to allow in
+            elements--everything else will be stripped
+
+        :arg allowed_css_properties: set of CSS properties to allow--everything
+            else will be stripped
+
+        :arg allowed_css_keywords: set of CSS keywords to allow--everything
+            else will be stripped
+
+        :arg allowed_svg_properties: set of SVG properties to allow--everything
+            else will be removed
+
+        :arg allowed_protocols: set of allowed protocols for URIs
+
+        :arg allowed_content_types: set of allowed content types for ``data`` URIs.
+
+        :arg attr_val_is_uri: set of attributes that have URI values--values
+            that have a scheme not listed in ``allowed_protocols`` are removed
+
+        :arg svg_attr_val_allows_ref: set of SVG attributes that can have
+            references
+
+        :arg svg_allow_local_href: set of SVG elements that can have local
+            hrefs--these are removed
+
+        """
+        super(Filter, self).__init__(source)
+        self.allowed_elements = allowed_elements
+        self.allowed_attributes = allowed_attributes
+        self.allowed_css_properties = allowed_css_properties
+        self.allowed_css_keywords = allowed_css_keywords
+        self.allowed_svg_properties = allowed_svg_properties
+        self.allowed_protocols = allowed_protocols
+        self.allowed_content_types = allowed_content_types
+        self.attr_val_is_uri = attr_val_is_uri
+        self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
+        self.svg_allow_local_href = svg_allow_local_href
+
+    def __iter__(self):
+        for token in base.Filter.__iter__(self):
+            token = self.sanitize_token(token)
+            if token:
+                yield token
+
+    # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
+    # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
+    # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
+    # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
+    # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
+    # allowed.
+    #
+    #   sanitize_html('<script> do_nasty_stuff() </script>')
+    #    => &lt;script> do_nasty_stuff() &lt;/script>
+    #   sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
+    #    => <a>Click here for $100</a>
+    def sanitize_token(self, token):
+
+        # accommodate filters which use token_type differently
+        token_type = token["type"]
+        if token_type in ("StartTag", "EndTag", "EmptyTag"):
+            name = token["name"]
+            namespace = token["namespace"]
+            if ((namespace, name) in self.allowed_elements or
+                (namespace is None and
+                 (namespaces["html"], name) in self.allowed_elements)):
+                return self.allowed_token(token)
+            else:
+                return self.disallowed_token(token)
+        elif token_type == "Comment":
+            pass
+        else:
+            return token
+
+    def allowed_token(self, token):
+        if "data" in token:
+            attrs = token["data"]
+            attr_names = set(attrs.keys())
+
+            # Remove forbidden attributes
+            for to_remove in (attr_names - self.allowed_attributes):
+                del token["data"][to_remove]
+                attr_names.remove(to_remove)
+
+            # Remove attributes with disallowed URL values
+            for attr in (attr_names & self.attr_val_is_uri):
+                assert attr in attrs
+                # I don't have a clue where this regexp comes from or why it matches those
+                # characters, nor why we call unescape. I just know it's always been here.
+                # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
+                # this will do is remove *more* than it otherwise would.
+                val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
+                                       unescape(attrs[attr])).lower()
+                # remove replacement characters from unescaped characters
+                val_unescaped = val_unescaped.replace("\ufffd", "")
+                try:
+                    uri = urlparse.urlparse(val_unescaped)
+                except ValueError:
+                    uri = None
+                    del attrs[attr]
+                if uri and uri.scheme:
+                    if uri.scheme not in self.allowed_protocols:
+                        del attrs[attr]
+                    if uri.scheme == 'data':
+                        m = data_content_type.match(uri.path)
+                        if not m:
+                            del attrs[attr]
+                        elif m.group('content_type') not in self.allowed_content_types:
+                            del attrs[attr]
+
+            for attr in self.svg_attr_val_allows_ref:
+                if attr in attrs:
+                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
+                                         ' ',
+                                         unescape(attrs[attr]))
+            if (token["name"] in self.svg_allow_local_href and
+                (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
+                                                                     attrs[(namespaces['xlink'], 'href')])):
+                del attrs[(namespaces['xlink'], 'href')]
+            if (None, 'style') in attrs:
+                attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
+            token["data"] = attrs
+        return token
+
+    def disallowed_token(self, token):
+        token_type = token["type"]
+        if token_type == "EndTag":
+            token["data"] = "</%s>" % token["name"]
+        elif token["data"]:
+            assert token_type in ("StartTag", "EmptyTag")
+            attrs = []
+            for (ns, name), v in token["data"].items():
+                attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
+            token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
+        else:
+            token["data"] = "<%s>" % token["name"]
+        if token.get("selfClosing"):
+            token["data"] = token["data"][:-1] + "/>"
+
+        token["type"] = "Characters"
+
+        del token["name"]
+        return token
+
+    def sanitize_css(self, style):
+        # disallow urls
+        style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
+
+        # gauntlet
+        if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
+            return ''
+        if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
+            return ''
+
+        clean = []
+        for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
+            if not value:
+                continue
+            if prop.lower() in self.allowed_css_properties:
+                clean.append(prop + ': ' + value + ';')
+            elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
+                                                'padding']:
+                for keyword in value.split():
+                    if keyword not in self.allowed_css_keywords and \
+                            not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):  # noqa
+                        break
+                else:
+                    clean.append(prop + ': ' + value + ';')
+            elif prop.lower() in self.allowed_svg_properties:
+                clean.append(prop + ': ' + value + ';')
+
+        return ' '.join(clean)
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/whitespace.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/whitespace.py
@ -2,20 +2,20 @@ from __future__ import absolute_import, division, unicode_literals

 import re

-from . import _base
+from . import base
 from ..constants import rcdataElements, spaceCharacters
 spaceCharacters = "".join(spaceCharacters)

 SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)


-class Filter(_base.Filter):
-
+class Filter(base.Filter):
+    """Collapses whitespace except in pre, textarea, and script elements"""
    spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))

    def __iter__(self):
        preserve = 0
-        for token in _base.Filter.__iter__(self):
+        for token in base.Filter.__iter__(self):
            type = token["type"]
            if type == "StartTag" \
                    and (preserve or token["name"] in self.spacePreserveElements):
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/html5parser.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/html5parser.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/serializer.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/serializer.py
@ -0,0 +1,409 @@
+from __future__ import absolute_import, division, unicode_literals
+from six import text_type
+
+import re
+
+from codecs import register_error, xmlcharrefreplace_errors
+
+from .constants import voidElements, booleanAttributes, spaceCharacters
+from .constants import rcdataElements, entities, xmlEntities
+from . import treewalkers, _utils
+from xml.sax.saxutils import escape
+
+_quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`"
+_quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]")
+_quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars +
+                                   "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
+                                   "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
+                                   "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+                                   "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
+                                   "\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
+                                   "\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
+                                   "\u3000]")
+
+
+_encode_entity_map = {}
+_is_ucs4 = len("\U0010FFFF") == 1
+for k, v in list(entities.items()):
+    # skip multi-character entities
+    if ((_is_ucs4 and len(v) > 1) or
+            (not _is_ucs4 and len(v) > 2)):
+        continue
+    if v != "&":
+        if len(v) == 2:
+            v = _utils.surrogatePairToCodepoint(v)
+        else:
+            v = ord(v)
+        if v not in _encode_entity_map or k.islower():
+            # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
+            _encode_entity_map[v] = k
+
+
+def htmlentityreplace_errors(exc):
+    if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
+        res = []
+        codepoints = []
+        skip = False
+        for i, c in enumerate(exc.object[exc.start:exc.end]):
+            if skip:
+                skip = False
+                continue
+            index = i + exc.start
+            if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
+                codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
+                skip = True
+            else:
+                codepoint = ord(c)
+            codepoints.append(codepoint)
+        for cp in codepoints:
+            e = _encode_entity_map.get(cp)
+            if e:
+                res.append("&")
+                res.append(e)
+                if not e.endswith(";"):
+                    res.append(";")
+            else:
+                res.append("&#x%s;" % (hex(cp)[2:]))
+        return ("".join(res), exc.end)
+    else:
+        return xmlcharrefreplace_errors(exc)
+
+
+register_error("htmlentityreplace", htmlentityreplace_errors)
+
+
+def serialize(input, tree="etree", encoding=None, **serializer_opts):
+    """Serializes the input token stream using the specified treewalker
+
+    :arg input: the token stream to serialize
+
+    :arg tree: the treewalker to use
+
+    :arg encoding: the encoding to use
+
+    :arg serializer_opts: any options to pass to the
+        :py:class:`html5lib.serializer.HTMLSerializer` that gets created
+
+    :returns: the tree serialized as a string
+
+    Example:
+
+    >>> from html5lib.html5parser import parse
+    >>> from html5lib.serializer import serialize
+    >>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
+    >>> serialize(token_stream, omit_optional_tags=False)
+    '<html><head></head><body><p>Hi!</p></body></html>'
+
+    """
+    # XXX: Should we cache this?
+    walker = treewalkers.getTreeWalker(tree)
+    s = HTMLSerializer(**serializer_opts)
+    return s.render(walker(input), encoding)
+
+
+class HTMLSerializer(object):
+
+    # attribute quoting options
+    quote_attr_values = "legacy"  # be secure by default
+    quote_char = '"'
+    use_best_quote_char = True
+
+    # tag syntax options
+    omit_optional_tags = True
+    minimize_boolean_attributes = True
+    use_trailing_solidus = False
+    space_before_trailing_solidus = True
+
+    # escaping options
+    escape_lt_in_attrs = False
+    escape_rcdata = False
+    resolve_entities = True
+
+    # miscellaneous options
+    alphabetical_attributes = False
+    inject_meta_charset = True
+    strip_whitespace = False
+    sanitize = False
+
+    options = ("quote_attr_values", "quote_char", "use_best_quote_char",
+               "omit_optional_tags", "minimize_boolean_attributes",
+               "use_trailing_solidus", "space_before_trailing_solidus",
+               "escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
+               "alphabetical_attributes", "inject_meta_charset",
+               "strip_whitespace", "sanitize")
+
+    def __init__(self, **kwargs):
+        """Initialize HTMLSerializer
+
+        :arg inject_meta_charset: Whether or not to inject the meta charset.
+
+            Defaults to ``True``.
+
+        :arg quote_attr_values: Whether to quote attribute values that don't
+            require quoting per legacy browser behavior (``"legacy"``), when
+            required by the standard (``"spec"``), or always (``"always"``).
+
+            Defaults to ``"legacy"``.
+
+        :arg quote_char: Use given quote character for attribute quoting.
+
+            Defaults to ``"`` which will use double quotes unless attribute
+            value contains a double quote, in which case single quotes are
+            used.
+
+        :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
+            values.
+
+            Defaults to ``False``.
+
+        :arg escape_rcdata: Whether to escape characters that need to be
+            escaped within normal elements within rcdata elements such as
+            style.
+
+            Defaults to ``False``.
+
+        :arg resolve_entities: Whether to resolve named character entities that
+            appear in the source tree. The XML predefined entities &lt; &gt;
+            &amp; &quot; &apos; are unaffected by this setting.
+
+            Defaults to ``True``.
+
+        :arg strip_whitespace: Whether to remove semantically meaningless
+            whitespace. (This compresses all whitespace to a single space
+            except within ``pre``.)
+
+            Defaults to ``False``.
+
+        :arg minimize_boolean_attributes: Shortens boolean attributes to give
+            just the attribute value, for example::
+
+              <input disabled="disabled">
+
+            becomes::
+
+              <input disabled>
+
+            Defaults to ``True``.
+
+        :arg use_trailing_solidus: Includes a close-tag slash at the end of the
+            start tag of void elements (empty elements whose end tag is
+            forbidden). E.g. ``<hr/>``.
+
+            Defaults to ``False``.
+
+        :arg space_before_trailing_solidus: Places a space immediately before
+            the closing slash in a tag using a trailing solidus. E.g.
+            ``<hr />``. Requires ``use_trailing_solidus=True``.
+
+            Defaults to ``True``.
+
+        :arg sanitize: Strip all unsafe or unknown constructs from output.
+            See :py:class:`html5lib.filters.sanitizer.Filter`.
+
+            Defaults to ``False``.
+
+        :arg omit_optional_tags: Omit start/end tags that are optional.
+
+            Defaults to ``True``.
+
+        :arg alphabetical_attributes: Reorder attributes to be in alphabetical order.
+
+            Defaults to ``False``.
+
+        """
+        unexpected_args = frozenset(kwargs) - frozenset(self.options)
+        if len(unexpected_args) > 0:
+            raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args)))
+        if 'quote_char' in kwargs:
+            self.use_best_quote_char = False
+        for attr in self.options:
+            setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
+        self.errors = []
+        self.strict = False
+
+    def encode(self, string):
+        assert(isinstance(string, text_type))
+        if self.encoding:
+            return string.encode(self.encoding, "htmlentityreplace")
+        else:
+            return string
+
+    def encodeStrict(self, string):
+        assert(isinstance(string, text_type))
+        if self.encoding:
+            return string.encode(self.encoding, "strict")
+        else:
+            return string
+
+    def serialize(self, treewalker, encoding=None):
+        # pylint:disable=too-many-nested-blocks
+        self.encoding = encoding
+        in_cdata = False
+        self.errors = []
+
+        if encoding and self.inject_meta_charset:
+            from .filters.inject_meta_charset import Filter
+            treewalker = Filter(treewalker, encoding)
+        # Alphabetical attributes is here under the assumption that none of
+        # the later filters add or change order of attributes; it needs to be
+        # before the sanitizer so escaped elements come out correctly
+        if self.alphabetical_attributes:
+            from .filters.alphabeticalattributes import Filter
+            treewalker = Filter(treewalker)
+        # WhitespaceFilter should be used before OptionalTagFilter
+        # for maximum efficiently of this latter filter
+        if self.strip_whitespace:
+            from .filters.whitespace import Filter
+            treewalker = Filter(treewalker)
+        if self.sanitize:
+            from .filters.sanitizer import Filter
+            treewalker = Filter(treewalker)
+        if self.omit_optional_tags:
+            from .filters.optionaltags import Filter
+            treewalker = Filter(treewalker)
+
+        for token in treewalker:
+            type = token["type"]
+            if type == "Doctype":
+                doctype = "<!DOCTYPE %s" % token["name"]
+
+                if token["publicId"]:
+                    doctype += ' PUBLIC "%s"' % token["publicId"]
+                elif token["systemId"]:
+                    doctype += " SYSTEM"
+                if token["systemId"]:
+                    if token["systemId"].find('"') >= 0:
+                        if token["systemId"].find("'") >= 0:
+                            self.serializeError("System identifer contains both single and double quote characters")
+                        quote_char = "'"
+                    else:
+                        quote_char = '"'
+                    doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
+
+                doctype += ">"
+                yield self.encodeStrict(doctype)
+
+            elif type in ("Characters", "SpaceCharacters"):
+                if type == "SpaceCharacters" or in_cdata:
+                    if in_cdata and token["data"].find("</") >= 0:
+                        self.serializeError("Unexpected </ in CDATA")
+                    yield self.encode(token["data"])
+                else:
+                    yield self.encode(escape(token["data"]))
+
+            elif type in ("StartTag", "EmptyTag"):
+                name = token["name"]
+                yield self.encodeStrict("<%s" % name)
+                if name in rcdataElements and not self.escape_rcdata:
+                    in_cdata = True
+                elif in_cdata:
+                    self.serializeError("Unexpected child element of a CDATA element")
+                for (_, attr_name), attr_value in token["data"].items():
+                    # TODO: Add namespace support here
+                    k = attr_name
+                    v = attr_value
+                    yield self.encodeStrict(' ')
+
+                    yield self.encodeStrict(k)
+                    if not self.minimize_boolean_attributes or \
+                        (k not in booleanAttributes.get(name, tuple()) and
+                         k not in booleanAttributes.get("", tuple())):
+                        yield self.encodeStrict("=")
+                        if self.quote_attr_values == "always" or len(v) == 0:
+                            quote_attr = True
+                        elif self.quote_attr_values == "spec":
+                            quote_attr = _quoteAttributeSpec.search(v) is not None
+                        elif self.quote_attr_values == "legacy":
+                            quote_attr = _quoteAttributeLegacy.search(v) is not None
+                        else:
+                            raise ValueError("quote_attr_values must be one of: "
+                                             "'always', 'spec', or 'legacy'")
+                        v = v.replace("&", "&amp;")
+                        if self.escape_lt_in_attrs:
+                            v = v.replace("<", "&lt;")
+                        if quote_attr:
+                            quote_char = self.quote_char
+                            if self.use_best_quote_char:
+                                if "'" in v and '"' not in v:
+                                    quote_char = '"'
+                                elif '"' in v and "'" not in v:
+                                    quote_char = "'"
+                            if quote_char == "'":
+                                v = v.replace("'", "&#39;")
+                            else:
+                                v = v.replace('"', "&quot;")
+                            yield self.encodeStrict(quote_char)
+                            yield self.encode(v)
+                            yield self.encodeStrict(quote_char)
+                        else:
+                            yield self.encode(v)
+                if name in voidElements and self.use_trailing_solidus:
+                    if self.space_before_trailing_solidus:
+                        yield self.encodeStrict(" /")
+                    else:
+                        yield self.encodeStrict("/")
+                yield self.encode(">")
+
+            elif type == "EndTag":
+                name = token["name"]
+                if name in rcdataElements:
+                    in_cdata = False
+                elif in_cdata:
+                    self.serializeError("Unexpected child element of a CDATA element")
+                yield self.encodeStrict("</%s>" % name)
+
+            elif type == "Comment":
+                data = token["data"]
+                if data.find("--") >= 0:
+                    self.serializeError("Comment contains --")
+                yield self.encodeStrict("<!--%s-->" % token["data"])
+
+            elif type == "Entity":
+                name = token["name"]
+                key = name + ";"
+                if key not in entities:
+                    self.serializeError("Entity %s not recognized" % name)
+                if self.resolve_entities and key not in xmlEntities:
+                    data = entities[key]
+                else:
+                    data = "&%s;" % name
+                yield self.encodeStrict(data)
+
+            else:
+                self.serializeError(token["data"])
+
+    def render(self, treewalker, encoding=None):
+        """Serializes the stream from the treewalker into a string
+
+        :arg treewalker: the treewalker to serialize
+
+        :arg encoding: the string encoding to use
+
+        :returns: the serialized tree
+
+        Example:
+
+        >>> from html5lib import parse, getTreeWalker
+        >>> from html5lib.serializer import HTMLSerializer
+        >>> token_stream = parse('<html><body>Hi!</body></html>')
+        >>> walker = getTreeWalker('etree')
+        >>> serializer = HTMLSerializer(omit_optional_tags=False)
+        >>> serializer.render(walker(token_stream))
+        '<html><head></head><body>Hi!</body></html>'
+
+        """
+        if encoding:
+            return b"".join(list(self.serialize(treewalker, encoding)))
+        else:
+            return "".join(list(self.serialize(treewalker)))
+
+    def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
+        # XXX The idea is to make data mandatory.
+        self.errors.append(data)
+        if self.strict:
+            raise SerializeError
+
+
+class SerializeError(Exception):
+    """Error in serialized tree"""
+    pass
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/init.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/init.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/conftest.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/conftest.py
@ -0,0 +1,108 @@
+from __future__ import print_function
+import os.path
+import sys
+
+import pkg_resources
+import pytest
+
+from .tree_construction import TreeConstructionFile
+from .tokenizer import TokenizerFile
+from .sanitizer import SanitizerFile
+
+_dir = os.path.abspath(os.path.dirname(__file__))
+_root = os.path.join(_dir, "..", "..")
+_testdata = os.path.join(_dir, "testdata")
+_tree_construction = os.path.join(_testdata, "tree-construction")
+_tokenizer = os.path.join(_testdata, "tokenizer")
+_sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")
+
+
+def fail_if_missing_pytest_expect():
+    """Throws an exception halting pytest if pytest-expect isn't working"""
+    try:
+        from pytest_expect import expect  # noqa
+    except ImportError:
+        header = '*' * 78
+        print(
+            '\n' +
+            header + '\n' +
+            'ERROR: Either pytest-expect or its dependency u-msgpack-python is not\n' +
+            'installed. Please install them both before running pytest.\n' +
+            header + '\n',
+            file=sys.stderr
+        )
+        raise
+
+
+fail_if_missing_pytest_expect()
+
+
+def pytest_configure(config):
+    msgs = []
+
+    if not os.path.exists(_testdata):
+        msg = "testdata not available! "
+        if os.path.exists(os.path.join(_root, ".git")):
+            msg += ("Please run git submodule update --init --recursive " +
+                    "and then run tests again.")
+        else:
+            msg += ("The testdata doesn't appear to be included with this package, " +
+                    "so finding the right version will be hard. :(")
+        msgs.append(msg)
+
+    if config.option.update_xfail:
+        # Check for optional requirements
+        req_file = os.path.join(_root, "requirements-optional.txt")
+        if os.path.exists(req_file):
+            with open(req_file, "r") as fp:
+                for line in fp:
+                    if (line.strip() and
+                        not (line.startswith("-r") or
+                             line.startswith("#"))):
+                        if ";" in line:
+                            spec, marker = line.strip().split(";", 1)
+                        else:
+                            spec, marker = line.strip(), None
+                        req = pkg_resources.Requirement.parse(spec)
+                        if marker and not pkg_resources.evaluate_marker(marker):
+                            msgs.append("%s not available in this environment" % spec)
+                        else:
+                            try:
+                                installed = pkg_resources.working_set.find(req)
+                            except pkg_resources.VersionConflict:
+                                msgs.append("Outdated version of %s installed, need %s" % (req.name, spec))
+                            else:
+                                if not installed:
+                                    msgs.append("Need %s" % spec)
+
+        # Check cElementTree
+        import xml.etree.ElementTree as ElementTree
+
+        try:
+            import xml.etree.cElementTree as cElementTree
+        except ImportError:
+            msgs.append("cElementTree unable to be imported")
+        else:
+            if cElementTree.Element is ElementTree.Element:
+                msgs.append("cElementTree is just an alias for ElementTree")
+
+    if msgs:
+        pytest.exit("\n".join(msgs))
+
+
+def pytest_collect_file(path, parent):
+    dir = os.path.abspath(path.dirname)
+    dir_and_parents = set()
+    while dir not in dir_and_parents:
+        dir_and_parents.add(dir)
+        dir = os.path.dirname(dir)
+
+    if _tree_construction in dir_and_parents:
+        if path.ext == ".dat":
+            return TreeConstructionFile(path, parent)
+    elif _tokenizer in dir_and_parents:
+        if path.ext == ".test":
+            return TokenizerFile(path, parent)
+    elif _sanitizer_testdata in dir_and_parents:
+        if path.ext == ".dat":
+            return SanitizerFile(path, parent)
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/sanitizer-testdata/tests1.dat
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/sanitizer-testdata/tests1.dat
@ -0,0 +1,433 @@
+[
+  {
+    "name": "IE_Comments",
+    "input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
+    "output": ""
+  },
+
+  {
+    "name": "IE_Comments_2",
+    "input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
+    "output": "&lt;script&gt;alert('XSS');&lt;/script&gt;"
+  },
+
+  {
+    "name": "allow_colons_in_path_component",
+    "input": "<a href=\"./this:that\">foo</a>",
+    "output": "<a href='./this:that'>foo</a>"
+  },
+
+  {
+    "name": "background_attribute",
+    "input": "<div background=\"javascript:alert('XSS')\"></div>",
+    "output": "<div></div>"
+  },
+
+  {
+    "name": "bgsound",
+    "input": "<bgsound src=\"javascript:alert('XSS');\" />",
+    "output": "&lt;bgsound src=\"javascript:alert('XSS');\"&gt;&lt;/bgsound&gt;"
+  },
+
+  {
+    "name": "div_background_image_unicode_encoded",
+    "input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
+    "output": "<div style=''>foo</div>"
+  },
+
+  {
+    "name": "div_expression",
+    "input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
+    "output": "<div style=''>foo</div>"
+  },
+
+  {
+    "name": "double_open_angle_brackets",
+    "input": "<img src=http://ha.ckers.org/scriptlet.html <",
+    "output": ""
+  },
+
+  {
+    "name": "double_open_angle_brackets_2",
+    "input": "<script src=http://ha.ckers.org/scriptlet.html <",
+    "output": ""
+  },
+
+  {
+    "name": "grave_accents",
+    "input": "<img src=`javascript:alert('XSS')` />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "img_dynsrc_lowsrc",
+    "input": "<img dynsrc=\"javascript:alert('XSS')\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "img_vbscript",
+    "input": "<img src='vbscript:msgbox(\"XSS\")' />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "input_image",
+    "input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
+    "output": "<input type='image'/>"
+  },
+
+  {
+    "name": "link_stylesheets",
+    "input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
+    "output": "&lt;link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"&gt;"
+  },
+
+  {
+    "name": "link_stylesheets_2",
+    "input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
+    "output": "&lt;link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"&gt;"
+  },
+
+  {
+    "name": "list_style_image",
+    "input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
+    "output": "<li style=''>foo</li>"
+  },
+
+  {
+    "name": "no_closing_script_tags",
+    "input": "<script src=http://ha.ckers.org/xss.js?<b>",
+    "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;&lt;/script&gt;"
+  },
+
+  {
+    "name": "non_alpha_non_digit",
+    "input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
+    "output": "&lt;script src=\"http://ha.ckers.org/xss.js\" xss=\"\"&gt;&lt;/script&gt;"
+  },
+
+  {
+    "name": "non_alpha_non_digit_2",
+    "input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
+    "output": "<a>foo</a>"
+  },
+
+  {
+    "name": "non_alpha_non_digit_3",
+    "input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
+    "output": "<img src='http://ha.ckers.org/xss.js'/>"
+  },
+
+  {
+    "name": "non_alpha_non_digit_II",
+    "input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
+    "output": "<a>foo</a>"
+  },
+
+  {
+    "name": "non_alpha_non_digit_III",
+    "input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
+    "output": "<a>foo</a>"
+  },
+
+  {
+    "name": "platypus",
+    "input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
+    "output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
+  },
+
+  {
+    "name": "protocol_resolution_in_script_tag",
+    "input": "<script src=//ha.ckers.org/.j></script>",
+    "output": "&lt;script src=\"//ha.ckers.org/.j\"&gt;&lt;/script&gt;"
+  },
+
+  {
+    "name": "should_allow_anchors",
+    "input": "<a href='foo' onclick='bar'><script>baz</script></a>",
+    "output": "<a href='foo'>&lt;script&gt;baz&lt;/script&gt;</a>"
+  },
+
+  {
+    "name": "should_allow_image_alt_attribute",
+    "input": "<img alt='foo' onclick='bar' />",
+    "output": "<img alt='foo'/>"
+  },
+
+  {
+    "name": "should_allow_image_height_attribute",
+    "input": "<img height='foo' onclick='bar' />",
+    "output": "<img height='foo'/>"
+  },
+
+  {
+    "name": "should_allow_image_src_attribute",
+    "input": "<img src='foo' onclick='bar' />",
+    "output": "<img src='foo'/>"
+  },
+
+  {
+    "name": "should_allow_image_width_attribute",
+    "input": "<img width='foo' onclick='bar' />",
+    "output": "<img width='foo'/>"
+  },
+
+  {
+    "name": "should_handle_blank_text",
+    "input": "",
+    "output": ""
+  },
+
+  {
+    "name": "should_handle_malformed_image_tags",
+    "input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
+    "output": "<img/>&lt;script&gt;alert(\"XSS\")&lt;/script&gt;\"&gt;"
+  },
+
+  {
+    "name": "should_handle_non_html",
+    "input": "abc",
+    "output": "abc"
+  },
+
+  {
+    "name": "should_not_fall_for_ridiculous_hack",
+    "input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_0",
+    "input": "<img src=\"javascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_1",
+    "input": "<img src=javascript:alert('XSS') />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_10",
+    "input": "<img src=\"jav&#x0A;ascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_11",
+    "input": "<img src=\"jav&#x0D;ascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_12",
+    "input": "<img src=\" &#14;  javascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_13",
+    "input": "<img src=\"&#x20;javascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_14",
+    "input": "<img src=\"&#xA0;javascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_2",
+    "input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_3",
+    "input": "<img src='javascript:alert(&quot;XSS&quot;)' />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_4",
+    "input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_5",
+    "input": "<img src='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;' />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_6",
+    "input": "<img src='&#0000106;&#0000097;&#0000118;&#0000097;&#0000115;&#0000099;&#0000114;&#0000105;&#0000112;&#0000116;&#0000058;&#0000097;&#0000108;&#0000101;&#0000114;&#0000116;&#0000040;&#0000039;&#0000088;&#0000083;&#0000083;&#0000039;&#0000041' />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_7",
+    "input": "<img src='&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29' />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_8",
+    "input": "<img src=\"jav\tascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_9",
+    "input": "<img src=\"jav&#x09;ascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_sanitize_half_open_scripts",
+    "input": "<img src=\"javascript:alert('XSS')\"",
+    "output": ""
+  },
+
+  {
+    "name": "should_sanitize_invalid_script_tag",
+    "input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
+    "output": "&lt;script src=\"http://ha.ckers.org/xss.js\" xss=\"\"&gt;&lt;/script&gt;"
+  },
+
+  {
+    "name": "should_sanitize_script_tag_with_multiple_open_brackets",
+    "input": "<<script>alert(\"XSS\");//<</script>",
+    "output": "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;"
+  },
+
+  {
+    "name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
+    "input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
+    "output": ""
+  },
+
+  {
+    "name": "should_sanitize_tag_broken_up_by_null",
+    "input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
+    "output": "&lt;scr\ufffdipt&gt;alert(\"XSS\")&lt;/scr\ufffdipt&gt;"
+  },
+
+  {
+    "name": "should_sanitize_unclosed_script",
+    "input": "<script src=http://ha.ckers.org/xss.js?<b>",
+    "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;&lt;/script&gt;"
+  },
+
+  {
+    "name": "should_strip_href_attribute_in_a_with_bad_protocols",
+    "input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
+    "output": "<a title='1'>boo</a>"
+  },
+
+  {
+    "name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
+    "input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
+    "output": "<a title='1'>boo</a>"
+  },
+
+  {
+    "name": "should_strip_src_attribute_in_img_with_bad_protocols",
+    "input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
+    "output": "<img title='1'/>boo"
+  },
+
+  {
+    "name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
+    "input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
+    "output": "<img title='1'/>boo"
+  },
+
+  {
+    "name": "xml_base",
+    "input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
+    "output": "<div>foo</div>"
+  },
+
+  {
+    "name": "xul",
+    "input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
+    "output": "<p style=''>fubar</p>"
+  },
+
+  {
+    "name": "quotes_in_attributes",
+    "input": "<img src='foo' title='\"foo\" bar' />",
+    "output": "<img src='foo' title='\"foo\" bar'/>"
+  },
+
+  {
+    "name": "uri_refs_in_svg_attributes",
+    "input": "<svg><rect fill='url(#foo)' />",
+    "output": "<svg><rect fill='url(#foo)'></rect></svg>"
+  },
+
+  {
+    "name": "absolute_uri_refs_in_svg_attributes",
+    "input": "<svg><rect fill='url(http://bad.com/) #fff' />",
+    "output": "<svg><rect fill='  #fff'></rect></svg>"
+  },
+
+  {
+    "name": "uri_ref_with_space_in svg_attribute",
+    "input": "<svg><rect fill='url(\n#foo)' />",
+    "output": "<svg><rect fill='url(\n#foo)'></rect></svg>"
+  },
+
+  {
+    "name": "absolute_uri_ref_with_space_in svg_attribute",
+    "input": "<svg><rect fill=\"url(\nhttp://bad.com/)\" />",
+    "output": "<svg><rect fill=' '></rect></svg>"
+  },
+
+  {
+    "name": "allow_html5_image_tag",
+    "input": "<image src='foo' />",
+    "output": "<img src='foo'/>"
+  },
+
+  {
+    "name": "style_attr_end_with_nothing",
+    "input": "<div style=\"color: blue\" />",
+    "output": "<div style='color: blue;'></div>"
+  },
+
+  {
+    "name": "style_attr_end_with_space",
+    "input": "<div style=\"color: blue \" />",
+    "output": "<div style='color: blue ;'></div>"
+  },
+
+  {
+    "name": "style_attr_end_with_semicolon",
+    "input": "<div style=\"color: blue;\" />",
+    "output": "<div style='color: blue;'></div>"
+  },
+
+  {
+    "name": "style_attr_end_with_semicolon_space",
+    "input": "<div style=\"color: blue; \" />",
+    "output": "<div style='color: blue;'></div>"
+  },
+  
+  {
+   "name": "attributes_with_embedded_quotes",
+   "input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />",
+   "output": "<img src='doesntexist.jpg\"&#39;onerror=\"alert(1)'/>"
+  },
+  
+  {
+   "name": "attributes_with_embedded_quotes_II",
+   "input": "<img src=notthere.jpg\"\"onerror=\"alert(2) />",
+   "output": "<img src='notthere.jpg\"\"onerror=\"alert(2)'/>"
+  }
+]
--- a/Показать больше
+++ b/Показать больше
				`@ -1 +0,0 @@`
				Each testcase file can be run through nose (using ``nosetests``).