diff --git a/testing/web-platform/meta/MANIFEST.json b/testing/web-platform/meta/MANIFEST.json
index 2b42766a8579..e885533838bf 100644
--- a/testing/web-platform/meta/MANIFEST.json
+++ b/testing/web-platform/meta/MANIFEST.json
@@ -405432,7 +405432,7 @@
"support"
],
"./.gitmodules": [
- "6a203e28d43909d7513daf8761281b351d2b2bd7",
+ "9e008399bdce736c7c03f7db0c3e8d624083c6b9",
"support"
],
"./.pyup.yml": [
diff --git a/testing/web-platform/tests/.gitmodules b/testing/web-platform/tests/.gitmodules
index 90c91af579f8..f40c77f6331e 100644
--- a/testing/web-platform/tests/.gitmodules
+++ b/testing/web-platform/tests/.gitmodules
@@ -1,6 +1,3 @@
-[submodule "tools/html5lib/html5lib/tests/testdata"]
- path = tools/html5lib/html5lib/tests/testdata
- url = https://github.com/html5lib/html5lib-tests.git
[submodule "resources/webidl2/test/widlproc"]
path = resources/webidl2/test/widlproc
url = https://github.com/dontcallmedom/widlproc.git
\ No newline at end of file
diff --git a/testing/web-platform/tests/tools/html5lib/.gitignore b/testing/web-platform/tests/tools/html5lib/.gitignore
deleted file mode 100644
index 73d97fec0733..000000000000
--- a/testing/web-platform/tests/tools/html5lib/.gitignore
+++ /dev/null
@@ -1,20 +0,0 @@
-# Because we never want compiled Python
-__pycache__/
-*.pyc
-
-# Ignore stuff produced by distutils
-/build/
-/dist/
-/MANIFEST
-
-# Generated by parse.py -p
-stats.prof
-
-# From cover (esp. in combination with nose)
-.coverage
-
-# Because tox's data is inherently local
-/.tox/
-
-# We have no interest in built Sphinx files
-/doc/_build
diff --git a/testing/web-platform/tests/tools/html5lib/.travis.yml b/testing/web-platform/tests/tools/html5lib/.travis.yml
deleted file mode 100644
index dd3130019e7a..000000000000
--- a/testing/web-platform/tests/tools/html5lib/.travis.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-language: python
-python:
- - "2.6"
- - "2.7"
- - "3.2"
- - "3.3"
- - "3.4"
- - "pypy"
-
-env:
- - USE_OPTIONAL=true
- - USE_OPTIONAL=false
-
-matrix:
- exclude:
- - python: "2.7"
- env: USE_OPTIONAL=false
- - python: "3.4"
- env: USE_OPTIONAL=false
- include:
- - python: "2.7"
- env: USE_OPTIONAL=false FLAKE=true
- - python: "3.4"
- env: USE_OPTIONAL=false FLAKE=true
-
-before_install:
- - git submodule update --init --recursive
-
-install:
- - bash requirements-install.sh
-
-script:
- - nosetests
- - bash flake8-run.sh
-
-after_script:
- - python debug-info.py
diff --git a/testing/web-platform/tests/tools/html5lib/CHANGES.rst b/testing/web-platform/tests/tools/html5lib/CHANGES.rst
deleted file mode 100644
index 1431b3c9b8fa..000000000000
--- a/testing/web-platform/tests/tools/html5lib/CHANGES.rst
+++ /dev/null
@@ -1,171 +0,0 @@
-Change Log
-----------
-
-0.9999
-~~~~~~
-
-Released on XXX, 2014
-
-* XXX
-
-
-0.999
-~~~~~
-
-Released on December 23, 2013
-
-* Fix #127: add work-around for CPython issue #20007: .read(0) on
- http.client.HTTPResponse drops the rest of the content.
-
-* Fix #115: lxml treewalker can now deal with fragments containing, at
- their root level, text nodes with non-ASCII characters on Python 2.
-
-
-0.99
-~~~~
-
-Released on September 10, 2013
-
-* No library changes from 1.0b3; released as 0.99 as pip has changed
- behaviour from 1.4 to avoid installing pre-release versions per
- PEP 440.
-
-
-1.0b3
-~~~~~
-
-Released on July 24, 2013
-
-* Removed ``RecursiveTreeWalker`` from ``treewalkers._base``. Any
- implementation using it should be moved to
- ``NonRecursiveTreeWalker``, as everything bundled with html5lib has
- for years.
-
-* Fix #67 so that ``BufferedStream`` to correctly returns a bytes
- object, thereby fixing any case where html5lib is passed a
- non-seekable RawIOBase-like object.
-
-
-1.0b2
-~~~~~
-
-Released on June 27, 2013
-
-* Removed reordering of attributes within the serializer. There is now
- an ``alphabetical_attributes`` option which preserves the previous
- behaviour through a new filter. This allows attribute order to be
- preserved through html5lib if the tree builder preserves order.
-
-* Removed ``dom2sax`` from DOM treebuilders. It has been replaced by
- ``treeadapters.sax.to_sax`` which is generic and supports any
- treewalker; it also resolves all known bugs with ``dom2sax``.
-
-* Fix treewalker assertions on hitting bytes strings on
- Python 2. Previous to 1.0b1, treewalkers coped with mixed
- bytes/unicode data on Python 2; this reintroduces this prior
- behaviour on Python 2. Behaviour is unchanged on Python 3.
-
-
-1.0b1
-~~~~~
-
-Released on May 17, 2013
-
-* Implementation updated to implement the `HTML specification
- `_ as of 5th May
- 2013 (`SVN `_ revision r7867).
-
-* Python 3.2+ supported in a single codebase using the ``six`` library.
-
-* Removed support for Python 2.5 and older.
-
-* Removed the deprecated Beautiful Soup 3 treebuilder.
- ``beautifulsoup4`` can use ``html5lib`` as a parser instead. Note that
- since it doesn't support namespaces, foreign content like SVG and
- MathML is parsed incorrectly.
-
-* Removed ``simpletree`` from the package. The default tree builder is
- now ``etree`` (using the ``xml.etree.cElementTree`` implementation if
- available, and ``xml.etree.ElementTree`` otherwise).
-
-* Removed the ``XHTMLSerializer`` as it never actually guaranteed its
- output was well-formed XML, and hence provided little of use.
-
-* Removed default DOM treebuilder, so ``html5lib.treebuilders.dom`` is no
- longer supported. ``html5lib.treebuilders.getTreeBuilder("dom")`` will
- return the default DOM treebuilder, which uses ``xml.dom.minidom``.
-
-* Optional heuristic character encoding detection now based on
- ``charade`` for Python 2.6 - 3.3 compatibility.
-
-* Optional ``Genshi`` treewalker support fixed.
-
-* Many bugfixes, including:
-
- * #33: null in attribute value breaks XML AttValue;
-
- * #4: nested, indirect descendant, causes infinite loop;
-
- * `Google Code 215
- `_: Properly
- detect seekable streams;
-
- * `Google Code 206
- `_: add
- support for , ;
-
- * `Google Code 205
- `_: add
- support for ;
-
- * `Google Code 202
- `_: Unicode
- file breaks InputStream.
-
-* Source code is now mostly PEP 8 compliant.
-
-* Test harness has been improved and now depends on ``nose``.
-
-* Documentation updated and moved to http://html5lib.readthedocs.org/.
-
-
-0.95
-~~~~
-
-Released on February 11, 2012
-
-
-0.90
-~~~~
-
-Released on January 17, 2010
-
-
-0.11.1
-~~~~~~
-
-Released on June 12, 2008
-
-
-0.11
-~~~~
-
-Released on June 10, 2008
-
-
-0.10
-~~~~
-
-Released on October 7, 2007
-
-
-0.9
-~~~
-
-Released on March 11, 2007
-
-
-0.2
-~~~
-
-Released on January 8, 2007
diff --git a/testing/web-platform/tests/tools/html5lib/doc/html5lib.rst b/testing/web-platform/tests/tools/html5lib/doc/html5lib.rst
deleted file mode 100644
index d4ed12b46469..000000000000
--- a/testing/web-platform/tests/tools/html5lib/doc/html5lib.rst
+++ /dev/null
@@ -1,77 +0,0 @@
-html5lib Package
-================
-
-:mod:`html5lib` Package
------------------------
-
-.. automodule:: html5lib.__init__
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`constants` Module
------------------------
-
-.. automodule:: html5lib.constants
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`html5parser` Module
--------------------------
-
-.. automodule:: html5lib.html5parser
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`ihatexml` Module
-----------------------
-
-.. automodule:: html5lib.ihatexml
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`inputstream` Module
--------------------------
-
-.. automodule:: html5lib.inputstream
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`sanitizer` Module
------------------------
-
-.. automodule:: html5lib.sanitizer
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`tokenizer` Module
------------------------
-
-.. automodule:: html5lib.tokenizer
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`utils` Module
--------------------
-
-.. automodule:: html5lib.utils
- :members:
- :undoc-members:
- :show-inheritance:
-
-Subpackages
------------
-
-.. toctree::
-
- html5lib.filters
- html5lib.serializer
- html5lib.treebuilders
- html5lib.treewalkers
-
diff --git a/testing/web-platform/tests/tools/html5lib/doc/html5lib.serializer.rst b/testing/web-platform/tests/tools/html5lib/doc/html5lib.serializer.rst
deleted file mode 100644
index fa9547421b13..000000000000
--- a/testing/web-platform/tests/tools/html5lib/doc/html5lib.serializer.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-serializer Package
-==================
-
-:mod:`serializer` Package
--------------------------
-
-.. automodule:: html5lib.serializer
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`htmlserializer` Module
-----------------------------
-
-.. automodule:: html5lib.serializer.htmlserializer
- :members:
- :undoc-members:
- :show-inheritance:
-
diff --git a/testing/web-platform/tests/tools/html5lib/doc/html5lib.treewalkers.rst b/testing/web-platform/tests/tools/html5lib/doc/html5lib.treewalkers.rst
deleted file mode 100644
index 80595e2d7d0e..000000000000
--- a/testing/web-platform/tests/tools/html5lib/doc/html5lib.treewalkers.rst
+++ /dev/null
@@ -1,59 +0,0 @@
-treewalkers Package
-===================
-
-:mod:`treewalkers` Package
---------------------------
-
-.. automodule:: html5lib.treewalkers
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`_base` Module
--------------------
-
-.. automodule:: html5lib.treewalkers._base
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`dom` Module
------------------
-
-.. automodule:: html5lib.treewalkers.dom
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`etree` Module
--------------------
-
-.. automodule:: html5lib.treewalkers.etree
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`genshistream` Module
---------------------------
-
-.. automodule:: html5lib.treewalkers.genshistream
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`lxmletree` Module
------------------------
-
-.. automodule:: html5lib.treewalkers.lxmletree
- :members:
- :undoc-members:
- :show-inheritance:
-
-:mod:`pulldom` Module
----------------------
-
-.. automodule:: html5lib.treewalkers.pulldom
- :members:
- :undoc-members:
- :show-inheritance:
-
diff --git a/testing/web-platform/tests/tools/html5lib/flake8-run.sh b/testing/web-platform/tests/tools/html5lib/flake8-run.sh
deleted file mode 100755
index d1a587d35067..000000000000
--- a/testing/web-platform/tests/tools/html5lib/flake8-run.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash -e
-
-if [[ ! -x $(which flake8) ]]; then
- echo "fatal: flake8 not found on $PATH. Exiting."
- exit 1
-fi
-
-if [[ $TRAVIS != "true" || $FLAKE == "true" ]]; then
- find html5lib/ -name '*.py' -and -not -name 'constants.py' -print0 | xargs -0 flake8 --ignore=E501
- flake1=$?
- flake8 --max-line-length=99 --ignore=E126 html5lib/constants.py
- flake2=$?
- exit $[$flake1 || $flake2]
-fi
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/__init__.py b/testing/web-platform/tests/tools/html5lib/html5lib/__init__.py
deleted file mode 100644
index a67a652b9990..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/__init__.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""
-HTML parsing library based on the WHATWG "HTML5"
-specification. The parser is designed to be compatible with existing
-HTML found in the wild and implements well-defined error recovery that
-is largely compatible with modern desktop web browsers.
-
-Example usage:
-
-import html5lib
-f = open("my_document.html")
-tree = html5lib.parse(f)
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-
-from .html5parser import HTMLParser, parse, parseFragment
-from .treebuilders import getTreeBuilder
-from .treewalkers import getTreeWalker
-from .serializer import serialize
-
-__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
- "getTreeWalker", "serialize"]
-__version__ = "0.9999-dev"
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/filters/alphabeticalattributes.py b/testing/web-platform/tests/tools/html5lib/html5lib/filters/alphabeticalattributes.py
deleted file mode 100644
index fed6996c1d9b..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/filters/alphabeticalattributes.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import _base
-
-try:
- from collections import OrderedDict
-except ImportError:
- from ordereddict import OrderedDict
-
-
-class Filter(_base.Filter):
- def __iter__(self):
- for token in _base.Filter.__iter__(self):
- if token["type"] in ("StartTag", "EmptyTag"):
- attrs = OrderedDict()
- for name, value in sorted(token["data"].items(),
- key=lambda x: x[0]):
- attrs[name] = value
- token["data"] = attrs
- yield token
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/filters/lint.py b/testing/web-platform/tests/tools/html5lib/html5lib/filters/lint.py
deleted file mode 100644
index 7cc99a4ba7c0..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/filters/lint.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from gettext import gettext
-_ = gettext
-
-from . import _base
-from ..constants import cdataElements, rcdataElements, voidElements
-
-from ..constants import spaceCharacters
-spaceCharacters = "".join(spaceCharacters)
-
-
-class LintError(Exception):
- pass
-
-
-class Filter(_base.Filter):
- def __iter__(self):
- open_elements = []
- contentModelFlag = "PCDATA"
- for token in _base.Filter.__iter__(self):
- type = token["type"]
- if type in ("StartTag", "EmptyTag"):
- name = token["name"]
- if contentModelFlag != "PCDATA":
- raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
- if not isinstance(name, str):
- raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
- if not name:
- raise LintError(_("Empty tag name"))
- if type == "StartTag" and name in voidElements:
- raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
- elif type == "EmptyTag" and name not in voidElements:
- raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
- if type == "StartTag":
- open_elements.append(name)
- for name, value in token["data"]:
- if not isinstance(name, str):
- raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
- if not name:
- raise LintError(_("Empty attribute name"))
- if not isinstance(value, str):
- raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
- if name in cdataElements:
- contentModelFlag = "CDATA"
- elif name in rcdataElements:
- contentModelFlag = "RCDATA"
- elif name == "plaintext":
- contentModelFlag = "PLAINTEXT"
-
- elif type == "EndTag":
- name = token["name"]
- if not isinstance(name, str):
- raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
- if not name:
- raise LintError(_("Empty tag name"))
- if name in voidElements:
- raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
- start_name = open_elements.pop()
- if start_name != name:
- raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
- contentModelFlag = "PCDATA"
-
- elif type == "Comment":
- if contentModelFlag != "PCDATA":
- raise LintError(_("Comment not in PCDATA content model flag"))
-
- elif type in ("Characters", "SpaceCharacters"):
- data = token["data"]
- if not isinstance(data, str):
- raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
- if not data:
- raise LintError(_("%(type)s token with empty data") % {"type": type})
- if type == "SpaceCharacters":
- data = data.strip(spaceCharacters)
- if data:
- raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
-
- elif type == "Doctype":
- name = token["name"]
- if contentModelFlag != "PCDATA":
- raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
- if not isinstance(name, str):
- raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
- # XXX: what to do with token["data"] ?
-
- elif type in ("ParseError", "SerializeError"):
- pass
-
- else:
- raise LintError(_("Unknown token type: %(type)s") % {"type": type})
-
- yield token
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/filters/sanitizer.py b/testing/web-platform/tests/tools/html5lib/html5lib/filters/sanitizer.py
deleted file mode 100644
index b206b54e7a74..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/filters/sanitizer.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import _base
-from ..sanitizer import HTMLSanitizerMixin
-
-
-class Filter(_base.Filter, HTMLSanitizerMixin):
- def __iter__(self):
- for token in _base.Filter.__iter__(self):
- token = self.sanitize_token(token)
- if token:
- yield token
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/sanitizer.py b/testing/web-platform/tests/tools/html5lib/html5lib/sanitizer.py
deleted file mode 100644
index 469d9b40c966..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/sanitizer.py
+++ /dev/null
@@ -1,271 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-from xml.sax.saxutils import escape, unescape
-
-from .tokenizer import HTMLTokenizer
-from .constants import tokenTypes
-
-
-class HTMLSanitizerMixin(object):
- """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
-
- acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
- 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
- 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
- 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
- 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
- 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
- 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
- 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
- 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
- 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
- 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
- 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
- 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
-
- mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
- 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
- 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
- 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
- 'munderover', 'none']
-
- svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
- 'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
- 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
- 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
- 'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
- 'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
-
- acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
- 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
- 'background', 'balance', 'bgcolor', 'bgproperties', 'border',
- 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
- 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
- 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
- 'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
- 'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
- 'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
- 'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
- 'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
- 'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
- 'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
- 'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
- 'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
- 'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
- 'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
- 'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
- 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
- 'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
- 'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
- 'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
- 'width', 'wrap', 'xml:lang']
-
- mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
- 'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
- 'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
- 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
- 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
- 'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
- 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
- 'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
- 'xlink:type', 'xmlns', 'xmlns:xlink']
-
- svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
- 'arabic-form', 'ascent', 'attributeName', 'attributeType',
- 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
- 'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
- 'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
- 'fill-opacity', 'fill-rule', 'font-family', 'font-size',
- 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
- 'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
- 'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
- 'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
- 'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
- 'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
- 'opacity', 'orient', 'origin', 'overline-position',
- 'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
- 'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
- 'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
- 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
- 'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
- 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
- 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
- 'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
- 'transform', 'type', 'u1', 'u2', 'underline-position',
- 'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
- 'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
- 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
- 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
- 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
- 'y1', 'y2', 'zoomAndPan']
-
- attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
- 'xlink:href', 'xml:base']
-
- svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
- 'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
- 'mask', 'stroke']
-
- svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
- 'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
- 'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
- 'set', 'use']
-
- acceptable_css_properties = ['azimuth', 'background-color',
- 'border-bottom-color', 'border-collapse', 'border-color',
- 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
- 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
- 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
- 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
- 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
- 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
- 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
- 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
- 'white-space', 'width']
-
- acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
- 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
- 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
- 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
- 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
- 'transparent', 'underline', 'white', 'yellow']
-
- acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
- 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
- 'stroke-opacity']
-
- acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
- 'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
- 'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
- 'ssh', 'sftp', 'rtsp', 'afs']
-
- # subclasses may define their own versions of these constants
- allowed_elements = acceptable_elements + mathml_elements + svg_elements
- allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
- allowed_css_properties = acceptable_css_properties
- allowed_css_keywords = acceptable_css_keywords
- allowed_svg_properties = acceptable_svg_properties
- allowed_protocols = acceptable_protocols
-
- # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
- # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
- # attributes are parsed, and a restricted set, # specified by
- # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
- # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
- # in ALLOWED_PROTOCOLS are allowed.
- #
- # sanitize_html('')
- # => <script> do_nasty_stuff() </script>
- # sanitize_html('Click here for $100 ')
- # => Click here for $100
- def sanitize_token(self, token):
-
- # accommodate filters which use token_type differently
- token_type = token["type"]
- if token_type in list(tokenTypes.keys()):
- token_type = tokenTypes[token_type]
-
- if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
- tokenTypes["EmptyTag"]):
- if token["name"] in self.allowed_elements:
- return self.allowed_token(token, token_type)
- else:
- return self.disallowed_token(token, token_type)
- elif token_type == tokenTypes["Comment"]:
- pass
- else:
- return token
-
- def allowed_token(self, token, token_type):
- if "data" in token:
- attrs = dict([(name, val) for name, val in
- token["data"][::-1]
- if name in self.allowed_attributes])
- for attr in self.attr_val_is_uri:
- if attr not in attrs:
- continue
- val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
- unescape(attrs[attr])).lower()
- # remove replacement characters from unescaped characters
- val_unescaped = val_unescaped.replace("\ufffd", "")
- if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
- (val_unescaped.split(':')[0] not in
- self.allowed_protocols)):
- del attrs[attr]
- for attr in self.svg_attr_val_allows_ref:
- if attr in attrs:
- attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
- ' ',
- unescape(attrs[attr]))
- if (token["name"] in self.svg_allow_local_href and
- 'xlink:href' in attrs and re.search('^\s*[^#\s].*',
- attrs['xlink:href'])):
- del attrs['xlink:href']
- if 'style' in attrs:
- attrs['style'] = self.sanitize_css(attrs['style'])
- token["data"] = [[name, val] for name, val in list(attrs.items())]
- return token
-
- def disallowed_token(self, token, token_type):
- if token_type == tokenTypes["EndTag"]:
- token["data"] = "%s>" % token["name"]
- elif token["data"]:
- attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
- token["data"] = "<%s%s>" % (token["name"], attrs)
- else:
- token["data"] = "<%s>" % token["name"]
- if token.get("selfClosing"):
- token["data"] = token["data"][:-1] + "/>"
-
- if token["type"] in list(tokenTypes.keys()):
- token["type"] = "Characters"
- else:
- token["type"] = tokenTypes["Characters"]
-
- del token["name"]
- return token
-
- def sanitize_css(self, style):
- # disallow urls
- style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
-
- # gauntlet
- if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
- return ''
- if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
- return ''
-
- clean = []
- for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
- if not value:
- continue
- if prop.lower() in self.allowed_css_properties:
- clean.append(prop + ': ' + value + ';')
- elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
- 'padding']:
- for keyword in value.split():
- if keyword not in self.acceptable_css_keywords and \
- not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
- break
- else:
- clean.append(prop + ': ' + value + ';')
- elif prop.lower() in self.allowed_svg_properties:
- clean.append(prop + ': ' + value + ';')
-
- return ' '.join(clean)
-
-
-class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
- def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
- lowercaseElementName=False, lowercaseAttrName=False, parser=None):
- # Change case matching defaults as we only output lowercase html anyway
- # This solution doesn't seem ideal...
- HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
- lowercaseElementName, lowercaseAttrName, parser=parser)
-
- def __iter__(self):
- for token in HTMLTokenizer.__iter__(self):
- token = self.sanitize_token(token)
- if token:
- yield token
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/serializer/__init__.py b/testing/web-platform/tests/tools/html5lib/html5lib/serializer/__init__.py
deleted file mode 100644
index 8380839a6d3e..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/serializer/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from .. import treewalkers
-
-from .htmlserializer import HTMLSerializer
-
-
-def serialize(input, tree="etree", format="html", encoding=None,
- **serializer_opts):
- # XXX: Should we cache this?
- walker = treewalkers.getTreeWalker(tree)
- if format == "html":
- s = HTMLSerializer(**serializer_opts)
- else:
- raise ValueError("type must be html")
- return s.render(walker(input), encoding)
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/serializer/htmlserializer.py b/testing/web-platform/tests/tools/html5lib/html5lib/serializer/htmlserializer.py
deleted file mode 100644
index 4a891ff56c4e..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/serializer/htmlserializer.py
+++ /dev/null
@@ -1,320 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from six import text_type
-
-import gettext
-_ = gettext.gettext
-
-try:
- from functools import reduce
-except ImportError:
- pass
-
-from ..constants import voidElements, booleanAttributes, spaceCharacters
-from ..constants import rcdataElements, entities, xmlEntities
-from .. import utils
-from xml.sax.saxutils import escape
-
-spaceCharacters = "".join(spaceCharacters)
-
-try:
- from codecs import register_error, xmlcharrefreplace_errors
-except ImportError:
- unicode_encode_errors = "strict"
-else:
- unicode_encode_errors = "htmlentityreplace"
-
- encode_entity_map = {}
- is_ucs4 = len("\U0010FFFF") == 1
- for k, v in list(entities.items()):
- # skip multi-character entities
- if ((is_ucs4 and len(v) > 1) or
- (not is_ucs4 and len(v) > 2)):
- continue
- if v != "&":
- if len(v) == 2:
- v = utils.surrogatePairToCodepoint(v)
- else:
- v = ord(v)
- if v not in encode_entity_map or k.islower():
- # prefer < over < and similarly for &, >, etc.
- encode_entity_map[v] = k
-
- def htmlentityreplace_errors(exc):
- if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
- res = []
- codepoints = []
- skip = False
- for i, c in enumerate(exc.object[exc.start:exc.end]):
- if skip:
- skip = False
- continue
- index = i + exc.start
- if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
- codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
- skip = True
- else:
- codepoint = ord(c)
- codepoints.append(codepoint)
- for cp in codepoints:
- e = encode_entity_map.get(cp)
- if e:
- res.append("&")
- res.append(e)
- if not e.endswith(";"):
- res.append(";")
- else:
- res.append("%s;" % (hex(cp)[2:]))
- return ("".join(res), exc.end)
- else:
- return xmlcharrefreplace_errors(exc)
-
- register_error(unicode_encode_errors, htmlentityreplace_errors)
-
- del register_error
-
-
-class HTMLSerializer(object):
-
- # attribute quoting options
- quote_attr_values = False
- quote_char = '"'
- use_best_quote_char = True
-
- # tag syntax options
- omit_optional_tags = True
- minimize_boolean_attributes = True
- use_trailing_solidus = False
- space_before_trailing_solidus = True
-
- # escaping options
- escape_lt_in_attrs = False
- escape_rcdata = False
- resolve_entities = True
-
- # miscellaneous options
- alphabetical_attributes = False
- inject_meta_charset = True
- strip_whitespace = False
- sanitize = False
-
- options = ("quote_attr_values", "quote_char", "use_best_quote_char",
- "omit_optional_tags", "minimize_boolean_attributes",
- "use_trailing_solidus", "space_before_trailing_solidus",
- "escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
- "alphabetical_attributes", "inject_meta_charset",
- "strip_whitespace", "sanitize")
-
- def __init__(self, **kwargs):
- """Initialize HTMLSerializer.
-
- Keyword options (default given first unless specified) include:
-
- inject_meta_charset=True|False
- Whether it insert a meta element to define the character set of the
- document.
- quote_attr_values=True|False
- Whether to quote attribute values that don't require quoting
- per HTML5 parsing rules.
- quote_char=u'"'|u"'"
- Use given quote character for attribute quoting. Default is to
- use double quote unless attribute value contains a double quote,
- in which case single quotes are used instead.
- escape_lt_in_attrs=False|True
- Whether to escape < in attribute values.
- escape_rcdata=False|True
- Whether to escape characters that need to be escaped within normal
- elements within rcdata elements such as style.
- resolve_entities=True|False
- Whether to resolve named character entities that appear in the
- source tree. The XML predefined entities < > & " '
- are unaffected by this setting.
- strip_whitespace=False|True
- Whether to remove semantically meaningless whitespace. (This
- compresses all whitespace to a single space except within pre.)
- minimize_boolean_attributes=True|False
- Shortens boolean attributes to give just the attribute value,
- for example becomes .
- use_trailing_solidus=False|True
- Includes a close-tag slash at the end of the start tag of void
- elements (empty elements whose end tag is forbidden). E.g. .
- space_before_trailing_solidus=True|False
- Places a space immediately before the closing slash in a tag
- using a trailing solidus. E.g. . Requires use_trailing_solidus.
- sanitize=False|True
- Strip all unsafe or unknown constructs from output.
- See `html5lib user documentation`_
- omit_optional_tags=True|False
- Omit start/end tags that are optional.
- alphabetical_attributes=False|True
- Reorder attributes to be in alphabetical order.
-
- .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
- """
- if 'quote_char' in kwargs:
- self.use_best_quote_char = False
- for attr in self.options:
- setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
- self.errors = []
- self.strict = False
-
- def encode(self, string):
- assert(isinstance(string, text_type))
- if self.encoding:
- return string.encode(self.encoding, unicode_encode_errors)
- else:
- return string
-
- def encodeStrict(self, string):
- assert(isinstance(string, text_type))
- if self.encoding:
- return string.encode(self.encoding, "strict")
- else:
- return string
-
- def serialize(self, treewalker, encoding=None):
- self.encoding = encoding
- in_cdata = False
- self.errors = []
-
- if encoding and self.inject_meta_charset:
- from ..filters.inject_meta_charset import Filter
- treewalker = Filter(treewalker, encoding)
- # WhitespaceFilter should be used before OptionalTagFilter
- # for maximum efficiently of this latter filter
- if self.strip_whitespace:
- from ..filters.whitespace import Filter
- treewalker = Filter(treewalker)
- if self.sanitize:
- from ..filters.sanitizer import Filter
- treewalker = Filter(treewalker)
- if self.omit_optional_tags:
- from ..filters.optionaltags import Filter
- treewalker = Filter(treewalker)
- # Alphabetical attributes must be last, as other filters
- # could add attributes and alter the order
- if self.alphabetical_attributes:
- from ..filters.alphabeticalattributes import Filter
- treewalker = Filter(treewalker)
-
- for token in treewalker:
- type = token["type"]
- if type == "Doctype":
- doctype = "= 0:
- if token["systemId"].find("'") >= 0:
- self.serializeError(_("System identifer contains both single and double quote characters"))
- quote_char = "'"
- else:
- quote_char = '"'
- doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
-
- doctype += ">"
- yield self.encodeStrict(doctype)
-
- elif type in ("Characters", "SpaceCharacters"):
- if type == "SpaceCharacters" or in_cdata:
- if in_cdata and token["data"].find("") >= 0:
- self.serializeError(_("Unexpected in CDATA"))
- yield self.encode(token["data"])
- else:
- yield self.encode(escape(token["data"]))
-
- elif type in ("StartTag", "EmptyTag"):
- name = token["name"]
- yield self.encodeStrict("<%s" % name)
- if name in rcdataElements and not self.escape_rcdata:
- in_cdata = True
- elif in_cdata:
- self.serializeError(_("Unexpected child element of a CDATA element"))
- for (attr_namespace, attr_name), attr_value in token["data"].items():
- # TODO: Add namespace support here
- k = attr_name
- v = attr_value
- yield self.encodeStrict(' ')
-
- yield self.encodeStrict(k)
- if not self.minimize_boolean_attributes or \
- (k not in booleanAttributes.get(name, tuple())
- and k not in booleanAttributes.get("", tuple())):
- yield self.encodeStrict("=")
- if self.quote_attr_values or not v:
- quote_attr = True
- else:
- quote_attr = reduce(lambda x, y: x or (y in v),
- spaceCharacters + ">\"'=", False)
- v = v.replace("&", "&")
- if self.escape_lt_in_attrs:
- v = v.replace("<", "<")
- if quote_attr:
- quote_char = self.quote_char
- if self.use_best_quote_char:
- if "'" in v and '"' not in v:
- quote_char = '"'
- elif '"' in v and "'" not in v:
- quote_char = "'"
- if quote_char == "'":
- v = v.replace("'", "'")
- else:
- v = v.replace('"', """)
- yield self.encodeStrict(quote_char)
- yield self.encode(v)
- yield self.encodeStrict(quote_char)
- else:
- yield self.encode(v)
- if name in voidElements and self.use_trailing_solidus:
- if self.space_before_trailing_solidus:
- yield self.encodeStrict(" /")
- else:
- yield self.encodeStrict("/")
- yield self.encode(">")
-
- elif type == "EndTag":
- name = token["name"]
- if name in rcdataElements:
- in_cdata = False
- elif in_cdata:
- self.serializeError(_("Unexpected child element of a CDATA element"))
- yield self.encodeStrict("%s>" % name)
-
- elif type == "Comment":
- data = token["data"]
- if data.find("--") >= 0:
- self.serializeError(_("Comment contains --"))
- yield self.encodeStrict("" % token["data"])
-
- elif type == "Entity":
- name = token["name"]
- key = name + ";"
- if key not in entities:
- self.serializeError(_("Entity %s not recognized" % name))
- if self.resolve_entities and key not in xmlEntities:
- data = entities[key]
- else:
- data = "&%s;" % name
- yield self.encodeStrict(data)
-
- else:
- self.serializeError(token["data"])
-
- def render(self, treewalker, encoding=None):
- if encoding:
- return b"".join(list(self.serialize(treewalker, encoding)))
- else:
- return "".join(list(self.serialize(treewalker)))
-
- def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
- # XXX The idea is to make data mandatory.
- self.errors.append(data)
- if self.strict:
- raise SerializeError
-
-
-def SerializeError(Exception):
- """Error in serialized tree"""
- pass
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/tests/README b/testing/web-platform/tests/tools/html5lib/html5lib/tests/README
deleted file mode 100644
index c564b6836e46..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/README
+++ /dev/null
@@ -1 +0,0 @@
-Each testcase file can be run through nose (using ``nosetests``).
\ No newline at end of file
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/tests/mockParser.py b/testing/web-platform/tests/tools/html5lib/html5lib/tests/mockParser.py
deleted file mode 100644
index ef31527e82df..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/mockParser.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import sys
-import os
-
-if __name__ == '__main__':
- # Allow us to import from the src directory
- os.chdir(os.path.split(os.path.abspath(__file__))[0])
- sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
-
-from html5lib.tokenizer import HTMLTokenizer
-
-
-class HTMLParser(object):
- """ Fake parser to test tokenizer output """
- def parse(self, stream, output=True):
- tokenizer = HTMLTokenizer(stream)
- for token in tokenizer:
- if output:
- print(token)
-
-if __name__ == "__main__":
- x = HTMLParser()
- if len(sys.argv) > 1:
- if len(sys.argv) > 2:
- import hotshot
- import hotshot.stats
- prof = hotshot.Profile('stats.prof')
- prof.runcall(x.parse, sys.argv[1], False)
- prof.close()
- stats = hotshot.stats.load('stats.prof')
- stats.strip_dirs()
- stats.sort_stats('time')
- stats.print_stats()
- else:
- x.parse(sys.argv[1])
- else:
- print("""Usage: python mockParser.py filename [stats]
- If stats is specified the hotshots profiler will run and output the
- stats instead.
- """)
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/tests/performance/concatenation.py b/testing/web-platform/tests/tools/html5lib/html5lib/tests/performance/concatenation.py
deleted file mode 100644
index a1465036e52b..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/performance/concatenation.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-def f1():
- x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- x += y + z
-
-
-def f2():
- x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- x = x + y + z
-
-
-def f3():
- x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- x = "".join((x, y, z))
-
-
-def f4():
- x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- x = "%s%s%s" % (x, y, z)
-
-import timeit
-for x in range(4):
- statement = "f%s" % (x + 1)
- t = timeit.Timer(statement, "from __main__ import " + statement)
- r = t.repeat(3, 1000000)
- print(r, min(r))
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_encoding.py b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_encoding.py
deleted file mode 100644
index d774ce0f600f..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_encoding.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import os
-import unittest
-
-try:
- unittest.TestCase.assertEqual
-except AttributeError:
- unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
-
-from .support import get_data_files, TestData, test_dir, errorMessage
-from html5lib import HTMLParser, inputstream
-
-
-class Html5EncodingTestCase(unittest.TestCase):
- def test_codec_name_a(self):
- self.assertEqual(inputstream.codecName("utf-8"), "utf-8")
-
- def test_codec_name_b(self):
- self.assertEqual(inputstream.codecName("utf8"), "utf-8")
-
- def test_codec_name_c(self):
- self.assertEqual(inputstream.codecName(" utf8 "), "utf-8")
-
- def test_codec_name_d(self):
- self.assertEqual(inputstream.codecName("ISO_8859--1"), "windows-1252")
-
-
-def runParserEncodingTest(data, encoding):
- p = HTMLParser()
- assert p.documentEncoding is None
- p.parse(data, useChardet=False)
- encoding = encoding.lower().decode("ascii")
-
- assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
-
-
-def runPreScanEncodingTest(data, encoding):
- stream = inputstream.HTMLBinaryInputStream(data, chardet=False)
- encoding = encoding.lower().decode("ascii")
-
- # Very crude way to ignore irrelevant tests
- if len(data) > stream.numBytesMeta:
- return
-
- assert encoding == stream.charEncoding[0], errorMessage(data, encoding, stream.charEncoding[0])
-
-
-def test_encoding():
- for filename in get_data_files("encoding"):
- tests = TestData(filename, b"data", encoding=None)
- for idx, test in enumerate(tests):
- yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
- yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
-
-try:
- try:
- import charade # flake8: noqa
- except ImportError:
- import chardet # flake8: noqa
-except ImportError:
- print("charade/chardet not found, skipping chardet tests")
-else:
- def test_chardet():
- with open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt"), "rb") as fp:
- encoding = inputstream.HTMLInputStream(fp.read()).charEncoding
- assert encoding[0].lower() == "big5"
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser.py b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser.py
deleted file mode 100644
index 230cdb42d59c..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser.py
+++ /dev/null
@@ -1,96 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import os
-import sys
-import traceback
-import warnings
-import re
-
-warnings.simplefilter("error")
-
-from .support import get_data_files
-from .support import TestData, convert, convertExpected, treeTypes
-from html5lib import html5parser, constants
-
-# Run the parse error checks
-checkParseErrors = False
-
-# XXX - There should just be one function here but for some reason the testcase
-# format differs from the treedump format by a single space character
-
-
-def convertTreeDump(data):
- return "\n".join(convert(3)(data).split("\n")[1:])
-
-namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
-
-
-def runParserTest(innerHTML, input, expected, errors, treeClass,
- namespaceHTMLElements):
- with warnings.catch_warnings(record=True) as caughtWarnings:
- warnings.simplefilter("always")
- p = html5parser.HTMLParser(tree=treeClass,
- namespaceHTMLElements=namespaceHTMLElements)
-
- try:
- if innerHTML:
- document = p.parseFragment(input, innerHTML)
- else:
- document = p.parse(input)
- except:
- errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
- "\nTraceback:", traceback.format_exc()])
- assert False, errorMsg
-
- otherWarnings = [x for x in caughtWarnings
- if not issubclass(x.category, constants.DataLossWarning)]
- assert len(otherWarnings) == 0, [(x.category, x.message) for x in otherWarnings]
- if len(caughtWarnings):
- return
-
- output = convertTreeDump(p.tree.testSerializer(document))
-
- expected = convertExpected(expected)
- if namespaceHTMLElements:
- expected = namespaceExpected(r"\1", expected)
-
- errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
- "\nReceived:", output])
- assert expected == output, errorMsg
-
- errStr = []
- for (line, col), errorcode, datavars in p.errors:
- assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars))
- errStr.append("Line: %i Col: %i %s" % (line, col,
- constants.E[errorcode] % datavars))
-
- errorMsg2 = "\n".join(["\n\nInput:", input,
- "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
- "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
- if checkParseErrors:
- assert len(p.errors) == len(errors), errorMsg2
-
-
-def test_parser():
- sys.stderr.write('Testing tree builders ' + " ".join(list(treeTypes.keys())) + "\n")
- files = get_data_files('tree-construction')
-
- for filename in files:
- testName = os.path.basename(filename).replace(".dat", "")
- if testName in ("template",):
- continue
-
- tests = TestData(filename, "data")
-
- for index, test in enumerate(tests):
- input, errors, innerHTML, expected = [test[key] for key in
- ('data', 'errors',
- 'document-fragment',
- 'document')]
- if errors:
- errors = errors.split("\n")
-
- for treeName, treeCls in treeTypes.items():
- for namespaceHTMLElements in (True, False):
- yield (runParserTest, innerHTML, input, expected, errors, treeCls,
- namespaceHTMLElements)
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser2.py b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser2.py
deleted file mode 100644
index 20bbdf3179c9..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser2.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import io
-
-from . import support # flake8: noqa
-from html5lib import html5parser
-from html5lib.constants import namespaces
-from html5lib import treebuilders
-
-import unittest
-
-# tests that aren't autogenerated from text files
-
-
-class MoreParserTests(unittest.TestCase):
-
- def setUp(self):
- self.dom_tree = treebuilders.getTreeBuilder("dom")
-
- def test_assertDoctypeCloneable(self):
- parser = html5parser.HTMLParser(tree=self.dom_tree)
- doc = parser.parse('')
- self.assertTrue(doc.cloneNode(True))
-
- def test_line_counter(self):
- # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
- parser = html5parser.HTMLParser(tree=self.dom_tree)
- parser.parse("\nx\n>\n ")
-
- def test_namespace_html_elements_0_dom(self):
- parser = html5parser.HTMLParser(tree=self.dom_tree, namespaceHTMLElements=True)
- doc = parser.parse("")
- self.assertTrue(doc.childNodes[0].namespaceURI == namespaces["html"])
-
- def test_namespace_html_elements_1_dom(self):
- parser = html5parser.HTMLParser(tree=self.dom_tree, namespaceHTMLElements=False)
- doc = parser.parse("")
- self.assertTrue(doc.childNodes[0].namespaceURI is None)
-
- def test_namespace_html_elements_0_etree(self):
- parser = html5parser.HTMLParser(namespaceHTMLElements=True)
- doc = parser.parse("")
- self.assertTrue(list(doc)[0].tag == "{%s}html" % (namespaces["html"],))
-
- def test_namespace_html_elements_1_etree(self):
- parser = html5parser.HTMLParser(namespaceHTMLElements=False)
- doc = parser.parse("")
- self.assertTrue(list(doc)[0].tag == "html")
-
- def test_unicode_file(self):
- parser = html5parser.HTMLParser()
- parser.parse(io.StringIO("a"))
-
-
-def buildTestSuite():
- return unittest.defaultTestLoader.loadTestsFromName(__name__)
-
-
-def main():
- buildTestSuite()
- unittest.main()
-
-if __name__ == '__main__':
- main()
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_sanitizer.py b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_sanitizer.py
deleted file mode 100644
index 1cc687dfcad7..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_sanitizer.py
+++ /dev/null
@@ -1,105 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-try:
- import json
-except ImportError:
- import simplejson as json
-
-from html5lib import html5parser, sanitizer, constants, treebuilders
-
-
-def toxmlFactory():
- tree = treebuilders.getTreeBuilder("etree")
-
- def toxml(element):
- # encode/decode roundtrip required for Python 2.6 compatibility
- result_bytes = tree.implementation.tostring(element, encoding="utf-8")
- return result_bytes.decode("utf-8")
-
- return toxml
-
-
-def runSanitizerTest(name, expected, input, toxml=None):
- if toxml is None:
- toxml = toxmlFactory()
- expected = ''.join([toxml(token) for token in html5parser.HTMLParser().
- parseFragment(expected)])
- expected = json.loads(json.dumps(expected))
- assert expected == sanitize_html(input)
-
-
-def sanitize_html(stream, toxml=None):
- if toxml is None:
- toxml = toxmlFactory()
- return ''.join([toxml(token) for token in
- html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
- parseFragment(stream)])
-
-
-def test_should_handle_astral_plane_characters():
- assert '\U0001d4b5 \U0001d538 ' == sanitize_html("𝒵 𝔸
")
-
-
-def test_sanitizer():
- toxml = toxmlFactory()
- for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
- if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
- continue # TODO
- if tag_name != tag_name.lower():
- continue # TODO
- if tag_name == 'image':
- yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
- " foo <bad>bar</bad> baz",
- "<%s title='1'>foo bar baz%s>" % (tag_name, tag_name),
- toxml)
- elif tag_name == 'br':
- yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
- " foo <bad>bar</bad> baz ",
- "<%s title='1'>foo bar baz%s>" % (tag_name, tag_name),
- toxml)
- elif tag_name in constants.voidElements:
- yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
- "<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name,
- "<%s title='1'>foo bar baz%s>" % (tag_name, tag_name),
- toxml)
- else:
- yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
- "<%s title=\"1\">foo <bad>bar</bad> baz%s>" % (tag_name, tag_name),
- "<%s title='1'>foo bar baz%s>" % (tag_name, tag_name),
- toxml)
-
- for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
- tag_name = tag_name.upper()
- yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name,
- "<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
- "<%s title='1'>foo bar baz%s>" % (tag_name, tag_name),
- toxml)
-
- for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
- if attribute_name != attribute_name.lower():
- continue # TODO
- if attribute_name == 'style':
- continue
- yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
- "foo <bad>bar</bad> baz
" % attribute_name,
- "foo bar baz
" % attribute_name,
- toxml)
-
- for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
- attribute_name = attribute_name.upper()
- yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name,
- "foo <bad>bar</bad> baz
",
- "foo bar baz
" % attribute_name,
- toxml)
-
- for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
- yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol,
- "foo " % protocol,
- """foo """ % protocol,
- toxml)
-
- for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
- yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
- "foo " % protocol,
- """foo """ % protocol,
- toxml)
diff --git a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_serializer.py b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_serializer.py
deleted file mode 100644
index 3c37feff70b2..000000000000
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_serializer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import json
-import unittest
-
-from .support import get_data_files
-
-try:
- unittest.TestCase.assertEqual
-except AttributeError:
- unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
-
-import html5lib
-from html5lib import constants
-from html5lib.serializer import HTMLSerializer, serialize
-from html5lib.treewalkers._base import TreeWalker
-
-optionals_loaded = []
-
-try:
- from lxml import etree
- optionals_loaded.append("lxml")
-except ImportError:
- pass
-
-default_namespace = constants.namespaces["html"]
-
-
-class JsonWalker(TreeWalker):
- def __iter__(self):
- for token in self.tree:
- type = token[0]
- if type == "StartTag":
- if len(token) == 4:
- namespace, name, attrib = token[1:4]
- else:
- namespace = default_namespace
- name, attrib = token[1:3]
- yield self.startTag(namespace, name, self._convertAttrib(attrib))
- elif type == "EndTag":
- if len(token) == 3:
- namespace, name = token[1:3]
- else:
- namespace = default_namespace
- name = token[1]
- yield self.endTag(namespace, name)
- elif type == "EmptyTag":
- if len(token) == 4:
- namespace, name, attrib = token[1:]
- else:
- namespace = default_namespace
- name, attrib = token[1:]
- for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)):
- yield token
- elif type == "Comment":
- yield self.comment(token[1])
- elif type in ("Characters", "SpaceCharacters"):
- for token in self.text(token[1]):
- yield token
- elif type == "Doctype":
- if len(token) == 4:
- yield self.doctype(token[1], token[2], token[3])
- elif len(token) == 3:
- yield self.doctype(token[1], token[2])
- else:
- yield self.doctype(token[1])
- else:
- raise ValueError("Unknown token type: " + type)
-
- def _convertAttrib(self, attribs):
- """html5lib tree-walkers use a dict of (namespace, name): value for
- attributes, but JSON cannot represent this. Convert from the format
- in the serializer tests (a list of dicts with "namespace", "name",
- and "value" as keys) to html5lib's tree-walker format."""
- attrs = {}
- for attrib in attribs:
- name = (attrib["namespace"], attrib["name"])
- assert(name not in attrs)
- attrs[name] = attrib["value"]
- return attrs
-
-
-def serialize_html(input, options):
- options = dict([(str(k), v) for k, v in options.items()])
- stream = JsonWalker(input)
- serializer = HTMLSerializer(alphabetical_attributes=True, **options)
- return serializer.render(stream, options.get("encoding", None))
-
-
-def runSerializerTest(input, expected, options):
- encoding = options.get("encoding", None)
-
- if encoding:
- encode = lambda x: x.encode(encoding)
- expected = list(map(encode, expected))
-
- result = serialize_html(input, options)
- if len(expected) == 1:
- assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
- elif result not in expected:
- assert False, "Expected: %s, Received: %s" % (expected, result)
-
-
-class EncodingTestCase(unittest.TestCase):
- def throwsWithLatin1(self, input):
- self.assertRaises(UnicodeEncodeError, serialize_html, input, {"encoding": "iso-8859-1"})
-
- def testDoctypeName(self):
- self.throwsWithLatin1([["Doctype", "\u0101"]])
-
- def testDoctypePublicId(self):
- self.throwsWithLatin1([["Doctype", "potato", "\u0101"]])
-
- def testDoctypeSystemId(self):
- self.throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]])
-
- def testCdataCharacters(self):
- runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
- [""
+ ],
+ "input": [
+ [
+ "StartTag",
+ "http://www.w3.org/1999/xhtml",
+ "style",
+ {}
+ ],
+ [
+ "Characters",
+ "\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
+ ],
+ [
+ "EndTag",
+ "http://www.w3.org/1999/xhtml",
+ "style"
+ ]
+ ],
+ "description": "text within