Bug 1547568 [wpt PR 16537] - Use a path segment trie for the manifest, a=testonly

Automatic update from web-platform-tests
Major new manifest version (v8): path trie edition

This essentially implements https://github.com/web-platform-tests/rfcs/pull/40.
--

wpt-commits: 31c0f5efba38b7d1d7f45ac449bcbc892e8771ce
wpt-pr: 16537
This commit is contained in:
Sam Sneddon 2020-02-04 13:19:59 +00:00 коммит произвёл moz-wptsync-bot
Родитель 6dfe17d0c5
Коммит 55e7330fe5
12 изменённых файлов: 621 добавлений и 366 удалений

Просмотреть файл

@ -111,6 +111,8 @@ jobs:
condition: dependencies.decision.outputs['test_jobs.tools_unittest']
pool:
vmImage: 'macOS-10.14'
variables:
HYPOTHESIS_PROFILE: ci
steps:
- template: tools/ci/azure/checkout.yml
- template: tools/ci/azure/tox_pytest.yml
@ -156,6 +158,8 @@ jobs:
condition: dependencies.decision.outputs['test_jobs.wptrunner_unittest']
pool:
vmImage: 'macOS-10.14'
variables:
HYPOTHESIS_PROFILE: ci
steps:
- template: tools/ci/azure/checkout.yml
- template: tools/ci/azure/tox_pytest.yml
@ -201,6 +205,8 @@ jobs:
condition: dependencies.decision.outputs['test_jobs.wpt_integration']
pool:
vmImage: 'macOS-10.14'
variables:
HYPOTHESIS_PROFILE: ci
steps:
# full checkout required
- template: tools/ci/azure/install_chrome.yml
@ -264,6 +270,8 @@ jobs:
condition: dependencies.decision.outputs['test_jobs.tools_unittest']
pool:
vmImage: 'windows-2019'
variables:
HYPOTHESIS_PROFILE: ci
steps:
- task: UsePythonVersion@0
inputs:
@ -280,6 +288,8 @@ jobs:
condition: dependencies.decision.outputs['test_jobs.tools_unittest']
pool:
vmImage: 'windows-2019'
variables:
HYPOTHESIS_PROFILE: ci
steps:
- task: UsePythonVersion@0
inputs:
@ -314,6 +324,8 @@ jobs:
condition: dependencies.decision.outputs['test_jobs.wptrunner_unittest']
pool:
vmImage: 'windows-2019'
variables:
HYPOTHESIS_PROFILE: ci
steps:
- task: UsePythonVersion@0
inputs:
@ -364,6 +376,8 @@ jobs:
condition: dependencies.decision.outputs['test_jobs.wpt_integration']
pool:
vmImage: 'windows-2019'
variables:
HYPOTHESIS_PROFILE: ci
steps:
# full checkout required
- task: UsePythonVersion@0

Просмотреть файл

@ -1,7 +1,7 @@
parameters:
directory: ''
toxenv: 'ALL'
steps:
- template: pip_install.yml
parameters:

Просмотреть файл

@ -1,3 +1,4 @@
import os.path
from inspect import isabstract
from six import iteritems, with_metaclass
from six.moves.urllib.parse import urljoin, urlparse
@ -60,6 +61,11 @@ class ManifestItem(with_metaclass(ManifestItemMeta)):
"""The item's type"""
pass
@property
def path_parts(self):
# type: () -> Tuple[Text, ...]
return tuple(self.path.split(os.path.sep))
def key(self):
# type: () -> Hashable
"""A unique identifier for the test"""
@ -103,14 +109,14 @@ class URLManifestItem(ManifestItem):
tests_root, # type: Text
path, # type: Text
url_base, # type: Text
url, # type: Text
url, # type: Optional[Text]
**extras # type: Any
):
# type: (...) -> None
super(URLManifestItem, self).__init__(tests_root, path)
assert url_base[0] == "/"
self.url_base = url_base
assert url[0] != "/"
assert url is None or url[0] != "/"
self._url = url
self._extras = extras
@ -122,10 +128,11 @@ class URLManifestItem(ManifestItem):
@property
def url(self):
# type: () -> Text
rel_url = self._url or self.path.replace(os.path.sep, u"/")
# we can outperform urljoin, because we know we just have path relative URLs
if self.url_base == "/":
return "/" + self._url
return urljoin(self.url_base, self._url)
return "/" + rel_url
return urljoin(self.url_base, rel_url)
@property
def https(self):
@ -134,8 +141,9 @@ class URLManifestItem(ManifestItem):
return ("https" in flags or "serviceworker" in flags)
def to_json(self):
# type: () -> Tuple[Text, Dict[Any, Any]]
rv = (self._url, {}) # type: Tuple[Text, Dict[Any, Any]]
# type: () -> Tuple[Optional[Text], Dict[Any, Any]]
rel_url = None if self._url == self.path.replace(os.path.sep, u"/") else self._url
rv = (rel_url, {}) # type: Tuple[Optional[Text], Dict[Any, Any]]
return rv
@classmethod
@ -182,7 +190,7 @@ class TestharnessTest(URLManifestItem):
return self._extras.get("script_metadata")
def to_json(self):
# type: () -> Tuple[Text, Dict[Text, Any]]
# type: () -> Tuple[Optional[Text], Dict[Text, Any]]
rv = super(TestharnessTest, self).to_json()
if self.timeout is not None:
rv[-1]["timeout"] = self.timeout
@ -204,7 +212,7 @@ class RefTest(URLManifestItem):
tests_root, # type: Text
path, # type: Text
url_base, # type: Text
url, # type: Text
url, # type: Optional[Text]
references=None, # type: Optional[List[Tuple[Text, Text]]]
**extras # type: Any
):
@ -248,8 +256,9 @@ class RefTest(URLManifestItem):
return rv
def to_json(self): # type: ignore
# type: () -> Tuple[Text, List[Tuple[Text, Text]], Dict[Text, Any]]
rv = (self._url, self.references, {}) # type: Tuple[Text, List[Tuple[Text, Text]], Dict[Text, Any]]
# type: () -> Tuple[Optional[Text], List[Tuple[Text, Text]], Dict[Text, Any]]
rel_url = None if self._url == self.path else self._url
rv = (rel_url, self.references, {}) # type: Tuple[Optional[Text], List[Tuple[Text, Text]], Dict[Text, Any]]
extras = rv[-1]
if self.timeout is not None:
extras["timeout"] = self.timeout
@ -320,7 +329,7 @@ class WebDriverSpecTest(URLManifestItem):
return self._extras.get("timeout")
def to_json(self):
# type: () -> Tuple[Text, Dict[Text, Any]]
# type: () -> Tuple[Optional[Text], Dict[Text, Any]]
rv = super(WebDriverSpecTest, self).to_json()
if self.timeout is not None:
rv[-1]["timeout"] = self.timeout

Просмотреть файл

@ -1,14 +1,16 @@
import itertools
import json
import os
from collections import MutableMapping
from six import iteritems, iterkeys, itervalues, string_types, binary_type, text_type
from copy import deepcopy
from multiprocessing import Pool, cpu_count
from six import PY3, iteritems, itervalues, string_types, binary_type, text_type
from . import vcs
from .item import (ConformanceCheckerTest, ManifestItem, ManualTest, RefTest, SupportFile,
TestharnessTest, VisualTest, WebDriverSpecTest, CrashTest)
from .log import get_logger
from .sourcefile import SourceFile
from .utils import from_os_path, to_os_path
from .typedata import TypeData
MYPY = False
if MYPY:
@ -18,9 +20,8 @@ if MYPY:
from typing import Container
from typing import Dict
from typing import IO
from typing import Iterable
from typing import Iterator
from typing import List
from typing import Iterable
from typing import Optional
from typing import Set
from typing import Text
@ -34,7 +35,7 @@ try:
except ImportError:
fast_json = json # type: ignore
CURRENT_VERSION = 7
CURRENT_VERSION = 8 # type: int
class ManifestError(Exception):
@ -55,156 +56,12 @@ item_classes = {"testharness": TestharnessTest,
"support": SupportFile} # type: Dict[str, Type[ManifestItem]]
if MYPY:
TypeDataType = MutableMapping[Text, Set[ManifestItem]]
else:
TypeDataType = MutableMapping
class TypeData(TypeDataType):
def __init__(self, manifest, type_cls):
# type: (Manifest, Type[ManifestItem]) -> None
"""Dict-like object containing the TestItems for each test type.
Loading an actual Item class for each test is unnecessarily
slow, so this class allows lazy-loading of the test
items. When the manifest is loaded we store the raw json
corresponding to the test type, and only create an Item
subclass when the test is accessed. In order to remain
API-compatible with consumers that depend on getting an Item
from iteration, we do egerly load all items when iterating
over the class."""
self.manifest = manifest
self.type_cls = type_cls
self.json_data = {} # type: Optional[Dict[Text, List[Any]]]
self.tests_root = None # type: Optional[str]
self.data = {} # type: Dict[Text, Set[ManifestItem]]
def __getitem__(self, key):
# type: (Text) -> Set[ManifestItem]
if key not in self.data and self.json_data is not None:
self.load(key)
return self.data[key]
def __nonzero__(self):
# type: () -> bool
return bool(self.data) or bool(self.json_data)
def __len__(self):
# type: () -> int
rv = len(self.data)
if self.json_data is not None:
rv += len(self.json_data)
return rv
def __delitem__(self, key):
# type: (Text) -> None
if key in self.data:
del self.data[key]
elif self.json_data is not None:
del self.json_data[from_os_path(key)]
else:
raise KeyError
def __setitem__(self, key, value):
# type: (Text, Set[ManifestItem]) -> None
if self.json_data is not None:
path = from_os_path(key)
if path in self.json_data:
del self.json_data[path]
self.data[key] = value
def __contains__(self, key):
# type: (Any) -> bool
self.load_all()
return key in self.data
def __iter__(self):
# type: () -> Iterator[Text]
self.load_all()
return self.data.__iter__()
def itervalues(self):
# type: () -> Iterator[Set[ManifestItem]]
self.load_all()
return itervalues(self.data)
def iteritems(self):
# type: () -> Iterator[Tuple[Text, Set[ManifestItem]]]
self.load_all()
return iteritems(self.data)
def values(self):
# type: () -> List[Set[ManifestItem]]
return list(self.itervalues())
def items(self):
# type: () -> List[Tuple[Text, Set[ManifestItem]]]
return list(self.iteritems())
def load(self, key):
# type: (Text) -> None
"""Load a specific Item given a path"""
if self.json_data is not None:
data = set()
path = from_os_path(key)
for test in self.json_data.get(path, []):
manifest_item = self.type_cls.from_json(self.manifest, path, test)
data.add(manifest_item)
try:
del self.json_data[path]
except KeyError:
pass
self.data[key] = data
else:
raise ValueError
def load_all(self):
# type: () -> None
"""Load all test items in this class"""
if self.json_data is not None:
for path, value in iteritems(self.json_data):
key = to_os_path(path)
if key in self.data:
continue
data = set()
for test in self.json_data.get(path, []):
manifest_item = self.type_cls.from_json(self.manifest, path, test)
data.add(manifest_item)
self.data[key] = data
self.json_data = None
def set_json(self, tests_root, data):
# type: (str, Dict[Text, Any]) -> None
if not isinstance(data, dict):
raise ValueError("Got a %s expected a dict" % (type(data)))
self.tests_root = tests_root
self.json_data = data
def to_json(self):
# type: () -> Dict[Text, Any]
data = {
from_os_path(path):
[t for t in sorted(test.to_json() for test in tests)]
for path, tests in iteritems(self.data)
}
if self.json_data is not None:
if not data:
# avoid copying if there's nothing here yet
return self.json_data
data.update(self.json_data)
return data
def paths(self):
# type: () -> Set[Text]
"""Get a list of all paths containing items of this type,
without actually constructing all the items"""
rv = set(iterkeys(self.data))
if self.json_data:
rv |= {to_os_path(item) for item in iterkeys(self.json_data)}
return rv
def compute_manifest_items(source_file):
# type: (SourceFile) -> Tuple[Tuple[Text, ...], Text, Set[ManifestItem], Text]
rel_path_parts = source_file.rel_path_parts
new_type, manifest_items = source_file.manifest_items()
file_hash = source_file.hash
return rel_path_parts, new_type, set(manifest_items), file_hash
if MYPY:
ManifestDataType = Dict[Any, TypeData]
@ -234,15 +91,24 @@ class ManifestData(ManifestDataType):
without actually constructing all the items"""
rv = set() # type: Set[Text]
for item_data in itervalues(self):
rv |= set(item_data.paths())
for item in item_data:
rv.add(os.path.sep.join(item))
return rv
def type_by_path(self):
# type: () -> Dict[Tuple[Text, ...], str]
rv = {}
for item_type, item_data in iteritems(self):
for item in item_data:
rv[item] = item_type
return rv
class Manifest(object):
def __init__(self, tests_root=None, url_base="/"):
# type: (Optional[str], Text) -> None
assert url_base is not None
self._path_hash = {} # type: Dict[Text, Tuple[Text, Text]]
self._data = ManifestData(self) # type: ManifestData
self.tests_root = tests_root # type: Optional[str]
self.url_base = url_base # type: Text
@ -254,30 +120,34 @@ class Manifest(object):
def itertypes(self, *types):
# type: (*str) -> Iterator[Tuple[str, Text, Set[ManifestItem]]]
for item_type in (types or sorted(self._data.keys())):
for path in sorted(self._data[item_type]):
for path in self._data[item_type]:
str_path = os.sep.join(path)
tests = self._data[item_type][path]
yield item_type, path, tests
yield item_type, str_path, tests
def iterpath(self, path):
# type: (Text) -> Iterator[ManifestItem]
# type: (Text) -> Iterable[ManifestItem]
tpath = tuple(path.split(os.path.sep))
for type_tests in self._data.values():
i = type_tests.get(path, set())
i = type_tests.get(tpath, set())
assert i is not None
for test in i:
yield test
def iterdir(self, dir_name):
# type: (Text) -> Iterator[ManifestItem]
if not dir_name.endswith(os.path.sep):
dir_name = dir_name + os.path.sep
# type: (Text) -> Iterable[ManifestItem]
tpath = tuple(dir_name.split(os.path.sep))
tpath_len = len(tpath)
for type_tests in self._data.values():
for path, tests in type_tests.iteritems():
if path.startswith(dir_name):
if path[:tpath_len] == tpath:
for test in tests:
yield test
def update(self, tree):
# type: (Iterable[Tuple[Union[SourceFile, bytes], bool]]) -> bool
def update(self, tree, parallel=True):
# type: (Iterable[Tuple[Union[SourceFile, bytes], bool]], bool) -> bool
"""Update the manifest given an iterable of items that make up the updated manifest.
The iterable must either generate tuples of the form (SourceFile, True) for paths
@ -285,119 +155,136 @@ class Manifest(object):
unusual API is designed as an optimistaion meaning that SourceFile items need not be
constructed in the case we are not updating a path, but the absence of an item from
the iterator may be used to remove defunct entries from the manifest."""
seen_files = set() # type: Set[Text]
changed = False
# Create local variable references to these dicts so we avoid the
# attribute access in the hot loop below
path_hash = self._path_hash # type: Dict[Text, Tuple[Text, Text]]
data = self._data
prev_files = data.paths() # type: Set[Text]
types = data.type_by_path()
deleted = set(types)
to_update = []
for source_file, update in tree:
if not update:
assert isinstance(source_file, (binary_type, text_type))
rel_path = source_file # type: Text
seen_files.add(rel_path)
assert rel_path in path_hash
old_hash, old_type = path_hash[rel_path] # type: Tuple[Text, Text]
deleted.remove(tuple(source_file.split(os.path.sep)))
else:
assert not isinstance(source_file, bytes)
rel_path = source_file.rel_path
seen_files.add(rel_path)
rel_path_parts = source_file.rel_path_parts
assert isinstance(rel_path_parts, tuple)
file_hash = source_file.hash # type: Text
is_new = rel_path not in path_hash # type: bool
is_new = rel_path_parts not in deleted # type: bool
hash_changed = False # type: bool
if not is_new:
old_hash, old_type = path_hash[rel_path]
deleted.remove(rel_path_parts)
old_type = types[rel_path_parts]
old_hash = data[old_type].hashes[rel_path_parts]
file_hash = source_file.hash # type: Text
if old_hash != file_hash:
hash_changed = True
del data[old_type][rel_path_parts]
if is_new or hash_changed:
new_type, manifest_items = source_file.manifest_items()
data[new_type][rel_path] = set(manifest_items)
path_hash[rel_path] = (file_hash, new_type)
if hash_changed and new_type != old_type:
del data[old_type][rel_path]
changed = True
to_update.append(source_file)
if to_update:
changed = True
if parallel and len(to_update) > 25 and cpu_count() > 1:
# 25 derived experimentally (2020-01) to be approximately
# the point at which it is quicker to create Pool and
# parallelize this
pool = Pool()
# chunksize set > 1 when more than 10000 tests, because
# chunking is a net-gain once we get to very large numbers
# of items (again, experimentally, 2020-01)
results = pool.imap_unordered(compute_manifest_items,
to_update,
chunksize=max(1, len(to_update) // 10000)
) # type: Iterator[Tuple[Tuple[Text, ...], Text, Set[ManifestItem], Text]]
elif PY3:
results = map(compute_manifest_items, to_update)
else:
results = itertools.imap(compute_manifest_items, to_update)
for result in results:
rel_path_parts, new_type, manifest_items, file_hash = result
data[new_type][rel_path_parts] = manifest_items
data[new_type].hashes[rel_path_parts] = file_hash
deleted = prev_files - seen_files
if deleted:
changed = True
for rel_path in deleted:
if rel_path in path_hash:
_, old_type = path_hash[rel_path]
del path_hash[rel_path]
try:
del data[old_type][rel_path]
except KeyError:
pass
else:
for test_data in itervalues(data):
if rel_path in test_data:
del test_data[rel_path]
for rel_path_parts in deleted:
for test_data in itervalues(data):
if rel_path_parts in test_data:
del test_data[rel_path_parts]
return changed
def to_json(self):
# type: () -> Dict[Text, Any]
def to_json(self, caller_owns_obj=True):
# type: (bool) -> Dict[Text, Any]
"""Dump a manifest into a object which can be serialized as JSON
If caller_owns_obj is False, then the return value remains
owned by the manifest; it is _vitally important_ that _no_
(even read) operation is done on the manifest, as otherwise
objects within the object graph rooted at the return value can
be mutated. This essentially makes this mode very dangerous
and only to be used under extreme care.
"""
out_items = {
test_type: type_paths.to_json()
for test_type, type_paths in iteritems(self._data) if type_paths
}
if caller_owns_obj:
out_items = deepcopy(out_items)
rv = {"url_base": self.url_base,
"paths": {from_os_path(k): v for k, v in iteritems(self._path_hash)},
"items": out_items,
"version": CURRENT_VERSION} # type: Dict[Text, Any]
return rv
@classmethod
def from_json(cls, tests_root, obj, types=None):
# type: (str, Dict[Text, Any], Optional[Container[Text]]) -> Manifest
def from_json(cls, tests_root, obj, types=None, callee_owns_obj=False):
# type: (str, Dict[Text, Any], Optional[Container[Text]], bool) -> Manifest
"""Load a manifest from a JSON object
This loads a manifest for a given local test_root path from an
object obj, potentially partially loading it to only load the
types given by types.
If callee_owns_obj is True, then ownership of obj transfers
to this function when called, and the caller must never mutate
the obj or anything referred to in the object graph rooted at
obj.
"""
version = obj.get("version")
if version != CURRENT_VERSION:
raise ManifestVersionMismatch
self = cls(tests_root, url_base=obj.get("url_base", "/"))
if not hasattr(obj, "items") and hasattr(obj, "paths"):
if not hasattr(obj, "items"):
raise ManifestError
self._path_hash = {to_os_path(k): v for k, v in iteritems(obj["paths"])}
# merge reftest_node and reftest
# TODO(MANIFESTv8): remove this condition
if "reftest_node" in obj["items"]:
for path in obj["items"]["reftest_node"]:
os_path = to_os_path(path)
old_hash, old_type = self._path_hash[os_path]
self._path_hash[os_path] = (old_hash, "reftest")
for test_type, type_paths in iteritems(obj["items"]):
# merge reftest_node and reftest
# TODO(MANIFESTv8): remove this condition
if test_type in ("reftest", "reftest_node"):
if types and "reftest" not in types:
continue
if self._data["reftest"].json_data:
self._data["reftest"].json_data.update(type_paths)
else:
self._data["reftest"].set_json(tests_root, type_paths)
continue
if test_type not in item_classes:
raise ManifestError
if types and test_type not in types:
continue
self._data[test_type].set_json(tests_root, type_paths)
if not callee_owns_obj:
type_paths = deepcopy(type_paths)
self._data[test_type].set_json(type_paths)
return self
@ -434,7 +321,8 @@ def _load(logger, # type: Logger
with open(manifest, "rb") as f:
rv = Manifest.from_json(tests_root,
fast_json.load(f),
types=types)
types=types,
callee_owns_obj=True)
except IOError:
return None
except ValueError:
@ -443,7 +331,8 @@ def _load(logger, # type: Logger
else:
rv = Manifest.from_json(tests_root,
fast_json.load(manifest),
types=types)
types=types,
callee_owns_obj=True)
if allow_cached:
__load_cache[manifest_path] = rv
@ -460,7 +349,8 @@ def load_and_update(tests_root, # type: bytes
working_copy=True, # type: bool
types=None, # type: Optional[Container[Text]]
write_manifest=True, # type: bool
allow_cached=True # type: bool
allow_cached=True, # type: bool
parallel=True # type: bool
):
# type: (...) -> Manifest
logger = get_logger()
@ -488,7 +378,7 @@ def load_and_update(tests_root, # type: bytes
if rebuild or update:
tree = vcs.get_tree(tests_root, manifest, manifest_path, cache_root,
working_copy, rebuild)
changed = manifest.update(tree)
changed = manifest.update(tree, parallel)
if write_manifest and changed:
write(manifest, manifest_path)
tree.dump_caches()
@ -504,6 +394,6 @@ def write(manifest, manifest_path):
with open(manifest_path, "wb") as f:
# Use ',' instead of the default ', ' separator to prevent trailing
# spaces: https://docs.python.org/2/library/json.html#json.dump
json.dump(manifest.to_json(), f,
json.dump(manifest.to_json(caller_owns_obj=True), f,
sort_keys=True, indent=1, separators=(',', ': '))
f.write("\n")

Просмотреть файл

@ -279,6 +279,11 @@ class SourceFile(object):
file_obj = open(self.path, 'rb')
return file_obj
@cached_property
def rel_path_parts(self):
# type: () -> Tuple[Text, ...]
return tuple(self.rel_path.split(os.path.sep))
@cached_property
def path(self):
# type: () -> Union[bytes, Text]
@ -316,18 +321,17 @@ class SourceFile(object):
if self.dir_path == "":
return True
parts = self.dir_path.split(os.path.sep)
parts = self.rel_path_parts
if (parts[0] in self.root_dir_non_test or
any(item in self.dir_non_test for item in parts) or
any(parts[:len(path)] == list(path) for path in self.dir_path_non_test)):
any(parts[:len(path)] == path for path in self.dir_path_non_test)):
return True
return False
def in_conformance_checker_dir(self):
# type: () -> bool
return (self.dir_path == "conformance-checkers" or
self.dir_path.startswith("conformance-checkers" + os.path.sep))
return self.rel_path_parts[0] == "conformance-checkers"
@property
def name_is_non_test(self):
@ -395,10 +399,10 @@ class SourceFile(object):
be a webdriver spec test file"""
# wdspec tests are in subdirectories of /webdriver excluding __init__.py
# files.
rel_dir_tree = self.rel_path.split(os.path.sep)
return (((rel_dir_tree[0] == "webdriver" and len(rel_dir_tree) > 1) or
(rel_dir_tree[:2] == ["infrastructure", "webdriver"] and
len(rel_dir_tree) > 2)) and
rel_path_parts = self.rel_path_parts
return (((rel_path_parts[0] == "webdriver" and len(rel_path_parts) > 1) or
(rel_path_parts[:2] == ("infrastructure", "webdriver") and
len(rel_path_parts) > 2)) and
self.filename not in ("__init__.py", "conftest.py") and
fnmatch(self.filename, wd_pattern))
@ -785,6 +789,8 @@ class SourceFile(object):
if self.items_cache:
return self.items_cache
drop_cached = "root" not in self.__dict__
if self.name_is_non_test:
rv = "support", [
SupportFile(
@ -957,4 +963,11 @@ class SourceFile(object):
self.items_cache = rv
if drop_cached and "__cached_properties__" in self.__dict__:
cached_properties = self.__dict__["__cached_properties__"]
for key in cached_properties:
if key in self.__dict__:
del self.__dict__[key]
del self.__dict__["__cached_properties__"]
return rv

Просмотреть файл

@ -5,7 +5,7 @@ import mock
import hypothesis as h
import hypothesis.strategies as hs
import pytest
from six import iteritems
from .. import manifest, sourcefile, item, utils
@ -18,7 +18,10 @@ if MYPY:
def SourceFileWithTest(path, hash, cls, **kwargs):
# type: (str, str, Type[item.ManifestItem], **Any) -> sourcefile.SourceFile
s = mock.Mock(rel_path=path, hash=hash)
rel_path_parts = tuple(path.split(os.path.sep))
s = mock.Mock(rel_path=path,
rel_path_parts=rel_path_parts,
hash=hash)
if cls == item.SupportFile:
test = cls("/foobar", path)
else:
@ -29,24 +32,15 @@ def SourceFileWithTest(path, hash, cls, **kwargs):
def SourceFileWithTests(path, hash, cls, variants):
# type: (str, str, Type[item.URLManifestItem], **Any) -> sourcefile.SourceFile
s = mock.Mock(rel_path=path, hash=hash)
rel_path_parts = tuple(path.split(os.path.sep))
s = mock.Mock(rel_path=path,
rel_path_parts=rel_path_parts,
hash=hash)
tests = [cls("/foobar", path, "/", item[0], **item[1]) for item in variants]
s.manifest_items = mock.Mock(return_value=(cls.item_type, tests))
return s # type: ignore
@hs.composite
def rel_dir_file_path(draw):
length = draw(hs.integers(min_value=1, max_value=20))
if length == 1:
return "a"
else:
remaining = length - 2
alphabet = "a" + os.path.sep
mid = draw(hs.text(alphabet=alphabet, min_size=remaining, max_size=remaining))
return os.path.normcase("a" + mid + "a")
@hs.composite
def sourcefile_strategy(draw):
item_classes = [item.TestharnessTest, item.RefTest,
@ -54,26 +48,64 @@ def sourcefile_strategy(draw):
item.ConformanceCheckerTest, item.SupportFile]
cls = draw(hs.sampled_from(item_classes))
path = draw(rel_dir_file_path())
hash = draw(hs.text(alphabet="0123456789abcdef", min_size=40, max_size=40))
s = mock.Mock(rel_path=path, hash=hash)
path = u"a"
rel_path_parts = tuple(path.split(os.path.sep))
hash = draw(hs.text(alphabet=u"0123456789abcdef", min_size=40, max_size=40))
s = mock.Mock(rel_path=path,
rel_path_parts=rel_path_parts,
hash=hash)
if cls is item.RefTest:
ref_path = draw(rel_dir_file_path())
h.assume(path != ref_path)
ref_path = u"b"
ref_eq = draw(hs.sampled_from(["==", "!="]))
test = cls("/foobar", path, "/", utils.from_os_path(path), references=[(utils.from_os_path(ref_path), ref_eq)])
elif cls is item.SupportFile:
test = cls("/foobar", path)
else:
test = cls("/foobar", path, "/", utils.from_os_path(path))
test = cls("/foobar", path, "/", "foobar")
s.manifest_items = mock.Mock(return_value=(cls.item_type, [test]))
return s
@h.given(hs.lists(sourcefile_strategy(),
min_size=1, max_size=1000, unique_by=lambda x: x.rel_path))
@hs.composite
def manifest_tree(draw):
names = hs.text(alphabet=hs.characters(blacklist_characters=u"\0/\\:*\"?<>|"), min_size=1)
tree = hs.recursive(sourcefile_strategy(),
lambda children: hs.dictionaries(names, children, min_size=1),
max_leaves=10)
generated_root = draw(tree)
h.assume(isinstance(generated_root, dict))
reftest_urls = []
output = []
stack = [((k,), v) for k, v in iteritems(generated_root)]
while stack:
path, node = stack.pop()
if isinstance(node, dict):
stack.extend((path + (k,), v) for k, v in iteritems(node))
else:
rel_path = os.path.sep.join(path)
node.rel_path = rel_path
node.rel_path_parts = tuple(path)
for test_item in node.manifest_items.return_value[1]:
test_item.path = rel_path
if isinstance(test_item, item.RefTest):
if reftest_urls:
possible_urls = hs.sampled_from(reftest_urls) | names
else:
possible_urls = names
reference = hs.tuples(hs.sampled_from([u"==", u"!="]),
possible_urls)
references = hs.lists(reference, min_size=1, unique=True)
test_item.references = draw(references)
reftest_urls.append(test_item.url)
output.append(node)
return output
@h.given(manifest_tree())
@h.example([SourceFileWithTest("a", "0"*40, item.ConformanceCheckerTest)])
def test_manifest_to_json(s):
m = manifest.Manifest()
@ -87,9 +119,7 @@ def test_manifest_to_json(s):
assert loaded.to_json() == json_str
@h.given(hs.lists(sourcefile_strategy(),
min_size=1, unique_by=lambda x: x.rel_path))
@h.given(manifest_tree())
@h.example([SourceFileWithTest("a", "0"*40, item.TestharnessTest)])
@h.example([SourceFileWithTest("a", "0"*40, item.RefTest, references=[("/aa", "==")])])
def test_manifest_idempotent(s):
@ -107,63 +137,22 @@ def test_manifest_idempotent(s):
def test_manifest_to_json_forwardslash():
m = manifest.Manifest()
s = SourceFileWithTest("a/b", "0"*40, item.TestharnessTest)
s = SourceFileWithTest("a" + os.path.sep + "b", "0"*40, item.TestharnessTest)
assert m.update([(s, True)]) is True
assert m.to_json() == {
'paths': {
'a/b': ('0000000000000000000000000000000000000000', 'testharness')
},
'version': 7,
'version': 8,
'url_base': '/',
'items': {
'testharness': {
'a/b': [('a/b', {})]
}
'testharness': {'a': {'b': [
'0000000000000000000000000000000000000000',
(None, {})
]}},
}
}
@pytest.mark.skipif(os.sep != "\\", reason="backslash path")
def test_manifest_to_json_backslash():
m = manifest.Manifest()
s = SourceFileWithTest("a\\b", "0"*40, item.TestharnessTest)
assert m.update([(s, True)]) is True
assert m.to_json() == {
'paths': {
'a/b': ('0000000000000000000000000000000000000000', 'testharness')
},
'version': 7,
'url_base': '/',
'items': {
'testharness': {
'a/b': [('a/b', {})]
}
}
}
def test_manifest_from_json_backslash():
json_obj = {
'paths': {
'a\\b': ('0000000000000000000000000000000000000000', 'testharness')
},
'version': 7,
'url_base': '/',
'items': {
'testharness': {
'a\\b': [['a/b', {}]]
}
}
}
with pytest.raises(ValueError):
manifest.Manifest.from_json("/", json_obj)
def test_reftest_computation_chain():
m = manifest.Manifest()
@ -228,9 +217,7 @@ def test_no_update_delete():
test1 = s1.manifest_items()[1][0]
s1_1 = SourceFileWithTest("test1", "1"*40, item.ManualTest)
m.update([(s1_1.rel_path, False)])
m.update([(s1.rel_path, False)])
assert list(m) == [("testharness", test1.path, {test1})]
@ -268,9 +255,10 @@ def test_update_from_json_modified():
m.update([(s2, True)])
json_str = m.to_json()
assert json_str == {
'items': {'testharness': {'test1': [('test1', {"timeout": "long"})]}},
'paths': {'test1': ('1111111111111111111111111111111111111111',
'testharness')},
'items': {'testharness': {'test1': [
"1"*40,
(None, {'timeout': 'long'})
]}},
'url_base': '/',
'version': 7
'version': 8
}

Просмотреть файл

@ -0,0 +1,329 @@
from collections import MutableMapping
from six import itervalues, iteritems
MYPY = False
if MYPY:
# MYPY is set to True when run under Mypy.
from typing import Any
from typing import Dict
from typing import Iterator
from typing import List
from typing import Optional
from typing import Set
from typing import Text
from typing import Tuple
from typing import Type
from typing import Union
# avoid actually importing these, they're only used by type comments
from . import item
from . import manifest
if MYPY:
TypeDataType = MutableMapping[Tuple[Text, ...], Set[item.ManifestItem]]
PathHashType = MutableMapping[Tuple[Text, ...], Text]
else:
TypeDataType = MutableMapping
PathHashType = MutableMapping
class TypeData(TypeDataType):
def __init__(self, m, type_cls):
# type: (manifest.Manifest, Type[item.ManifestItem]) -> None
"""Dict-like object containing the TestItems for each test type.
Loading an actual Item class for each test is unnecessarily
slow, so this class allows lazy-loading of the test
items. When the manifest is loaded we store the raw json
corresponding to the test type, and only create an Item
subclass when the test is accessed. In order to remain
API-compatible with consumers that depend on getting an Item
from iteration, we do egerly load all items when iterating
over the class."""
self._manifest = m
self._type_cls = type_cls # type: Type[item.ManifestItem]
self._json_data = {} # type: Dict[Text, Any]
self._data = {} # type: Dict[Text, Any]
self._hashes = {} # type: Dict[Tuple[Text, ...], Text]
self.hashes = PathHash(self)
def _delete_node(self, data, key):
# type: (Dict[Text, Any], Tuple[Text, ...]) -> None
"""delete a path from a Dict data with a given key"""
path = []
node = data
for pathseg in key[:-1]:
path.append((node, pathseg))
node = node[pathseg]
if not isinstance(node, dict):
raise KeyError(key)
del node[key[-1]]
while path:
node, pathseg = path.pop()
if len(node[pathseg]) == 0:
del node[pathseg]
else:
break
def __getitem__(self, key):
# type: (Tuple[Text, ...]) -> Set[item.ManifestItem]
node = self._data # type: Union[Dict[Text, Any], Set[item.ManifestItem], List[Any]]
for pathseg in key:
if isinstance(node, dict) and pathseg in node:
node = node[pathseg]
else:
break
else:
if isinstance(node, set):
return node
else:
raise KeyError(key)
node = self._json_data
found = False
for pathseg in key:
if isinstance(node, dict) and pathseg in node:
node = node[pathseg]
else:
break
else:
found = True
if not found:
raise KeyError(key)
if not isinstance(node, list):
raise KeyError(key)
self._hashes[key] = node[0]
data = set()
path = "/".join(key)
for test in node[1:]:
manifest_item = self._type_cls.from_json(self._manifest, path, test)
data.add(manifest_item)
node = self._data
assert isinstance(node, dict)
for pathseg in key[:-1]:
node = node.setdefault(pathseg, {})
assert isinstance(node, dict)
assert key[-1] not in node
node[key[-1]] = data
self._delete_node(self._json_data, key)
return data
def __setitem__(self, key, value):
# type: (Tuple[Text, ...], Set[item.ManifestItem]) -> None
try:
self._delete_node(self._json_data, key)
except KeyError:
pass
node = self._data
for i, pathseg in enumerate(key[:-1]):
node = node.setdefault(pathseg, {})
if not isinstance(node, dict):
raise KeyError("%r is a child of a test (%r)" % (key, key[:i+1]))
node[key[-1]] = value
def __delitem__(self, key):
# type: (Tuple[Text, ...]) -> None
try:
self._delete_node(self._data, key)
except KeyError:
self._delete_node(self._json_data, key)
else:
try:
del self._hashes[key]
except KeyError:
pass
def __iter__(self):
# type: () -> Iterator[Tuple[Text, ...]]
"""Iterator over keys in the TypeData in codepoint order"""
data_node = self._data # type: Optional[Dict[Text, Any]]
json_node = self._json_data # type: Optional[Dict[Text, Any]]
path = tuple() # type: Tuple[Text, ...]
stack = [(data_node, json_node, path)]
while stack:
data_node, json_node, path = stack.pop()
if isinstance(data_node, set) or isinstance(json_node, list):
assert data_node is None or json_node is None
yield path
else:
assert data_node is None or isinstance(data_node, dict)
assert json_node is None or isinstance(json_node, dict)
keys = set() # type: Set[Text]
if data_node is not None:
keys |= set(iter(data_node))
if json_node is not None:
keys |= set(iter(json_node))
for key in sorted(keys, reverse=True):
stack.append((data_node.get(key) if data_node is not None else None,
json_node.get(key) if json_node is not None else None,
path + (key,)))
def __len__(self):
# type: () -> int
count = 0
stack = [self._data]
while stack:
v = stack.pop()
if isinstance(v, set):
count += 1
else:
stack.extend(itervalues(v))
stack = [self._json_data]
while stack:
v = stack.pop()
if isinstance(v, list):
count += 1
else:
stack.extend(itervalues(v))
return count
def __nonzero__(self):
# type: () -> bool
return bool(self._data) or bool(self._json_data)
__bool__ = __nonzero__
def __contains__(self, key):
# type: (Any) -> bool
# we provide our own impl of this to avoid calling __getitem__ and generating items for
# those in self._json_data
node = self._data
for pathseg in key:
if pathseg in node:
node = node[pathseg]
else:
break
else:
return bool(isinstance(node, set))
node = self._json_data
for pathseg in key:
if pathseg in node:
node = node[pathseg]
else:
break
else:
return bool(isinstance(node, list))
return False
def clear(self):
# type: () -> None
# much, much simpler/quicker than that defined in MutableMapping
self._json_data.clear()
self._data.clear()
self._hashes.clear()
def set_json(self, json_data):
# type: (Dict[Text, Any]) -> None
"""Provide the object with a raw JSON blob
Note that this object graph is assumed to be owned by the TypeData
object after the call, so the caller must not mutate any part of the
graph.
"""
if self._json_data:
raise ValueError("set_json call when JSON data is not empty")
self._json_data = json_data
def to_json(self):
# type: () -> Dict[Text, Any]
"""Convert the current data to JSON
Note that the returned object may contain references to the internal
data structures, and is only guaranteed to be valid until the next
__getitem__, __setitem__, __delitem__ call, so the caller must not
mutate any part of the returned object graph.
"""
json_rv = self._json_data.copy()
stack = [(self._data, json_rv, tuple())] # type: List[Tuple[Dict[Text, Any], Dict[Text, Any], Tuple[Text, ...]]]
while stack:
data_node, json_node, par_full_key = stack.pop()
for k, v in iteritems(data_node):
full_key = par_full_key + (k,)
if isinstance(v, set):
assert k not in json_node
json_node[k] = [self._hashes.get(full_key)] + [t for t in sorted(test.to_json() for test in v)]
else:
json_node[k] = json_node.get(k, {}).copy()
stack.append((v, json_node[k], full_key))
return json_rv
class PathHash(PathHashType):
def __init__(self, data):
# type: (TypeData) -> None
self._data = data
def __getitem__(self, k):
# type: (Tuple[Text, ...]) -> Text
if k not in self._data:
raise KeyError
if k in self._data._hashes:
return self._data._hashes[k]
node = self._data._json_data
for pathseg in k:
if pathseg in node:
node = node[pathseg]
else:
break
else:
return node[0] # type: ignore
assert False, "unreachable"
raise KeyError
def __setitem__(self, k, v):
# type: (Tuple[Text, ...], Text) -> None
if k not in self._data:
raise KeyError
if k in self._data._hashes:
self._data._hashes[k] = v
node = self._data._json_data
for pathseg in k:
if pathseg in node:
node = node[pathseg]
else:
break
else:
node[0] = v # type: ignore
return
self._data._hashes[k] = v
def __delitem__(self, k):
# type: (Tuple[Text, ...]) -> None
raise ValueError("keys here must match underlying data")
def __iter__(self):
# type: () -> Iterator[Tuple[Text, ...]]
return iter(self._data)
def __len__(self):
# type: () -> int
return len(self._data)

Просмотреть файл

@ -26,7 +26,8 @@ def update(tests_root, # type: str
manifest_path=None, # type: Optional[str]
working_copy=True, # type: bool
cache_root=None, # type: Optional[str]
rebuild=False # type: bool
rebuild=False, # type: bool
parallel=True # type: bool
):
# type: (...) -> bool
logger.warning("Deprecated; use manifest.load_and_update instead")
@ -34,7 +35,7 @@ def update(tests_root, # type: str
tree = vcs.get_tree(tests_root, manifest, manifest_path, cache_root,
working_copy, rebuild)
return manifest.update(tree)
return manifest.update(tree, parallel)
def update_from_cli(**kwargs):
@ -51,7 +52,8 @@ def update_from_cli(**kwargs):
kwargs["url_base"],
update=True,
rebuild=kwargs["rebuild"],
cache_root=kwargs["cache_root"])
cache_root=kwargs["cache_root"],
parallel=kwargs["parallel"])
def abs_path(path):
@ -78,6 +80,9 @@ def create_parser():
parser.add_argument(
"--cache-root", action="store", default=os.path.join(wpt_root, ".wptcache"),
help="Path in which to store any caches (default <tests_root>/.wptcache/)")
parser.add_argument(
"--no-parallel", dest="parallel", action="store_false", default=True,
help="Do not parallelize building the manifest")
return parser

Просмотреть файл

@ -103,4 +103,5 @@ class cached_property(Generic[T]):
# we can unconditionally assign as next time this won't be called
assert self.name not in obj.__dict__
rv = obj.__dict__[self.name] = self.func(obj)
obj.__dict__.setdefault("__cached_properties__", set()).add(self.name)
return rv

Просмотреть файл

@ -538,23 +538,22 @@ def create_test_tree(metadata_path, test_manifest):
for test in tests:
id_test_map[intern(ensure_str(test.id))] = test_file_data
dir_path = os.path.split(test_path)[0].replace(os.path.sep, "/")
dir_path = os.path.dirname(test_path)
while True:
if dir_path:
dir_id = dir_path + "/__dir__"
else:
dir_id = "__dir__"
dir_id = intern(ensure_str((test_manifest.url_base + dir_id).lstrip("/")))
if dir_id not in id_test_map:
test_file_data = TestFileData(intern(ensure_str(test_manifest.url_base)),
None,
metadata_path,
dir_id,
[])
id_test_map[dir_id] = test_file_data
if not dir_path or dir_path in id_test_map:
dir_meta_path = os.path.join(dir_path, "__dir__")
dir_id = (test_manifest.url_base + dir_meta_path.replace(os.path.sep, "/")).lstrip("/")
if dir_id in id_test_map:
break
test_file_data = TestFileData(intern(test_manifest.url_base),
None,
metadata_path,
dir_meta_path,
[])
id_test_map[dir_id] = test_file_data
dir_path = os.path.dirname(dir_path)
if not dir_path:
break
dir_path = dir_path.rsplit("/", 1)[0] if "/" in dir_path else ""
return id_test_map

Просмотреть файл

@ -12,7 +12,7 @@ from mozlog import structuredlog, handlers, formatters
here = os.path.dirname(__file__)
sys.path.insert(0, os.path.join(here, os.pardir, os.pardir, os.pardir))
from manifest import manifest, item as manifest_item
from manifest import manifest, item as manifest_item, utils
def rel_path_to_test_url(rel_path):
@ -21,7 +21,9 @@ def rel_path_to_test_url(rel_path):
def SourceFileWithTest(path, hash, cls, *args):
s = mock.Mock(rel_path=path, hash=hash)
path_parts = tuple(path.split("/"))
path = utils.to_os_path(path)
s = mock.Mock(rel_path=path, rel_path_parts=path_parts, hash=hash)
test = cls("/foobar", path, "/", rel_path_to_test_url(path), *args)
s.manifest_items = mock.Mock(return_value=(cls.item_type, [test]))
return s
@ -71,6 +73,7 @@ def update(tests, *logs, **kwargs):
expected_data = {}
metadata.load_expected = lambda _, __, test_path, *args: expected_data.get(test_path)
for test_path, test_ids, test_type, manifest_str in tests:
test_path = utils.to_os_path(test_path)
expected_data[test_path] = manifestupdate.compile(BytesIO(manifest_str),
test_path,
"/",

Просмотреть файл

@ -2,7 +2,8 @@ from io import BytesIO
from mock import Mock
from manifest import manifest as wptmanifest
from manifest.item import TestharnessTest
from manifest.item import TestharnessTest, RefTest
from manifest.utils import to_os_path
from .. import manifestexpected, wpttest
dir_ini_0 = b"""\
@ -199,22 +200,25 @@ def test_expect_any_subtest_status():
def test_metadata_fuzzy():
manifest_data = {
"items": {"reftest": {"a/fuzzy.html": [["a/fuzzy.html",
[["/a/fuzzy-ref.html", "=="]],
{"fuzzy": [[["/a/fuzzy.html", '/a/fuzzy-ref.html', '=='],
[[2, 3], [10, 15]]]]}]]}},
"paths": {"a/fuzzy.html": ["0"*40, "reftest"]},
"version": 7,
"url_base": "/"}
manifest = wptmanifest.Manifest.from_json(".", manifest_data)
item = RefTest(".", "a/fuzzy.html", "/", "a/fuzzy.html",
references=[["/a/fuzzy-ref.html", "=="]],
fuzzy=[[["/a/fuzzy.html", '/a/fuzzy-ref.html', '=='],
[[2, 3], [10, 15]]]])
s = Mock(rel_path="a/fuzzy.html", rel_path_parts=("a", "fuzzy.html"), hash="0"*40)
s.manifest_items = Mock(return_value=(item.item_type, [item]))
manifest = wptmanifest.Manifest()
assert manifest.update([(s, True)]) is True
test_metadata = manifestexpected.static.compile(BytesIO(test_fuzzy),
{},
data_cls_getter=manifestexpected.data_cls_getter,
test_path="a/fuzzy.html",
url_base="/")
test = next(manifest.iterpath("a/fuzzy.html"))
test = next(manifest.iterpath(to_os_path("a/fuzzy.html")))
test_obj = wpttest.from_manifest(manifest, test, [], test_metadata.get_test(test.id))
assert test_obj.fuzzy == {('/a/fuzzy.html', '/a/fuzzy-ref.html', '=='): [[2, 3], [10, 15]]}