Bug 1547568 [wpt PR 16537] - Use a path segment trie for the manifest, a=testonly

Automatic update from web-platform-tests Major new manifest version (v8): path trie edition This essentially implements https://github.com/web-platform-tests/rfcs/pull/40. -- wpt-commits: 31c0f5efba38b7d1d7f45ac449bcbc892e8771ce wpt-pr: 16537
2020-02-04 13:19:59 +00:00 · 2020-02-04 13:19:59 +00:00 · 55e7330fe5
--- a/testing/web-platform/tests/.azure-pipelines.yml
+++ b/testing/web-platform/tests/.azure-pipelines.yml
@ -111,6 +111,8 @@ jobs:
  condition: dependencies.decision.outputs['test_jobs.tools_unittest']
  pool:
    vmImage: 'macOS-10.14'
+  variables:
+    HYPOTHESIS_PROFILE: ci
  steps:
  - template: tools/ci/azure/checkout.yml
  - template: tools/ci/azure/tox_pytest.yml
@ -156,6 +158,8 @@ jobs:
  condition: dependencies.decision.outputs['test_jobs.wptrunner_unittest']
  pool:
    vmImage: 'macOS-10.14'
+  variables:
+    HYPOTHESIS_PROFILE: ci
  steps:
  - template: tools/ci/azure/checkout.yml
  - template: tools/ci/azure/tox_pytest.yml
@ -201,6 +205,8 @@ jobs:
  condition: dependencies.decision.outputs['test_jobs.wpt_integration']
  pool:
    vmImage: 'macOS-10.14'
+  variables:
+    HYPOTHESIS_PROFILE: ci
  steps:
  # full checkout required
  - template: tools/ci/azure/install_chrome.yml
@ -264,6 +270,8 @@ jobs:
  condition: dependencies.decision.outputs['test_jobs.tools_unittest']
  pool:
    vmImage: 'windows-2019'
+  variables:
+    HYPOTHESIS_PROFILE: ci
  steps:
  - task: UsePythonVersion@0
    inputs:
@ -280,6 +288,8 @@ jobs:
  condition: dependencies.decision.outputs['test_jobs.tools_unittest']
  pool:
    vmImage: 'windows-2019'
+  variables:
+    HYPOTHESIS_PROFILE: ci
  steps:
  - task: UsePythonVersion@0
    inputs:
@ -314,6 +324,8 @@ jobs:
  condition: dependencies.decision.outputs['test_jobs.wptrunner_unittest']
  pool:
    vmImage: 'windows-2019'
+  variables:
+    HYPOTHESIS_PROFILE: ci
  steps:
  - task: UsePythonVersion@0
    inputs:
@ -364,6 +376,8 @@ jobs:
  condition: dependencies.decision.outputs['test_jobs.wpt_integration']
  pool:
    vmImage: 'windows-2019'
+  variables:
+    HYPOTHESIS_PROFILE: ci
  steps:
  # full checkout required
  - task: UsePythonVersion@0
--- a/testing/web-platform/tests/tools/ci/azure/tox_pytest.yml
+++ b/testing/web-platform/tests/tools/ci/azure/tox_pytest.yml
@ -1,7 +1,7 @@
 parameters:
  directory: ''
  toxenv: 'ALL'
-
+  
 steps:
 - template: pip_install.yml
  parameters:
--- a/testing/web-platform/tests/tools/manifest/item.py
+++ b/testing/web-platform/tests/tools/manifest/item.py
@ -1,3 +1,4 @@
+import os.path
 from inspect import isabstract
 from six import iteritems, with_metaclass
 from six.moves.urllib.parse import urljoin, urlparse
@ -60,6 +61,11 @@ class ManifestItem(with_metaclass(ManifestItemMeta)):
        """The item's type"""
        pass

+    @property
+    def path_parts(self):
+        # type: () -> Tuple[Text, ...]
+        return tuple(self.path.split(os.path.sep))
+
    def key(self):
        # type: () -> Hashable
        """A unique identifier for the test"""
@ -103,14 +109,14 @@ class URLManifestItem(ManifestItem):
                 tests_root,  # type: Text
                 path,  # type: Text
                 url_base,  # type: Text
-                 url,  # type: Text
+                 url,  # type: Optional[Text]
                 **extras  # type: Any
                 ):
        # type: (...) -> None
        super(URLManifestItem, self).__init__(tests_root, path)
        assert url_base[0] == "/"
        self.url_base = url_base
-        assert url[0] != "/"
+        assert url is None or url[0] != "/"
        self._url = url
        self._extras = extras

@ -122,10 +128,11 @@ class URLManifestItem(ManifestItem):
    @property
    def url(self):
        # type: () -> Text
+        rel_url = self._url or self.path.replace(os.path.sep, u"/")
        # we can outperform urljoin, because we know we just have path relative URLs
        if self.url_base == "/":
-            return "/" + self._url
-        return urljoin(self.url_base, self._url)
+            return "/" + rel_url
+        return urljoin(self.url_base, rel_url)

    @property
    def https(self):
@ -134,8 +141,9 @@ class URLManifestItem(ManifestItem):
        return ("https" in flags or "serviceworker" in flags)

    def to_json(self):
-        # type: () -> Tuple[Text, Dict[Any, Any]]
-        rv = (self._url, {})  # type: Tuple[Text, Dict[Any, Any]]
+        # type: () -> Tuple[Optional[Text], Dict[Any, Any]]
+        rel_url = None if self._url == self.path.replace(os.path.sep, u"/") else self._url
+        rv = (rel_url, {})  # type: Tuple[Optional[Text], Dict[Any, Any]]
        return rv

    @classmethod
@ -182,7 +190,7 @@ class TestharnessTest(URLManifestItem):
        return self._extras.get("script_metadata")

    def to_json(self):
-        # type: () -> Tuple[Text, Dict[Text, Any]]
+        # type: () -> Tuple[Optional[Text], Dict[Text, Any]]
        rv = super(TestharnessTest, self).to_json()
        if self.timeout is not None:
            rv[-1]["timeout"] = self.timeout
@ -204,7 +212,7 @@ class RefTest(URLManifestItem):
                 tests_root,  # type: Text
                 path,  # type: Text
                 url_base,  # type: Text
-                 url,  # type: Text
+                 url,  # type: Optional[Text]
                 references=None,  # type: Optional[List[Tuple[Text, Text]]]
                 **extras  # type: Any
                 ):
@ -248,8 +256,9 @@ class RefTest(URLManifestItem):
        return rv

    def to_json(self):  # type: ignore
-        # type: () -> Tuple[Text, List[Tuple[Text, Text]], Dict[Text, Any]]
-        rv = (self._url, self.references, {})  # type: Tuple[Text, List[Tuple[Text, Text]], Dict[Text, Any]]
+        # type: () -> Tuple[Optional[Text], List[Tuple[Text, Text]], Dict[Text, Any]]
+        rel_url = None if self._url == self.path else self._url
+        rv = (rel_url, self.references, {})  # type: Tuple[Optional[Text], List[Tuple[Text, Text]], Dict[Text, Any]]
        extras = rv[-1]
        if self.timeout is not None:
            extras["timeout"] = self.timeout
@ -320,7 +329,7 @@ class WebDriverSpecTest(URLManifestItem):
        return self._extras.get("timeout")

    def to_json(self):
-        # type: () -> Tuple[Text, Dict[Text, Any]]
+        # type: () -> Tuple[Optional[Text], Dict[Text, Any]]
        rv = super(WebDriverSpecTest, self).to_json()
        if self.timeout is not None:
            rv[-1]["timeout"] = self.timeout
--- a/testing/web-platform/tests/tools/manifest/manifest.py
+++ b/testing/web-platform/tests/tools/manifest/manifest.py
@ -1,14 +1,16 @@
+import itertools
 import json
 import os
-from collections import MutableMapping
-from six import iteritems, iterkeys, itervalues, string_types, binary_type, text_type
+from copy import deepcopy
+from multiprocessing import Pool, cpu_count
+from six import PY3, iteritems, itervalues, string_types, binary_type, text_type

 from . import vcs
 from .item import (ConformanceCheckerTest, ManifestItem, ManualTest, RefTest, SupportFile,
                   TestharnessTest, VisualTest, WebDriverSpecTest, CrashTest)
 from .log import get_logger
 from .sourcefile import SourceFile
-from .utils import from_os_path, to_os_path
+from .typedata import TypeData

 MYPY = False
 if MYPY:
@ -18,9 +20,8 @@ if MYPY:
    from typing import Container
    from typing import Dict
    from typing import IO
-    from typing import Iterable
    from typing import Iterator
-    from typing import List
+    from typing import Iterable
    from typing import Optional
    from typing import Set
    from typing import Text
@ -34,7 +35,7 @@ try:
 except ImportError:
    fast_json = json  # type: ignore

-CURRENT_VERSION = 7
+CURRENT_VERSION = 8  # type: int


 class ManifestError(Exception):
@ -55,156 +56,12 @@ item_classes = {"testharness": TestharnessTest,
                "support": SupportFile}  # type: Dict[str, Type[ManifestItem]]


-if MYPY:
-    TypeDataType = MutableMapping[Text, Set[ManifestItem]]
-else:
-    TypeDataType = MutableMapping
-
-class TypeData(TypeDataType):
-    def __init__(self, manifest, type_cls):
-        # type: (Manifest, Type[ManifestItem]) -> None
-        """Dict-like object containing the TestItems for each test type.
-
-        Loading an actual Item class for each test is unnecessarily
-        slow, so this class allows lazy-loading of the test
-        items. When the manifest is loaded we store the raw json
-        corresponding to the test type, and only create an Item
-        subclass when the test is accessed. In order to remain
-        API-compatible with consumers that depend on getting an Item
-        from iteration, we do egerly load all items when iterating
-        over the class."""
-        self.manifest = manifest
-        self.type_cls = type_cls
-        self.json_data = {}  # type: Optional[Dict[Text, List[Any]]]
-        self.tests_root = None  # type: Optional[str]
-        self.data = {}  # type: Dict[Text, Set[ManifestItem]]
-
-    def __getitem__(self, key):
-        # type: (Text) -> Set[ManifestItem]
-        if key not in self.data and self.json_data is not None:
-            self.load(key)
-        return self.data[key]
-
-    def __nonzero__(self):
-        # type: () -> bool
-        return bool(self.data) or bool(self.json_data)
-
-    def __len__(self):
-        # type: () -> int
-        rv = len(self.data)
-        if self.json_data is not None:
-            rv += len(self.json_data)
-        return rv
-
-    def __delitem__(self, key):
-        # type: (Text) -> None
-        if key in self.data:
-            del self.data[key]
-        elif self.json_data is not None:
-            del self.json_data[from_os_path(key)]
-        else:
-            raise KeyError
-
-    def __setitem__(self, key, value):
-        # type: (Text, Set[ManifestItem]) -> None
-        if self.json_data is not None:
-            path = from_os_path(key)
-            if path in self.json_data:
-                del self.json_data[path]
-        self.data[key] = value
-
-    def __contains__(self, key):
-        # type: (Any) -> bool
-        self.load_all()
-        return key in self.data
-
-    def __iter__(self):
-        # type: () -> Iterator[Text]
-        self.load_all()
-        return self.data.__iter__()
-
-    def itervalues(self):
-        # type: () -> Iterator[Set[ManifestItem]]
-        self.load_all()
-        return itervalues(self.data)
-
-    def iteritems(self):
-        # type: () -> Iterator[Tuple[Text, Set[ManifestItem]]]
-        self.load_all()
-        return iteritems(self.data)
-
-    def values(self):
-        # type: () -> List[Set[ManifestItem]]
-        return list(self.itervalues())
-
-    def items(self):
-        # type: () -> List[Tuple[Text, Set[ManifestItem]]]
-        return list(self.iteritems())
-
-    def load(self, key):
-        # type: (Text) -> None
-        """Load a specific Item given a path"""
-        if self.json_data is not None:
-            data = set()
-            path = from_os_path(key)
-            for test in self.json_data.get(path, []):
-                manifest_item = self.type_cls.from_json(self.manifest, path, test)
-                data.add(manifest_item)
-            try:
-                del self.json_data[path]
-            except KeyError:
-                pass
-            self.data[key] = data
-        else:
-            raise ValueError
-
-    def load_all(self):
-        # type: () -> None
-        """Load all test items in this class"""
-        if self.json_data is not None:
-            for path, value in iteritems(self.json_data):
-                key = to_os_path(path)
-                if key in self.data:
-                    continue
-                data = set()
-                for test in self.json_data.get(path, []):
-                    manifest_item = self.type_cls.from_json(self.manifest, path, test)
-                    data.add(manifest_item)
-                self.data[key] = data
-            self.json_data = None
-
-    def set_json(self, tests_root, data):
-        # type: (str, Dict[Text, Any]) -> None
-        if not isinstance(data, dict):
-            raise ValueError("Got a %s expected a dict" % (type(data)))
-        self.tests_root = tests_root
-        self.json_data = data
-
-    def to_json(self):
-        # type: () -> Dict[Text, Any]
-        data = {
-            from_os_path(path):
-            [t for t in sorted(test.to_json() for test in tests)]
-            for path, tests in iteritems(self.data)
-        }
-
-        if self.json_data is not None:
-            if not data:
-                # avoid copying if there's nothing here yet
-                return self.json_data
-            data.update(self.json_data)
-
-        return data
-
-    def paths(self):
-        # type: () -> Set[Text]
-        """Get a list of all paths containing items of this type,
-        without actually constructing all the items"""
-        rv = set(iterkeys(self.data))
-        if self.json_data:
-            rv |= {to_os_path(item) for item in iterkeys(self.json_data)}
-        return rv
-
+def compute_manifest_items(source_file):
+    # type: (SourceFile) -> Tuple[Tuple[Text, ...], Text, Set[ManifestItem], Text]
+    rel_path_parts = source_file.rel_path_parts
+    new_type, manifest_items = source_file.manifest_items()
+    file_hash = source_file.hash
+    return rel_path_parts, new_type, set(manifest_items), file_hash

 if MYPY:
    ManifestDataType = Dict[Any, TypeData]
@ -234,15 +91,24 @@ class ManifestData(ManifestDataType):
        without actually constructing all the items"""
        rv = set()  # type: Set[Text]
        for item_data in itervalues(self):
-            rv |= set(item_data.paths())
+            for item in item_data:
+                rv.add(os.path.sep.join(item))
        return rv

+    def type_by_path(self):
+        # type: () -> Dict[Tuple[Text, ...], str]
+        rv = {}
+        for item_type, item_data in iteritems(self):
+            for item in item_data:
+                rv[item] = item_type
+        return rv
+
+

 class Manifest(object):
    def __init__(self, tests_root=None, url_base="/"):
        # type: (Optional[str], Text) -> None
        assert url_base is not None
-        self._path_hash = {}  # type: Dict[Text, Tuple[Text, Text]]
        self._data = ManifestData(self)  # type: ManifestData
        self.tests_root = tests_root  # type: Optional[str]
        self.url_base = url_base  # type: Text
@ -254,30 +120,34 @@ class Manifest(object):
    def itertypes(self, *types):
        # type: (*str) -> Iterator[Tuple[str, Text, Set[ManifestItem]]]
        for item_type in (types or sorted(self._data.keys())):
-            for path in sorted(self._data[item_type]):
+            for path in self._data[item_type]:
+                str_path = os.sep.join(path)
                tests = self._data[item_type][path]
-                yield item_type, path, tests
+                yield item_type, str_path, tests

    def iterpath(self, path):
-        # type: (Text) -> Iterator[ManifestItem]
+        # type: (Text) -> Iterable[ManifestItem]
+        tpath = tuple(path.split(os.path.sep))
+
        for type_tests in self._data.values():
-            i = type_tests.get(path, set())
+            i = type_tests.get(tpath, set())
            assert i is not None
            for test in i:
                yield test

    def iterdir(self, dir_name):
-        # type: (Text) -> Iterator[ManifestItem]
-        if not dir_name.endswith(os.path.sep):
-            dir_name = dir_name + os.path.sep
+        # type: (Text) -> Iterable[ManifestItem]
+        tpath = tuple(dir_name.split(os.path.sep))
+        tpath_len = len(tpath)
+
        for type_tests in self._data.values():
            for path, tests in type_tests.iteritems():
-                if path.startswith(dir_name):
+                if path[:tpath_len] == tpath:
                    for test in tests:
                        yield test

-    def update(self, tree):
-        # type: (Iterable[Tuple[Union[SourceFile, bytes], bool]]) -> bool
+    def update(self, tree, parallel=True):
+        # type: (Iterable[Tuple[Union[SourceFile, bytes], bool]], bool) -> bool
        """Update the manifest given an iterable of items that make up the updated manifest.

        The iterable must either generate tuples of the form (SourceFile, True) for paths
@ -285,119 +155,136 @@ class Manifest(object):
        unusual API is designed as an optimistaion meaning that SourceFile items need not be
        constructed in the case we are not updating a path, but the absence of an item from
        the iterator may be used to remove defunct entries from the manifest."""
-        seen_files = set()  # type: Set[Text]

        changed = False

        # Create local variable references to these dicts so we avoid the
        # attribute access in the hot loop below
-        path_hash = self._path_hash  # type: Dict[Text, Tuple[Text, Text]]
        data = self._data

-        prev_files = data.paths()  # type: Set[Text]
+        types = data.type_by_path()
+        deleted = set(types)
+
+        to_update = []

        for source_file, update in tree:
            if not update:
                assert isinstance(source_file, (binary_type, text_type))
-                rel_path = source_file  # type: Text
-                seen_files.add(rel_path)
-                assert rel_path in path_hash
-                old_hash, old_type = path_hash[rel_path]  # type: Tuple[Text, Text]
+                deleted.remove(tuple(source_file.split(os.path.sep)))
            else:
                assert not isinstance(source_file, bytes)
-                rel_path = source_file.rel_path
-                seen_files.add(rel_path)
+                rel_path_parts = source_file.rel_path_parts
+                assert isinstance(rel_path_parts, tuple)

-                file_hash = source_file.hash  # type: Text
-
-                is_new = rel_path not in path_hash  # type: bool
+                is_new = rel_path_parts not in deleted  # type: bool
                hash_changed = False  # type: bool

                if not is_new:
-                    old_hash, old_type = path_hash[rel_path]
+                    deleted.remove(rel_path_parts)
+                    old_type = types[rel_path_parts]
+                    old_hash = data[old_type].hashes[rel_path_parts]
+                    file_hash = source_file.hash  # type: Text
                    if old_hash != file_hash:
                        hash_changed = True
+                        del data[old_type][rel_path_parts]

                if is_new or hash_changed:
-                    new_type, manifest_items = source_file.manifest_items()
-                    data[new_type][rel_path] = set(manifest_items)
-                    path_hash[rel_path] = (file_hash, new_type)
-                    if hash_changed and new_type != old_type:
-                        del data[old_type][rel_path]
-                    changed = True
+                    to_update.append(source_file)
+
+        if to_update:
+            changed = True
+
+        if parallel and len(to_update) > 25 and cpu_count() > 1:
+            # 25 derived experimentally (2020-01) to be approximately
+            # the point at which it is quicker to create Pool and
+            # parallelize this
+            pool = Pool()
+
+            # chunksize set > 1 when more than 10000 tests, because
+            # chunking is a net-gain once we get to very large numbers
+            # of items (again, experimentally, 2020-01)
+            results = pool.imap_unordered(compute_manifest_items,
+                                          to_update,
+                                          chunksize=max(1, len(to_update) // 10000)
+                                          )  # type: Iterator[Tuple[Tuple[Text, ...], Text, Set[ManifestItem], Text]]
+        elif PY3:
+            results = map(compute_manifest_items, to_update)
+        else:
+            results = itertools.imap(compute_manifest_items, to_update)
+
+        for result in results:
+            rel_path_parts, new_type, manifest_items, file_hash = result
+            data[new_type][rel_path_parts] = manifest_items
+            data[new_type].hashes[rel_path_parts] = file_hash

-        deleted = prev_files - seen_files
        if deleted:
            changed = True
-            for rel_path in deleted:
-                if rel_path in path_hash:
-                    _, old_type = path_hash[rel_path]
-                    del path_hash[rel_path]
-                    try:
-                        del data[old_type][rel_path]
-                    except KeyError:
-                        pass
-                else:
-                    for test_data in itervalues(data):
-                        if rel_path in test_data:
-                            del test_data[rel_path]
+            for rel_path_parts in deleted:
+                for test_data in itervalues(data):
+                    if rel_path_parts in test_data:
+                        del test_data[rel_path_parts]

        return changed

-    def to_json(self):
-        # type: () -> Dict[Text, Any]
+    def to_json(self, caller_owns_obj=True):
+        # type: (bool) -> Dict[Text, Any]
+        """Dump a manifest into a object which can be serialized as JSON
+
+        If caller_owns_obj is False, then the return value remains
+        owned by the manifest; it is _vitally important_ that _no_
+        (even read) operation is done on the manifest, as otherwise
+        objects within the object graph rooted at the return value can
+        be mutated. This essentially makes this mode very dangerous
+        and only to be used under extreme care.
+
+        """
        out_items = {
            test_type: type_paths.to_json()
            for test_type, type_paths in iteritems(self._data) if type_paths
        }
+
+        if caller_owns_obj:
+            out_items = deepcopy(out_items)
+
        rv = {"url_base": self.url_base,
-              "paths": {from_os_path(k): v for k, v in iteritems(self._path_hash)},
              "items": out_items,
              "version": CURRENT_VERSION}  # type: Dict[Text, Any]
        return rv

    @classmethod
-    def from_json(cls, tests_root, obj, types=None):
-        # type: (str, Dict[Text, Any], Optional[Container[Text]]) -> Manifest
+    def from_json(cls, tests_root, obj, types=None, callee_owns_obj=False):
+        # type: (str, Dict[Text, Any], Optional[Container[Text]], bool) -> Manifest
+        """Load a manifest from a JSON object
+
+        This loads a manifest for a given local test_root path from an
+        object obj, potentially partially loading it to only load the
+        types given by types.
+
+        If callee_owns_obj is True, then ownership of obj transfers
+        to this function when called, and the caller must never mutate
+        the obj or anything referred to in the object graph rooted at
+        obj.
+
+        """
        version = obj.get("version")
        if version != CURRENT_VERSION:
            raise ManifestVersionMismatch

        self = cls(tests_root, url_base=obj.get("url_base", "/"))
-        if not hasattr(obj, "items") and hasattr(obj, "paths"):
+        if not hasattr(obj, "items"):
            raise ManifestError

-        self._path_hash = {to_os_path(k): v for k, v in iteritems(obj["paths"])}
-
-        # merge reftest_node and reftest
-        # TODO(MANIFESTv8): remove this condition
-        if "reftest_node" in obj["items"]:
-            for path in obj["items"]["reftest_node"]:
-                os_path = to_os_path(path)
-                old_hash, old_type = self._path_hash[os_path]
-                self._path_hash[os_path] = (old_hash, "reftest")
-
        for test_type, type_paths in iteritems(obj["items"]):
-            # merge reftest_node and reftest
-            # TODO(MANIFESTv8): remove this condition
-            if test_type in ("reftest", "reftest_node"):
-                if types and "reftest" not in types:
-                    continue
-
-                if self._data["reftest"].json_data:
-                    self._data["reftest"].json_data.update(type_paths)
-                else:
-                    self._data["reftest"].set_json(tests_root, type_paths)
-
-                continue
-
            if test_type not in item_classes:
                raise ManifestError

            if types and test_type not in types:
                continue

-            self._data[test_type].set_json(tests_root, type_paths)
+            if not callee_owns_obj:
+                type_paths = deepcopy(type_paths)
+
+            self._data[test_type].set_json(type_paths)

        return self

@ -434,7 +321,8 @@ def _load(logger,  # type: Logger
            with open(manifest, "rb") as f:
                rv = Manifest.from_json(tests_root,
                                        fast_json.load(f),
-                                        types=types)
+                                        types=types,
+                                        callee_owns_obj=True)
        except IOError:
            return None
        except ValueError:
@ -443,7 +331,8 @@ def _load(logger,  # type: Logger
    else:
        rv = Manifest.from_json(tests_root,
                                fast_json.load(manifest),
-                                types=types)
+                                types=types,
+                                callee_owns_obj=True)

    if allow_cached:
        __load_cache[manifest_path] = rv
@ -460,7 +349,8 @@ def load_and_update(tests_root,  # type: bytes
                    working_copy=True,  # type: bool
                    types=None,  # type: Optional[Container[Text]]
                    write_manifest=True,  # type: bool
-                    allow_cached=True  # type: bool
+                    allow_cached=True,  # type: bool
+                    parallel=True  # type: bool
                    ):
    # type: (...) -> Manifest
    logger = get_logger()
@ -488,7 +378,7 @@ def load_and_update(tests_root,  # type: bytes
    if rebuild or update:
        tree = vcs.get_tree(tests_root, manifest, manifest_path, cache_root,
                            working_copy, rebuild)
-        changed = manifest.update(tree)
+        changed = manifest.update(tree, parallel)
        if write_manifest and changed:
            write(manifest, manifest_path)
        tree.dump_caches()
@ -504,6 +394,6 @@ def write(manifest, manifest_path):
    with open(manifest_path, "wb") as f:
        # Use ',' instead of the default ', ' separator to prevent trailing
        # spaces: https://docs.python.org/2/library/json.html#json.dump
-        json.dump(manifest.to_json(), f,
+        json.dump(manifest.to_json(caller_owns_obj=True), f,
                  sort_keys=True, indent=1, separators=(',', ': '))
        f.write("\n")
--- a/testing/web-platform/tests/tools/manifest/sourcefile.py
+++ b/testing/web-platform/tests/tools/manifest/sourcefile.py
@ -279,6 +279,11 @@ class SourceFile(object):
            file_obj = open(self.path, 'rb')
        return file_obj

+    @cached_property
+    def rel_path_parts(self):
+        # type: () -> Tuple[Text, ...]
+        return tuple(self.rel_path.split(os.path.sep))
+
    @cached_property
    def path(self):
        # type: () -> Union[bytes, Text]
@ -316,18 +321,17 @@ class SourceFile(object):
        if self.dir_path == "":
            return True

-        parts = self.dir_path.split(os.path.sep)
+        parts = self.rel_path_parts

        if (parts[0] in self.root_dir_non_test or
            any(item in self.dir_non_test for item in parts) or
-            any(parts[:len(path)] == list(path) for path in self.dir_path_non_test)):
+            any(parts[:len(path)] == path for path in self.dir_path_non_test)):
            return True
        return False

    def in_conformance_checker_dir(self):
        # type: () -> bool
-        return (self.dir_path == "conformance-checkers" or
-                self.dir_path.startswith("conformance-checkers" + os.path.sep))
+        return self.rel_path_parts[0] == "conformance-checkers"

    @property
    def name_is_non_test(self):
@ -395,10 +399,10 @@ class SourceFile(object):
        be a webdriver spec test file"""
        # wdspec tests are in subdirectories of /webdriver excluding __init__.py
        # files.
-        rel_dir_tree = self.rel_path.split(os.path.sep)
-        return (((rel_dir_tree[0] == "webdriver" and len(rel_dir_tree) > 1) or
-                 (rel_dir_tree[:2] == ["infrastructure", "webdriver"] and
-                  len(rel_dir_tree) > 2)) and
+        rel_path_parts = self.rel_path_parts
+        return (((rel_path_parts[0] == "webdriver" and len(rel_path_parts) > 1) or
+                 (rel_path_parts[:2] == ("infrastructure", "webdriver") and
+                  len(rel_path_parts) > 2)) and
                self.filename not in ("__init__.py", "conftest.py") and
                fnmatch(self.filename, wd_pattern))

@ -785,6 +789,8 @@ class SourceFile(object):
        if self.items_cache:
            return self.items_cache

+        drop_cached = "root" not in self.__dict__
+
        if self.name_is_non_test:
            rv = "support", [
                SupportFile(
@ -957,4 +963,11 @@ class SourceFile(object):

        self.items_cache = rv

+        if drop_cached and "__cached_properties__" in self.__dict__:
+            cached_properties = self.__dict__["__cached_properties__"]
+            for key in cached_properties:
+                if key in self.__dict__:
+                    del self.__dict__[key]
+            del self.__dict__["__cached_properties__"]
+
        return rv
--- a/testing/web-platform/tests/tools/manifest/tests/test_manifest.py
+++ b/testing/web-platform/tests/tools/manifest/tests/test_manifest.py
@ -5,7 +5,7 @@ import mock
 import hypothesis as h
 import hypothesis.strategies as hs

-import pytest
+from six import iteritems

 from .. import manifest, sourcefile, item, utils

@ -18,7 +18,10 @@ if MYPY:

 def SourceFileWithTest(path, hash, cls, **kwargs):
    # type: (str, str, Type[item.ManifestItem], **Any) -> sourcefile.SourceFile
-    s = mock.Mock(rel_path=path, hash=hash)
+    rel_path_parts = tuple(path.split(os.path.sep))
+    s = mock.Mock(rel_path=path,
+                  rel_path_parts=rel_path_parts,
+                  hash=hash)
    if cls == item.SupportFile:
        test = cls("/foobar", path)
    else:
@ -29,24 +32,15 @@ def SourceFileWithTest(path, hash, cls, **kwargs):

 def SourceFileWithTests(path, hash, cls, variants):
    # type: (str, str, Type[item.URLManifestItem], **Any) -> sourcefile.SourceFile
-    s = mock.Mock(rel_path=path, hash=hash)
+    rel_path_parts = tuple(path.split(os.path.sep))
+    s = mock.Mock(rel_path=path,
+                  rel_path_parts=rel_path_parts,
+                  hash=hash)
    tests = [cls("/foobar", path, "/", item[0], **item[1]) for item in variants]
    s.manifest_items = mock.Mock(return_value=(cls.item_type, tests))
    return s  # type: ignore


-@hs.composite
-def rel_dir_file_path(draw):
-    length = draw(hs.integers(min_value=1, max_value=20))
-    if length == 1:
-        return "a"
-    else:
-        remaining = length - 2
-        alphabet = "a" + os.path.sep
-        mid = draw(hs.text(alphabet=alphabet, min_size=remaining, max_size=remaining))
-        return os.path.normcase("a" + mid + "a")
-
-
@hs.composite
 def sourcefile_strategy(draw):
    item_classes = [item.TestharnessTest, item.RefTest,
@ -54,26 +48,64 @@ def sourcefile_strategy(draw):
                    item.ConformanceCheckerTest, item.SupportFile]
    cls = draw(hs.sampled_from(item_classes))

-    path = draw(rel_dir_file_path())
-    hash = draw(hs.text(alphabet="0123456789abcdef", min_size=40, max_size=40))
-    s = mock.Mock(rel_path=path, hash=hash)
+    path = u"a"
+    rel_path_parts = tuple(path.split(os.path.sep))
+    hash = draw(hs.text(alphabet=u"0123456789abcdef", min_size=40, max_size=40))
+    s = mock.Mock(rel_path=path,
+                  rel_path_parts=rel_path_parts,
+                  hash=hash)

    if cls is item.RefTest:
-        ref_path = draw(rel_dir_file_path())
-        h.assume(path != ref_path)
+        ref_path = u"b"
        ref_eq = draw(hs.sampled_from(["==", "!="]))
        test = cls("/foobar", path, "/", utils.from_os_path(path), references=[(utils.from_os_path(ref_path), ref_eq)])
    elif cls is item.SupportFile:
        test = cls("/foobar", path)
    else:
-        test = cls("/foobar", path, "/", utils.from_os_path(path))
+        test = cls("/foobar", path, "/", "foobar")

    s.manifest_items = mock.Mock(return_value=(cls.item_type, [test]))
    return s


-@h.given(hs.lists(sourcefile_strategy(),
-                  min_size=1, max_size=1000, unique_by=lambda x: x.rel_path))
+@hs.composite
+def manifest_tree(draw):
+    names = hs.text(alphabet=hs.characters(blacklist_characters=u"\0/\\:*\"?<>|"), min_size=1)
+    tree = hs.recursive(sourcefile_strategy(),
+                        lambda children: hs.dictionaries(names, children, min_size=1),
+                        max_leaves=10)
+
+    generated_root = draw(tree)
+    h.assume(isinstance(generated_root, dict))
+
+    reftest_urls = []
+    output = []
+    stack = [((k,), v) for k, v in iteritems(generated_root)]
+    while stack:
+        path, node = stack.pop()
+        if isinstance(node, dict):
+            stack.extend((path + (k,), v) for k, v in iteritems(node))
+        else:
+            rel_path = os.path.sep.join(path)
+            node.rel_path = rel_path
+            node.rel_path_parts = tuple(path)
+            for test_item in node.manifest_items.return_value[1]:
+                test_item.path = rel_path
+                if isinstance(test_item, item.RefTest):
+                    if reftest_urls:
+                        possible_urls = hs.sampled_from(reftest_urls) | names
+                    else:
+                        possible_urls = names
+                    reference = hs.tuples(hs.sampled_from([u"==", u"!="]),
+                                          possible_urls)
+                    references = hs.lists(reference, min_size=1, unique=True)
+                    test_item.references = draw(references)
+                    reftest_urls.append(test_item.url)
+            output.append(node)
+
+    return output
+
+@h.given(manifest_tree())
@h.example([SourceFileWithTest("a", "0"*40, item.ConformanceCheckerTest)])
 def test_manifest_to_json(s):
    m = manifest.Manifest()
@ -87,9 +119,7 @@ def test_manifest_to_json(s):

    assert loaded.to_json() == json_str

-
-@h.given(hs.lists(sourcefile_strategy(),
-                  min_size=1, unique_by=lambda x: x.rel_path))
+@h.given(manifest_tree())
@h.example([SourceFileWithTest("a", "0"*40, item.TestharnessTest)])
@h.example([SourceFileWithTest("a", "0"*40, item.RefTest, references=[("/aa", "==")])])
 def test_manifest_idempotent(s):
@ -107,63 +137,22 @@ def test_manifest_idempotent(s):
 def test_manifest_to_json_forwardslash():
    m = manifest.Manifest()

-    s = SourceFileWithTest("a/b", "0"*40, item.TestharnessTest)
+    s = SourceFileWithTest("a" + os.path.sep + "b", "0"*40, item.TestharnessTest)

    assert m.update([(s, True)]) is True

    assert m.to_json() == {
-        'paths': {
-            'a/b': ('0000000000000000000000000000000000000000', 'testharness')
-        },
-        'version': 7,
+        'version': 8,
        'url_base': '/',
        'items': {
-            'testharness': {
-                'a/b': [('a/b', {})]
-            }
+            'testharness': {'a': {'b': [
+                '0000000000000000000000000000000000000000',
+                (None, {})
+            ]}},
        }
    }


-@pytest.mark.skipif(os.sep != "\\", reason="backslash path")
-def test_manifest_to_json_backslash():
-    m = manifest.Manifest()
-
-    s = SourceFileWithTest("a\\b", "0"*40, item.TestharnessTest)
-
-    assert m.update([(s, True)]) is True
-
-    assert m.to_json() == {
-        'paths': {
-            'a/b': ('0000000000000000000000000000000000000000', 'testharness')
-        },
-        'version': 7,
-        'url_base': '/',
-        'items': {
-            'testharness': {
-                'a/b': [('a/b', {})]
-            }
-        }
-    }
-
-
-def test_manifest_from_json_backslash():
-    json_obj = {
-        'paths': {
-            'a\\b': ('0000000000000000000000000000000000000000', 'testharness')
-        },
-        'version': 7,
-        'url_base': '/',
-        'items': {
-            'testharness': {
-                'a\\b': [['a/b', {}]]
-            }
-        }
-    }
-
-    with pytest.raises(ValueError):
-        manifest.Manifest.from_json("/", json_obj)
-

 def test_reftest_computation_chain():
    m = manifest.Manifest()
@ -228,9 +217,7 @@ def test_no_update_delete():

    test1 = s1.manifest_items()[1][0]

-    s1_1 = SourceFileWithTest("test1", "1"*40, item.ManualTest)
-
-    m.update([(s1_1.rel_path, False)])
+    m.update([(s1.rel_path, False)])

    assert list(m) == [("testharness", test1.path, {test1})]

@ -268,9 +255,10 @@ def test_update_from_json_modified():
    m.update([(s2, True)])
    json_str = m.to_json()
    assert json_str == {
-        'items': {'testharness': {'test1': [('test1', {"timeout": "long"})]}},
-        'paths': {'test1': ('1111111111111111111111111111111111111111',
-                            'testharness')},
+        'items': {'testharness': {'test1': [
+            "1"*40,
+            (None, {'timeout': 'long'})
+        ]}},
        'url_base': '/',
-        'version': 7
+        'version': 8
    }
--- a/testing/web-platform/tests/tools/manifest/typedata.py
+++ b/testing/web-platform/tests/tools/manifest/typedata.py
@ -0,0 +1,329 @@
+from collections import MutableMapping
+
+from six import itervalues, iteritems
+
+
+MYPY = False
+if MYPY:
+    # MYPY is set to True when run under Mypy.
+    from typing import Any
+    from typing import Dict
+    from typing import Iterator
+    from typing import List
+    from typing import Optional
+    from typing import Set
+    from typing import Text
+    from typing import Tuple
+    from typing import Type
+    from typing import Union
+
+    # avoid actually importing these, they're only used by type comments
+    from . import item
+    from . import manifest
+
+
+if MYPY:
+    TypeDataType = MutableMapping[Tuple[Text, ...], Set[item.ManifestItem]]
+    PathHashType = MutableMapping[Tuple[Text, ...], Text]
+else:
+    TypeDataType = MutableMapping
+    PathHashType = MutableMapping
+
+
+class TypeData(TypeDataType):
+    def __init__(self, m, type_cls):
+        # type: (manifest.Manifest, Type[item.ManifestItem]) -> None
+        """Dict-like object containing the TestItems for each test type.
+
+        Loading an actual Item class for each test is unnecessarily
+        slow, so this class allows lazy-loading of the test
+        items. When the manifest is loaded we store the raw json
+        corresponding to the test type, and only create an Item
+        subclass when the test is accessed. In order to remain
+        API-compatible with consumers that depend on getting an Item
+        from iteration, we do egerly load all items when iterating
+        over the class."""
+        self._manifest = m
+        self._type_cls = type_cls  # type: Type[item.ManifestItem]
+        self._json_data = {}  # type: Dict[Text, Any]
+        self._data = {}  # type: Dict[Text, Any]
+        self._hashes = {}  # type: Dict[Tuple[Text, ...], Text]
+        self.hashes = PathHash(self)
+
+    def _delete_node(self, data, key):
+        # type: (Dict[Text, Any], Tuple[Text, ...]) -> None
+        """delete a path from a Dict data with a given key"""
+        path = []
+        node = data
+        for pathseg in key[:-1]:
+            path.append((node, pathseg))
+            node = node[pathseg]
+            if not isinstance(node, dict):
+                raise KeyError(key)
+
+        del node[key[-1]]
+        while path:
+            node, pathseg = path.pop()
+            if len(node[pathseg]) == 0:
+                del node[pathseg]
+            else:
+                break
+
+    def __getitem__(self, key):
+        # type: (Tuple[Text, ...]) -> Set[item.ManifestItem]
+        node = self._data  # type: Union[Dict[Text, Any], Set[item.ManifestItem], List[Any]]
+        for pathseg in key:
+            if isinstance(node, dict) and pathseg in node:
+                node = node[pathseg]
+            else:
+                break
+        else:
+            if isinstance(node, set):
+                return node
+            else:
+                raise KeyError(key)
+
+        node = self._json_data
+        found = False
+        for pathseg in key:
+            if isinstance(node, dict) and pathseg in node:
+                node = node[pathseg]
+            else:
+                break
+        else:
+            found = True
+
+        if not found:
+            raise KeyError(key)
+
+        if not isinstance(node, list):
+            raise KeyError(key)
+
+        self._hashes[key] = node[0]
+
+        data = set()
+        path = "/".join(key)
+        for test in node[1:]:
+            manifest_item = self._type_cls.from_json(self._manifest, path, test)
+            data.add(manifest_item)
+
+        node = self._data
+        assert isinstance(node, dict)
+        for pathseg in key[:-1]:
+            node = node.setdefault(pathseg, {})
+            assert isinstance(node, dict)
+        assert key[-1] not in node
+        node[key[-1]] = data
+
+        self._delete_node(self._json_data, key)
+
+        return data
+
+    def __setitem__(self, key, value):
+        # type: (Tuple[Text, ...], Set[item.ManifestItem]) -> None
+        try:
+            self._delete_node(self._json_data, key)
+        except KeyError:
+            pass
+
+        node = self._data
+        for i, pathseg in enumerate(key[:-1]):
+            node = node.setdefault(pathseg, {})
+            if not isinstance(node, dict):
+                raise KeyError("%r is a child of a test (%r)" % (key, key[:i+1]))
+        node[key[-1]] = value
+
+    def __delitem__(self, key):
+        # type: (Tuple[Text, ...]) -> None
+        try:
+            self._delete_node(self._data, key)
+        except KeyError:
+            self._delete_node(self._json_data, key)
+        else:
+            try:
+                del self._hashes[key]
+            except KeyError:
+                pass
+
+    def __iter__(self):
+        # type: () -> Iterator[Tuple[Text, ...]]
+        """Iterator over keys in the TypeData in codepoint order"""
+        data_node = self._data  # type: Optional[Dict[Text, Any]]
+        json_node = self._json_data  # type: Optional[Dict[Text, Any]]
+        path = tuple()  # type: Tuple[Text, ...]
+        stack = [(data_node, json_node, path)]
+        while stack:
+            data_node, json_node, path = stack.pop()
+            if isinstance(data_node, set) or isinstance(json_node, list):
+                assert data_node is None or json_node is None
+                yield path
+            else:
+                assert data_node is None or isinstance(data_node, dict)
+                assert json_node is None or isinstance(json_node, dict)
+
+                keys = set()  # type: Set[Text]
+                if data_node is not None:
+                    keys |= set(iter(data_node))
+                if json_node is not None:
+                    keys |= set(iter(json_node))
+
+                for key in sorted(keys, reverse=True):
+                    stack.append((data_node.get(key) if data_node is not None else None,
+                                  json_node.get(key) if json_node is not None else None,
+                                  path + (key,)))
+
+    def __len__(self):
+        # type: () -> int
+        count = 0
+
+        stack = [self._data]
+        while stack:
+            v = stack.pop()
+            if isinstance(v, set):
+                count += 1
+            else:
+                stack.extend(itervalues(v))
+
+        stack = [self._json_data]
+        while stack:
+            v = stack.pop()
+            if isinstance(v, list):
+                count += 1
+            else:
+                stack.extend(itervalues(v))
+
+        return count
+
+    def __nonzero__(self):
+        # type: () -> bool
+        return bool(self._data) or bool(self._json_data)
+
+    __bool__ = __nonzero__
+
+    def __contains__(self, key):
+        # type: (Any) -> bool
+        # we provide our own impl of this to avoid calling __getitem__ and generating items for
+        # those in self._json_data
+        node = self._data
+        for pathseg in key:
+            if pathseg in node:
+                node = node[pathseg]
+            else:
+                break
+        else:
+            return bool(isinstance(node, set))
+
+        node = self._json_data
+        for pathseg in key:
+            if pathseg in node:
+                node = node[pathseg]
+            else:
+                break
+        else:
+            return bool(isinstance(node, list))
+
+        return False
+
+    def clear(self):
+        # type: () -> None
+        # much, much simpler/quicker than that defined in MutableMapping
+        self._json_data.clear()
+        self._data.clear()
+        self._hashes.clear()
+
+    def set_json(self, json_data):
+        # type: (Dict[Text, Any]) -> None
+        """Provide the object with a raw JSON blob
+
+        Note that this object graph is assumed to be owned by the TypeData
+        object after the call, so the caller must not mutate any part of the
+        graph.
+        """
+        if self._json_data:
+            raise ValueError("set_json call when JSON data is not empty")
+
+        self._json_data = json_data
+
+    def to_json(self):
+        # type: () -> Dict[Text, Any]
+        """Convert the current data to JSON
+
+        Note that the returned object may contain references to the internal
+        data structures, and is only guaranteed to be valid until the next
+        __getitem__, __setitem__, __delitem__ call, so the caller must not
+        mutate any part of the returned object graph.
+
+        """
+        json_rv = self._json_data.copy()
+
+        stack = [(self._data, json_rv, tuple())]  # type: List[Tuple[Dict[Text, Any], Dict[Text, Any], Tuple[Text, ...]]]
+        while stack:
+            data_node, json_node, par_full_key = stack.pop()
+            for k, v in iteritems(data_node):
+                full_key = par_full_key + (k,)
+                if isinstance(v, set):
+                    assert k not in json_node
+                    json_node[k] = [self._hashes.get(full_key)] + [t for t in sorted(test.to_json() for test in v)]
+                else:
+                    json_node[k] = json_node.get(k, {}).copy()
+                    stack.append((v, json_node[k], full_key))
+
+        return json_rv
+
+
+class PathHash(PathHashType):
+    def __init__(self, data):
+        # type: (TypeData) -> None
+        self._data = data
+
+    def __getitem__(self, k):
+        # type: (Tuple[Text, ...]) -> Text
+        if k not in self._data:
+            raise KeyError
+
+        if k in self._data._hashes:
+            return self._data._hashes[k]
+
+        node = self._data._json_data
+        for pathseg in k:
+            if pathseg in node:
+                node = node[pathseg]
+            else:
+                break
+        else:
+            return node[0]  # type: ignore
+
+        assert False, "unreachable"
+        raise KeyError
+
+    def __setitem__(self, k, v):
+        # type: (Tuple[Text, ...], Text) -> None
+        if k not in self._data:
+            raise KeyError
+
+        if k in self._data._hashes:
+            self._data._hashes[k] = v
+
+        node = self._data._json_data
+        for pathseg in k:
+            if pathseg in node:
+                node = node[pathseg]
+            else:
+                break
+        else:
+            node[0] = v  # type: ignore
+            return
+
+        self._data._hashes[k] = v
+
+    def __delitem__(self, k):
+        # type: (Tuple[Text, ...]) -> None
+        raise ValueError("keys here must match underlying data")
+
+    def __iter__(self):
+        # type: () -> Iterator[Tuple[Text, ...]]
+        return iter(self._data)
+
+    def __len__(self):
+        # type: () -> int
+        return len(self._data)
--- a/testing/web-platform/tests/tools/manifest/update.py
+++ b/testing/web-platform/tests/tools/manifest/update.py
@ -26,7 +26,8 @@ def update(tests_root,  # type: str
           manifest_path=None,  # type: Optional[str]
           working_copy=True,  # type: bool
           cache_root=None,  # type: Optional[str]
-           rebuild=False  # type: bool
+           rebuild=False,  # type: bool
+           parallel=True  # type: bool
           ):
    # type: (...) -> bool
    logger.warning("Deprecated; use manifest.load_and_update instead")
@ -34,7 +35,7 @@ def update(tests_root,  # type: str

    tree = vcs.get_tree(tests_root, manifest, manifest_path, cache_root,
                        working_copy, rebuild)
-    return manifest.update(tree)
+    return manifest.update(tree, parallel)


 def update_from_cli(**kwargs):
@ -51,7 +52,8 @@ def update_from_cli(**kwargs):
                             kwargs["url_base"],
                             update=True,
                             rebuild=kwargs["rebuild"],
-                             cache_root=kwargs["cache_root"])
+                             cache_root=kwargs["cache_root"],
+                             parallel=kwargs["parallel"])


 def abs_path(path):
@ -78,6 +80,9 @@ def create_parser():
    parser.add_argument(
        "--cache-root", action="store", default=os.path.join(wpt_root, ".wptcache"),
        help="Path in which to store any caches (default <tests_root>/.wptcache/)")
+    parser.add_argument(
+        "--no-parallel", dest="parallel", action="store_false", default=True,
+        help="Do not parallelize building the manifest")
    return parser


--- a/testing/web-platform/tests/tools/manifest/utils.py
+++ b/testing/web-platform/tests/tools/manifest/utils.py
@ -103,4 +103,5 @@ class cached_property(Generic[T]):
        # we can unconditionally assign as next time this won't be called
        assert self.name not in obj.__dict__
        rv = obj.__dict__[self.name] = self.func(obj)
+        obj.__dict__.setdefault("__cached_properties__", set()).add(self.name)
        return rv
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/metadata.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/metadata.py
@ -538,23 +538,22 @@ def create_test_tree(metadata_path, test_manifest):
        for test in tests:
            id_test_map[intern(ensure_str(test.id))] = test_file_data

-        dir_path = os.path.split(test_path)[0].replace(os.path.sep, "/")
+        dir_path = os.path.dirname(test_path)
        while True:
-            if dir_path:
-                dir_id = dir_path + "/__dir__"
-            else:
-                dir_id = "__dir__"
-            dir_id = intern(ensure_str((test_manifest.url_base + dir_id).lstrip("/")))
-            if dir_id not in id_test_map:
-                test_file_data = TestFileData(intern(ensure_str(test_manifest.url_base)),
-                                              None,
-                                              metadata_path,
-                                              dir_id,
-                                              [])
-                id_test_map[dir_id] = test_file_data
-            if not dir_path or dir_path in id_test_map:
+            dir_meta_path = os.path.join(dir_path, "__dir__")
+            dir_id = (test_manifest.url_base + dir_meta_path.replace(os.path.sep, "/")).lstrip("/")
+            if dir_id in id_test_map:
+                break
+
+            test_file_data = TestFileData(intern(test_manifest.url_base),
+                                          None,
+                                          metadata_path,
+                                          dir_meta_path,
+                                          [])
+            id_test_map[dir_id] = test_file_data
+            dir_path = os.path.dirname(dir_path)
+            if not dir_path:
                break
-            dir_path = dir_path.rsplit("/", 1)[0] if "/" in dir_path else ""

    return id_test_map

--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/tests/test_update.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/tests/test_update.py
@ -12,7 +12,7 @@ from mozlog import structuredlog, handlers, formatters

 here = os.path.dirname(__file__)
 sys.path.insert(0, os.path.join(here, os.pardir, os.pardir, os.pardir))
-from manifest import manifest, item as manifest_item
+from manifest import manifest, item as manifest_item, utils


 def rel_path_to_test_url(rel_path):
@ -21,7 +21,9 @@ def rel_path_to_test_url(rel_path):


 def SourceFileWithTest(path, hash, cls, *args):
-    s = mock.Mock(rel_path=path, hash=hash)
+    path_parts = tuple(path.split("/"))
+    path = utils.to_os_path(path)
+    s = mock.Mock(rel_path=path, rel_path_parts=path_parts, hash=hash)
    test = cls("/foobar", path, "/", rel_path_to_test_url(path), *args)
    s.manifest_items = mock.Mock(return_value=(cls.item_type, [test]))
    return s
@ -71,6 +73,7 @@ def update(tests, *logs, **kwargs):
    expected_data = {}
    metadata.load_expected = lambda _, __, test_path, *args: expected_data.get(test_path)
    for test_path, test_ids, test_type, manifest_str in tests:
+        test_path = utils.to_os_path(test_path)
        expected_data[test_path] = manifestupdate.compile(BytesIO(manifest_str),
                                                          test_path,
                                                          "/",
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/tests/test_wpttest.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/tests/test_wpttest.py
@ -2,7 +2,8 @@ from io import BytesIO
 from mock import Mock

 from manifest import manifest as wptmanifest
-from manifest.item import TestharnessTest
+from manifest.item import TestharnessTest, RefTest
+from manifest.utils import to_os_path
 from .. import manifestexpected, wpttest

 dir_ini_0 = b"""\
@ -199,22 +200,25 @@ def test_expect_any_subtest_status():


 def test_metadata_fuzzy():
-    manifest_data = {
-        "items": {"reftest": {"a/fuzzy.html": [["a/fuzzy.html",
-                                                [["/a/fuzzy-ref.html", "=="]],
-                                                {"fuzzy": [[["/a/fuzzy.html", '/a/fuzzy-ref.html', '=='],
-                                                            [[2, 3], [10, 15]]]]}]]}},
-        "paths": {"a/fuzzy.html": ["0"*40, "reftest"]},
-        "version": 7,
-        "url_base": "/"}
-    manifest = wptmanifest.Manifest.from_json(".", manifest_data)
+    item = RefTest(".", "a/fuzzy.html", "/", "a/fuzzy.html",
+                   references=[["/a/fuzzy-ref.html", "=="]],
+                   fuzzy=[[["/a/fuzzy.html", '/a/fuzzy-ref.html', '=='],
+                           [[2, 3], [10, 15]]]])
+    s = Mock(rel_path="a/fuzzy.html", rel_path_parts=("a", "fuzzy.html"), hash="0"*40)
+    s.manifest_items = Mock(return_value=(item.item_type, [item]))
+
+
+    manifest = wptmanifest.Manifest()
+
+    assert manifest.update([(s, True)]) is True
+
    test_metadata = manifestexpected.static.compile(BytesIO(test_fuzzy),
                                                    {},
                                                    data_cls_getter=manifestexpected.data_cls_getter,
                                                    test_path="a/fuzzy.html",
                                                    url_base="/")

-    test = next(manifest.iterpath("a/fuzzy.html"))
+    test = next(manifest.iterpath(to_os_path("a/fuzzy.html")))
    test_obj = wpttest.from_manifest(manifest, test, [], test_metadata.get_test(test.id))

    assert test_obj.fuzzy == {('/a/fuzzy.html', '/a/fuzzy-ref.html', '=='): [[2, 3], [10, 15]]}