Bug 1497898 - Use testfile mtimes to pre-filter files iterated over for the manifest update, r=ato

Depends on D11667 Differential Revision: https://phabricator.services.mozilla.com/D8221 --HG-- extra : moz-landing-system : lando
2018-11-16 18:48:40 +00:00 · 2018-11-16 18:48:40 +00:00 · 51b7f8e4b4
--- a/testing/web-platform/tests/tools/manifest/commands.json
+++ b/testing/web-platform/tests/tools/manifest/commands.json
@ -2,5 +2,4 @@
 {"path": "update.py", "script": "run", "parser": "create_parser", "help": "Update the MANIFEST.json file",
  "virtualenv": false},
 "manifest-download":
- {"path": "download.py", "script": "run", "parser": "create_parser", "help": "Download recent pregenerated MANIFEST.json file",
-  "virtualenv": false}}
+ {"path": "download.py", "script": "run", "parser": "create_parser", "help": "Download recent pregenerated MANIFEST.json file", "virtualenv": false}}
--- a/testing/web-platform/tests/tools/manifest/item.py
+++ b/testing/web-platform/tests/tools/manifest/item.py
@ -2,19 +2,21 @@ from six.moves.urllib.parse import urljoin, urlparse
 from abc import ABCMeta, abstractproperty


-def get_source_file(source_files, tests_root, manifest, path):
-    def make_new():
+class SourceFileCache(object):
+    def __init__(self):
+        self.source_files = {}
+
+    def make_new(self, tests_root, path, url_base):
        from .sourcefile import SourceFile

-        return SourceFile(tests_root, path, manifest.url_base)
+        return SourceFile(tests_root, path, url_base)

-    if source_files is None:
-        return make_new()
+    def get(self, tests_root, manifest, path):

-    if path not in source_files:
-        source_files[path] = make_new()
+        if path not in self.source_files:
+            self.source_files[path] = self.make_new(tests_root, path, manifest.url_base)

-    return source_files[path]
+        return self.source_files[path]


 item_types = {}
@ -37,6 +39,8 @@ class ManifestItem(object):

    item_type = None

+    source_file_cache = SourceFileCache()
+
    def __init__(self, source_file, manifest=None):
        self.manifest = manifest
        self.source_file = source_file
@ -84,8 +88,8 @@ class ManifestItem(object):
        return [{}]

    @classmethod
-    def from_json(cls, manifest, tests_root, path, obj, source_files=None):
-        source_file = get_source_file(source_files, tests_root, manifest, path)
+    def from_json(cls, manifest, tests_root, path, obj):
+        source_file = cls.source_file_cache.get(tests_root, manifest, path)
        return cls(source_file,
                   manifest=manifest)

@ -113,8 +117,8 @@ class URLManifestItem(ManifestItem):
        return rv

    @classmethod
-    def from_json(cls, manifest, tests_root, path, obj, source_files=None):
-        source_file = get_source_file(source_files, tests_root, manifest, path)
+    def from_json(cls, manifest, tests_root, path, obj):
+        source_file = cls.source_file_cache.get(tests_root, manifest, path)
        url, extras = obj
        return cls(source_file,
                   url,
@ -145,8 +149,8 @@ class TestharnessTest(URLManifestItem):
        return rv

    @classmethod
-    def from_json(cls, manifest, tests_root, path, obj, source_files=None):
-        source_file = get_source_file(source_files, tests_root, manifest, path)
+    def from_json(cls, manifest, tests_root, path, obj):
+        source_file = cls.source_file_cache.get(tests_root, manifest, path)

        url, extras = obj
        return cls(source_file,
@ -187,8 +191,8 @@ class RefTestNode(URLManifestItem):
        return rv

    @classmethod
-    def from_json(cls, manifest, tests_root, path, obj, source_files=None):
-        source_file = get_source_file(source_files, tests_root, manifest, path)
+    def from_json(cls, manifest, tests_root, path, obj):
+        source_file = cls.source_file_cache.get(tests_root, manifest, path)
        url, references, extras = obj
        return cls(source_file,
                   url,
@ -248,8 +252,8 @@ class WebDriverSpecTest(URLManifestItem):
        return rv

    @classmethod
-    def from_json(cls, manifest, tests_root, path, obj, source_files=None):
-        source_file = get_source_file(source_files, tests_root, manifest, path)
+    def from_json(cls, manifest, tests_root, path, obj):
+        source_file = cls.source_file_cache.get(tests_root, manifest, path)

        url, extras = obj
        return cls(source_file,
--- a/testing/web-platform/tests/tools/manifest/manifest.py
+++ b/testing/web-platform/tests/tools/manifest/manifest.py
@ -1,13 +1,17 @@
 import itertools
-import json
 import os
 from collections import defaultdict
-from six import iteritems, itervalues, viewkeys, string_types
+from six import iteritems, iterkeys, itervalues, string_types

-from .item import ManualTest, WebDriverSpecTest, Stub, RefTestNode, RefTest, TestharnessTest, SupportFile, ConformanceCheckerTest, VisualTest
+from .item import (ManualTest, WebDriverSpecTest, Stub, RefTestNode, RefTest,
+                   TestharnessTest, SupportFile, ConformanceCheckerTest, VisualTest)
 from .log import get_logger
 from .utils import from_os_path, to_os_path

+try:
+    import ujson as json
+except ImportError:
+    import json

 CURRENT_VERSION = 5

@ -27,11 +31,162 @@ def iterfilter(filters, iter):
        yield item


+item_classes = {"testharness": TestharnessTest,
+                "reftest": RefTest,
+                "reftest_node": RefTestNode,
+                "manual": ManualTest,
+                "stub": Stub,
+                "wdspec": WebDriverSpecTest,
+                "conformancechecker": ConformanceCheckerTest,
+                "visual": VisualTest,
+                "support": SupportFile}
+
+
+class TypeData(object):
+    def __init__(self, manifest, type_cls):
+        """Dict-like object containing the TestItems for each test type.
+
+        Loading an actual Item class for each test is unnecessarily
+        slow, so this class allows lazy-loading of the test
+        items. When the manifest is loaded we store the raw json
+        corresponding to the test type, and only create an Item
+        subclass when the test is accessed. In order to remain
+        API-compatible with consumers that depend on getting an Item
+        from iteration, we do egerly load all items when iterating
+        over the class.
+
+        """
+        self.manifest = manifest
+        self.type_cls = type_cls
+        self.data = {}
+        self.json_data = None
+        self.tests_root = None
+
+    def __getitem__(self, key):
+        if key not in self.data:
+            self.load(key)
+        return self.data[key]
+
+    def __bool__(self):
+        return bool(self.data)
+
+    def __len__(self):
+        return len(self.data)
+
+    def __delitem__(self, key):
+        del self.data[key]
+
+    def __setitem__(self, key, value):
+        self.data[key] = value
+
+    def __contains__(self, key):
+        self.load_all()
+        return key in self.data
+
+    def __iter__(self):
+        self.load_all()
+        return self.data.__iter__()
+
+    def pop(self, key, default=None):
+        try:
+            value = self[key]
+        except ValueError:
+            value = default
+        else:
+            del self.data[key]
+        return value
+
+    def get(self, key, default=None):
+        try:
+            return self[key]
+        except ValueError:
+            return default
+
+    def itervalues(self):
+        self.load_all()
+        return itervalues(self.data)
+
+    def iteritems(self):
+        self.load_all()
+        for path, tests in iteritems(self.data):
+            yield path, tests
+
+    def load(self, key):
+        """Load a specific Item given a path"""
+        if self.json_data is not None:
+            data = set()
+            path = from_os_path(key)
+            for test in self.json_data.get(path, []):
+                manifest_item = self.type_cls.from_json(self.manifest,
+                                                        self.tests_root,
+                                                        path,
+                                                        test)
+                data.add(manifest_item)
+            self.data[key] = data
+        else:
+            raise ValueError
+
+    def load_all(self):
+        """Load all test items in this class"""
+        if self.json_data is not None:
+            for path, value in iteritems(self.json_data):
+                key = to_os_path(path)
+                if key in self.data:
+                    continue
+                data = set()
+                for test in self.json_data.get(path, []):
+                    manifest_item = self.type_cls.from_json(self.manifest,
+                                                            self.tests_root,
+                                                            path,
+                                                            test)
+                    data.add(manifest_item)
+                self.data[key] = data
+            self.json_data = None
+
+    def set_json(self, tests_root, data):
+        if not isinstance(data, dict):
+            raise ValueError("Got a %s expected a dict" % (type(data)))
+        self.tests_root = tests_root
+        self.json_data = data
+
+    def paths(self):
+        """Get a list of all paths containing items of this type,
+        without actually constructing all the items"""
+        rv = set(iterkeys(self.data))
+        if self.json_data:
+            rv |= set(to_os_path(item) for item in iterkeys(self.json_data))
+        return rv
+
+
+class ManifestData(dict):
+    def __init__(self, manifest, meta_filters=None):
+        """Dictionary subclass containing a TypeData instance for each test type,
+        keyed by type name"""
+        self.initialized = False
+        for key, value in item_classes.iteritems():
+            self[key] = TypeData(manifest, value, meta_filters=meta_filters)
+        self.initialized = True
+        self.json_obj = None
+
+    def __setitem__(self, key, value):
+        if self.initialized:
+            raise AttributeError
+        dict.__setitem__(self, key, value)
+
+    def paths(self):
+        """Get a list of all paths containing test items
+        without actually constructing all the items"""
+        rv = set()
+        for item_data in itervalues(self):
+            rv |= set(item_data.paths())
+        return rv
+
+
 class Manifest(object):
    def __init__(self, url_base="/"):
        assert url_base is not None
        self._path_hash = {}
-        self._data = defaultdict(dict)
+        self._data = ManifestData(self)
        self._reftest_nodes_by_url = None
        self.url_base = url_base

@ -42,7 +197,7 @@ class Manifest(object):
        if not types:
            types = sorted(self._data.keys())
        for item_type in types:
-            for path, tests in sorted(iteritems(self._data[item_type])):
+            for path, tests in sorted(self._data[item_type]):
                yield item_type, path, tests

    def iterpath(self, path):
@ -74,61 +229,75 @@ class Manifest(object):
        return self.reftest_nodes_by_url.get(url)

    def update(self, tree):
-        new_data = defaultdict(dict)
-        new_hashes = {}
+        """Update the manifest given an iterable of items that make up the updated manifest.

+        The iterable must either generate tuples of the form (SourceFile, True) for paths
+        that are to be updated, or (path, False) for items that are not to be updated. This
+        unusual API is designed as an optimistaion meaning that SourceFile items need not be
+        constructed in the case we are not updating a path, but the absence of an item from
+        the iterator may be used to remove defunct entries from the manifest."""
        reftest_nodes = []
-        old_files = defaultdict(set, {k: set(viewkeys(v)) for k, v in iteritems(self._data)})
+        seen_files = set()

        changed = False
        reftest_changes = False

-        for source_file in tree:
-            rel_path = source_file.rel_path
-            file_hash = source_file.hash
+        prev_files = self._data.paths()

-            is_new = rel_path not in self._path_hash
-            hash_changed = False
+        reftest_types = ("reftest", "reftest_node")

-            if not is_new:
-                old_hash, old_type = self._path_hash[rel_path]
-                old_files[old_type].remove(rel_path)
-                if old_hash != file_hash:
-                    new_type, manifest_items = source_file.manifest_items()
-                    hash_changed = True
-                else:
-                    new_type, manifest_items = old_type, self._data[old_type][rel_path]
-                if old_type in ("reftest", "reftest_node") and new_type != old_type:
-                    reftest_changes = True
+        for source_file, update in tree:
+            if not update:
+                rel_path = source_file
+                seen_files.add(rel_path)
            else:
-                new_type, manifest_items = source_file.manifest_items()
+                rel_path = source_file.rel_path
+                seen_files.add(rel_path)
+
+                file_hash = source_file.hash
+
+                is_new = rel_path not in self._path_hash
+                hash_changed = False
+
+                if not is_new:
+                    old_hash, old_type = self._path_hash[rel_path]
+                    if old_hash != file_hash:
+                        new_type, manifest_items = source_file.manifest_items()
+                        hash_changed = True
+                    else:
+                        new_type, manifest_items = old_type, self._data[old_type][rel_path]
+                    if old_type in reftest_types and new_type != old_type:
+                        reftest_changes = True
+                else:
+                    new_type, manifest_items = source_file.manifest_items()
+
+                if new_type in ("reftest", "reftest_node"):
+                    reftest_nodes.extend(manifest_items)
+                    if is_new or hash_changed:
+                        reftest_changes = True
+                elif new_type:
+                    self._data[new_type][rel_path] = set(manifest_items)
+
+                self._path_hash[rel_path] = (file_hash, new_type)

-            if new_type in ("reftest", "reftest_node"):
-                reftest_nodes.extend(manifest_items)
                if is_new or hash_changed:
-                    reftest_changes = True
-            elif new_type:
-                new_data[new_type][rel_path] = set(manifest_items)
+                    changed = True

-            new_hashes[rel_path] = (file_hash, new_type)
-
-            if is_new or hash_changed:
-                changed = True
-
-        if reftest_changes or old_files["reftest"] or old_files["reftest_node"]:
-            reftests, reftest_nodes, changed_hashes = self._compute_reftests(reftest_nodes)
-            new_data["reftest"] = reftests
-            new_data["reftest_node"] = reftest_nodes
-            new_hashes.update(changed_hashes)
-        else:
-            new_data["reftest"] = self._data["reftest"]
-            new_data["reftest_node"] = self._data["reftest_node"]
-
-        if any(itervalues(old_files)):
+        deleted = prev_files - seen_files
+        if deleted:
            changed = True
+            for rel_path in deleted:
+                _, old_type = self._path_hash[rel_path]
+                if old_type in reftest_types:
+                    reftest_changes = True
+                del self._path_hash[rel_path]
+                del self._data[old_type][rel_path]

-        self._data = new_data
-        self._path_hash = new_hashes
+        if reftest_changes:
+            reftests, reftest_nodes, changed_hashes = self._compute_reftests(reftest_nodes)
+            self._data["reftest"].data = reftests
+            self._data["reftest_node"].data = reftest_nodes
+            self._path_hash.update(changed_hashes)

        return changed

@ -168,7 +337,7 @@ class Manifest(object):
                [t for t in sorted(test.to_json() for test in tests)]
                for path, tests in iteritems(type_paths)
            }
-            for test_type, type_paths in iteritems(self._data)
+            for test_type, type_paths in self._data.iteritems() if type_paths
        }
        rv = {"url_base": self.url_base,
              "paths": {from_os_path(k): v for k, v in iteritems(self._path_hash)},
@ -188,20 +357,8 @@ class Manifest(object):

        self._path_hash = {to_os_path(k): v for k, v in iteritems(obj["paths"])}

-        item_classes = {"testharness": TestharnessTest,
-                        "reftest": RefTest,
-                        "reftest_node": RefTestNode,
-                        "manual": ManualTest,
-                        "stub": Stub,
-                        "wdspec": WebDriverSpecTest,
-                        "conformancechecker": ConformanceCheckerTest,
-                        "visual": VisualTest,
-                        "support": SupportFile}
-
        meta_filters = meta_filters or []

-        source_files = {}
-
        for test_type, type_paths in iteritems(obj["items"]):
            if test_type not in item_classes:
                raise ManifestError
@ -209,18 +366,7 @@ class Manifest(object):
            if types and test_type not in types:
                continue

-            test_cls = item_classes[test_type]
-            tests = defaultdict(set)
-            for path, manifest_tests in iteritems(type_paths):
-                path = to_os_path(path)
-                for test in iterfilter(meta_filters, manifest_tests):
-                    manifest_item = test_cls.from_json(self,
-                                                       tests_root,
-                                                       path,
-                                                       test,
-                                                       source_files=source_files)
-                    tests[path].add(manifest_item)
-            self._data[test_type] = tests
+            self._data[test_type].set_json(tests_root, type_paths)

        return self

@ -252,5 +398,5 @@ def write(manifest, manifest_path):
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    with open(manifest_path, "wb") as f:
-        json.dump(manifest.to_json(), f, sort_keys=True, indent=1, separators=(',', ': '))
+        json.dump(manifest.to_json(), f, sort_keys=True, indent=1)
        f.write("\n")
--- a/testing/web-platform/tests/tools/manifest/tests/test_manifest.py
+++ b/testing/web-platform/tests/tools/manifest/tests/test_manifest.py
@ -72,7 +72,7 @@ def sourcefile_strategy(draw):
 def test_manifest_to_json(s):
    m = manifest.Manifest()

-    assert m.update(s) is True
+    assert m.update((item, True) for item in s) is True

    json_str = m.to_json()
    loaded = manifest.Manifest.from_json("/", json_str)
@ -90,11 +90,11 @@ def test_manifest_to_json(s):
 def test_manifest_idempotent(s):
    m = manifest.Manifest()

-    assert m.update(s) is True
+    assert m.update((item, True) for item in s) is True

    m1 = list(m)

-    assert m.update(s) is False
+    assert m.update((item, True) for item in s) is False

    assert list(m) == m1

@ -104,7 +104,7 @@ def test_manifest_to_json_forwardslash():

    s = SourceFileWithTest("a/b", "0"*40, item.TestharnessTest)

-    assert m.update([s]) is True
+    assert m.update([(s, True)]) is True

    assert m.to_json() == {
        'paths': {
@ -113,8 +113,6 @@ def test_manifest_to_json_forwardslash():
        'version': 5,
        'url_base': '/',
        'items': {
-            'reftest': {},
-            'reftest_node': {},
            'testharness': {
                'a/b': [['/a/b', {}]]
            }
@ -128,7 +126,7 @@ def test_manifest_to_json_backslash():
    s = SourceFileWithTest("a\\b", "0"*40, item.TestharnessTest)

    if os.path.sep == "\\":
-        assert m.update([s]) is True
+        assert m.update([(s, True)]) is True

        assert m.to_json() == {
            'paths': {
@ -137,8 +135,6 @@ def test_manifest_to_json_backslash():
            'version': 5,
            'url_base': '/',
            'items': {
-                'reftest': {},
-                'reftest_node': {},
                'testharness': {
                    'a/b': [['/a/b', {}]]
                }
@ -148,7 +144,7 @@ def test_manifest_to_json_backslash():
        with pytest.raises(ValueError):
            # one of these must raise ValueError
            # the first must return True if it doesn't raise
-            assert m.update([s]) is True
+            assert m.update([(s, True)]) is True
            m.to_json()


@ -160,8 +156,6 @@ def test_manifest_from_json_backslash():
        'version': 5,
        'url_base': '/',
        'items': {
-            'reftest': {},
-            'reftest_node': {},
            'testharness': {
                'a\\b': [['/a/b', {}]]
            }
@ -178,7 +172,7 @@ def test_reftest_computation_chain():
    s1 = SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test2", "==")])
    s2 = SourceFileWithTest("test2", "0"*40, item.RefTest, [("/test3", "==")])

-    m.update([s1, s2])
+    m.update([(s1, True), (s2, True)])

    test1 = s1.manifest_items()[1][0]
    test2 = s2.manifest_items()[1][0]
@ -194,7 +188,7 @@ def test_reftest_computation_chain_update_add():
    s2 = SourceFileWithTest("test2", "0"*40, item.RefTest, [("/test3", "==")])
    test2 = s2.manifest_items()[1][0]

-    assert m.update([s2]) is True
+    assert m.update([(s2, True)]) is True

    assert list(m) == [("reftest", test2.path, {test2})]

@ -202,7 +196,7 @@ def test_reftest_computation_chain_update_add():
    test1 = s1.manifest_items()[1][0]

    # s2's hash is unchanged, but it has gone from a test to a node
-    assert m.update([s1, s2]) is True
+    assert m.update([(s1, True), (s2, True)]) is True

    test2_node = test2.to_RefTestNode()

@ -216,7 +210,7 @@ def test_reftest_computation_chain_update_remove():
    s1 = SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test2", "==")])
    s2 = SourceFileWithTest("test2", "0"*40, item.RefTest, [("/test3", "==")])

-    assert m.update([s1, s2]) is True
+    assert m.update([(s1, True), (s2, True)]) is True

    test1 = s1.manifest_items()[1][0]
    test2 = s2.manifest_items()[1][0]
@ -226,7 +220,7 @@ def test_reftest_computation_chain_update_remove():
                       ("reftest_node", test2.path, {test2_node})]

    # s2's hash is unchanged, but it has gone from a node to a test
-    assert m.update([s2]) is True
+    assert m.update([(s2, True)]) is True

    assert list(m) == [("reftest", test2.path, {test2})]

@ -236,7 +230,7 @@ def test_reftest_computation_chain_update_test_type():

    s1 = SourceFileWithTest("test", "0"*40, item.RefTest, [("/test-ref", "==")])

-    assert m.update([s1]) is True
+    assert m.update([(s1, True)]) is True

    test1 = s1.manifest_items()[1][0]

@ -246,7 +240,7 @@ def test_reftest_computation_chain_update_test_type():
    # based on the file contents). The updated manifest should not includes the
    # old reftest.
    s2 = SourceFileWithTest("test", "1"*40, item.TestharnessTest)
-    assert m.update([s2]) is True
+    assert m.update([(s2, True)]) is True

    test2 = s2.manifest_items()[1][0]

@ -259,7 +253,7 @@ def test_reftest_computation_chain_update_node_change():
    s1 = SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test2", "==")])
    s2 = SourceFileWithTest("test2", "0"*40, item.RefTestNode, [("/test3", "==")])

-    assert m.update([s1, s2]) is True
+    assert m.update([(s1, True), (s2, True)]) is True

    test1 = s1.manifest_items()[1][0]
    test2 = s2.manifest_items()[1][0]
@ -270,7 +264,7 @@ def test_reftest_computation_chain_update_node_change():
    #test2 changes to support type
    s2 = SourceFileWithTest("test2", "1"*40, item.SupportFile)

-    assert m.update([s1,s2]) is True
+    assert m.update([(s1, True), (s2, True)]) is True
    test3 = s2.manifest_items()[1][0]

    assert list(m) == [("reftest", test1.path, {test1}),
@ -280,12 +274,14 @@ def test_reftest_computation_chain_update_node_change():
 def test_iterpath():
    m = manifest.Manifest()

+    # This has multiple test types from the same file, which isn't really supported,
+    # so pretend they have different hashes
    sources = [SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test1-ref", "==")]),
               SourceFileWithTest("test2", "0"*40, item.RefTest, [("/test2-ref", "==")]),
-               SourceFileWithTests("test2", "0"*40, item.TestharnessTest, [("/test2-1.html",),
+               SourceFileWithTests("test2", "1"*40, item.TestharnessTest, [("/test2-1.html",),
                                                                           ("/test2-2.html",)]),
               SourceFileWithTest("test3", "0"*40, item.TestharnessTest)]
-    m.update(sources)
+    m.update([(s, True) for s in sources])

    assert set(item.url for item in m.iterpath("test2")) == set(["/test2",
                                                                 "/test2-1.html",
@ -296,12 +292,14 @@ def test_iterpath():
 def test_filter():
    m = manifest.Manifest()

+    # This has multiple test types from the same file, which isn't really supported,
+    # so pretend they have different hashes
    sources = [SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test1-ref", "==")]),
-               SourceFileWithTest("test2", "0"*40, item.RefTest, [("/test2-ref", "==")]),
+               SourceFileWithTest("test2", "1"*40, item.RefTest, [("/test2-ref", "==")]),
               SourceFileWithTests("test2", "0"*40, item.TestharnessTest, [("/test2-1.html",),
                                                                           ("/test2-2.html",)]),
               SourceFileWithTest("test3", "0"*40, item.TestharnessTest)]
-    m.update(sources)
+    m.update([(s, True) for s in sources])

    json = m.to_json()

@ -328,7 +326,7 @@ def test_reftest_node_by_url():
    s1 = SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test2", "==")])
    s2 = SourceFileWithTest("test2", "0"*40, item.RefTest, [("/test3", "==")])

-    m.update([s1, s2])
+    m.update([(s1, True), (s2, True)])

    test1 = s1.manifest_items()[1][0]
    test2 = s2.manifest_items()[1][0]
@ -339,3 +337,44 @@ def test_reftest_node_by_url():
    m._reftest_nodes_by_url = None
    assert m.reftest_nodes_by_url == {"/test1": test1,
                                      "/test2": test2_node}
+
+
+def test_no_update():
+    m = manifest.Manifest()
+
+    s1 = SourceFileWithTest("test1", "0"*40, item.TestharnessTest)
+    s2 = SourceFileWithTest("test2", "0"*40, item.TestharnessTest)
+
+    m.update([(s1, True), (s2, True)])
+
+    test1 = s1.manifest_items()[1][0]
+    test2 = s2.manifest_items()[1][0]
+
+    assert list(m) == [("testharness", test1.path, {test1}),
+                       ("testharness", test2.path, {test2})]
+
+    s1_1 = SourceFileWithTest("test1", "1"*40, item.TestharnessTest)
+
+    m.update([(s1, True), (s2.rel_path, False)])
+
+    test1_1 = s1_1.manifest_items()[1][0]
+
+    assert list(m) == [("testharness", test1_1.path, {test1_1}),
+                       ("testharness", test2.path, {test2})]
+
+
+def test_no_update_delete():
+    m = manifest.Manifest()
+
+    s1 = SourceFileWithTest("test1", "0"*40, item.TestharnessTest)
+    s2 = SourceFileWithTest("test2", "0"*40, item.TestharnessTest)
+
+    m.update([(s1, True), (s2, True)])
+
+    s1_1 = SourceFileWithTest("test1", "1"*40, item.TestharnessTest)
+
+    m.update([(s1, True)])
+
+    test1_1 = s1_1.manifest_items()[1][0]
+
+    assert list(m) == [("testharness", test1_1.path, {test1_1})]
--- a/testing/web-platform/tests/tools/manifest/update.py
+++ b/testing/web-platform/tests/tools/manifest/update.py
@ -13,15 +13,29 @@ wpt_root = os.path.abspath(os.path.join(here, os.pardir, os.pardir))

 logger = get_logger()

-def update(tests_root, manifest, working_copy=False):
+
+def update(tests_root, manifest, working_copy=False, cache_root=None, rebuild=False):
    logger.info("Updating manifest")
    tree = None
-    if not working_copy:
-        tree = vcs.Git.for_path(tests_root, manifest.url_base)
-    if tree is None:
-        tree = vcs.FileSystem(tests_root, manifest.url_base)
+    if cache_root is None:
+        cache_root = os.path.join(tests_root, ".cache")
+    if not os.path.exists(cache_root):
+        try:
+            os.makedirs(cache_root)
+        except IOError:
+            cache_root = None

-    return manifest.update(tree)
+    if not working_copy:
+        tree = vcs.Git.for_path(tests_root, manifest.url_base,
+                                cache_path=cache_root, rebuild=rebuild)
+    if tree is None:
+        tree = vcs.FileSystem(tests_root, manifest.url_base,
+                              cache_path=cache_root, rebuild=rebuild)
+
+    try:
+        return manifest.update(tree)
+    finally:
+        tree.dump_caches()


 def update_from_cli(**kwargs):
@ -46,7 +60,9 @@ def update_from_cli(**kwargs):

    changed = update(tests_root,
                     m,
-                     working_copy=kwargs["work"])
+                     working_copy=kwargs["work"],
+                     cache_root=kwargs["cache_root"],
+                     rebuild=kwargs["rebuild"])
    if changed:
        manifest.write(m, path)

@ -73,6 +89,9 @@ def create_parser():
    parser.add_argument(
        "--no-download", dest="download", action="store_false", default=True,
        help="Never attempt to download the manifest.")
+    parser.add_argument(
+        "--cache-root", action="store", default=os.path.join(wpt_root, ".wptcache"),
+        help="Path in which to store any caches (default <tests_root>/.wptcache/")
    return parser


@ -87,10 +106,9 @@ def find_top_repo():
    return rv


-def run(**kwargs):
+def run(*args, **kwargs):
    if kwargs["path"] is None:
        kwargs["path"] = os.path.join(kwargs["tests_root"], "MANIFEST.json")
-
    update_from_cli(**kwargs)


--- a/testing/web-platform/tests/tools/manifest/vcs.py
+++ b/testing/web-platform/tests/tools/manifest/vcs.py
@ -1,12 +1,13 @@
+import json
 import os
-import subprocess
 import platform
+import subprocess

 from .sourcefile import SourceFile


 class Git(object):
-    def __init__(self, repo_root, url_base):
+    def __init__(self, repo_root, url_base, filters=None):
        self.root = os.path.abspath(repo_root)
        self.git = Git.get_func(repo_root)
        self.url_base = url_base
@ -74,27 +75,73 @@ class Git(object):
                                 rel_path,
                                 self.url_base,
                                 hash,
-                                 contents=contents)
+                                 contents=contents), True


 class FileSystem(object):
-    def __init__(self, root, url_base):
+    def __init__(self, root, url_base, mtime_filter):
        self.root = root
        self.url_base = url_base
        from gitignore import gitignore
        self.path_filter = gitignore.PathFilter(self.root, extras=[".git/"])
+        self.mtime_filter = mtime_filter

    def __iter__(self):
-        paths = self.get_paths()
-        for path in paths:
-            yield SourceFile(self.root, path, self.url_base)
+        mtime_cache = self.mtime_cache
+        for dirpath, dirnames, filenames in self.path_filter(walk(".")):
+            for filename, path_stat in filenames:
+                # We strip the ./ prefix off the path
+                path = os.path.join(dirpath, filename)
+                if mtime_cache is None or mtime_cache.updated(path, path_stat):
+                    yield SourceFile(self.root, path, self.url_base), True
+                else:
+                    yield path, False
+        self.ignore_cache.dump()

-    def get_paths(self):
-        for dirpath, dirnames, filenames in os.walk(self.root):
-            for filename in filenames:
-                path = os.path.relpath(os.path.join(dirpath, filename), self.root)
-                if self.path_filter(path):
-                    yield path
+    def dump_caches(self):
+        for cache in [self.mtime_cache, self.ignore_cache]:
+            if cache is not None:
+                cache.dump()

-            dirnames[:] = [item for item in dirnames if self.path_filter(
-                           os.path.relpath(os.path.join(dirpath, item), self.root) + "/")]
+
+class CacheFile(object):
+    file_name = None
+
+    def __init__(self, cache_root, rebuild=False):
+        if not os.path.exists(cache_root):
+            os.makedirs(cache_root)
+        self.path = os.path.join(cache_root, self.file_name)
+        self.data = self.load(rebuild)
+        self.modified = False
+
+    def dump(self):
+        missing = set(self.data.keys()) - self.updated
+        if not missing or not self.modified:
+            return
+        for item in missing:
+            del self.data[item]
+        with open(self.path, 'w') as f:
+            json.dump(self.data, f, indent=1)
+
+    def load(self):
+        try:
+            with open(self.path, 'r') as f:
+                return json.load(f)
+        except IOError:
+            return {}
+
+    def update(self, rel_path, stat=None):
+        self.updated.add(rel_path)
+        try:
+            if stat is None:
+                stat = os.stat(os.path.join(self.root,
+                                            rel_path))
+        except Exception:
+            return True
+
+        mtime = stat.st_mtime
+        if mtime != self.data.get(rel_path):
+            self.modified = True
+            self.data[rel_path] = mtime
+            return True
+        return False