diff --git a/config/check_js_msg_encoding.py b/config/check_js_msg_encoding.py index 6a0ece99b739..197a51e1fb7f 100644 --- a/config/check_js_msg_encoding.py +++ b/config/check_js_msg_encoding.py @@ -54,11 +54,10 @@ def check_files(): with get_repository_from_env() as repo: root = repo.path - for filename in repo.get_files_in_working_directory(): - if filename.endswith('.msg'): - if filename not in ignore_files: - if not check_single_file(os.path.join(root, filename)): - result = False + for filename, _ in repo.get_tracked_files_finder().find('**/*.msg'): + if filename not in ignore_files: + if not check_single_file(os.path.join(root, filename)): + result = False return result diff --git a/python/mozbuild/mozpack/files.py b/python/mozbuild/mozpack/files.py index 215ec950d431..e7e13a081892 100644 --- a/python/mozbuild/mozpack/files.py +++ b/python/mozbuild/mozpack/files.py @@ -4,6 +4,7 @@ from __future__ import absolute_import, print_function, unicode_literals +import bisect import codecs import errno import inspect @@ -15,11 +16,12 @@ import stat import subprocess import uuid import mozbuild.makeutil as makeutil -from itertools import chain +from itertools import chain, takewhile from mozbuild.preprocessor import Preprocessor from mozbuild.util import ( FileAvoidWrite, ensure_unicode, + memoize ) from mozpack.executables import ( is_executable, @@ -1238,3 +1240,39 @@ class MercurialRevisionFinder(BaseFinder): self._files[path] = f return f + + +class FileListFinder(BaseFinder): + """Finder for a literal list of file names.""" + + def __init__(self, files): + """files must be a sorted list.""" + self._files = files + + @memoize + def _match(self, pattern): + """Return a sorted list of all files matching the given pattern.""" + # We don't use the utility _find_helper method because it's not tuned + # for performance in the way that we would like this class to be. That's + # a possible avenue for refactoring here. + ret = [] + # We do this as an optimization to figure out where in the sorted list + # to search and where to stop searching. + components = pattern.split('/') + prefix = '/'.join(takewhile(lambda s: '*' not in s, components)) + start = bisect.bisect_left(self._files, prefix) + for i in six.moves.range(start, len(self._files)): + f = self._files[i] + if not f.startswith(prefix): + break + # Skip hidden files while scanning. + if '/.' in f[len(prefix):]: + continue + if mozpath.match(f, pattern): + ret.append(f) + return ret + + def find(self, pattern): + pattern = pattern.strip('/') + for path in self._match(pattern): + yield path, File(path) diff --git a/python/mozversioncontrol/mozversioncontrol/__init__.py b/python/mozversioncontrol/mozversioncontrol/__init__.py index ba3296fc5ab7..6ce4b47408b0 100644 --- a/python/mozversioncontrol/mozversioncontrol/__init__.py +++ b/python/mozversioncontrol/mozversioncontrol/__init__.py @@ -14,6 +14,7 @@ import sys from mozbuild.util import ensure_subprocess_env from mozfile import which +from mozpack.files import FileListFinder class MissingVCSTool(Exception): @@ -216,8 +217,14 @@ class Repository(object): ''' @abc.abstractmethod - def get_files_in_working_directory(self): - """Obtain a list of managed files in the working directory.""" + def get_tracked_files_finder(self): + """Obtain a mozpack.files.BaseFinder of managed files in the working + directory. + + The Finder will have its list of all files in the repo cached for its + entire lifetime, so operations on the Finder will not track with, for + example, commits to the repo during the Finder's lifetime. + """ @abc.abstractmethod def working_directory_clean(self, untracked=False, ignored=False): @@ -419,10 +426,11 @@ class HgRepository(Repository): return self._run('forget', *paths) - def get_files_in_working_directory(self): + def get_tracked_files_finder(self): # Can return backslashes on Windows. Normalize to forward slashes. - return list(p.replace('\\', '/') for p in - self._run(b'files', b'-0').split('\0') if p) + files = list(p.replace('\\', '/') for p in + self._run(b'files', b'-0').split('\0') if p) + return FileListFinder(files) def working_directory_clean(self, untracked=False, ignored=False): args = ['status', '--modified', '--added', '--removed', @@ -549,8 +557,9 @@ class GitRepository(Repository): return self._run('reset', *paths) - def get_files_in_working_directory(self): - return [p for p in self._run('ls-files', '-z').split('\0') if p] + def get_tracked_files_finder(self): + files = [p for p in self._run('ls-files', '-z').split('\0') if p] + return FileListFinder(files) def working_directory_clean(self, untracked=False, ignored=False): args = ['status', '--porcelain'] diff --git a/taskcluster/taskgraph/util/hash.py b/taskcluster/taskgraph/util/hash.py index 2db28ee1c5e7..42692162f161 100644 --- a/taskcluster/taskgraph/util/hash.py +++ b/taskcluster/taskgraph/util/hash.py @@ -4,8 +4,8 @@ from __future__ import absolute_import, print_function, unicode_literals from mozbuild.util import memoize -from mozpack.files import FileFinder import mozpack.path as mozpath +from mozversioncontrol import get_repository_object import hashlib import io import six @@ -21,6 +21,11 @@ def hash_path(path): return hashlib.sha256(fh.read()).hexdigest() +@memoize +def get_file_finder(base_path): + return get_repository_object(base_path).get_tracked_files_finder() + + def hash_paths(base_path, patterns): """ Give a list of path patterns, return a digest of the contents of all @@ -30,7 +35,7 @@ def hash_paths(base_path, patterns): Each file is hashed. The list of all hashes and file paths is then itself hashed to produce the result. """ - finder = FileFinder(base_path) + finder = get_file_finder(base_path) h = hashlib.sha256() files = {} for pattern in patterns: