Bug 1636797 - In `hash.py`, enumerate files from the VCS rather than searching the filesystem directly r=ahal

This resolves a long-standing issue in development where `mach artifact` (and therefore `mach bootstrap`) would fail unpredictably if you had dirty, but ignored, files in your checkout. Resolving this problem often required unwieldy `hg purge`/`git ignore` incantations that are easy to get wrong.

This patch addresses the problem by doing what we "should" have been doing all along, and consulting the VCS to list tracked files rather than listing EVERY file on disk and applying heuristics to determine whether they should be included in the hash.

Differential Revision: https://phabricator.services.mozilla.com/D86780
This commit is contained in:
Ricky Stewart 2020-08-17 15:19:34 +00:00
Родитель a97cc4af06
Коммит 9719a9a5a9
4 изменённых файлов: 66 добавлений и 15 удалений

Просмотреть файл

@ -54,11 +54,10 @@ def check_files():
with get_repository_from_env() as repo:
root = repo.path
for filename in repo.get_files_in_working_directory():
if filename.endswith('.msg'):
if filename not in ignore_files:
if not check_single_file(os.path.join(root, filename)):
result = False
for filename, _ in repo.get_tracked_files_finder().find('**/*.msg'):
if filename not in ignore_files:
if not check_single_file(os.path.join(root, filename)):
result = False
return result

Просмотреть файл

@ -4,6 +4,7 @@
from __future__ import absolute_import, print_function, unicode_literals
import bisect
import codecs
import errno
import inspect
@ -15,11 +16,12 @@ import stat
import subprocess
import uuid
import mozbuild.makeutil as makeutil
from itertools import chain
from itertools import chain, takewhile
from mozbuild.preprocessor import Preprocessor
from mozbuild.util import (
FileAvoidWrite,
ensure_unicode,
memoize
)
from mozpack.executables import (
is_executable,
@ -1238,3 +1240,39 @@ class MercurialRevisionFinder(BaseFinder):
self._files[path] = f
return f
class FileListFinder(BaseFinder):
"""Finder for a literal list of file names."""
def __init__(self, files):
"""files must be a sorted list."""
self._files = files
@memoize
def _match(self, pattern):
"""Return a sorted list of all files matching the given pattern."""
# We don't use the utility _find_helper method because it's not tuned
# for performance in the way that we would like this class to be. That's
# a possible avenue for refactoring here.
ret = []
# We do this as an optimization to figure out where in the sorted list
# to search and where to stop searching.
components = pattern.split('/')
prefix = '/'.join(takewhile(lambda s: '*' not in s, components))
start = bisect.bisect_left(self._files, prefix)
for i in six.moves.range(start, len(self._files)):
f = self._files[i]
if not f.startswith(prefix):
break
# Skip hidden files while scanning.
if '/.' in f[len(prefix):]:
continue
if mozpath.match(f, pattern):
ret.append(f)
return ret
def find(self, pattern):
pattern = pattern.strip('/')
for path in self._match(pattern):
yield path, File(path)

Просмотреть файл

@ -14,6 +14,7 @@ import sys
from mozbuild.util import ensure_subprocess_env
from mozfile import which
from mozpack.files import FileListFinder
class MissingVCSTool(Exception):
@ -216,8 +217,14 @@ class Repository(object):
'''
@abc.abstractmethod
def get_files_in_working_directory(self):
"""Obtain a list of managed files in the working directory."""
def get_tracked_files_finder(self):
"""Obtain a mozpack.files.BaseFinder of managed files in the working
directory.
The Finder will have its list of all files in the repo cached for its
entire lifetime, so operations on the Finder will not track with, for
example, commits to the repo during the Finder's lifetime.
"""
@abc.abstractmethod
def working_directory_clean(self, untracked=False, ignored=False):
@ -419,10 +426,11 @@ class HgRepository(Repository):
return
self._run('forget', *paths)
def get_files_in_working_directory(self):
def get_tracked_files_finder(self):
# Can return backslashes on Windows. Normalize to forward slashes.
return list(p.replace('\\', '/') for p in
self._run(b'files', b'-0').split('\0') if p)
files = list(p.replace('\\', '/') for p in
self._run(b'files', b'-0').split('\0') if p)
return FileListFinder(files)
def working_directory_clean(self, untracked=False, ignored=False):
args = ['status', '--modified', '--added', '--removed',
@ -549,8 +557,9 @@ class GitRepository(Repository):
return
self._run('reset', *paths)
def get_files_in_working_directory(self):
return [p for p in self._run('ls-files', '-z').split('\0') if p]
def get_tracked_files_finder(self):
files = [p for p in self._run('ls-files', '-z').split('\0') if p]
return FileListFinder(files)
def working_directory_clean(self, untracked=False, ignored=False):
args = ['status', '--porcelain']

Просмотреть файл

@ -4,8 +4,8 @@
from __future__ import absolute_import, print_function, unicode_literals
from mozbuild.util import memoize
from mozpack.files import FileFinder
import mozpack.path as mozpath
from mozversioncontrol import get_repository_object
import hashlib
import io
import six
@ -21,6 +21,11 @@ def hash_path(path):
return hashlib.sha256(fh.read()).hexdigest()
@memoize
def get_file_finder(base_path):
return get_repository_object(base_path).get_tracked_files_finder()
def hash_paths(base_path, patterns):
"""
Give a list of path patterns, return a digest of the contents of all
@ -30,7 +35,7 @@ def hash_paths(base_path, patterns):
Each file is hashed. The list of all hashes and file paths is then
itself hashed to produce the result.
"""
finder = FileFinder(base_path)
finder = get_file_finder(base_path)
h = hashlib.sha256()
files = {}
for pattern in patterns: