Bug 1168607 - Implement a finder that reads from a Mercurial repo; r=glandium

Now that moz.build files use finders for I/O, we can start reading
moz.build data from other sources.

Version control is essentially a filesystem. We implement a finder
that speaks to Mercurial to obtain file data. It is able to obtain
file data from a specific revision in the repository.

We use the hglib package (which uses the Mercurial command server) for
speaking with Mercurial. This adds overhead compared to consuming the
raw Mercurial APIs. However, it also avoids GPL side-effects of
importing Mercurial's Python modules.

Testing shows that performance is good but not great. A follow-up
commit will introduce a GPL licensed Mercurial finder. For now, get
the base functionality in place.

--HG--
extra : commitid : BkwaQOW9MiR
extra : rebase_source : 915d6015317ccc79c228a76eed861d9f43e2fd17
This commit is contained in:
Gregory Szorc 2015-06-09 13:39:01 -07:00
Родитель 8ce366a5f7
Коммит 4f13520461
2 изменённых файлов: 158 добавлений и 13 удалений

Просмотреть файл

@ -34,6 +34,12 @@ from tempfile import (
NamedTemporaryFile,
)
try:
import hglib
except ImportError:
hglib = None
# For clean builds, copying files on win32 using CopyFile through ctypes is
# ~2x as fast as using shutil.copyfile.
if platform.system() != 'Windows':
@ -776,6 +782,30 @@ class BaseFinder(object):
return file
def _find_helper(self, pattern, files, file_getter):
"""Generic implementation of _find.
A few *Finder implementations share logic for returning results.
This function implements the custom logic.
The ``file_getter`` argument is a callable that receives a path
that is known to exist. The callable should return a ``BaseFile``
instance.
"""
if '*' in pattern:
for p in files:
if mozpath.match(p, pattern):
yield p, file_getter(p)
elif pattern == '':
for p in files:
yield p, file_getter(p)
elif pattern in files:
yield pattern, file_getter(pattern)
else:
for p in files:
if mozpath.basedir(p, [pattern]) == pattern:
yield p, file_getter(p)
class FileFinder(BaseFinder):
'''
@ -905,19 +935,8 @@ class JarFinder(BaseFinder):
Actual implementation of JarFinder.find(), dispatching to specialized
member functions depending on what kind of pattern was given.
'''
if '*' in pattern:
for p in self._files:
if mozpath.match(p, pattern):
yield p, DeflatedFile(self._files[p])
elif pattern == '':
for p in self._files:
yield p, DeflatedFile(self._files[p])
elif pattern in self._files:
yield pattern, DeflatedFile(self._files[pattern])
else:
for p in self._files:
if mozpath.basedir(p, [pattern]) == pattern:
yield p, DeflatedFile(self._files[p])
return self._find_helper(pattern, self._files,
lambda x: DeflatedFile(self._files[x]))
class ComposedFinder(BaseFinder):
@ -945,3 +964,64 @@ class ComposedFinder(BaseFinder):
def find(self, pattern):
for p in self.files.match(pattern):
yield p, self.files[p]
class MercurialFile(BaseFile):
"""File class for holding data from Mercurial."""
def __init__(self, client, rev, path):
self._content = client.cat([path], rev=rev)
def read(self):
return self._content
class MercurialRevisionFinder(BaseFinder):
"""A finder that operates on a specific Mercurial revision."""
def __init__(self, repo, rev='.', **kwargs):
"""Create a finder attached to a specific revision in a repository.
If no revision is given, open the parent of the working directory.
"""
if not hglib:
raise Exception('hglib package not found')
super(MercurialRevisionFinder, self).__init__(base=repo, **kwargs)
self._root = repo
# We change directories here otherwise we have to deal with relative
# paths.
oldcwd = os.getcwd()
os.chdir(self._root)
try:
self._client = hglib.open(path=repo, encoding=b'utf-8')
finally:
os.chdir(oldcwd)
self._rev = rev if rev is not None else b'.'
self._files = OrderedDict()
# Immediately populate the list of files in the repo since nearly every
# operation requires this list.
out = self._client.rawcommand([b'files', b'--rev', self._rev])
for relpath in out.splitlines():
self._files[relpath] = None
def _find(self, pattern):
return self._find_helper(pattern, self._files, self._get)
def get(self, path):
try:
return self._get(path)
except KeyError:
return None
def _get(self, path):
# We lazy populate self._files because potentially creating tens of
# thousands of MercurialFile instances for every file in the repo is
# inefficient.
f = self._files[path]
if not f:
f = MercurialFile(self._client, self._rev, path)
self._files[path] = f
return f

Просмотреть файл

@ -19,11 +19,20 @@ from mozpack.files import (
GeneratedFile,
JarFinder,
ManifestFile,
MercurialFile,
MercurialRevisionFinder,
MinifiedJavaScript,
MinifiedProperties,
PreprocessedFile,
XPTFile,
)
# We don't have hglib installed everywhere.
try:
import hglib
except ImportError:
hglib = None
from mozpack.mozjar import (
JarReader,
JarWriter,
@ -1017,5 +1026,61 @@ class TestComposedFinder(MatchTestTemplate, TestWithTmpDir):
self.assertIsInstance(self.finder.get('bar'), File)
@unittest.skipUnless(hglib, 'hglib not available')
class TestMercurialRevisionFinder(MatchTestTemplate, TestWithTmpDir):
def setUp(self):
super(TestMercurialRevisionFinder, self).setUp()
hglib.init(self.tmpdir)
def add(self, path):
c = hglib.open(self.tmpdir)
ensureParentDir(self.tmppath(path))
with open(self.tmppath(path), 'wb') as fh:
fh.write(path)
c.add(self.tmppath(path))
def do_check(self, pattern, result):
do_check(self, self.finder, pattern, result)
def test_default_revision(self):
self.prepare_match_test()
c = hglib.open(self.tmpdir)
c.commit('initial commit')
self.finder = MercurialRevisionFinder(self.tmpdir)
self.do_match_test()
self.assertIsNone(self.finder.get('does-not-exist'))
self.assertIsInstance(self.finder.get('bar'), MercurialFile)
def test_old_revision(self):
c = hglib.open(self.tmpdir)
with open(self.tmppath('foo'), 'wb') as fh:
fh.write('foo initial')
c.add(self.tmppath('foo'))
c.commit('initial')
with open(self.tmppath('foo'), 'wb') as fh:
fh.write('foo second')
with open(self.tmppath('bar'), 'wb') as fh:
fh.write('bar second')
c.add(self.tmppath('bar'))
c.commit('second')
# This wipes out the working directory, ensuring the finder isn't
# finding anything from the filesystem.
c.rawcommand(['update', 'null'])
finder = MercurialRevisionFinder(self.tmpdir, rev='0')
f = finder.get('foo')
self.assertEqual(f.read(), 'foo initial')
self.assertEqual(f.read(), 'foo initial', 'read again for good measure')
self.assertIsNone(finder.get('bar'))
finder = MercurialRevisionFinder(self.tmpdir, rev='1')
f = finder.get('foo')
self.assertEqual(f.read(), 'foo second')
f = finder.get('bar')
self.assertEqual(f.read(), 'bar second')
if __name__ == '__main__':
mozunit.main()