diff --git a/python/mozlint/mozlint/roller.py b/python/mozlint/mozlint/roller.py index b12e49c34cb1..d2e8f6a6619e 100644 --- a/python/mozlint/mozlint/roller.py +++ b/python/mozlint/mozlint/roller.py @@ -10,6 +10,7 @@ import sys import traceback from collections import defaultdict from concurrent.futures import ProcessPoolExecutor +from math import ceil from multiprocessing import cpu_count from subprocess import CalledProcessError @@ -56,6 +57,7 @@ class LintRoller(object): version control or cwd. :param lintargs: Arguments to pass to the underlying linter(s). """ + MAX_PATHS_PER_JOB = 50 # set a max size to prevent command lines that are too long on Windows def __init__(self, root, **lintargs): self.parse = Parser() @@ -82,6 +84,14 @@ class LintRoller(object): for path in paths: self.linters.extend(self.parse(path)) + def _generate_jobs(self, paths, num_procs): + """A job is of the form (, ).""" + chunk_size = min(self.MAX_PATHS_PER_JOB, int(ceil(float(len(paths)) / num_procs))) + while paths: + for linter in self.linters: + yield linter, paths[:chunk_size] + paths = paths[chunk_size:] + def roll(self, paths=None, outgoing=None, workdir=None, num_procs=None): """Run all of the registered linters against the specified file paths. @@ -132,12 +142,10 @@ class LintRoller(object): paths = map(os.path.abspath, paths) num_procs = num_procs or cpu_count() - num_procs = min(num_procs, len(self.linters)) - all_results = defaultdict(list) with ProcessPoolExecutor(num_procs) as executor: - futures = [executor.submit(_run_worker, config, paths, **self.lintargs) - for config in self.linters] + futures = [executor.submit(_run_worker, config, p, **self.lintargs) + for config, p in self._generate_jobs(paths, num_procs)] # ignore SIGINT in parent so we can still get partial results # from child processes. These should shutdown quickly anyway. orig_sigint = signal.signal(signal.SIGINT, signal.SIG_IGN) diff --git a/python/mozlint/test/test_roller.py b/python/mozlint/test/test_roller.py index 631371caf03c..03fdeddc60d4 100644 --- a/python/mozlint/test/test_roller.py +++ b/python/mozlint/test/test_roller.py @@ -5,6 +5,7 @@ from __future__ import absolute_import import os +import platform import sys import mozunit @@ -76,10 +77,47 @@ def test_roll_with_failure_code(lint, lintdir, files): lint.read(os.path.join(lintdir, 'badreturncode.yml')) assert lint.failed is None - result = lint.roll(files) + result = lint.roll(files, num_procs=1) assert len(result) == 0 assert lint.failed == ['BadReturnCodeLinter'] +def fake_run_linters(config, paths, **lintargs): + return {'count': [1]}, [] + + +@pytest.mark.skipif(platform.system() == 'Windows', + reason="monkeypatch issues with multiprocessing on Windows") +@pytest.mark.parametrize('num_procs', [1, 4, 8, 16]) +def test_number_of_jobs(monkeypatch, lint, linters, files, num_procs): + monkeypatch.setattr(sys.modules[lint.__module__], '_run_linters', fake_run_linters) + + lint.read(linters) + num_jobs = len(lint.roll(files, num_procs=num_procs)['count']) + + if len(files) >= num_procs: + assert num_jobs == num_procs * len(linters) + else: + assert num_jobs == len(files) * len(linters) + + +@pytest.mark.skipif(platform.system() == 'Windows', + reason="monkeypatch issues with multiprocessing on Windows") +@pytest.mark.parametrize('max_paths,expected_jobs', [(1, 12), (4, 6), (16, 6)]) +def test_max_paths_per_job(monkeypatch, lint, linters, files, max_paths, expected_jobs): + monkeypatch.setattr(sys.modules[lint.__module__], '_run_linters', fake_run_linters) + + files = files[:4] + assert len(files) == 4 + + linters = linters[:3] + assert len(linters) == 3 + + lint.MAX_PATHS_PER_JOB = max_paths + lint.read(linters) + num_jobs = len(lint.roll(files, num_procs=2)['count']) + assert num_jobs == expected_jobs + + if __name__ == '__main__': mozunit.main()