2016-03-16 21:55:21 +03:00
|
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
|
2017-09-07 05:52:46 +03:00
|
|
|
from __future__ import absolute_import
|
|
|
|
|
2016-03-16 21:55:21 +03:00
|
|
|
import os
|
Bug 1430825 - [mozlint] Split work up by paths instead of by linters, r=standard8
The initial motivation for this patch, was to prevent command lines that are
too long on Windows. To that end, there is a cap to the number of paths that
can be run per job. For now that cap is set to 50. This will allow for an
average path length of 160 characters, which should be sufficient with room to
spare.
But another big benefit of this patch is that we are running more things in
parallel. Previously, mozlint ran each linter in its own subprocess, but that's
it. If running eslint is 90% of the work, it'll still only get a single
process. This means we are wasting cores as soon as the other linters are
finished.
This patch chunks the number of specified paths such that there will be N*L
jobs where 'N' is the number of cores and 'L' is the number of linters. This
means even when there's a dominant linter, we'll be making better use of our
resources. This isn't perfect of course, as some paths might contain a small
number of files, and some will contain a very large number of files. But it's
a start
A limitation to this approach is that if there are fewer paths specified than
there are cores, we won't schedule enough jobs per linter to use those extra
cores. One idea might be to expand specified directories and individually list
all the paths under the directory. But this has some hairy edge cases that
would be tough to catch. Doing this in a non-hacky way would also require a
medium scale refactor.
So I propose further parallelization efforts be destined for follow-ups.
MozReview-Commit-ID: JRRu13AFaii
--HG--
extra : rebase_source : 242fb71fe0af8bd2a981bd10a7216bb897fe00ac
2018-01-17 00:01:20 +03:00
|
|
|
import platform
|
Bug 1369711 - [mozlint] Make sure KeyboardInterrupts are handled well wherever they happen r=gps
There a few pieces needed here to properly handle KeyboardInterrupts.
1. All in-progress work needs to abort. Ideally the underlying linters will be
able to catch KeyboardInterrupt, and return partial results (like the flake8
linter does). Linters may alternatively allow the KeyboardInterrupt to
propagate up. Mozlint will catch and handle this appropriately, though any
results found will be lost. The only change to this behaviour was fixing a bug
in the flake8 linter.
2. Any unstarted jobs need to be canceled. In concurrent.futures, there are two
different queues. First, jobs are placed on the work queue, which is just a list
maintained by the parent process. As workers become available, jobs are moved
off the work queue, and onto the call queue (which is a multiprocessing.Queue).
Jobs that live on the work queue can be canceled with 'future.cancel()', whereas
jobs that live on the call queue cannot. The number of extra jobs that are stored
on the call queue is determined by this variable:
https://hg.mozilla.org/mozilla-central/file/deb7714a7bcd/third_party/python/futures/concurrent/futures/process.py#l86
In this patch, the parent process' sigint handler (which will be called on Ctrl-C)
is responsible for canceling all the jobs on the work queue. For the jobs on the
call queue, the best we can do is set a global variable that tells workers to
abort early.
3. Idle workers should exit gracefully. When there are no more jobs left, workers
will block on the call queue (either waiting for more jobs, or waiting for the
executor to send the shutdown signal). If a KeyboardInterrupt is received while a
worker is blocking, it isn't possible to intercept that anywhere (due to quirks
of how concurrent.futures is implemented). The InterruptableQueue class was
created to solve this problem. It will return None instead of propagating
KeyboardInterrupt. A None value will wake the worker up and tell it to gracefully
shutdown. This way, we avoid cryptic tracebacks in the output.
With all of these various pieces solved, pressing Ctrl-C appears to always exit
gracefully, sometimes even printing partial results.
MozReview-Commit-ID: 36Pe3bbUKmk
--HG--
extra : rebase_source : d4c312ee5cc3679eeee1407c5521aed679f84ad4
extra : source : a93a00141bf62f6bc9e30934c0e56f6b2e434bf0
2018-02-23 16:55:06 +03:00
|
|
|
import signal
|
|
|
|
import subprocess
|
2016-03-16 21:55:21 +03:00
|
|
|
import sys
|
Bug 1369711 - [mozlint] Make sure KeyboardInterrupts are handled well wherever they happen r=gps
There a few pieces needed here to properly handle KeyboardInterrupts.
1. All in-progress work needs to abort. Ideally the underlying linters will be
able to catch KeyboardInterrupt, and return partial results (like the flake8
linter does). Linters may alternatively allow the KeyboardInterrupt to
propagate up. Mozlint will catch and handle this appropriately, though any
results found will be lost. The only change to this behaviour was fixing a bug
in the flake8 linter.
2. Any unstarted jobs need to be canceled. In concurrent.futures, there are two
different queues. First, jobs are placed on the work queue, which is just a list
maintained by the parent process. As workers become available, jobs are moved
off the work queue, and onto the call queue (which is a multiprocessing.Queue).
Jobs that live on the work queue can be canceled with 'future.cancel()', whereas
jobs that live on the call queue cannot. The number of extra jobs that are stored
on the call queue is determined by this variable:
https://hg.mozilla.org/mozilla-central/file/deb7714a7bcd/third_party/python/futures/concurrent/futures/process.py#l86
In this patch, the parent process' sigint handler (which will be called on Ctrl-C)
is responsible for canceling all the jobs on the work queue. For the jobs on the
call queue, the best we can do is set a global variable that tells workers to
abort early.
3. Idle workers should exit gracefully. When there are no more jobs left, workers
will block on the call queue (either waiting for more jobs, or waiting for the
executor to send the shutdown signal). If a KeyboardInterrupt is received while a
worker is blocking, it isn't possible to intercept that anywhere (due to quirks
of how concurrent.futures is implemented). The InterruptableQueue class was
created to solve this problem. It will return None instead of propagating
KeyboardInterrupt. A None value will wake the worker up and tell it to gracefully
shutdown. This way, we avoid cryptic tracebacks in the output.
With all of these various pieces solved, pressing Ctrl-C appears to always exit
gracefully, sometimes even printing partial results.
MozReview-Commit-ID: 36Pe3bbUKmk
--HG--
extra : rebase_source : d4c312ee5cc3679eeee1407c5521aed679f84ad4
extra : source : a93a00141bf62f6bc9e30934c0e56f6b2e434bf0
2018-02-23 16:55:06 +03:00
|
|
|
import time
|
2016-03-16 21:55:21 +03:00
|
|
|
|
2017-08-29 21:50:33 +03:00
|
|
|
import mozunit
|
2016-09-09 23:20:09 +03:00
|
|
|
import pytest
|
2016-03-16 21:55:21 +03:00
|
|
|
|
2018-02-23 17:02:16 +03:00
|
|
|
from mozlint.errors import LintersNotConfigured
|
2018-08-28 16:51:04 +03:00
|
|
|
from mozlint.result import Issue, ResultSummary
|
2016-03-16 21:55:21 +03:00
|
|
|
|
|
|
|
|
|
|
|
here = os.path.abspath(os.path.dirname(__file__))
|
|
|
|
|
|
|
|
|
2016-09-09 23:20:09 +03:00
|
|
|
def test_roll_no_linters_configured(lint, files):
|
|
|
|
with pytest.raises(LintersNotConfigured):
|
|
|
|
lint.roll(files)
|
2016-03-16 21:55:21 +03:00
|
|
|
|
|
|
|
|
2016-09-09 23:20:09 +03:00
|
|
|
def test_roll_successful(lint, linters, files):
|
2018-09-20 23:45:04 +03:00
|
|
|
lint.read(linters('string', 'regex', 'external'))
|
2016-03-16 21:55:21 +03:00
|
|
|
|
2016-09-09 23:20:09 +03:00
|
|
|
result = lint.roll(files)
|
2018-08-28 16:51:04 +03:00
|
|
|
assert len(result.issues) == 1
|
|
|
|
assert result.failed == set([])
|
2016-03-16 21:55:21 +03:00
|
|
|
|
2018-08-28 16:51:04 +03:00
|
|
|
path = result.issues.keys()[0]
|
2016-09-09 23:20:09 +03:00
|
|
|
assert os.path.basename(path) == 'foobar.js'
|
2016-03-16 21:55:21 +03:00
|
|
|
|
2018-08-28 16:51:04 +03:00
|
|
|
errors = result.issues[path]
|
2016-09-09 23:20:09 +03:00
|
|
|
assert isinstance(errors, list)
|
|
|
|
assert len(errors) == 6
|
2016-03-16 21:55:21 +03:00
|
|
|
|
2016-09-09 23:20:09 +03:00
|
|
|
container = errors[0]
|
2018-08-28 16:51:04 +03:00
|
|
|
assert isinstance(container, Issue)
|
2016-09-09 23:20:09 +03:00
|
|
|
assert container.rule == 'no-foobar'
|
2016-03-16 21:55:21 +03:00
|
|
|
|
|
|
|
|
2018-05-11 18:13:36 +03:00
|
|
|
def test_roll_from_subdir(lint, linters):
|
2018-09-20 23:45:04 +03:00
|
|
|
lint.read(linters('string', 'regex', 'external'))
|
2018-05-11 18:13:36 +03:00
|
|
|
|
|
|
|
oldcwd = os.getcwd()
|
|
|
|
try:
|
|
|
|
os.chdir(os.path.join(lint.root, 'files'))
|
|
|
|
|
|
|
|
# Path relative to cwd works
|
|
|
|
result = lint.roll('no_foobar.js')
|
2018-08-28 16:51:04 +03:00
|
|
|
assert len(result.issues) == 0
|
|
|
|
assert len(result.failed) == 0
|
|
|
|
assert result.returncode == 0
|
2018-05-11 18:13:36 +03:00
|
|
|
|
|
|
|
# Path relative to root doesn't work
|
|
|
|
result = lint.roll(os.path.join('files', 'no_foobar.js'))
|
2018-08-28 16:51:04 +03:00
|
|
|
assert len(result.issues) == 0
|
2018-09-25 21:30:23 +03:00
|
|
|
assert len(result.failed) == 3
|
2018-08-28 16:51:04 +03:00
|
|
|
assert result.returncode == 1
|
2018-05-11 18:13:36 +03:00
|
|
|
|
|
|
|
# Paths from vcs are always joined to root instead of cwd
|
|
|
|
lint.mock_vcs([os.path.join('files', 'no_foobar.js')])
|
|
|
|
result = lint.roll(outgoing=True)
|
2018-08-28 16:51:04 +03:00
|
|
|
assert len(result.issues) == 0
|
|
|
|
assert len(result.failed) == 0
|
|
|
|
assert result.returncode == 0
|
2018-05-11 18:13:36 +03:00
|
|
|
|
|
|
|
result = lint.roll(workdir=True)
|
2018-08-28 16:51:04 +03:00
|
|
|
assert len(result.issues) == 0
|
|
|
|
assert len(result.failed) == 0
|
|
|
|
assert result.returncode == 0
|
2018-05-11 18:13:36 +03:00
|
|
|
finally:
|
|
|
|
os.chdir(oldcwd)
|
|
|
|
|
|
|
|
|
2018-09-20 23:45:04 +03:00
|
|
|
def test_roll_catch_exception(lint, linters, files, capfd):
|
|
|
|
lint.read(linters('raises'))
|
2016-03-16 21:55:21 +03:00
|
|
|
|
2018-02-23 17:02:16 +03:00
|
|
|
lint.roll(files) # assert not raises
|
|
|
|
out, err = capfd.readouterr()
|
|
|
|
assert 'LintException' in err
|
2016-03-16 21:55:21 +03:00
|
|
|
|
|
|
|
|
2018-10-17 00:04:15 +03:00
|
|
|
def test_roll_with_global_excluded_path(lint, linters, files):
|
2018-10-17 00:04:14 +03:00
|
|
|
lint.exclude = ['**/foobar.js']
|
2018-09-20 23:45:04 +03:00
|
|
|
lint.read(linters('string', 'regex', 'external'))
|
2016-09-09 23:20:09 +03:00
|
|
|
result = lint.roll(files)
|
2016-03-16 21:55:21 +03:00
|
|
|
|
2018-08-28 16:51:04 +03:00
|
|
|
assert len(result.issues) == 0
|
|
|
|
assert result.failed == set([])
|
2016-03-16 21:55:21 +03:00
|
|
|
|
2016-09-09 23:20:09 +03:00
|
|
|
|
2018-10-17 00:04:15 +03:00
|
|
|
def test_roll_with_local_excluded_path(lint, linters, files):
|
|
|
|
lint.read(linters('excludes'))
|
|
|
|
result = lint.roll(files)
|
|
|
|
|
|
|
|
assert len(result.issues) == 0
|
|
|
|
assert result.failed == set([])
|
|
|
|
|
|
|
|
|
2018-08-31 19:05:12 +03:00
|
|
|
def test_roll_with_no_files_to_lint(lint, linters, capfd):
|
2018-09-20 23:45:04 +03:00
|
|
|
lint.read(linters('string', 'regex', 'external'))
|
2018-08-31 19:05:12 +03:00
|
|
|
lint.mock_vcs([])
|
|
|
|
result = lint.roll([], workdir=True)
|
|
|
|
assert isinstance(result, ResultSummary)
|
|
|
|
assert len(result.issues) == 0
|
|
|
|
assert len(result.failed) == 0
|
|
|
|
|
|
|
|
out, err = capfd.readouterr()
|
|
|
|
assert 'warning: no files linted' in out
|
|
|
|
|
|
|
|
|
2018-09-20 23:45:04 +03:00
|
|
|
def test_roll_with_invalid_extension(lint, linters, filedir):
|
|
|
|
lint.read(linters('external'))
|
2016-09-09 23:20:09 +03:00
|
|
|
result = lint.roll(os.path.join(filedir, 'foobar.py'))
|
2018-08-28 16:51:04 +03:00
|
|
|
assert len(result.issues) == 0
|
|
|
|
assert result.failed == set([])
|
2016-10-13 23:23:13 +03:00
|
|
|
|
|
|
|
|
2018-09-20 23:45:04 +03:00
|
|
|
def test_roll_with_failure_code(lint, linters, files):
|
|
|
|
lint.read(linters('badreturncode'))
|
2016-10-13 23:23:13 +03:00
|
|
|
|
Bug 1430825 - [mozlint] Split work up by paths instead of by linters, r=standard8
The initial motivation for this patch, was to prevent command lines that are
too long on Windows. To that end, there is a cap to the number of paths that
can be run per job. For now that cap is set to 50. This will allow for an
average path length of 160 characters, which should be sufficient with room to
spare.
But another big benefit of this patch is that we are running more things in
parallel. Previously, mozlint ran each linter in its own subprocess, but that's
it. If running eslint is 90% of the work, it'll still only get a single
process. This means we are wasting cores as soon as the other linters are
finished.
This patch chunks the number of specified paths such that there will be N*L
jobs where 'N' is the number of cores and 'L' is the number of linters. This
means even when there's a dominant linter, we'll be making better use of our
resources. This isn't perfect of course, as some paths might contain a small
number of files, and some will contain a very large number of files. But it's
a start
A limitation to this approach is that if there are fewer paths specified than
there are cores, we won't schedule enough jobs per linter to use those extra
cores. One idea might be to expand specified directories and individually list
all the paths under the directory. But this has some hairy edge cases that
would be tough to catch. Doing this in a non-hacky way would also require a
medium scale refactor.
So I propose further parallelization efforts be destined for follow-ups.
MozReview-Commit-ID: JRRu13AFaii
--HG--
extra : rebase_source : 242fb71fe0af8bd2a981bd10a7216bb897fe00ac
2018-01-17 00:01:20 +03:00
|
|
|
result = lint.roll(files, num_procs=1)
|
2018-08-28 16:51:04 +03:00
|
|
|
assert len(result.issues) == 0
|
|
|
|
assert result.failed == set(['BadReturnCodeLinter'])
|
2016-08-09 23:24:04 +03:00
|
|
|
|
2016-03-16 21:55:21 +03:00
|
|
|
|
2018-09-20 23:45:04 +03:00
|
|
|
def test_roll_warnings(lint, linters, files):
|
|
|
|
lint.read(linters('warning'))
|
2018-08-27 16:39:46 +03:00
|
|
|
result = lint.roll(files)
|
|
|
|
assert len(result.issues) == 0
|
|
|
|
assert result.total_issues == 0
|
|
|
|
assert len(result.suppressed_warnings) == 1
|
|
|
|
assert result.total_suppressed_warnings == 2
|
|
|
|
|
|
|
|
lint.lintargs['show_warnings'] = True
|
|
|
|
result = lint.roll(files)
|
|
|
|
assert len(result.issues) == 1
|
|
|
|
assert result.total_issues == 2
|
|
|
|
assert len(result.suppressed_warnings) == 0
|
|
|
|
assert result.total_suppressed_warnings == 0
|
|
|
|
|
|
|
|
|
2018-02-23 17:02:16 +03:00
|
|
|
def fake_run_worker(config, paths, **lintargs):
|
2018-08-28 16:51:04 +03:00
|
|
|
result = ResultSummary()
|
|
|
|
result.issues['count'].append(1)
|
|
|
|
return result
|
Bug 1430825 - [mozlint] Split work up by paths instead of by linters, r=standard8
The initial motivation for this patch, was to prevent command lines that are
too long on Windows. To that end, there is a cap to the number of paths that
can be run per job. For now that cap is set to 50. This will allow for an
average path length of 160 characters, which should be sufficient with room to
spare.
But another big benefit of this patch is that we are running more things in
parallel. Previously, mozlint ran each linter in its own subprocess, but that's
it. If running eslint is 90% of the work, it'll still only get a single
process. This means we are wasting cores as soon as the other linters are
finished.
This patch chunks the number of specified paths such that there will be N*L
jobs where 'N' is the number of cores and 'L' is the number of linters. This
means even when there's a dominant linter, we'll be making better use of our
resources. This isn't perfect of course, as some paths might contain a small
number of files, and some will contain a very large number of files. But it's
a start
A limitation to this approach is that if there are fewer paths specified than
there are cores, we won't schedule enough jobs per linter to use those extra
cores. One idea might be to expand specified directories and individually list
all the paths under the directory. But this has some hairy edge cases that
would be tough to catch. Doing this in a non-hacky way would also require a
medium scale refactor.
So I propose further parallelization efforts be destined for follow-ups.
MozReview-Commit-ID: JRRu13AFaii
--HG--
extra : rebase_source : 242fb71fe0af8bd2a981bd10a7216bb897fe00ac
2018-01-17 00:01:20 +03:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(platform.system() == 'Windows',
|
|
|
|
reason="monkeypatch issues with multiprocessing on Windows")
|
|
|
|
@pytest.mark.parametrize('num_procs', [1, 4, 8, 16])
|
|
|
|
def test_number_of_jobs(monkeypatch, lint, linters, files, num_procs):
|
2018-02-23 17:02:16 +03:00
|
|
|
monkeypatch.setattr(sys.modules[lint.__module__], '_run_worker', fake_run_worker)
|
Bug 1430825 - [mozlint] Split work up by paths instead of by linters, r=standard8
The initial motivation for this patch, was to prevent command lines that are
too long on Windows. To that end, there is a cap to the number of paths that
can be run per job. For now that cap is set to 50. This will allow for an
average path length of 160 characters, which should be sufficient with room to
spare.
But another big benefit of this patch is that we are running more things in
parallel. Previously, mozlint ran each linter in its own subprocess, but that's
it. If running eslint is 90% of the work, it'll still only get a single
process. This means we are wasting cores as soon as the other linters are
finished.
This patch chunks the number of specified paths such that there will be N*L
jobs where 'N' is the number of cores and 'L' is the number of linters. This
means even when there's a dominant linter, we'll be making better use of our
resources. This isn't perfect of course, as some paths might contain a small
number of files, and some will contain a very large number of files. But it's
a start
A limitation to this approach is that if there are fewer paths specified than
there are cores, we won't schedule enough jobs per linter to use those extra
cores. One idea might be to expand specified directories and individually list
all the paths under the directory. But this has some hairy edge cases that
would be tough to catch. Doing this in a non-hacky way would also require a
medium scale refactor.
So I propose further parallelization efforts be destined for follow-ups.
MozReview-Commit-ID: JRRu13AFaii
--HG--
extra : rebase_source : 242fb71fe0af8bd2a981bd10a7216bb897fe00ac
2018-01-17 00:01:20 +03:00
|
|
|
|
2018-09-20 23:45:04 +03:00
|
|
|
linters = linters('string', 'regex', 'external')
|
Bug 1430825 - [mozlint] Split work up by paths instead of by linters, r=standard8
The initial motivation for this patch, was to prevent command lines that are
too long on Windows. To that end, there is a cap to the number of paths that
can be run per job. For now that cap is set to 50. This will allow for an
average path length of 160 characters, which should be sufficient with room to
spare.
But another big benefit of this patch is that we are running more things in
parallel. Previously, mozlint ran each linter in its own subprocess, but that's
it. If running eslint is 90% of the work, it'll still only get a single
process. This means we are wasting cores as soon as the other linters are
finished.
This patch chunks the number of specified paths such that there will be N*L
jobs where 'N' is the number of cores and 'L' is the number of linters. This
means even when there's a dominant linter, we'll be making better use of our
resources. This isn't perfect of course, as some paths might contain a small
number of files, and some will contain a very large number of files. But it's
a start
A limitation to this approach is that if there are fewer paths specified than
there are cores, we won't schedule enough jobs per linter to use those extra
cores. One idea might be to expand specified directories and individually list
all the paths under the directory. But this has some hairy edge cases that
would be tough to catch. Doing this in a non-hacky way would also require a
medium scale refactor.
So I propose further parallelization efforts be destined for follow-ups.
MozReview-Commit-ID: JRRu13AFaii
--HG--
extra : rebase_source : 242fb71fe0af8bd2a981bd10a7216bb897fe00ac
2018-01-17 00:01:20 +03:00
|
|
|
lint.read(linters)
|
2018-08-28 16:51:04 +03:00
|
|
|
num_jobs = len(lint.roll(files, num_procs=num_procs).issues['count'])
|
Bug 1430825 - [mozlint] Split work up by paths instead of by linters, r=standard8
The initial motivation for this patch, was to prevent command lines that are
too long on Windows. To that end, there is a cap to the number of paths that
can be run per job. For now that cap is set to 50. This will allow for an
average path length of 160 characters, which should be sufficient with room to
spare.
But another big benefit of this patch is that we are running more things in
parallel. Previously, mozlint ran each linter in its own subprocess, but that's
it. If running eslint is 90% of the work, it'll still only get a single
process. This means we are wasting cores as soon as the other linters are
finished.
This patch chunks the number of specified paths such that there will be N*L
jobs where 'N' is the number of cores and 'L' is the number of linters. This
means even when there's a dominant linter, we'll be making better use of our
resources. This isn't perfect of course, as some paths might contain a small
number of files, and some will contain a very large number of files. But it's
a start
A limitation to this approach is that if there are fewer paths specified than
there are cores, we won't schedule enough jobs per linter to use those extra
cores. One idea might be to expand specified directories and individually list
all the paths under the directory. But this has some hairy edge cases that
would be tough to catch. Doing this in a non-hacky way would also require a
medium scale refactor.
So I propose further parallelization efforts be destined for follow-ups.
MozReview-Commit-ID: JRRu13AFaii
--HG--
extra : rebase_source : 242fb71fe0af8bd2a981bd10a7216bb897fe00ac
2018-01-17 00:01:20 +03:00
|
|
|
|
|
|
|
if len(files) >= num_procs:
|
|
|
|
assert num_jobs == num_procs * len(linters)
|
|
|
|
else:
|
|
|
|
assert num_jobs == len(files) * len(linters)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(platform.system() == 'Windows',
|
|
|
|
reason="monkeypatch issues with multiprocessing on Windows")
|
|
|
|
@pytest.mark.parametrize('max_paths,expected_jobs', [(1, 12), (4, 6), (16, 6)])
|
|
|
|
def test_max_paths_per_job(monkeypatch, lint, linters, files, max_paths, expected_jobs):
|
2018-02-23 17:02:16 +03:00
|
|
|
monkeypatch.setattr(sys.modules[lint.__module__], '_run_worker', fake_run_worker)
|
Bug 1430825 - [mozlint] Split work up by paths instead of by linters, r=standard8
The initial motivation for this patch, was to prevent command lines that are
too long on Windows. To that end, there is a cap to the number of paths that
can be run per job. For now that cap is set to 50. This will allow for an
average path length of 160 characters, which should be sufficient with room to
spare.
But another big benefit of this patch is that we are running more things in
parallel. Previously, mozlint ran each linter in its own subprocess, but that's
it. If running eslint is 90% of the work, it'll still only get a single
process. This means we are wasting cores as soon as the other linters are
finished.
This patch chunks the number of specified paths such that there will be N*L
jobs where 'N' is the number of cores and 'L' is the number of linters. This
means even when there's a dominant linter, we'll be making better use of our
resources. This isn't perfect of course, as some paths might contain a small
number of files, and some will contain a very large number of files. But it's
a start
A limitation to this approach is that if there are fewer paths specified than
there are cores, we won't schedule enough jobs per linter to use those extra
cores. One idea might be to expand specified directories and individually list
all the paths under the directory. But this has some hairy edge cases that
would be tough to catch. Doing this in a non-hacky way would also require a
medium scale refactor.
So I propose further parallelization efforts be destined for follow-ups.
MozReview-Commit-ID: JRRu13AFaii
--HG--
extra : rebase_source : 242fb71fe0af8bd2a981bd10a7216bb897fe00ac
2018-01-17 00:01:20 +03:00
|
|
|
|
|
|
|
files = files[:4]
|
|
|
|
assert len(files) == 4
|
|
|
|
|
2018-09-20 23:45:04 +03:00
|
|
|
linters = linters('string', 'regex', 'external')[:3]
|
Bug 1430825 - [mozlint] Split work up by paths instead of by linters, r=standard8
The initial motivation for this patch, was to prevent command lines that are
too long on Windows. To that end, there is a cap to the number of paths that
can be run per job. For now that cap is set to 50. This will allow for an
average path length of 160 characters, which should be sufficient with room to
spare.
But another big benefit of this patch is that we are running more things in
parallel. Previously, mozlint ran each linter in its own subprocess, but that's
it. If running eslint is 90% of the work, it'll still only get a single
process. This means we are wasting cores as soon as the other linters are
finished.
This patch chunks the number of specified paths such that there will be N*L
jobs where 'N' is the number of cores and 'L' is the number of linters. This
means even when there's a dominant linter, we'll be making better use of our
resources. This isn't perfect of course, as some paths might contain a small
number of files, and some will contain a very large number of files. But it's
a start
A limitation to this approach is that if there are fewer paths specified than
there are cores, we won't schedule enough jobs per linter to use those extra
cores. One idea might be to expand specified directories and individually list
all the paths under the directory. But this has some hairy edge cases that
would be tough to catch. Doing this in a non-hacky way would also require a
medium scale refactor.
So I propose further parallelization efforts be destined for follow-ups.
MozReview-Commit-ID: JRRu13AFaii
--HG--
extra : rebase_source : 242fb71fe0af8bd2a981bd10a7216bb897fe00ac
2018-01-17 00:01:20 +03:00
|
|
|
assert len(linters) == 3
|
|
|
|
|
|
|
|
lint.MAX_PATHS_PER_JOB = max_paths
|
|
|
|
lint.read(linters)
|
2018-08-28 16:51:04 +03:00
|
|
|
num_jobs = len(lint.roll(files, num_procs=2).issues['count'])
|
Bug 1430825 - [mozlint] Split work up by paths instead of by linters, r=standard8
The initial motivation for this patch, was to prevent command lines that are
too long on Windows. To that end, there is a cap to the number of paths that
can be run per job. For now that cap is set to 50. This will allow for an
average path length of 160 characters, which should be sufficient with room to
spare.
But another big benefit of this patch is that we are running more things in
parallel. Previously, mozlint ran each linter in its own subprocess, but that's
it. If running eslint is 90% of the work, it'll still only get a single
process. This means we are wasting cores as soon as the other linters are
finished.
This patch chunks the number of specified paths such that there will be N*L
jobs where 'N' is the number of cores and 'L' is the number of linters. This
means even when there's a dominant linter, we'll be making better use of our
resources. This isn't perfect of course, as some paths might contain a small
number of files, and some will contain a very large number of files. But it's
a start
A limitation to this approach is that if there are fewer paths specified than
there are cores, we won't schedule enough jobs per linter to use those extra
cores. One idea might be to expand specified directories and individually list
all the paths under the directory. But this has some hairy edge cases that
would be tough to catch. Doing this in a non-hacky way would also require a
medium scale refactor.
So I propose further parallelization efforts be destined for follow-ups.
MozReview-Commit-ID: JRRu13AFaii
--HG--
extra : rebase_source : 242fb71fe0af8bd2a981bd10a7216bb897fe00ac
2018-01-17 00:01:20 +03:00
|
|
|
assert num_jobs == expected_jobs
|
|
|
|
|
|
|
|
|
Bug 1369711 - [mozlint] Make sure KeyboardInterrupts are handled well wherever they happen r=gps
There a few pieces needed here to properly handle KeyboardInterrupts.
1. All in-progress work needs to abort. Ideally the underlying linters will be
able to catch KeyboardInterrupt, and return partial results (like the flake8
linter does). Linters may alternatively allow the KeyboardInterrupt to
propagate up. Mozlint will catch and handle this appropriately, though any
results found will be lost. The only change to this behaviour was fixing a bug
in the flake8 linter.
2. Any unstarted jobs need to be canceled. In concurrent.futures, there are two
different queues. First, jobs are placed on the work queue, which is just a list
maintained by the parent process. As workers become available, jobs are moved
off the work queue, and onto the call queue (which is a multiprocessing.Queue).
Jobs that live on the work queue can be canceled with 'future.cancel()', whereas
jobs that live on the call queue cannot. The number of extra jobs that are stored
on the call queue is determined by this variable:
https://hg.mozilla.org/mozilla-central/file/deb7714a7bcd/third_party/python/futures/concurrent/futures/process.py#l86
In this patch, the parent process' sigint handler (which will be called on Ctrl-C)
is responsible for canceling all the jobs on the work queue. For the jobs on the
call queue, the best we can do is set a global variable that tells workers to
abort early.
3. Idle workers should exit gracefully. When there are no more jobs left, workers
will block on the call queue (either waiting for more jobs, or waiting for the
executor to send the shutdown signal). If a KeyboardInterrupt is received while a
worker is blocking, it isn't possible to intercept that anywhere (due to quirks
of how concurrent.futures is implemented). The InterruptableQueue class was
created to solve this problem. It will return None instead of propagating
KeyboardInterrupt. A None value will wake the worker up and tell it to gracefully
shutdown. This way, we avoid cryptic tracebacks in the output.
With all of these various pieces solved, pressing Ctrl-C appears to always exit
gracefully, sometimes even printing partial results.
MozReview-Commit-ID: 36Pe3bbUKmk
--HG--
extra : rebase_source : d4c312ee5cc3679eeee1407c5521aed679f84ad4
extra : source : a93a00141bf62f6bc9e30934c0e56f6b2e434bf0
2018-02-23 16:55:06 +03:00
|
|
|
@pytest.mark.skipif(platform.system() == 'Windows',
|
|
|
|
reason="signal.CTRL_C_EVENT isn't causing a KeyboardInterrupt on Windows")
|
|
|
|
def test_keyboard_interrupt():
|
|
|
|
# We use two linters so we'll have two jobs. One (string.yml) will complete
|
|
|
|
# quickly. The other (slow.yml) will run slowly. This way the first worker
|
|
|
|
# will be be stuck blocking on the ProcessPoolExecutor._call_queue when the
|
|
|
|
# signal arrives and the other still be doing work.
|
|
|
|
cmd = [sys.executable, 'runcli.py', '-l=string', '-l=slow']
|
|
|
|
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=here)
|
|
|
|
time.sleep(1)
|
|
|
|
proc.send_signal(signal.SIGINT)
|
|
|
|
|
|
|
|
out = proc.communicate()[0]
|
|
|
|
assert 'warning: not all files were linted' in out
|
|
|
|
assert 'Traceback' not in out
|
|
|
|
|
|
|
|
|
2018-09-20 23:45:04 +03:00
|
|
|
def test_support_files(lint, linters, filedir, monkeypatch):
|
2018-02-17 01:46:04 +03:00
|
|
|
jobs = []
|
|
|
|
|
|
|
|
# Replace the original _generate_jobs with a new one that simply
|
|
|
|
# adds jobs to a list (and then doesn't return anything).
|
|
|
|
orig_generate_jobs = lint._generate_jobs
|
|
|
|
|
|
|
|
def fake_generate_jobs(*args, **kwargs):
|
|
|
|
jobs.extend([job[1] for job in orig_generate_jobs(*args, **kwargs)])
|
|
|
|
return []
|
|
|
|
|
|
|
|
monkeypatch.setattr(lint, '_generate_jobs', fake_generate_jobs)
|
|
|
|
|
2018-09-20 23:45:04 +03:00
|
|
|
linter_path = linters('support_files')[0]
|
2018-02-17 01:46:04 +03:00
|
|
|
lint.read(linter_path)
|
|
|
|
|
|
|
|
# Modified support files only lint entire root if --outgoing or --workdir
|
|
|
|
# are used.
|
|
|
|
path = os.path.join(filedir, 'foobar.js')
|
|
|
|
lint.mock_vcs([os.path.join(filedir, 'foobar.py')])
|
|
|
|
lint.roll(path)
|
|
|
|
assert jobs[0] == [path]
|
|
|
|
|
|
|
|
jobs = []
|
|
|
|
lint.roll(path, workdir=True)
|
|
|
|
assert jobs[0] == [lint.root]
|
|
|
|
|
|
|
|
jobs = []
|
|
|
|
lint.roll(path, outgoing=True)
|
|
|
|
assert jobs[0] == [lint.root]
|
|
|
|
|
|
|
|
# Lint config file is implicitly added as a support file
|
|
|
|
lint.mock_vcs([linter_path])
|
|
|
|
jobs = []
|
|
|
|
lint.roll(path, outgoing=True, workdir=True)
|
|
|
|
assert jobs[0] == [lint.root]
|
|
|
|
|
|
|
|
|
2018-01-25 21:40:02 +03:00
|
|
|
def test_setup(lint, linters, filedir, capfd):
|
|
|
|
with pytest.raises(LintersNotConfigured):
|
|
|
|
lint.setup()
|
|
|
|
|
2018-09-20 23:45:04 +03:00
|
|
|
lint.read(linters('setup', 'setupfailed', 'setupraised'))
|
2018-01-25 21:40:02 +03:00
|
|
|
lint.setup()
|
|
|
|
out, err = capfd.readouterr()
|
|
|
|
assert 'setup passed' in out
|
|
|
|
assert 'setup failed' in out
|
|
|
|
assert 'setup raised' in out
|
|
|
|
assert 'error: problem with lint setup, skipping' in out
|
2018-08-28 16:51:04 +03:00
|
|
|
assert lint.result.failed_setup == set(['SetupFailedLinter', 'SetupRaisedLinter'])
|
2018-01-25 21:40:02 +03:00
|
|
|
|
|
|
|
|
2016-03-16 21:55:21 +03:00
|
|
|
if __name__ == '__main__':
|
2017-08-29 21:50:33 +03:00
|
|
|
mozunit.main()
|