Improve mypy_runner.py (#171)

This PR reworks mypy_runner.py both to ensure all files are checked, and to speed up the process (from about 3 minutes to about 12 seconds in the PR build). Rather than processing one file at a time, mypy is called repeatedly with "--verbose" set, and the logs are (silently) checked to see if files have been visited. Visited files are excluded from the set to be checked, and mypy is invoked again on the remaining ones until there are none (or until no further files are visited - though this should not and does not seem to happen). Care is taken to ensure that this script can also be called when this repo is present as a submodule (assumed to be called innereye-deeplearning as usual). When this is the case, we do not check the files inside the submodule, as we assume they have already been checked as part of the build process here. It is also now possible to provide the script with a specific list of files to check, by supplying them on the command line. Running this new version turned up a couple of previously undetected type issues, which are also fixed here.
2020-08-14 15:41:44 +01:00 · 2020-08-14 15:41:44 +01:00 · 1136e23352
--- a/TestSubmodule/test_submodule_runner.py
+++ b/TestSubmodule/test_submodule_runner.py
--- a/Tests/Common/test_metrics_dict.py
+++ b/Tests/Common/test_metrics_dict.py
@ -506,16 +506,16 @@ def test_get_single_metric() -> None:
    m.add_metric(m2, v2)
    assert m.get_single_metric(m1, h1) == v1
    assert m.get_single_metric(m2) == v2
-    with pytest.raises(KeyError) as ex:
+    with pytest.raises(KeyError) as ex1:
        m.get_single_metric(m1, "no such hue")
-    assert "no such hue" in str(ex)
-    with pytest.raises(KeyError) as ex:
+    assert "no such hue" in str(ex1)
+    with pytest.raises(KeyError) as ex2:
        m.get_single_metric("no such metric", h1)
-    assert "no such metric" in str(ex)
+    assert "no such metric" in str(ex2)
    m.add_metric(m2, v2)
-    with pytest.raises(ValueError) as ex:
+    with pytest.raises(ValueError) as ex3:
        m.get_single_metric(m2)
-    assert "Expected a single entry" in str(ex)
+    assert "Expected a single entry" in str(ex3)


 def test_aggregate_segmentation_metrics() -> None:
--- a/azure-pipelines/train_via_submodule.yml
+++ b/azure-pipelines/train_via_submodule.yml
@ -11,7 +11,7 @@ steps:
      branch_prefix="refs/heads/"
      full_branch_name=$(Build.SourceBranch)
      branch_name_without_prefix=${full_branch_name#$branch_prefix}
-      python $(Agent.TempDirectory)/InnerEye/TestSubmodule/runner.py --submit_to_azureml=True --model="$(model)" --is_train="$(is_train)" $(more_switches) --number_of_cross_validation_splits="$(number_of_cross_validation_splits)" --wait_for_completion="${{parameters.wait_for_completion}}" --pytest_mark="${{parameters.pytest_mark}}" --gpu_cluster_name="$(gpu_cluster_name)" --user_friendly_name="$(user_friendly_name)" --run_recovery_id="$(run_recovery_id)" --tag="$(tags)" --build_number=$(Build.BuildId) --build_user="$(Build.RequestedFor)" --build_branch="$branch_name_without_prefix" --build_source_id="$(Build.SourceVersion)" --build_source_message="$(Build.SourceVersionMessage)" --build_source_author="$(Build.SourceVersionAuthor)" --build_source_repository="$(Build.Repository.Name)"
+      python $(Agent.TempDirectory)/InnerEye/TestSubmodule/test_submodule_runner.py --submit_to_azureml=True --model="$(model)" --is_train="$(is_train)" $(more_switches) --number_of_cross_validation_splits="$(number_of_cross_validation_splits)" --wait_for_completion="${{parameters.wait_for_completion}}" --pytest_mark="${{parameters.pytest_mark}}" --gpu_cluster_name="$(gpu_cluster_name)" --user_friendly_name="$(user_friendly_name)" --run_recovery_id="$(run_recovery_id)" --tag="$(tags)" --build_number=$(Build.BuildId) --build_user="$(Build.RequestedFor)" --build_branch="$branch_name_without_prefix" --build_source_id="$(Build.SourceVersion)" --build_source_message="$(Build.SourceVersionMessage)" --build_source_author="$(Build.SourceVersionAuthor)" --build_source_repository="$(Build.Repository.Name)"
    env:
      PYTHONPATH: $(Agent.TempDirectory)/InnerEye
      APPLICATION_KEY: $(InnerEyeDeepLearningServicePrincipalKey)
--- a/mypy_runner.py
+++ b/mypy_runner.py
@ -2,31 +2,67 @@
 #  Copyright (c) Microsoft Corporation. All rights reserved.
 #  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
 #  ------------------------------------------------------------------------------------------
-import os
 import subprocess
 import sys
 from pathlib import Path
 from typing import List

-from joblib import Parallel, delayed

-
-def run_mypy(file: str) -> int:
-    return subprocess.run(["mypy", "--config=mypy.ini", f"{str(file)}"]).returncode
+def run_mypy(files: List[str]) -> int:
+    """
+    Runs mypy on the specified files, printing whatever is sent to stdout (i.e. mypy errors).
+    Because of an apparent bug in mypy, we run mypy in --verbose mode, so that log lines are printed to
+    stderr. We intercept these, and assume that any files mentioned in them have been processed.
+    We run mypy repeatedly on the files that were not mentioned until there are none remaining, or until
+    no further files are mentioned in the logs.
+    :param files: list of .py files to check
+    :return: maximum return code from any of the mypy runs
+    """
+    return_code = 0
+    iteration = 1
+    while files:
+        print(f"Iteration {iteration}: running mypy on {len(files)}{' remaining' if iteration > 1 else ''} files")
+        command = ["mypy", "--config=mypy.ini", "--verbose"] + files
+        # We pipe stdout and then print it, otherwise lines can appear in the wrong order in builds.
+        process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        for line in process.stdout.split("\n"):
+            print(line)
+        # Set of files we are hoping to see mentioned in the mypy log.
+        files_to_do = set(files)
+        # Remove from files_to_do everything that's mentioned in the log.
+        for line in process.stderr.split("\n"):
+            for token in line.split():
+                files_to_do.discard(token)
+        # If we didn't manage to discard any files, there's no point continuing. This should not occur, but if
+        # it does, we don't want to continue indefinitely.
+        if len(files_to_do) == len(files):
+            print("No further files appear to have been checked!")
+            return_code = max(return_code, 1)
+            break
+        files = sorted(files_to_do)
+        return_code = max(return_code, process.returncode)
+        iteration += 1
+    return return_code


 def main() -> int:
-    exclude: List[str] = []
-    current_dir = os.path.dirname(os.path.abspath(__file__))
-    files = sorted(map(lambda x: x.relative_to(current_dir), Path.cwd().rglob('*.py')))
-    files = list(filter(lambda x: not any([str(Path(ele)) in str(x) for ele in exclude]), files))
-
-    return_codes = Parallel(n_jobs=os.cpu_count())(delayed(run_mypy)(file) for file in files)
-    if all(v == 0 for v in return_codes):
-        return 0
+    """
+    Runs mypy on the files in the argument list, or every *.py file under the current directory if there are none.
+    """
+    current_dir = Path.cwd()
+    if sys.argv[1:]:
+        file_list = [Path(arg) for arg in sys.argv[1:]]
    else:
-        sys.stderr.write("mypy failed")
-        return 1
+        # We don't want to check the files in the submodule if any, partly because they should already have
+        # been checked in the original repo, and partly because we don't want the module name clashes mypy would
+        # otherwise report.
+        submodule_name = "innereye-deeplearning"
+        files = set(current_dir.glob('*.py'))
+        for path in current_dir.glob('*'):
+            if path.name != submodule_name:
+                files.update(path.rglob('*.py'))
+        file_list = list(files)
+    return run_mypy(sorted(str(file) for file in file_list))


 if __name__ == "__main__":