Bug fix: deployed models and training code use different versions of hi-ml (#606)

2021-12-07 12:45:49 +00:00 · 2021-12-07 12:45:49 +00:00 · 5a37198b3f
--- a/.flake8
+++ b/.flake8
@ -1,5 +1,5 @@
 [flake8]
-ignore = E226,E302,E41,W391, E701, W291, E722, W503, E128, E126, E127, E731, E401
+ignore = E226,E302,E41,W391, E701, W291, E722, W503, E128, E126, E127, E731, E401, E402
 max-line-length = 160
 max-complexity = 25
 exclude = fastMRI/ test_outputs/ hi-ml/
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -63,6 +63,7 @@ gets uploaded to AzureML, by skipping all test folders.
 - ([#605](https://github.com/microsoft/InnerEye-DeepLearning/pull/605)) Make build jobs deterministic for regression testing.

 ### Fixed
+- ([#606](https://github.com/microsoft/InnerEye-DeepLearning/pull/606)) Bug fix: registered models do not include the hi-ml submodule
 - ([#593](https://github.com/microsoft/InnerEye-DeepLearning/pull/593)) Bug fix for hi-ml 0.1.11 issue (#130): empty mount point is turned into ".", which fails the AML job
 - ([#587](https://github.com/microsoft/InnerEye-DeepLearning/pull/587)) Bug fix for regression in AzureML's handling of environments: upgrade to hi-ml 0.1.11
 - ([#537](https://github.com/microsoft/InnerEye-DeepLearning/pull/537)) Print warning if inference is disabled but comparison requested.
--- a/InnerEye/Common/fixed_paths.py
+++ b/InnerEye/Common/fixed_paths.py
@ -6,7 +6,7 @@ import logging
 import os
 import sys
 from pathlib import Path
-from typing import Optional
+from typing import List, Optional, Tuple

 from InnerEye.Common.type_annotations import PathOrString

@ -93,6 +93,15 @@ def get_environment_yaml_file() -> Path:
    return env


+def get_hi_ml_submodule_relative_paths() -> List[Tuple[Path, str]]:
+    """
+    Returns the paths relative to the repository root where the submodules for hi-ml and hi-ml-azure are expected.
+    It returns a list with a tuple (folder name, expected subfolder in that folder)
+    """
+    return [(Path("hi-ml") / "hi-ml-azure" / "src", "health_azure"),
+            (Path("hi-ml") / "hi-ml" / "src", "health_ml")]
+
+
 def add_submodules_to_path() -> None:
    """
    This function adds all submodules that the code uses to sys.path and to the environment variables. This is
@ -104,9 +113,8 @@ def add_submodules_to_path() -> None:
    """
    innereye_root = repository_root_directory()
    folders_to_add = [(innereye_root, "InnerEye"),
-                      (innereye_root / "fastMRI", "fastmri"),
-                      (innereye_root / "hi-ml" / "hi-ml-azure" / "src", "health_azure"),
-                      (innereye_root / "hi-ml" / "hi-ml" / "src", "health_ml")]
+                      (innereye_root / "fastMRI", "fastmri")]
+    folders_to_add.extend([(innereye_root / p, folder) for p, folder in get_hi_ml_submodule_relative_paths()])
    for (folder, subfolder_that_must_exist) in folders_to_add:
        if (folder / subfolder_that_must_exist).is_dir():
            folder_str = str(folder)
--- a/InnerEye/ML/run_ml.py
+++ b/InnerEye/ML/run_ml.py
@ -697,10 +697,11 @@ class MLRunner:
        :param python_environment: The Python environment that is used in the present AzureML run.
        """

-        def copy_folder(source_folder: Path, destination_folder: str = "") -> None:
+        def copy_folder(source_folder: Path, destination_folder: Optional[Path] = None) -> None:
            logging.info(f"Copying folder for registration: {source_folder}")
-            destination_folder = destination_folder or source_folder.name
-            shutil.copytree(str(source_folder), str(model_folder / destination_folder),
+            full_destination = model_folder / source_folder.name if destination_folder is None \
+                else model_folder / destination_folder
+            shutil.copytree(str(source_folder), str(full_destination),
                            ignore=shutil.ignore_patterns('*.pyc'))

        def copy_file(source: Path, destination_file: str) -> None:
@ -743,6 +744,11 @@ class MLRunner:
        # we can identify it by going up the folder structure off a known file (repository_root does exactly that)
        repository_root = fixed_paths.repository_root_directory()
        copy_folder(repository_root / INNEREYE_PACKAGE_NAME)
+        # If hi-ml is used as a submodule, copy that too
+        for relative_path, _ in fixed_paths.get_hi_ml_submodule_relative_paths():
+            full_submodule_path = repository_root / relative_path
+            if full_submodule_path.is_dir():
+                copy_folder(full_submodule_path, relative_path)
        # Extra code directory is expected to be relative to the project root folder.
        if self.azure_config.extra_code_directory:
            extra_code_folder = self.project_root / self.azure_config.extra_code_directory
--- a/InnerEye/ML/runner.py
+++ b/InnerEye/ML/runner.py
@ -9,10 +9,6 @@ import uuid
 from pathlib import Path
 from typing import Optional, Tuple

-# Suppress all errors here because the imports after code cause loads of warnings. We can't specifically suppress
-# individual warnings only.
-# flake8: noqa
-
 # Workaround for an issue with how AzureML and Pytorch Lightning interact: When spawning additional processes for DDP,
 # the working directory is not correctly picked up in sys.path
 print(f"Starting InnerEye runner at {sys.argv[0]}")
@ -24,6 +20,8 @@ if (innereye_root / "InnerEye").is_dir():
        sys.path.insert(0, innereye_root_str)
 from InnerEye.Common import fixed_paths

+# This must be added before all other imports because they might rely on hi-ml already, and that can optionally live
+# in a submodule
 fixed_paths.add_submodules_to_path()

 from azureml._base_sdk_common import user_agent
@ -47,7 +45,7 @@ from InnerEye.Azure.azure_util import (RUN_CONTEXT, RUN_RECOVERY_ID_KEY_NAME, ge
                                       is_offline_run_context)
 from InnerEye.Azure.run_pytest import download_pytest_result, run_pytest
 from InnerEye.Common.common_util import (FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE,
-                                         disable_logging_to_file, is_linux, logging_to_stdout)
+                                         is_linux, logging_to_stdout)
 from InnerEye.Common.generic_parsing import GenericConfig
 from InnerEye.ML.common import DATASET_CSV_FILE_NAME
 from InnerEye.ML.deep_learning_config import DeepLearningConfig
--- a/InnerEye/Scripts/submit_for_inference.py
+++ b/InnerEye/Scripts/submit_for_inference.py
@ -12,14 +12,14 @@ from typing import Dict, List, Optional
 import param
 import requests
 from azureml.core import Model, ScriptRunConfig
+from health_azure import create_run_configuration, submit_run

 from InnerEye.Azure.azure_config import AzureConfig
 from InnerEye.Common.common_util import logging_to_stdout
 from InnerEye.Common.fixed_paths import DEFAULT_DATA_FOLDER, DEFAULT_RESULT_IMAGE_NAME, DEFAULT_RESULT_ZIP_DICOM_NAME, \
-    DEFAULT_TEST_IMAGE_NAME, DEFAULT_TEST_ZIP_NAME, ENVIRONMENT_YAML_FILE_NAME, PYTHON_ENVIRONMENT_NAME, \
+    DEFAULT_TEST_IMAGE_NAME, DEFAULT_TEST_ZIP_NAME, PYTHON_ENVIRONMENT_NAME, \
    RUN_SCORING_SCRIPT, SCORE_SCRIPT, SETTINGS_YAML_FILE, repository_root_directory
 from InnerEye.Common.generic_parsing import GenericConfig
-from health_azure import create_run_configuration, submit_run


 class SubmitForInferenceConfig(GenericConfig):
@ -149,17 +149,8 @@ def submit_for_inference(args: SubmitForInferenceConfig, azure_config: AzureConf
    logging.info(f"Building inference run submission in {source_directory_path}")
    image_folder = source_directory_path / DEFAULT_DATA_FOLDER
    image = copy_image_file(args.image_file, image_folder, args.use_dicom)
-    model_sas_urls = model.get_sas_urls()
-    # Identifies all the files with basename "environment.yml" in the model and downloads them.
-    # These downloads should go into a temp folder that will most likely not be included in the model itself,
-    # because the AzureML run will later download the model into the same folder structure, and the file names might
-    # clash.
-    temp_folder = source_directory_path / "temp_for_scoring"
-    conda_files = download_files_from_model(model_sas_urls, ENVIRONMENT_YAML_FILE_NAME, dir_path=temp_folder)
-    if len(conda_files) != 1:
-        raise ValueError("Exactly 1 Conda environment definition must exist in the model.")
    # Retrieve the name of the Python environment that the training run used. This environment should have been
-    # registered. If no such environment exists, it will be re-create from the Conda files provided.
+    # registered at training.
    python_environment_name = model.tags.get(PYTHON_ENVIRONMENT_NAME, "")
    if not python_environment_name:
        raise ValueError(f"The model did not contain tag {PYTHON_ENVIRONMENT_NAME} for the AzureML environment to use.")
--- a/Tests/AfterTraining/test_after_training.py
+++ b/Tests/AfterTraining/test_after_training.py
@ -50,11 +50,11 @@ from InnerEye.ML.visualizers.plot_cross_validation import PlotCrossValidationCon
 from InnerEye.Scripts import submit_for_inference
 from Tests.ML.util import assert_nifti_content, get_default_azure_config, get_default_workspace, get_nifti_shape

-FALLBACK_SINGLE_RUN = "refs_pull_593_merge_1637188926_7ba554ba"
-FALLBACK_ENSEMBLE_RUN = "refs_pull_545_merge:HD_caea82ae-9603-48ba-8280-7d2bc6272411"
-FALLBACK_2NODE_RUN = "refs_pull_545_merge:refs_pull_545_merge_1626538178_9f3023b2"
+FALLBACK_SINGLE_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867172_17ba8dc5"
+FALLBACK_ENSEMBLE_RUN = "refs_pull_606_merge:HD_b8a6ad93-8c19-45de-8ea1-f87fce92c3bd"
+FALLBACK_2NODE_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867224_8d8072fe"
 FALLBACK_CV_GLAUCOMA = "refs_pull_545_merge:HD_72ecc647-07c3-4353-a538-620346114ebd"
-FALLBACK_HELLO_CONTAINER_RUN = "refs_pull_545_merge:refs_pull_545_merge_1626538216_3eb92f09"
+FALLBACK_HELLO_CONTAINER_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867108_789991ac"


 def get_most_recent_run_id(fallback_run_id_for_local_execution: str = FALLBACK_SINGLE_RUN) -> str:
--- a/TestsOutsidePackage/test_register_model.py
+++ b/TestsOutsidePackage/test_register_model.py
@ -52,6 +52,8 @@ def test_copy_child_paths_to_folder(is_ensemble: bool,
    project_root = Path(__file__).parent.parent
    ml_runner = MLRunner(model_config=fake_model, azure_config=azure_config, project_root=project_root)
    model_folder = test_output_dirs.root_dir / "final"
+    hi_ml_submodules = [p for p, _ in fixed_paths.get_hi_ml_submodule_relative_paths()]
+    has_submodule = any(folder.is_dir() for folder in hi_ml_submodules)
    ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=checkpoints_absolute)
    expected_files = [
        fixed_paths.ENVIRONMENT_YAML_FILE_NAME,
@ -61,6 +63,9 @@ def test_copy_child_paths_to_folder(is_ensemble: bool,
        "InnerEye/Common/fixed_paths.py",
        "InnerEye/Common/common_util.py",
    ]
+    if has_submodule:
+        expected_files.extend(["hi-ml/hi-ml/src/health_ml/__init__.py",
+                               "hi-ml/hi-ml-azure/src/health_azure/__init__.py"])
    for r in checkpoints_relative:
        expected_files.append(f"{CHECKPOINT_FOLDER}/{r}")
    for expected_file in expected_files:
--- a/environment.yml
+++ b/environment.yml
@ -26,8 +26,6 @@ dependencies:
      - gitpython==3.1.7
      - gputil==1.4.0
      - h5py==2.10.0
-      - hi-ml==0.1.11
-      - hi-ml-azure==0.1.11
      - InnerEye-DICOM-RT==1.0.1
      - joblib==0.16.0
      - jupyter==1.0.0
@ -57,6 +55,7 @@ dependencies:
      - pytorch-lightning==1.3.8
      - rich==5.1.1
      - rpdb==0.1.6
+      - ruamel.yaml==0.16.12
      - runstats==1.8.0
      - scikit-image==0.17.2
      - scikit-learn==0.23.2
--- a/score.py
+++ b/score.py
@ -2,21 +2,26 @@
 #  Copyright (c) Microsoft Corporation. All rights reserved.
 #  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
 #  ------------------------------------------------------------------------------------------
-from collections import defaultdict
 import logging
 import os
 import sys
+import zipfile
+from collections import defaultdict
 from pathlib import Path
 from typing import List, Optional, Tuple
-import zipfile

 import numpy as np
 import param
-from azureml.core import Run
 from InnerEye_DICOM_RT.nifti_to_dicom_rt_converter import rtconvert
+from azureml.core import Run
+
+from InnerEye.Common import fixed_paths
+
+# This must be added before all other imports because they might rely on hi-ml already, and that can optionally live
+# in a submodule
+fixed_paths.add_submodules_to_path()

 from InnerEye.Azure.azure_util import is_offline_run_context
-from InnerEye.Common import fixed_paths
 from InnerEye.Common.fixed_paths import DEFAULT_RESULT_ZIP_DICOM_NAME
 from InnerEye.Common.generic_parsing import GenericConfig
 from InnerEye.Common.type_annotations import TupleFloat3, TupleInt3
@ -27,8 +32,8 @@ from InnerEye.ML.photometric_normalization import PhotometricNormalization
 from InnerEye.ML.pipelines.ensemble import EnsemblePipeline
 from InnerEye.ML.pipelines.inference import FullImageInferencePipelineBase, InferencePipeline
 from InnerEye.ML.utils.config_loader import ModelConfigLoader
-from InnerEye.ML.utils.io_util import ImageWithHeader, load_nifti_image, reverse_tuple_float3, store_as_ubyte_nifti, \
-    load_dicom_series_and_save
+from InnerEye.ML.utils.io_util import ImageWithHeader, load_dicom_series_and_save, load_nifti_image, \
+    reverse_tuple_float3, store_as_ubyte_nifti


 class ScorePipelineConfig(GenericConfig):