Bug fix: deployed models and training code use different versions of hi-ml (#606)

This commit is contained in:
Anton Schwaighofer 2021-12-07 12:45:49 +00:00 коммит произвёл GitHub
Родитель f5b7298c57
Коммит 5a37198b3f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
10 изменённых файлов: 50 добавлений и 37 удалений

Просмотреть файл

@ -1,5 +1,5 @@
[flake8]
ignore = E226,E302,E41,W391, E701, W291, E722, W503, E128, E126, E127, E731, E401
ignore = E226,E302,E41,W391, E701, W291, E722, W503, E128, E126, E127, E731, E401, E402
max-line-length = 160
max-complexity = 25
exclude = fastMRI/ test_outputs/ hi-ml/

Просмотреть файл

@ -63,6 +63,7 @@ gets uploaded to AzureML, by skipping all test folders.
- ([#605](https://github.com/microsoft/InnerEye-DeepLearning/pull/605)) Make build jobs deterministic for regression testing.
### Fixed
- ([#606](https://github.com/microsoft/InnerEye-DeepLearning/pull/606)) Bug fix: registered models do not include the hi-ml submodule
- ([#593](https://github.com/microsoft/InnerEye-DeepLearning/pull/593)) Bug fix for hi-ml 0.1.11 issue (#130): empty mount point is turned into ".", which fails the AML job
- ([#587](https://github.com/microsoft/InnerEye-DeepLearning/pull/587)) Bug fix for regression in AzureML's handling of environments: upgrade to hi-ml 0.1.11
- ([#537](https://github.com/microsoft/InnerEye-DeepLearning/pull/537)) Print warning if inference is disabled but comparison requested.

Просмотреть файл

@ -6,7 +6,7 @@ import logging
import os
import sys
from pathlib import Path
from typing import Optional
from typing import List, Optional, Tuple
from InnerEye.Common.type_annotations import PathOrString
@ -93,6 +93,15 @@ def get_environment_yaml_file() -> Path:
return env
def get_hi_ml_submodule_relative_paths() -> List[Tuple[Path, str]]:
"""
Returns the paths relative to the repository root where the submodules for hi-ml and hi-ml-azure are expected.
It returns a list with a tuple (folder name, expected subfolder in that folder)
"""
return [(Path("hi-ml") / "hi-ml-azure" / "src", "health_azure"),
(Path("hi-ml") / "hi-ml" / "src", "health_ml")]
def add_submodules_to_path() -> None:
"""
This function adds all submodules that the code uses to sys.path and to the environment variables. This is
@ -104,9 +113,8 @@ def add_submodules_to_path() -> None:
"""
innereye_root = repository_root_directory()
folders_to_add = [(innereye_root, "InnerEye"),
(innereye_root / "fastMRI", "fastmri"),
(innereye_root / "hi-ml" / "hi-ml-azure" / "src", "health_azure"),
(innereye_root / "hi-ml" / "hi-ml" / "src", "health_ml")]
(innereye_root / "fastMRI", "fastmri")]
folders_to_add.extend([(innereye_root / p, folder) for p, folder in get_hi_ml_submodule_relative_paths()])
for (folder, subfolder_that_must_exist) in folders_to_add:
if (folder / subfolder_that_must_exist).is_dir():
folder_str = str(folder)

Просмотреть файл

@ -697,10 +697,11 @@ class MLRunner:
:param python_environment: The Python environment that is used in the present AzureML run.
"""
def copy_folder(source_folder: Path, destination_folder: str = "") -> None:
def copy_folder(source_folder: Path, destination_folder: Optional[Path] = None) -> None:
logging.info(f"Copying folder for registration: {source_folder}")
destination_folder = destination_folder or source_folder.name
shutil.copytree(str(source_folder), str(model_folder / destination_folder),
full_destination = model_folder / source_folder.name if destination_folder is None \
else model_folder / destination_folder
shutil.copytree(str(source_folder), str(full_destination),
ignore=shutil.ignore_patterns('*.pyc'))
def copy_file(source: Path, destination_file: str) -> None:
@ -743,6 +744,11 @@ class MLRunner:
# we can identify it by going up the folder structure off a known file (repository_root does exactly that)
repository_root = fixed_paths.repository_root_directory()
copy_folder(repository_root / INNEREYE_PACKAGE_NAME)
# If hi-ml is used as a submodule, copy that too
for relative_path, _ in fixed_paths.get_hi_ml_submodule_relative_paths():
full_submodule_path = repository_root / relative_path
if full_submodule_path.is_dir():
copy_folder(full_submodule_path, relative_path)
# Extra code directory is expected to be relative to the project root folder.
if self.azure_config.extra_code_directory:
extra_code_folder = self.project_root / self.azure_config.extra_code_directory

Просмотреть файл

@ -9,10 +9,6 @@ import uuid
from pathlib import Path
from typing import Optional, Tuple
# Suppress all errors here because the imports after code cause loads of warnings. We can't specifically suppress
# individual warnings only.
# flake8: noqa
# Workaround for an issue with how AzureML and Pytorch Lightning interact: When spawning additional processes for DDP,
# the working directory is not correctly picked up in sys.path
print(f"Starting InnerEye runner at {sys.argv[0]}")
@ -24,6 +20,8 @@ if (innereye_root / "InnerEye").is_dir():
sys.path.insert(0, innereye_root_str)
from InnerEye.Common import fixed_paths
# This must be added before all other imports because they might rely on hi-ml already, and that can optionally live
# in a submodule
fixed_paths.add_submodules_to_path()
from azureml._base_sdk_common import user_agent
@ -47,7 +45,7 @@ from InnerEye.Azure.azure_util import (RUN_CONTEXT, RUN_RECOVERY_ID_KEY_NAME, ge
is_offline_run_context)
from InnerEye.Azure.run_pytest import download_pytest_result, run_pytest
from InnerEye.Common.common_util import (FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE,
disable_logging_to_file, is_linux, logging_to_stdout)
is_linux, logging_to_stdout)
from InnerEye.Common.generic_parsing import GenericConfig
from InnerEye.ML.common import DATASET_CSV_FILE_NAME
from InnerEye.ML.deep_learning_config import DeepLearningConfig

Просмотреть файл

@ -12,14 +12,14 @@ from typing import Dict, List, Optional
import param
import requests
from azureml.core import Model, ScriptRunConfig
from health_azure import create_run_configuration, submit_run
from InnerEye.Azure.azure_config import AzureConfig
from InnerEye.Common.common_util import logging_to_stdout
from InnerEye.Common.fixed_paths import DEFAULT_DATA_FOLDER, DEFAULT_RESULT_IMAGE_NAME, DEFAULT_RESULT_ZIP_DICOM_NAME, \
DEFAULT_TEST_IMAGE_NAME, DEFAULT_TEST_ZIP_NAME, ENVIRONMENT_YAML_FILE_NAME, PYTHON_ENVIRONMENT_NAME, \
DEFAULT_TEST_IMAGE_NAME, DEFAULT_TEST_ZIP_NAME, PYTHON_ENVIRONMENT_NAME, \
RUN_SCORING_SCRIPT, SCORE_SCRIPT, SETTINGS_YAML_FILE, repository_root_directory
from InnerEye.Common.generic_parsing import GenericConfig
from health_azure import create_run_configuration, submit_run
class SubmitForInferenceConfig(GenericConfig):
@ -149,17 +149,8 @@ def submit_for_inference(args: SubmitForInferenceConfig, azure_config: AzureConf
logging.info(f"Building inference run submission in {source_directory_path}")
image_folder = source_directory_path / DEFAULT_DATA_FOLDER
image = copy_image_file(args.image_file, image_folder, args.use_dicom)
model_sas_urls = model.get_sas_urls()
# Identifies all the files with basename "environment.yml" in the model and downloads them.
# These downloads should go into a temp folder that will most likely not be included in the model itself,
# because the AzureML run will later download the model into the same folder structure, and the file names might
# clash.
temp_folder = source_directory_path / "temp_for_scoring"
conda_files = download_files_from_model(model_sas_urls, ENVIRONMENT_YAML_FILE_NAME, dir_path=temp_folder)
if len(conda_files) != 1:
raise ValueError("Exactly 1 Conda environment definition must exist in the model.")
# Retrieve the name of the Python environment that the training run used. This environment should have been
# registered. If no such environment exists, it will be re-create from the Conda files provided.
# registered at training.
python_environment_name = model.tags.get(PYTHON_ENVIRONMENT_NAME, "")
if not python_environment_name:
raise ValueError(f"The model did not contain tag {PYTHON_ENVIRONMENT_NAME} for the AzureML environment to use.")

Просмотреть файл

@ -50,11 +50,11 @@ from InnerEye.ML.visualizers.plot_cross_validation import PlotCrossValidationCon
from InnerEye.Scripts import submit_for_inference
from Tests.ML.util import assert_nifti_content, get_default_azure_config, get_default_workspace, get_nifti_shape
FALLBACK_SINGLE_RUN = "refs_pull_593_merge_1637188926_7ba554ba"
FALLBACK_ENSEMBLE_RUN = "refs_pull_545_merge:HD_caea82ae-9603-48ba-8280-7d2bc6272411"
FALLBACK_2NODE_RUN = "refs_pull_545_merge:refs_pull_545_merge_1626538178_9f3023b2"
FALLBACK_SINGLE_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867172_17ba8dc5"
FALLBACK_ENSEMBLE_RUN = "refs_pull_606_merge:HD_b8a6ad93-8c19-45de-8ea1-f87fce92c3bd"
FALLBACK_2NODE_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867224_8d8072fe"
FALLBACK_CV_GLAUCOMA = "refs_pull_545_merge:HD_72ecc647-07c3-4353-a538-620346114ebd"
FALLBACK_HELLO_CONTAINER_RUN = "refs_pull_545_merge:refs_pull_545_merge_1626538216_3eb92f09"
FALLBACK_HELLO_CONTAINER_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867108_789991ac"
def get_most_recent_run_id(fallback_run_id_for_local_execution: str = FALLBACK_SINGLE_RUN) -> str:

Просмотреть файл

@ -52,6 +52,8 @@ def test_copy_child_paths_to_folder(is_ensemble: bool,
project_root = Path(__file__).parent.parent
ml_runner = MLRunner(model_config=fake_model, azure_config=azure_config, project_root=project_root)
model_folder = test_output_dirs.root_dir / "final"
hi_ml_submodules = [p for p, _ in fixed_paths.get_hi_ml_submodule_relative_paths()]
has_submodule = any(folder.is_dir() for folder in hi_ml_submodules)
ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=checkpoints_absolute)
expected_files = [
fixed_paths.ENVIRONMENT_YAML_FILE_NAME,
@ -61,6 +63,9 @@ def test_copy_child_paths_to_folder(is_ensemble: bool,
"InnerEye/Common/fixed_paths.py",
"InnerEye/Common/common_util.py",
]
if has_submodule:
expected_files.extend(["hi-ml/hi-ml/src/health_ml/__init__.py",
"hi-ml/hi-ml-azure/src/health_azure/__init__.py"])
for r in checkpoints_relative:
expected_files.append(f"{CHECKPOINT_FOLDER}/{r}")
for expected_file in expected_files:

Просмотреть файл

@ -26,8 +26,6 @@ dependencies:
- gitpython==3.1.7
- gputil==1.4.0
- h5py==2.10.0
- hi-ml==0.1.11
- hi-ml-azure==0.1.11
- InnerEye-DICOM-RT==1.0.1
- joblib==0.16.0
- jupyter==1.0.0
@ -57,6 +55,7 @@ dependencies:
- pytorch-lightning==1.3.8
- rich==5.1.1
- rpdb==0.1.6
- ruamel.yaml==0.16.12
- runstats==1.8.0
- scikit-image==0.17.2
- scikit-learn==0.23.2

Просмотреть файл

@ -2,21 +2,26 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
from collections import defaultdict
import logging
import os
import sys
import zipfile
from collections import defaultdict
from pathlib import Path
from typing import List, Optional, Tuple
import zipfile
import numpy as np
import param
from azureml.core import Run
from InnerEye_DICOM_RT.nifti_to_dicom_rt_converter import rtconvert
from azureml.core import Run
from InnerEye.Common import fixed_paths
# This must be added before all other imports because they might rely on hi-ml already, and that can optionally live
# in a submodule
fixed_paths.add_submodules_to_path()
from InnerEye.Azure.azure_util import is_offline_run_context
from InnerEye.Common import fixed_paths
from InnerEye.Common.fixed_paths import DEFAULT_RESULT_ZIP_DICOM_NAME
from InnerEye.Common.generic_parsing import GenericConfig
from InnerEye.Common.type_annotations import TupleFloat3, TupleInt3
@ -27,8 +32,8 @@ from InnerEye.ML.photometric_normalization import PhotometricNormalization
from InnerEye.ML.pipelines.ensemble import EnsemblePipeline
from InnerEye.ML.pipelines.inference import FullImageInferencePipelineBase, InferencePipeline
from InnerEye.ML.utils.config_loader import ModelConfigLoader
from InnerEye.ML.utils.io_util import ImageWithHeader, load_nifti_image, reverse_tuple_float3, store_as_ubyte_nifti, \
load_dicom_series_and_save
from InnerEye.ML.utils.io_util import ImageWithHeader, load_dicom_series_and_save, load_nifti_image, \
reverse_tuple_float3, store_as_ubyte_nifti
class ScorePipelineConfig(GenericConfig):