Bug fix: deployed models and training code use different versions of hi-ml (#606)
This commit is contained in:
Родитель
f5b7298c57
Коммит
5a37198b3f
2
.flake8
2
.flake8
|
@ -1,5 +1,5 @@
|
|||
[flake8]
|
||||
ignore = E226,E302,E41,W391, E701, W291, E722, W503, E128, E126, E127, E731, E401
|
||||
ignore = E226,E302,E41,W391, E701, W291, E722, W503, E128, E126, E127, E731, E401, E402
|
||||
max-line-length = 160
|
||||
max-complexity = 25
|
||||
exclude = fastMRI/ test_outputs/ hi-ml/
|
||||
|
|
|
@ -63,6 +63,7 @@ gets uploaded to AzureML, by skipping all test folders.
|
|||
- ([#605](https://github.com/microsoft/InnerEye-DeepLearning/pull/605)) Make build jobs deterministic for regression testing.
|
||||
|
||||
### Fixed
|
||||
- ([#606](https://github.com/microsoft/InnerEye-DeepLearning/pull/606)) Bug fix: registered models do not include the hi-ml submodule
|
||||
- ([#593](https://github.com/microsoft/InnerEye-DeepLearning/pull/593)) Bug fix for hi-ml 0.1.11 issue (#130): empty mount point is turned into ".", which fails the AML job
|
||||
- ([#587](https://github.com/microsoft/InnerEye-DeepLearning/pull/587)) Bug fix for regression in AzureML's handling of environments: upgrade to hi-ml 0.1.11
|
||||
- ([#537](https://github.com/microsoft/InnerEye-DeepLearning/pull/537)) Print warning if inference is disabled but comparison requested.
|
||||
|
|
|
@ -6,7 +6,7 @@ import logging
|
|||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from InnerEye.Common.type_annotations import PathOrString
|
||||
|
||||
|
@ -93,6 +93,15 @@ def get_environment_yaml_file() -> Path:
|
|||
return env
|
||||
|
||||
|
||||
def get_hi_ml_submodule_relative_paths() -> List[Tuple[Path, str]]:
|
||||
"""
|
||||
Returns the paths relative to the repository root where the submodules for hi-ml and hi-ml-azure are expected.
|
||||
It returns a list with a tuple (folder name, expected subfolder in that folder)
|
||||
"""
|
||||
return [(Path("hi-ml") / "hi-ml-azure" / "src", "health_azure"),
|
||||
(Path("hi-ml") / "hi-ml" / "src", "health_ml")]
|
||||
|
||||
|
||||
def add_submodules_to_path() -> None:
|
||||
"""
|
||||
This function adds all submodules that the code uses to sys.path and to the environment variables. This is
|
||||
|
@ -104,9 +113,8 @@ def add_submodules_to_path() -> None:
|
|||
"""
|
||||
innereye_root = repository_root_directory()
|
||||
folders_to_add = [(innereye_root, "InnerEye"),
|
||||
(innereye_root / "fastMRI", "fastmri"),
|
||||
(innereye_root / "hi-ml" / "hi-ml-azure" / "src", "health_azure"),
|
||||
(innereye_root / "hi-ml" / "hi-ml" / "src", "health_ml")]
|
||||
(innereye_root / "fastMRI", "fastmri")]
|
||||
folders_to_add.extend([(innereye_root / p, folder) for p, folder in get_hi_ml_submodule_relative_paths()])
|
||||
for (folder, subfolder_that_must_exist) in folders_to_add:
|
||||
if (folder / subfolder_that_must_exist).is_dir():
|
||||
folder_str = str(folder)
|
||||
|
|
|
@ -697,10 +697,11 @@ class MLRunner:
|
|||
:param python_environment: The Python environment that is used in the present AzureML run.
|
||||
"""
|
||||
|
||||
def copy_folder(source_folder: Path, destination_folder: str = "") -> None:
|
||||
def copy_folder(source_folder: Path, destination_folder: Optional[Path] = None) -> None:
|
||||
logging.info(f"Copying folder for registration: {source_folder}")
|
||||
destination_folder = destination_folder or source_folder.name
|
||||
shutil.copytree(str(source_folder), str(model_folder / destination_folder),
|
||||
full_destination = model_folder / source_folder.name if destination_folder is None \
|
||||
else model_folder / destination_folder
|
||||
shutil.copytree(str(source_folder), str(full_destination),
|
||||
ignore=shutil.ignore_patterns('*.pyc'))
|
||||
|
||||
def copy_file(source: Path, destination_file: str) -> None:
|
||||
|
@ -743,6 +744,11 @@ class MLRunner:
|
|||
# we can identify it by going up the folder structure off a known file (repository_root does exactly that)
|
||||
repository_root = fixed_paths.repository_root_directory()
|
||||
copy_folder(repository_root / INNEREYE_PACKAGE_NAME)
|
||||
# If hi-ml is used as a submodule, copy that too
|
||||
for relative_path, _ in fixed_paths.get_hi_ml_submodule_relative_paths():
|
||||
full_submodule_path = repository_root / relative_path
|
||||
if full_submodule_path.is_dir():
|
||||
copy_folder(full_submodule_path, relative_path)
|
||||
# Extra code directory is expected to be relative to the project root folder.
|
||||
if self.azure_config.extra_code_directory:
|
||||
extra_code_folder = self.project_root / self.azure_config.extra_code_directory
|
||||
|
|
|
@ -9,10 +9,6 @@ import uuid
|
|||
from pathlib import Path
|
||||
from typing import Optional, Tuple
|
||||
|
||||
# Suppress all errors here because the imports after code cause loads of warnings. We can't specifically suppress
|
||||
# individual warnings only.
|
||||
# flake8: noqa
|
||||
|
||||
# Workaround for an issue with how AzureML and Pytorch Lightning interact: When spawning additional processes for DDP,
|
||||
# the working directory is not correctly picked up in sys.path
|
||||
print(f"Starting InnerEye runner at {sys.argv[0]}")
|
||||
|
@ -24,6 +20,8 @@ if (innereye_root / "InnerEye").is_dir():
|
|||
sys.path.insert(0, innereye_root_str)
|
||||
from InnerEye.Common import fixed_paths
|
||||
|
||||
# This must be added before all other imports because they might rely on hi-ml already, and that can optionally live
|
||||
# in a submodule
|
||||
fixed_paths.add_submodules_to_path()
|
||||
|
||||
from azureml._base_sdk_common import user_agent
|
||||
|
@ -47,7 +45,7 @@ from InnerEye.Azure.azure_util import (RUN_CONTEXT, RUN_RECOVERY_ID_KEY_NAME, ge
|
|||
is_offline_run_context)
|
||||
from InnerEye.Azure.run_pytest import download_pytest_result, run_pytest
|
||||
from InnerEye.Common.common_util import (FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE,
|
||||
disable_logging_to_file, is_linux, logging_to_stdout)
|
||||
is_linux, logging_to_stdout)
|
||||
from InnerEye.Common.generic_parsing import GenericConfig
|
||||
from InnerEye.ML.common import DATASET_CSV_FILE_NAME
|
||||
from InnerEye.ML.deep_learning_config import DeepLearningConfig
|
||||
|
|
|
@ -12,14 +12,14 @@ from typing import Dict, List, Optional
|
|||
import param
|
||||
import requests
|
||||
from azureml.core import Model, ScriptRunConfig
|
||||
from health_azure import create_run_configuration, submit_run
|
||||
|
||||
from InnerEye.Azure.azure_config import AzureConfig
|
||||
from InnerEye.Common.common_util import logging_to_stdout
|
||||
from InnerEye.Common.fixed_paths import DEFAULT_DATA_FOLDER, DEFAULT_RESULT_IMAGE_NAME, DEFAULT_RESULT_ZIP_DICOM_NAME, \
|
||||
DEFAULT_TEST_IMAGE_NAME, DEFAULT_TEST_ZIP_NAME, ENVIRONMENT_YAML_FILE_NAME, PYTHON_ENVIRONMENT_NAME, \
|
||||
DEFAULT_TEST_IMAGE_NAME, DEFAULT_TEST_ZIP_NAME, PYTHON_ENVIRONMENT_NAME, \
|
||||
RUN_SCORING_SCRIPT, SCORE_SCRIPT, SETTINGS_YAML_FILE, repository_root_directory
|
||||
from InnerEye.Common.generic_parsing import GenericConfig
|
||||
from health_azure import create_run_configuration, submit_run
|
||||
|
||||
|
||||
class SubmitForInferenceConfig(GenericConfig):
|
||||
|
@ -149,17 +149,8 @@ def submit_for_inference(args: SubmitForInferenceConfig, azure_config: AzureConf
|
|||
logging.info(f"Building inference run submission in {source_directory_path}")
|
||||
image_folder = source_directory_path / DEFAULT_DATA_FOLDER
|
||||
image = copy_image_file(args.image_file, image_folder, args.use_dicom)
|
||||
model_sas_urls = model.get_sas_urls()
|
||||
# Identifies all the files with basename "environment.yml" in the model and downloads them.
|
||||
# These downloads should go into a temp folder that will most likely not be included in the model itself,
|
||||
# because the AzureML run will later download the model into the same folder structure, and the file names might
|
||||
# clash.
|
||||
temp_folder = source_directory_path / "temp_for_scoring"
|
||||
conda_files = download_files_from_model(model_sas_urls, ENVIRONMENT_YAML_FILE_NAME, dir_path=temp_folder)
|
||||
if len(conda_files) != 1:
|
||||
raise ValueError("Exactly 1 Conda environment definition must exist in the model.")
|
||||
# Retrieve the name of the Python environment that the training run used. This environment should have been
|
||||
# registered. If no such environment exists, it will be re-create from the Conda files provided.
|
||||
# registered at training.
|
||||
python_environment_name = model.tags.get(PYTHON_ENVIRONMENT_NAME, "")
|
||||
if not python_environment_name:
|
||||
raise ValueError(f"The model did not contain tag {PYTHON_ENVIRONMENT_NAME} for the AzureML environment to use.")
|
||||
|
|
|
@ -50,11 +50,11 @@ from InnerEye.ML.visualizers.plot_cross_validation import PlotCrossValidationCon
|
|||
from InnerEye.Scripts import submit_for_inference
|
||||
from Tests.ML.util import assert_nifti_content, get_default_azure_config, get_default_workspace, get_nifti_shape
|
||||
|
||||
FALLBACK_SINGLE_RUN = "refs_pull_593_merge_1637188926_7ba554ba"
|
||||
FALLBACK_ENSEMBLE_RUN = "refs_pull_545_merge:HD_caea82ae-9603-48ba-8280-7d2bc6272411"
|
||||
FALLBACK_2NODE_RUN = "refs_pull_545_merge:refs_pull_545_merge_1626538178_9f3023b2"
|
||||
FALLBACK_SINGLE_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867172_17ba8dc5"
|
||||
FALLBACK_ENSEMBLE_RUN = "refs_pull_606_merge:HD_b8a6ad93-8c19-45de-8ea1-f87fce92c3bd"
|
||||
FALLBACK_2NODE_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867224_8d8072fe"
|
||||
FALLBACK_CV_GLAUCOMA = "refs_pull_545_merge:HD_72ecc647-07c3-4353-a538-620346114ebd"
|
||||
FALLBACK_HELLO_CONTAINER_RUN = "refs_pull_545_merge:refs_pull_545_merge_1626538216_3eb92f09"
|
||||
FALLBACK_HELLO_CONTAINER_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867108_789991ac"
|
||||
|
||||
|
||||
def get_most_recent_run_id(fallback_run_id_for_local_execution: str = FALLBACK_SINGLE_RUN) -> str:
|
||||
|
|
|
@ -52,6 +52,8 @@ def test_copy_child_paths_to_folder(is_ensemble: bool,
|
|||
project_root = Path(__file__).parent.parent
|
||||
ml_runner = MLRunner(model_config=fake_model, azure_config=azure_config, project_root=project_root)
|
||||
model_folder = test_output_dirs.root_dir / "final"
|
||||
hi_ml_submodules = [p for p, _ in fixed_paths.get_hi_ml_submodule_relative_paths()]
|
||||
has_submodule = any(folder.is_dir() for folder in hi_ml_submodules)
|
||||
ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=checkpoints_absolute)
|
||||
expected_files = [
|
||||
fixed_paths.ENVIRONMENT_YAML_FILE_NAME,
|
||||
|
@ -61,6 +63,9 @@ def test_copy_child_paths_to_folder(is_ensemble: bool,
|
|||
"InnerEye/Common/fixed_paths.py",
|
||||
"InnerEye/Common/common_util.py",
|
||||
]
|
||||
if has_submodule:
|
||||
expected_files.extend(["hi-ml/hi-ml/src/health_ml/__init__.py",
|
||||
"hi-ml/hi-ml-azure/src/health_azure/__init__.py"])
|
||||
for r in checkpoints_relative:
|
||||
expected_files.append(f"{CHECKPOINT_FOLDER}/{r}")
|
||||
for expected_file in expected_files:
|
||||
|
|
|
@ -26,8 +26,6 @@ dependencies:
|
|||
- gitpython==3.1.7
|
||||
- gputil==1.4.0
|
||||
- h5py==2.10.0
|
||||
- hi-ml==0.1.11
|
||||
- hi-ml-azure==0.1.11
|
||||
- InnerEye-DICOM-RT==1.0.1
|
||||
- joblib==0.16.0
|
||||
- jupyter==1.0.0
|
||||
|
@ -57,6 +55,7 @@ dependencies:
|
|||
- pytorch-lightning==1.3.8
|
||||
- rich==5.1.1
|
||||
- rpdb==0.1.6
|
||||
- ruamel.yaml==0.16.12
|
||||
- runstats==1.8.0
|
||||
- scikit-image==0.17.2
|
||||
- scikit-learn==0.23.2
|
||||
|
|
17
score.py
17
score.py
|
@ -2,21 +2,26 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
|
||||
# ------------------------------------------------------------------------------------------
|
||||
from collections import defaultdict
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import zipfile
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple
|
||||
import zipfile
|
||||
|
||||
import numpy as np
|
||||
import param
|
||||
from azureml.core import Run
|
||||
from InnerEye_DICOM_RT.nifti_to_dicom_rt_converter import rtconvert
|
||||
from azureml.core import Run
|
||||
|
||||
from InnerEye.Common import fixed_paths
|
||||
|
||||
# This must be added before all other imports because they might rely on hi-ml already, and that can optionally live
|
||||
# in a submodule
|
||||
fixed_paths.add_submodules_to_path()
|
||||
|
||||
from InnerEye.Azure.azure_util import is_offline_run_context
|
||||
from InnerEye.Common import fixed_paths
|
||||
from InnerEye.Common.fixed_paths import DEFAULT_RESULT_ZIP_DICOM_NAME
|
||||
from InnerEye.Common.generic_parsing import GenericConfig
|
||||
from InnerEye.Common.type_annotations import TupleFloat3, TupleInt3
|
||||
|
@ -27,8 +32,8 @@ from InnerEye.ML.photometric_normalization import PhotometricNormalization
|
|||
from InnerEye.ML.pipelines.ensemble import EnsemblePipeline
|
||||
from InnerEye.ML.pipelines.inference import FullImageInferencePipelineBase, InferencePipeline
|
||||
from InnerEye.ML.utils.config_loader import ModelConfigLoader
|
||||
from InnerEye.ML.utils.io_util import ImageWithHeader, load_nifti_image, reverse_tuple_float3, store_as_ubyte_nifti, \
|
||||
load_dicom_series_and_save
|
||||
from InnerEye.ML.utils.io_util import ImageWithHeader, load_dicom_series_and_save, load_nifti_image, \
|
||||
reverse_tuple_float3, store_as_ubyte_nifti
|
||||
|
||||
|
||||
class ScorePipelineConfig(GenericConfig):
|
||||
|
|
Загрузка…
Ссылка в новой задаче