Adding cross-validation to the hi-ml runner (#198)

* Adding cross-validation
* Simplified model config loading for models in the histopathology folder
* Editor setup for VSCode
* Simplified pytest usage, also from within VSCode
This commit is contained in:
Anton Schwaighofer 2022-03-07 12:21:09 +00:00 коммит произвёл GitHub
Родитель 92d2f22650
Коммит 314433cb95
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
54 изменённых файлов: 1244 добавлений и 685 удалений

16
.amlignore Normal file
Просмотреть файл

@ -0,0 +1,16 @@
/.git
/.github
/azure-pipelines
/docs
/.idea
.pytest_cache
.mypy_cache
logs
outputs
config.json
*.egg-info
# Temporary files generated from conda merging
temp_environment-*
.config
.vscode
node_modules

Просмотреть файл

@ -2,7 +2,7 @@
omit =
**/pytest
**/__init__.py
*/hello_container_2.py
**/temp_config_for_unittests.py
[html]
skip_empty = true

71
.github/workflows/build-test-pr.yml поставляемый
Просмотреть файл

@ -75,6 +75,8 @@ jobs:
strategy:
matrix:
folder: [ hi-ml, hi-ml-azure, hi-ml-histopathology ]
# This will let all parts of the matrix run, to collect as many errors as possible, rather than aborting after first fail
fail-fast: false
steps:
- uses: actions/checkout@v2
with:
@ -85,7 +87,7 @@ jobs:
with:
python-version: ${{ env.pythonVersion }}
- name: Install dependencies
- name: Install hi-ml-azure in editable mode
if: ${{ matrix.folder != 'hi-ml-azure' }}
run: |
cd hi-ml-azure
@ -93,6 +95,14 @@ jobs:
# Install local package in editable mode
make pip_local
- name: Install hi-ml in editable mode
if: ${{ matrix.folder != 'hi-ml-azure' && matrix.folder != 'hi-ml' }}
run: |
cd hi-ml
# Install local package in editable mode
make pip_local
- name: Test with pytest, fast only
run: |
cd ${{ matrix.folder }}
@ -108,7 +118,7 @@ jobs:
needs: [ pytest_fast ]
strategy:
matrix:
folder: [ hi-ml, hi-ml-azure, hi-ml-histopathology ]
folder: [ hi-ml, hi-ml-azure ]
steps:
- uses: actions/checkout@v2
with:
@ -134,7 +144,7 @@ jobs:
needs: [ build-python ]
strategy:
matrix:
folder: [ hi-ml, hi-ml-azure, hi-ml-histopathology ]
folder: [ hi-ml, hi-ml-azure ]
packageName: [ '*.whl', '*.tar.gz' ]
steps:
- uses: actions/checkout@v2
@ -199,6 +209,61 @@ jobs:
with:
folder: ${{ matrix.folder }}
test-project-folders:
runs-on: ubuntu-18.04
needs: [ build-python ]
strategy:
matrix:
folder: [ hi-ml-histopathology ]
packageName: [ '*.whl' ]
steps:
- uses: actions/checkout@v2
with:
lfs: true
- name: Set up Python ${{ env.pythonVersion }}
uses: actions/setup-python@v2
with:
python-version: ${{ env.pythonVersion }}
- name: PIP upgrade
run: |
cd hi-ml-azure
make pip_upgrade
- name: Download hi-ml-azure
id: download_himlazure
uses: ./.github/actions/download_package_artifacts
with:
folder: hi-ml-azure
package_type: ${{ matrix.packageName }}
- name: Download hi-ml
id: download_himl
uses: ./.github/actions/download_package_artifacts
with:
folder: hi-ml
package_type: ${{ matrix.packageName }}
- name: Install dependencies
run: |
cd hi-ml-azure
pip install ${{ steps.download_himlazure.outputs.package_filename }}
cd ../hi-ml
pip install ${{ steps.download_himl.outputs.package_filename }}
- name: Run tests
run: |
cd ${{ matrix.folder }}
make pip_local
make pytest_and_coverage
- name: Upload coverage artifacts
uses: ./.github/actions/upload_coverage_artifacts
if: ${{ matrix.packageName == '*.whl' }}
with:
folder: ${{ matrix.folder }}
build-coverage:
runs-on: ubuntu-18.04

9
.gitignore поставляемый
Просмотреть файл

@ -150,4 +150,11 @@ node_modules/
!.github/actions/format_coverage/dist/
package-lock.json
logs/
logs/
# Temporary files generated from conda merging
temp_environment-*
# Temporary files from unittesting
temp_config_for_unittests.py
# Temp file from building requirements for histo
temp_requirements.txt

Просмотреть файл

@ -2,7 +2,6 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/hi-ml-azure/src" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/hi-ml/src" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/hi-ml-azure/testazure" isTestSource="false" />

8
.vscode/extensions.json поставляемый Normal file
Просмотреть файл

@ -0,0 +1,8 @@
{
"recommendations": [
"njpwerner.autodocstring",
"doi.fileheadercomment",
"ms-python.python",
"ms-python.vscode-pylance"
]
}

48
.vscode/launch.json поставляемый
Просмотреть файл

@ -11,6 +11,54 @@
"program": "${file}",
"console": "integratedTerminal"
},
{
"name": "Python: Run the HelloWorld model locally",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/hi-ml/src/health_ml/runner.py",
"args": [
"--model=HelloWorld"
],
"console": "integratedTerminal"
},
{
"name": "Python: Run the HelloWorld model in AzureML",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/hi-ml/src/health_ml/runner.py",
"args": [
"--model=HelloWorld",
"--azureml",
"--cluster=lite-testing-ds2",
],
"console": "integratedTerminal"
},
{
"name": "Python: Run HelloWorld with cross-validation",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/hi-ml/src/health_ml/runner.py",
"args": [
"--model=HelloWorld",
"--azureml",
"--cluster=lite-testing-ds2",
"--crossval_count=2"
],
"console": "integratedTerminal"
},
{
"name": "Python: Run DeepSMILECrck in AzureML",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/hi-ml/src/health_ml/runner.py",
"cwd": "${workspaceFolder}/hi-ml-histopathology",
"args": [
"--model=histopathology.configs.classification.DeepSMILECrck",
"--azureml",
"--cluster=lite-testing-ds2",
],
"console": "integratedTerminal"
},
{
"name": "Elevate: HI-ML-AZURE Local",
"type": "python",

17
.vscode/settings.json поставляемый
Просмотреть файл

@ -1,4 +1,5 @@
{
"autoDocstring.docstringFormat": "sphinx",
"fileHeaderComment.parameter":{
"*":{
"line": "-------------------------------------------------------------------------------------------",
@ -49,4 +50,20 @@
"[python]": {
"editor.rulers": [120],
},
"python.analysis.extraPaths": [
"./hi-ml-azure/src",
"./hi-ml-azure/testazure",
"./hi-ml/src",
"./hi-ml/testhiml",
"./hi-ml-histopathology/src",
"./hi-ml-histopathology/testhisto",
"./hi-ml-histopathology/testSSL",
],
"terminal.integrated.defaultProfile.windows": "Command Prompt",
"terminal.integrated.env.windows": {
"PYTHONPATH":"${workspaceFolder}/hi-ml/src:${workspaceFolder}/hi-ml-azure/src:${workspaceFolder}/hi-ml-histopathology/src"
},
"terminal.integrated.env.linux": {
"PYTHONPATH":"${workspaceFolder}/hi-ml/src:${workspaceFolder}/hi-ml-azure/src:${workspaceFolder}/hi-ml-histopathology/src"
}
}

Просмотреть файл

@ -20,12 +20,17 @@ the environment file since it is necessary for the augmentations.
- ([#181](https://github.com/microsoft/hi-ml/pull/181)) Add computational pathology tools in hi-ml-histopathology folder.
- ([#187](https://github.com/microsoft/hi-ml/pull/187)) Add mean pooling layer for MIL.
- ([#186](https://github.com/microsoft/hi-ml/pull/186)) Add inference to hi-ml runner.
- ([#198](https://github.com/microsoft/hi-ml/pull/198)) Add cross-validation to hi-ml runner.
- ([#198](https://github.com/microsoft/hi-ml/pull/198)) Improved editor setup for VSCode.
### Changed
- ([#198](https://github.com/microsoft/hi-ml/pull/198)) Model config loader is now more flexible, can accept fully qualified class name or just top-level module name and class (like histopathology.DeepSMILECrck)
- ([#198](https://github.com/microsoft/hi-ml/pull/198)) Runner raises an error when Conda environment file contains a pip include (-r) statement
- ([#196](https://github.com/microsoft/hi-ml/pull/196)) Show current workspace name in error message.
### Fixed
- ([#198](https://github.com/microsoft/hi-ml/pull/198)) Dependencies for histopathology folder are no longer specified in `test_requirements.txt`, but correctly in the histopathology Conda environment.
- ([#188](https://github.com/microsoft/hi-ml/pull/188)) Updated DeepSMILES models. Now they are uptodate with innereye-dl.
- ([#179](https://github.com/microsoft/hi-ml/pull/179)) HEDJitter was jittering the D channel as well. StainNormalization was relying on skimage.
- ([#195](https://github.com/microsoft/hi-ml/pull/195)) Fix DeepMIL metrics bug whereby hard labels were used instead of probabilities.

Просмотреть файл

@ -6,13 +6,11 @@ This toolbox aims at providing low-level and high-level building blocks for Mach
practitioners. It helps to simplify and streamline work on deep learning models for healthcare and life sciences,
by providing tested components (data loaders, pre-processing), deep learning models, and cloud integration tools.
This toolbox is still in very early stages, and presently offers only the cloud integration components. ML components
will be added in the next few weeks.
This repository consists of two Python packages, as well as project-specific codebases:
This toolbox consists of two Python projects:
* [hi-ml-azure](https://pypi.org/project/hi-ml-azure/) - providing helper functions for running in AzureML.
* [hi-ml](https://pypi.org/project/hi-ml/) - providing ML components.
* PyPi package [hi-ml-azure](https://pypi.org/project/hi-ml-azure/) - providing helper functions for running in AzureML.
* PyPi package [hi-ml](https://pypi.org/project/hi-ml/) - providing ML components.
* hi-ml-histopathology: Models and workflows for working with histopathology images
## Getting started
@ -24,6 +22,10 @@ For just the AzureML helper functions:
* Install from `pypi` via `pip`, by running `pip install hi-ml-azure`
For the histopathology workflows, please follow the instructions [here](hi-ml-histopathology/README.md).
If you would like to contribute to the code, please check the [developer guide](docs/source/developers.md).
## Documentation
The detailed package documentation, with examples and API reference, is on

Просмотреть файл

@ -12,6 +12,10 @@ If in doubt, reach out to the core `hi-ml` team before starting your work.
Please look through the existing folder structure to find a good home for your contribution.
## Setting up your dev environment
Please see the detailed instructions [here](developers.md).
## Submitting a Pull Request
If you'd like to submit a PR to the codebase, please ensure you:

Просмотреть файл

@ -1,5 +1,13 @@
# Notes for developers
## Development environment
We suggest using Visual Studio Code (VSCode), available for multiple platforms [here](https://code.visualstudio.com/).
On Windows system, we recommend using WSL, the Windows Subsystem for Linux, because some PyTorch features are not available on Windows.
Inside VSCode, please install the extensions that are recommended for this project - they are available in `.vscode/extensions.json` in the
repository root.
## Creating a Conda environment
To create a separate Conda environment with all packages that `hi-ml` requires for running and testing,
@ -15,6 +23,9 @@ outside the Conda environment. For WSL, these are the required steps (see also
[here](https://docs.microsoft.com/en-us/windows/dev-environment/javascript/nodejs-on-wsl)):
```shell
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.38.0/install.sh | bash
```
Close your terminal and re-open it, then run:
```shell
nvm install node
npm install -g pyright
```
@ -103,3 +114,13 @@ To create a new package release, follow these steps:
* Click "Auto-generate release notes" to pull in the titles of the Pull Requests since the last release.
* Before the auto-generated "What's changed" section, add a few sentences that summarize what's new.
* Click "Publish release"
## Troubleshooting
### Debugging a test in VSCode fails on Windows
* Symptom: Debugging just does not seem to do anything
* Check: Debug Console shows error `from _sqlite3 import *: ImportError: DLL load failed: The specified module could not be found.`
* Fix: [see here](https://stackoverflow.com/questions/54876404/unable-to-import-sqlite3-using-anaconda-python)
* Run `conda info --envs` to see where your Conda environment lives, then place `sqlite3.dll` into the `DLLs` folder inside of the environment

Просмотреть файл

@ -11,17 +11,36 @@ use of these features:
This can be used by invoking the hi-ml runner and providing the name of the container class, like this:
`himl-runner --model=MyContainer`.
There is a fully working example [HelloContainer](../../hi-ml/src/health-ml/configs/hello_container.py), that
There is a fully working example [HelloContainer](../../hi-ml/src/health-ml/configs/hello_world.py), that
implements a simple 1-dimensional regression model from data stored in a CSV file. You can run that
from the command line by `himl-runner --model=HelloContainer`.
from the command line by `himl-runner --model=HelloWorld`.
# Running ML experiments in Azure ML
## Specifying the model to run
The `--model` argument specifies the name of a class that should be used for model training. The class needs to
be a subclass of `LightningContainer`, see below. There are different ways of telling the runner where to find
that class:
* If just providing a single class name, like `--model=HelloWorld`, the class is expected somewhere in the
`health_ml.configs` namespace. It can be in any module/folder inside of that namespace.
* If the class is outside of the `health_ml.configs` (as would be normal if using the `himl-runner` from a package),
you need to provide some "hints" where to start searching. It is enough to provide the start of the namespace string:
for example, `--model histopathology.PandaImageNetMIL` is effectively telling the runner to search for the
`PandaImageNetMIL` class _anywhere_ in the `histopathology` namespace. You can think of this as
`histopathology.*.PandaImageNetMIL`
## Running ML experiments in Azure ML
To train in AzureML, add a `--azureml` flag. Use the flag `--cluster` to specify the name of the cluster
in your Workspace that you want to submit the job to. So the whole command would look like:
`himl-runner --model=HelloContainer --cluster=my_cluster_name --azureml`. You can also specify `--num_nodes` if
you wish to distribute the model training.
When starting the runner, you need to do that from a directory that contains all the code that your experiment needs:
The current working directory will be used as the root of all data that will be copied to AzureML to run your experiment.
(the only exception to this rule is if you start the runner from within an enlistment of the HI-ML GitHub repository).
AzureML needs to know which Python/Conda environment it should use. For that, the runner expects a file `environment.yml`
in the current working directory, that contains a Conda environment definition.
## Setup - creating your model config file
@ -140,7 +159,7 @@ By default, config files will be looked for in the folder "health_ml.configs". T
that live elsewhere, use a fully qualified name for the parameter `--model` - e.g. "MyModule.Configs.my_config.py"
### Outputting files during training
## Outputting files during training
The Lightning model returned by `create_model` needs to write its output files to the current working directory.
When running inside of AzureML, the output folders will be directly under the project root. If not running inside
@ -150,7 +169,7 @@ When running in AzureML, the folder structure will be set up such that all files
to the current working directory are later uploaded to Azure blob storage at the end of the AzureML job. The files
will also be later available via the AzureML UI.
### Trainer arguments
## Trainer arguments
All arguments that control the PyTorch Lightning `Trainer` object are defined in the class `TrainerParams`. A
`LightningContainer` object inherits from this class. The most essential one is the `max_epochs` field, which controls
the `max_epochs` argument of the `Trainer`.

Просмотреть файл

@ -1,3 +1,5 @@
# This environment definition contains all packages to run hi-ml and hi-ml-azure development work, building and
# testing
name: himl
channels:
- defaults
@ -5,7 +7,10 @@ channels:
dependencies:
- pip=20.1.1
- python=3.7.3
- pytorch=1.8.0
- pytorch=1.10.0
- cudatoolkit=11.3.1
- pip:
- -r hi-ml-azure/run_requirements.txt
- -r hi-ml/run_requirements.txt
- -r build_requirements.txt
- -r test_requirements.txt

Просмотреть файл

@ -1,8 +1,10 @@
[pytest]
testpaths = testazure
norecursedirs = outputs
log_cli = True
log_cli_level = DEBUG
adopts = --strict-markers
addopts = --strict-markers
markers =
fast: Tests that should run very fast, and can act as smoke tests to see if something goes terribly wrong.
slow: Tests that are slow to run and not crucial to the build.
timeout: Tests will terminate and fail if not completed within this length of time.
timeout: Tests will terminate and fail if not completed within this length of time.

Просмотреть файл

@ -1,7 +1,8 @@
azureml-sdk==1.32.0
azureml-tensorboard==1.32.0
conda-merge==0.1.5
pandas==1.3.4
param==1.9.3
ruamel.yaml==0.16.12
tensorboard==2.6.0
azureml-sdk>=1.36.0
azureml-tensorboard>=1.36.0
conda-merge>=0.1.5
pandas>=1.3.4
param>=1.12
pysocks>=1.5.8
ruamel.yaml>=0.16.12
tensorboard>=2.6.0

Просмотреть файл

@ -27,7 +27,7 @@ from azureml.data.dataset_consumption_config import DatasetConsumptionConfig
from azureml.train.hyperdrive import HyperDriveConfig, GridParameterSampling, PrimaryMetricGoal, choice
from azureml.dataprep.fuse.daemon import MountContext
from health_azure.utils import (create_python_environment, create_run_recovery_id, _find_file,
from health_azure.utils import (create_python_environment, create_run_recovery_id, find_file_in_parent_to_pythonpath,
is_run_and_child_runs_completed, is_running_in_azure_ml, register_environment,
run_duration_string_to_seconds, to_azure_friendly_string, RUN_CONTEXT, get_workspace,
PathOrString, DEFAULT_ENVIRONMENT_VARIABLES)
@ -178,7 +178,7 @@ def create_run_configuration(workspace: Workspace,
def create_crossval_hyperdrive_config(num_splits: int,
cross_val_index_arg_name: str = "cross_validation_split_index",
cross_val_index_arg_name: str = "crossval_index",
metric_name: str = "val/loss") -> HyperDriveConfig:
"""
Creates an Azure ML HyperDriveConfig object for running cross validation. Note: this config expects a metric
@ -186,20 +186,21 @@ def create_crossval_hyperdrive_config(num_splits: int,
https://docs.microsoft.com/en-us/azure/machine-learning/how-to-tune-hyperparameters#log-metrics-for-hyperparameter-tuning))
:param num_splits: The number of splits for k-fold cross validation
:param cross_val_index_arg_name: The name of the argument received by each of the child runs that indicates which
split that child represents.
:param cross_val_index_arg_name: The name of the commandline argument that each of the child runs gets, to
indicate which split they should work on.
:param metric_name: The name of the metric that the HyperDriveConfig will compare runs by. Please note that it is
your responsibility to make sure a metric with this name is logged to the Run in your training script
:return: an Azure ML HyperDriveConfig object
"""
logging.info(f"Creating a HyperDriveConfig. Please be aware that this expects to find the metric {metric_name}"
f" logged to the Run during your training script.")
logging.info(f"Creating a HyperDriveConfig. Please note that this expects to find the specified "
f"metric '{metric_name}' logged to AzureML from your training script (for example, using the "
f"AzureMLLogger with Pytorch Lightning)")
parameter_dict = {
cross_val_index_arg_name: choice(list(range(num_splits))),
}
return HyperDriveConfig(
run_config=ScriptRunConfig(""),
hyperparameter_sampling=GridParameterSampling(
{
cross_val_index_arg_name: choice(list(range(num_splits)))
}),
hyperparameter_sampling=GridParameterSampling(parameter_dict),
primary_metric_name=metric_name,
primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
max_total_runs=num_splits
@ -337,7 +338,8 @@ def submit_to_azure_if_needed( # type: ignore
submit_to_azureml: Optional[bool] = None,
tags: Optional[Dict[str, str]] = None,
after_submission: Optional[Callable[[Run], None]] = None,
hyperdrive_config: Optional[HyperDriveConfig] = None
hyperdrive_config: Optional[HyperDriveConfig] = None,
create_output_folders: bool = True,
) -> AzureRunInfo: # pragma: no cover
"""
Submit a folder to Azure, if needed and run it.
@ -390,6 +392,7 @@ def submit_to_azure_if_needed( # type: ignore
for local execution (i.e., return immediately) will be executed. If not provided (None), submission to AzureML
will be triggered if the commandline flag '--azureml' is present in sys.argv
:param hyperdrive_config: A configuration object for Hyperdrive (hyperparameter search).
:param create_output_folders: If True (default), create folders "outputs" and "logs" in the current working folder.
:return: If the script is submitted to AzureML then we terminate python as the script should be executed in AzureML,
otherwise we return a AzureRunInfo object.
"""
@ -448,7 +451,7 @@ def submit_to_azure_if_needed( # type: ignore
workspace = get_workspace(aml_workspace, workspace_config_path)
if conda_environment_file is None:
conda_environment_file = _find_file(CONDA_ENVIRONMENT_FILE)
conda_environment_file = find_file_in_parent_to_pythonpath(CONDA_ENVIRONMENT_FILE)
conda_environment_file = _str_to_path(conda_environment_file)
logging.info(f"Loaded AzureML workspace {workspace.name}")

Просмотреть файл

@ -0,0 +1,48 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import logging
from pathlib import Path
ENVIRONMENT_YAML_FILE_NAME = "environment.yml"
REPO_HIML_FOLDER = "hi-ml"
REPO_HIML_AZURE_FOLDER = "hi-ml-azure"
def is_himl_used_from_git_repo() -> bool:
"""Returns False if HI-ML was installed as a package into site-packages. Returns True if the HI-ML codebase is
used from a clone of the full git repository.
:return: False if HI-ML is installed as a package, True if used via source from git.
:rtype: bool
"""
health_ml_root = Path(__file__).parent.parent
logging.debug(f"health_ml root: {health_ml_root}")
if health_ml_root.parent.stem == "site-packages":
return False
himl_root = health_ml_root.parent.parent
# These two folder are present in the top-level folder of the git repo
expected_folders = [REPO_HIML_FOLDER, REPO_HIML_AZURE_FOLDER]
all_folders_exist = all((himl_root / folder).is_dir() for folder in expected_folders)
if all_folders_exist:
return True
raise ValueError(
"Unable to determine the installation status: Code is not used from site-packages, but the "
"expected top-level folders are not present?"
)
def git_repo_root_folder() -> Path:
"""
Attempts to return the path to the top-level hi-ml repo that contains the hi-ml and hi-ml-azure packages.
This top level repo will only be present if hi-ml has been installed as a git submodule, or the repo has
been directly downloaded. Otherwise (e.g.if hi-ml has been installed as a pip package) returns None
return: Path to the himl root dir if it exists, else None
"""
if not is_himl_used_from_git_repo():
raise ValueError("This function can only be used if the HI-ML package is used directly from the git repo.")
return Path(__file__).parent.parent.parent.parent

Просмотреть файл

@ -65,6 +65,13 @@ RUN_CONTEXT = Run.get_context()
PARENT_RUN_CONTEXT = getattr(RUN_CONTEXT, "parent", None)
WORKSPACE_CONFIG_JSON = "config.json"
# Names for sections in a Conda environment definition
CONDA_NAME = "name"
CONDA_CHANNELS = "channels"
CONDA_DEPENDENCIES = "dependencies"
CONDA_PIP = "pip"
# By default, define several environment variables that work around known issues in the software stack
DEFAULT_ENVIRONMENT_VARIABLES = {
"AZUREML_OUTPUT_UPLOAD_TIMEOUT_SEC": "3600",
@ -112,14 +119,15 @@ class GenericConfig(param.Parameterized):
"""
# check if illegal arguments are passed in
legal_params = self.get_overridable_parameters()
illegal = [k for k, v in params.items() if (k in self.params().keys()) and (k not in legal_params)]
current_param_names = self.param.values().keys()
illegal = [k for k, v in params.items() if (k in current_param_names) and (k not in legal_params)]
if illegal:
raise ValueError(f"The following parameters cannot be overridden as they are either "
f"readonly, constant, or private members : {illegal}")
if throw_if_unknown_param:
# check if parameters not defined by the config class are passed in
unknown = [k for k, v in params.items() if (k not in self.params().keys())]
unknown = [k for k, v in params.items() if (k not in current_param_names)]
if unknown:
raise ValueError(f"The following parameters do not exist: {unknown}")
# set known arguments
@ -358,7 +366,7 @@ def get_overridable_parameters(config: Any) -> Dict[str, param.Parameter]:
:return: A dictionary of parameter names and their definitions.
"""
assert isinstance(config, param.Parameterized)
return dict((k, v) for k, v in config.params().items()
return dict((k, v) for k, v in config.param.params().items()
if reason_not_overridable(v) is None)
@ -424,16 +432,17 @@ def report_on_overrides(config: Any, overrides_to_apply: Dict[str, Any], keys_to
:param keys_to_ignore: set of dictionary keys not to report on
"""
assert isinstance(config, param.Parameterized)
current_params = config.param.params()
for key, desired in overrides_to_apply.items():
if key in keys_to_ignore:
continue
actual = getattr(config, key, None)
if actual == desired:
continue
if key not in config.params():
if key not in current_params:
reason = "parameter is undefined"
else:
val = config.params()[key]
val = current_params[key]
reason = reason_not_overridable(val) # type: ignore
if reason is None:
reason = "for UNKNOWN REASONS"
@ -456,7 +465,7 @@ def create_from_matching_params(from_object: param.Parameterized, cls_: Type[T])
c = cls_()
if not isinstance(c, param.Parameterized):
raise ValueError(f"The created object must be a subclass of param.Parameterized, but got {type(c)}")
for param_name, p in c.params().items():
for param_name, p in c.param.params().items():
if not p.constant and not p.readonly:
setattr(c, param_name, getattr(from_object, param_name))
return c
@ -648,38 +657,39 @@ def determine_run_id_type(run_or_recovery_id: str) -> str:
return run_or_recovery_id
def _find_file(file_name: str, stop_at_pythonpath: bool = True) -> Optional[Path]:
def find_file_in_parent_folders(file_name: str, stop_at_path: List[Path]) -> Optional[Path]:
"""Searches for a file of the given name in the current working directory, or any of its parent folders.
Searching stops if either the file is found, or no parent folder can be found, or the search has reached any
of the given folders in stop_at_path.
:param file_name: The name of the file to find.
:param stop_at_path: A list of folders. If any of them is reached, search stops.
:return: The absolute path of the file if found, or None if it was not found.
"""
Recurse up the file system, starting at the current working directory, to find a file. Optionally stop when we hit
the PYTHONPATH root (defaults to stopping).
:param file_name: The file name of the file to find.
:param stop_at_pythonpath: (Defaults to True.) Whether to stop at the PYTHONPATH root.
:return: The path to the file, or None if it cannot be found.
"""
def return_file_or_parent(
start_at: Path,
file_name: str,
stop_at_pythonpath: bool,
pythonpaths: List[Path]) -> Optional[Path]:
logging.info(f"Searching for file {file_name} in {start_at}")
def return_file_or_parent(start_at: Path) -> Optional[Path]:
logging.debug(f"Searching for file {file_name} in {start_at}")
expected = start_at / file_name
if expected.is_file() and expected.name == file_name:
return expected
if start_at.parent == start_at or start_at in pythonpaths:
if start_at.parent == start_at or start_at in stop_at_path:
return None
return return_file_or_parent(start_at.parent, file_name, stop_at_pythonpath, pythonpaths)
return return_file_or_parent(start_at.parent)
return return_file_or_parent(start_at=Path.cwd())
def find_file_in_parent_to_pythonpath(file_name: str) -> Optional[Path]:
"""
Recurse up the file system, starting at the current working directory, to find a file. Stop when we hit
any of the folders in PYTHONPATH.
:param file_name: The file name of the file to find.
:return: The path to the file, or None if it cannot be found.
"""
pythonpaths: List[Path] = []
if 'PYTHONPATH' in os.environ:
pythonpaths = [Path(path_string) for path_string in os.environ['PYTHONPATH'].split(os.pathsep)]
return return_file_or_parent(
start_at=Path.cwd(),
file_name=file_name,
stop_at_pythonpath=stop_at_pythonpath,
pythonpaths=pythonpaths)
return find_file_in_parent_folders(file_name=file_name, stop_at_path=pythonpaths)
def get_workspace(aml_workspace: Optional[Workspace] = None, workspace_config_path: Optional[Path] = None) -> Workspace:
@ -705,7 +715,7 @@ def get_workspace(aml_workspace: Optional[Workspace] = None, workspace_config_pa
return aml_workspace
if workspace_config_path is None:
workspace_config_path = _find_file(WORKSPACE_CONFIG_JSON)
workspace_config_path = find_file_in_parent_to_pythonpath(WORKSPACE_CONFIG_JSON)
if workspace_config_path:
logging.info(f"Using the workspace config file {str(workspace_config_path.absolute())}")
else:
@ -715,7 +725,9 @@ def get_workspace(aml_workspace: Optional[Workspace] = None, workspace_config_pa
raise ValueError("Workspace config path is not a path, check your input.")
elif workspace_config_path.is_file():
auth = get_authentication()
return Workspace.from_config(path=str(workspace_config_path), auth=auth)
workspace = Workspace.from_config(path=str(workspace_config_path), auth=auth)
logging.info(f"Logged into AzureML workspace {workspace.name}")
return workspace
raise ValueError("Workspace config file does not exist or cannot be read.")
@ -902,6 +914,52 @@ def _retrieve_unique_deps(dependencies: List[str], keep_method: str = "first") -
return unique_deps_list
def _get_pip_dependencies(parsed_yaml: Any) -> Optional[Tuple[int, List[Any]]]:
"""Gets the first pip dependencies section of a Conda yaml file. Returns the index at which the pip section
was found, and the pip section itself. If no pip section was found, returns None
"""
if CONDA_DEPENDENCIES in parsed_yaml:
for i, dep in enumerate(parsed_yaml.get(CONDA_DEPENDENCIES)):
if isinstance(dep, dict) and CONDA_PIP in dep:
return i, dep[CONDA_PIP]
return None
def is_pip_include_dependency(package: str) -> bool:
"""Returns True if the given package name (as used in a Conda environment file) relies on PIP includes,
in the format "-r requirements.txt"
:param package: The name of the PIP dependency to check.
:return: True if the package name is a PIP include statement.
"""
return package.strip().startswith("-r ")
def is_conda_file_with_pip_include(conda_file: Path) -> Tuple[bool, Dict]:
"""Checks if the given Conda environment file uses the "include" syntax in the pip section, like
`-r requirements.txt`. If it uses pip includes, the function returns True and a modified Conda yaml
without all the pip include statements. If no pip include statements are found, False is returned and the
unmodified Conda yaml.
:param conda_file: The path of a Conda environment file.
:return: True if the file uses pip includes, False if not. Seconda return value is the modified Conda environment
without the PIP include statements.
"""
conda_yaml = conda_merge.read_file(str(conda_file))
pip_dep = _get_pip_dependencies(conda_yaml)
if pip_dep is not None:
pip_index, pip = pip_dep
pip_without_include = [package for package in pip if not is_pip_include_dependency(package)]
if len(pip) != len(pip_without_include):
if len(pip_without_include) == 0:
# Avoid empty PIP dependencies section, this causes a failure in conda_merge
conda_yaml.get(CONDA_DEPENDENCIES).pop(pip_index)
else:
conda_yaml.get(CONDA_DEPENDENCIES)[pip_index] = {CONDA_PIP: pip_without_include}
return True, conda_yaml
return False, conda_yaml
def merge_conda_files(conda_files: List[Path], result_file: Path, pip_files: Optional[List[Path]] = None,
pip_clash_keep_method: str = "first") -> None:
"""
@ -914,43 +972,42 @@ def merge_conda_files(conda_files: List[Path], result_file: Path, pip_files: Opt
:param pip_clash_keep_method: If two or more pip packages are specified with the same name, this determines
which one should be kept. Current options: ['first', 'last']
"""
env_definitions = [conda_merge.read_file(str(f)) for f in conda_files]
env_definitions: List[Any] = []
for file in conda_files:
_, pip_without_include = is_conda_file_with_pip_include(file)
env_definitions.append(pip_without_include)
unified_definition = {}
NAME = "name"
CHANNELS = "channels"
DEPENDENCIES = "dependencies"
extra_pip_deps = []
for pip_file in pip_files or []:
with open(pip_file, "r") as f_path:
additional_pip_deps = [d for d in f_path.read().split("\n") if d]
extra_pip_deps.extend(additional_pip_deps)
additional_pip_deps = [d for d in pip_file.read_text().split("\n") if d and not is_pip_include_dependency(d)]
extra_pip_deps.extend(additional_pip_deps)
name = conda_merge.merge_names(env.get(NAME) for env in env_definitions)
name = conda_merge.merge_names(env.get(CONDA_NAME) for env in env_definitions)
if name:
unified_definition[NAME] = name
unified_definition[CONDA_NAME] = name
try:
channels = conda_merge.merge_channels(env.get(CHANNELS) for env in env_definitions)
channels = conda_merge.merge_channels(env.get(CONDA_CHANNELS) for env in env_definitions)
except conda_merge.MergeError:
logging.error("Failed to merge channel priorities.")
raise
if channels:
unified_definition[CHANNELS] = channels
unified_definition[CONDA_CHANNELS] = channels
try:
deps_to_merge = [env.get(DEPENDENCIES) for env in env_definitions]
deps_to_merge = [env.get(CONDA_DEPENDENCIES) for env in env_definitions]
if len(extra_pip_deps) > 0:
deps_to_merge.extend([[{"pip": extra_pip_deps}]])
deps_to_merge.append([{CONDA_PIP: extra_pip_deps}])
deps = conda_merge.merge_dependencies(deps_to_merge)
# Remove duplicated pip packages from merged dependencies sections. Note that for a package that is
# duplicated, the first value encountered will be retained.
pip_deps_entries = [d for d in deps if isinstance(d, dict) and "pip" in d] # type: ignore
pip_deps_entries = [d for d in deps if isinstance(d, dict) and CONDA_PIP in d] # type: ignore
if len(pip_deps_entries) == 0:
raise ValueError("Didn't find a dictionary with the key 'pip' in the list of dependencies")
pip_deps_entry: Dict[str, List[str]] = pip_deps_entries[0]
pip_deps = pip_deps_entry["pip"]
pip_deps = pip_deps_entry[CONDA_PIP]
# temporarily remove pip dependencies from deps to be added back after deduplicaton
deps.remove(pip_deps_entry)
@ -960,13 +1017,13 @@ def merge_conda_files(conda_files: List[Path], result_file: Path, pip_files: Opt
unique_pip_deps = _retrieve_unique_deps(pip_deps, keep_method=pip_clash_keep_method)
# finally add back the deduplicated list of dependencies
unique_deps.append({"pip": unique_pip_deps}) # type: ignore
unique_deps.append({CONDA_PIP: unique_pip_deps}) # type: ignore
except conda_merge.MergeError:
logging.error("Failed to merge dependencies.")
raise
if unique_deps:
unified_definition[DEPENDENCIES] = unique_deps
unified_definition[CONDA_DEPENDENCIES] = unique_deps
else:
raise ValueError("No dependencies found in any of the conda files.")
@ -1439,7 +1496,7 @@ def _get_runs_from_script_config(script_config: AmlRunScriptConfig, workspace: W
if script_config.run is None:
if script_config.experiment is None:
# default to latest run file
latest_run_file = _find_file("most_recent_run.txt")
latest_run_file = find_file_in_parent_to_pythonpath("most_recent_run.txt")
if latest_run_file is None:
raise ValueError("Could not find most_recent_run.txt")
runs = [get_most_recent_run(latest_run_file, workspace)]
@ -1678,7 +1735,7 @@ def aml_workspace_for_unittests() -> Workspace:
is found, the workspace details are read from environment variables. Authentication information is also read
from environment variables.
"""
config_json = _find_file(WORKSPACE_CONFIG_JSON)
config_json = find_file_in_parent_to_pythonpath(WORKSPACE_CONFIG_JSON)
if config_json is not None:
return Workspace.from_config(path=str(config_json))
else:

Просмотреть файл

@ -1,6 +0,0 @@
name: test-env
dependencies:
- pip=20.1.1
- python=3.7.3
- pip:
- -r ../test_requirements.txt

Просмотреть файл

@ -3,13 +3,19 @@
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import shutil
import sys
import uuid
from pathlib import Path
from typing import Generator
import pytest
from health_azure.himl import _package_setup
full_folder = str(Path(__file__).parent.parent / "src")
if full_folder not in sys.path:
print(f"Adding to sys.path for running hi-ml-azure: {full_folder}")
sys.path.insert(0, str(full_folder))
from health_azure.himl import _package_setup # noqa: E402
def outputs_for_tests() -> Path:

Просмотреть файл

@ -29,6 +29,7 @@ from azureml.core import Experiment, Run, ScriptRunConfig, Workspace
from azureml.core.authentication import ServicePrincipalAuthentication
from azureml.core.environment import CondaDependencies
from azureml.data.azure_storage_datastore import AzureBlobDatastore
from health_azure import paths
import health_azure.utils as util
from health_azure.himl import AML_IGNORE_FILE, append_to_amlignore
@ -61,10 +62,10 @@ def test_find_file(tmp_path: Path) -> None:
start_path.mkdir(exist_ok=False)
where_are_we_now = Path.cwd()
os.chdir(start_path)
found_file = util._find_file(file_name, False)
found_file = util.find_file_in_parent_to_pythonpath(file_name)
assert found_file
with mock.patch.dict(os.environ, {"PYTHONPATH": str(python_root.absolute())}):
found_file = util._find_file(file_name)
found_file = util.find_file_in_parent_to_pythonpath(file_name)
assert not found_file
os.chdir(where_are_we_now)
@ -384,6 +385,110 @@ dependencies:
util.merge_conda_files(files, merged_file)
def test_merge_conda_pip_include(random_folder: Path) -> None:
"""
Tests the logic to exclude PIP include statements from Conda environments.
"""
env1 = """
channels:
- default
dependencies:
- conda_both=3.0
- pip:
- -r requirements.txt
- foo==1.0
"""
file1 = random_folder / "env1.yml"
file1.write_text(env1)
merged_file = random_folder / "merged.yml"
util.merge_conda_files([file1], merged_file)
merged_contents = merged_file.read_text()
assert "-r requirements.txt" not in merged_contents
file2 = random_folder / "requirements.txt"
file2.write_text("package==1.0.0")
merged_file2 = random_folder / "merged2.yml"
util.merge_conda_files([file1], merged_file2, pip_files=[file2])
merged_contents2 = merged_file2.read_text()
assert merged_contents2 == """channels:
- default
dependencies:
- conda_both=3.0
- pip:
- foo==1.0
- package==1.0.0
"""
def test_merge_conda_pip_include2(random_folder: Path) -> None:
"""
Tests the logic to exclude PIP include statements from Conda environments, on the root level environment file.
"""
if paths.is_himl_used_from_git_repo():
root_yaml = paths.git_repo_root_folder() / paths.ENVIRONMENT_YAML_FILE_NAME
requirements = paths.git_repo_root_folder() / "hi-ml-azure" / "run_requirements.txt"
merged_file2 = random_folder / "merged2.yml"
util.merge_conda_files([root_yaml], merged_file2, pip_files=[requirements])
def assert_pip_length(yaml: Any, expected_length: int) -> None:
"""Checks if the pip dependencies section of a Conda YAML file has the expected number of entries
"""
pip = util._get_pip_dependencies(yaml)
assert pip is not None
assert len(pip[1]) == expected_length
@pytest.mark.fast
def test_pip_include_1() -> None:
"""Test if Conda files that use PIP include are handled correctly. This uses the top-level environment.yml
file in the repository.
"""
if paths.is_himl_used_from_git_repo():
root_yaml = paths.git_repo_root_folder() / paths.ENVIRONMENT_YAML_FILE_NAME
assert root_yaml.is_file()
original_yaml = conda_merge.read_file(root_yaml)
# At the time of writing, the top-level environment file only had 4 include statements in the pip
# section, they should all be filtered out.
assert_pip_length(original_yaml, 4)
uses_pip_include, modified_yaml = util.is_conda_file_with_pip_include(root_yaml)
assert uses_pip_include
pip = util._get_pip_dependencies(modified_yaml)
# The pip section of the top-level yaml has nothing but include statements, so after filtering the
# pip section is empty. In this case, no pip section shoudld be present at all.
assert pip is None
@pytest.mark.fast
def test_pip_include_2(tmp_path: Path) -> None:
"""Test if Conda files that use PIP include are recognized.
"""
# Environment file without a "-r" include statement
conda_str = """name: simple-envpip
dependencies:
- pip:
- azureml-sdk==1.23.0
- more_conda
"""
tmp_conda = tmp_path / "env.yml"
tmp_conda.write_text(conda_str)
uses_pip_include, modified_yaml = util.is_conda_file_with_pip_include(tmp_conda)
assert not uses_pip_include
assert_pip_length(modified_yaml, 1)
# Environment file that has a "-r" include statement
conda_str = """name: simple-env
dependencies:
- pip:
- -r foo.txt
- any_package
"""
tmp_conda.write_text(conda_str)
uses_pip_include, modified_yaml = util.is_conda_file_with_pip_include(tmp_conda)
assert uses_pip_include
assert util._get_pip_dependencies(modified_yaml) == (0, ["any_package"])
@pytest.mark.parametrize(["s", "expected"],
[
("1s", 1),
@ -392,6 +497,7 @@ dependencies:
("1.0d", 24 * 3600),
("", None),
]) # NOQA
@pytest.mark.fast
def test_run_duration(s: str, expected: Optional[float]) -> None:
actual = util.run_duration_string_to_seconds(s)
assert actual == expected
@ -399,11 +505,13 @@ def test_run_duration(s: str, expected: Optional[float]) -> None:
assert isinstance(actual, int)
@pytest.mark.fast
def test_run_duration_fails() -> None:
with pytest.raises(Exception):
util.run_duration_string_to_seconds("17b")
@pytest.mark.fast
def test_repository_root() -> None:
root = repository_root()
assert (root / "SECURITY.md").is_file()

Просмотреть файл

@ -17,9 +17,12 @@ pip_build:
pip_test:
$(call call_parent,pip_test)
# pip install local package in editable mode for development and testing
# pip install all requirements for histo, read off the Conda file. This is somewhat hacky,
# we could also build a full Conda before starting the tests. Unclear about the performance
# impact of that.
call_pip_local:
ls
sed -e '1,/pip:/ d' environment.yml | cut -d "-" -f 2- > temp_requirements.txt
pip install -r temp_requirements.txt
# pip upgrade and install local package in editable mode
pip_local: pip_upgrade call_pip_local
@ -38,13 +41,6 @@ clean:
rm -vrf ./testhisto/testhisto/test_outputs ./testhistotestSSL/test_ouputs
rm -vf ./coverage ./coverage.txt ./coverage.xml
# build package, assuming build requirements already installed
call_build:
ls
# pip install build requirements and build package
build: pip_build call_build
# run flake8, assuming test requirements already installed
call_flake8:
flake8 --count --statistics --config=../.flake8 .
@ -91,4 +87,4 @@ call_pytest_and_coverage:
pytest_and_coverage: pip_test call_pytest_and_coverage
# install test requirements and run all tests
test_all: pip_test call_flake8 call_mypy call_pytest_and_coverage
test_all: pip_test call_flake8 call_mypy call_pytest_and_coverage

Просмотреть файл

@ -0,0 +1,9 @@
# Histopathology Models and Workflows
## Getting started
- Build environment
- Download config to AzureML workspace
- Run a first workflow.
To be completed.

Просмотреть файл

@ -4,29 +4,29 @@ channels:
- pytorch
- conda-forge
dependencies:
- cudatoolkit=11.1
- cudatoolkit=11.3.1
- pip=20.1.1
- python=3.7.3
- pytorch=1.10.0
- python-blosc==1.7.0
- torchvision=0.11.1
- pip:
- -r ../test_requirements.txt
- azureml-sdk==1.36.0
- cryptography==3.3.2
- docker==4.3.1
- flask==2.0.1
- gputil==1.4.0
- hi-ml>=0.1.12
- joblib==0.16.0
- jupyter==1.0.0
- jupyter-client==6.1.5
- lightning-bolts==0.4.0
- mlflow==1.17.0
- monai==0.6.0
- more-itertools==8.10.0
- mypy-extensions==0.4.3
- numba==0.51.2
- numpy==1.19.1
- opencv-python-headless==4.5.1.48
- pandas==1.3.4
- pillow==9.0.0
- psutil==5.7.2
- pydicom==2.0.0
- pyflakes==2.2.0
- PyJWT==1.7.1
- rich==5.1.1
@ -34,8 +34,12 @@ dependencies:
- runstats==1.8.0
- scikit-image==0.17.2
- scipy==1.5.2
- simpleitk==1.2.4
- six==1.15.0
- stopit==1.1.2
- tabulate==0.8.7
- torchprof==1.3.3
- torch>=1.10.0
- torchvision>=0.11.1
- torchmetrics==0.6.0
- umap-learn==0.5.2
- yacs==0.1.8

Просмотреть файл

@ -3,6 +3,6 @@ testpaths = testhisto testSSL
norecursedirs = docs logs outputs test_data
log_cli = true
log_cli_level = DEBUG
adopts = --strict-markers
addopts = --strict-markers
markers =
fast: Tests that should run very fast, and can act as smoke tests to see if something goes terribly wrong.

Просмотреть файл

@ -51,6 +51,8 @@ class BaseMIL(LightningContainer):
"`none` (default),`cpu`, `gpu`")
encoding_chunk_size: int = param.Integer(0, doc="If > 0 performs encoding in chunks, by loading"
"enconding_chunk_size tiles per chunk")
is_finetune: bool = param.Boolean(False, doc="If True, fine-tune the encoder during training. If False, "
"keep the encoder frozen.")
# local_dataset (used as data module root_path) is declared in DatasetParams superclass
@property

Просмотреть файл

@ -58,8 +58,8 @@ class DeepSMILECrck(BaseMIL):
# declared in TrainerParams:
max_epochs=50,
# declared in WorkflowParams:
# number_of_cross_validation_splits=5,
# cross_validation_split_index=0,
# crossval_count=5,
# crossval_index=0,
# declared in OptimizerParams:
l_rate=5e-4,
weight_decay=1e-4,
@ -122,8 +122,8 @@ class DeepSMILECrck(BaseMIL):
cache_mode=self.cache_mode,
precache_location=self.precache_location,
cache_dir=self.cache_dir,
number_of_cross_validation_splits=self.number_of_cross_validation_splits,
cross_validation_split_index=self.cross_validation_split_index,
crossval_count=self.crossval_count,
crossval_index=self.crossval_index,
)
def get_callbacks(self) -> List[Callback]:

Просмотреть файл

@ -60,8 +60,8 @@ class DeepSMILEPanda(BaseMIL):
# use_mixed_precision = True,
# declared in WorkflowParams:
number_of_cross_validation_splits=5,
cross_validation_split_index=0,
crossval_count=5,
crossval_index=0,
# declared in OptimizerParams:
l_rate=5e-4,
@ -125,8 +125,8 @@ class DeepSMILEPanda(BaseMIL):
cache_mode=self.cache_mode,
precache_location=self.precache_location,
cache_dir=self.cache_dir,
# number_of_cross_validation_splits=self.number_of_cross_validation_splits,
# cross_validation_split_index=self.cross_validation_split_index,
# crossval_count=self.crossval_count,
# crossval_index=self.crossval_index,
)
# TODO: move self.class_names somewhere else since this is almost an exact copy of create_model in BaseMIL

Просмотреть файл

@ -39,8 +39,8 @@ class TilesDataModule(LightningDataModule):
cache_mode: CacheMode = CacheMode.NONE,
precache_location: CacheLocation = CacheLocation.NONE,
cache_dir: Optional[Path] = None,
number_of_cross_validation_splits: int = 0,
cross_validation_split_index: int = 0) -> None:
crossval_count: int = 0,
crosval_index: int = 0) -> None:
"""
:param root_path: Root directory of the source dataset.
:param max_bag_size: Upper bound on number of tiles in each loaded bag. If 0 (default),
@ -67,8 +67,8 @@ class TilesDataModule(LightningDataModule):
device it was saved from;
If cache_mode is `DISK` precache_location `CPU` and `GPU` are equivalent.
:param cache_dir: The directory onto which to cache data if caching is enabled.
:param number_of_cross_validation_splits: Number of folds to perform.
:param cross_validation_split_index: Index of the cross validation split to be performed.
:param crossval_count: Number of folds to perform.
:param crosval_index: Index of the cross validation split to be performed.
"""
if precache_location is not CacheLocation.NONE and cache_mode is CacheMode.NONE:
raise ValueError("Can only pre-cache if caching is enabled")
@ -85,8 +85,8 @@ class TilesDataModule(LightningDataModule):
self.precache_location = precache_location
self.cache_dir = cache_dir
self.batch_size = batch_size
self.number_of_cross_validation_splits = number_of_cross_validation_splits
self.cross_validation_split_index = cross_validation_split_index
self.crossval_count = crossval_count
self.crosval_index = crosval_index
self.train_dataset, self.val_dataset, self.test_dataset = self.get_splits()
self.class_weights = self.train_dataset.get_class_weights()
self.seed = seed

Просмотреть файл

@ -31,9 +31,9 @@ class TcgaCrckTilesDataModule(TilesDataModule):
group_column=trainval_dataset.SLIDE_ID_COLUMN,
random_seed=5)
# if self.number_of_cross_validation_splits > 1:
# if self.crossval_count > 1:
# # Function get_k_fold_cross_validation_splits() will concatenate train and val splits
# splits = splits.get_k_fold_cross_validation_splits(self.number_of_cross_validation_splits)
# splits = splits.get_k_fold_cross_validation_splits(self.crossval_count)
# [self.cross_validation_split_index]
return (TcgaCrck_TilesDataset(self.root_path, dataset_df=splits.train),

Просмотреть файл

@ -116,7 +116,7 @@ def test_ssl_container_cifar10_resnet_simclr() -> None:
- checkpoint loading and ImageClassifier module creation
- training of image classifier for one epoch.
"""
model_namespace_simclr = "hi-ml-histopathology.SSL.configs.CIFAR10SimCLR"
model_namespace_simclr = "SSL.configs.CIFAR10SimCLR"
args = common_test_args + [f"--model={model_namespace_simclr}"]
runner = default_runner()
with check_config_json(Path.cwd()):
@ -160,7 +160,7 @@ def test_ssl_container_cifar10_resnet_simclr() -> None:
assert SslOnlineEvaluatorHiml.EVALUATOR_STATE_NAME in callback_state
# Now run the actual SSL classifier off the stored checkpoint
model_namespace_cifar = "hi-ml-histopathology.SSL.configs.SSLClassifierCIFAR"
model_namespace_cifar = "SSL.configs.SSLClassifierCIFAR"
args = common_test_args + [f"--model={model_namespace_cifar}",
f"--local_ssl_weights_path={checkpoint_path}"]
with check_config_json(Path.cwd()):
@ -180,7 +180,7 @@ def test_load_ssl_container_cifar10_cifar100_resnet_byol() -> None:
Tests that the parameters feed into the BYOL model and online evaluator are
indeed the one we fed through our command line args
"""
model_namespace_byol = "hi-ml-histopathology.SSL.configs.CIFAR10CIFAR100BYOL"
model_namespace_byol = "SSL.configs.CIFAR10CIFAR100BYOL"
args = common_test_args + [f"--model={model_namespace_byol}"]
runner = default_runner()
with mock.patch("sys.argv", args):
@ -199,7 +199,7 @@ def test_ssl_container_rsna() -> None:
runner = default_runner()
path_to_cxr_test_dataset = TEST_OUTPUTS_PATH / "cxr_test_dataset"
# Test training of SSL model
model_namespace_byol = "hi-ml-histopathology.SSL.configs.NIH_RSNA_BYOL"
model_namespace_byol = "SSL.configs.NIH_RSNA_BYOL"
args = common_test_args + [f"--model={model_namespace_byol}",
f"--local_datasets={str(path_to_cxr_test_dataset)},{str(path_to_cxr_test_dataset)}",
"--use_balanced_binary_loss_for_linear_head=True",
@ -249,7 +249,7 @@ def test_ssl_container_rsna() -> None:
# Check that we are able to load the checkpoint and create classifier model
checkpoint_path = loaded_config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX
model_namespace_cxr = "hi-ml-histopathology.SSL.configs.CXRImageClassifier"
model_namespace_cxr = "SSL.configs.CXRImageClassifier"
args = common_test_args + [f"--model={model_namespace_cxr}",
f"--local_datasets={str(path_to_cxr_test_dataset)}",
"--use_balanced_binary_loss_for_linear_head=True",

12
hi-ml/environment.yml Normal file
Просмотреть файл

@ -0,0 +1,12 @@
# This environment definition only specifies python and pytorch itself. It will be picked up automatically
# for AzureML jobs that are started from within the hi-ml folder. The submission logic will automatically add the
# pip packages required by health_azure and health_ml
name: himl-basic
channels:
- defaults
- pytorch
dependencies:
- pip=20.1.1
- python=3.7.3
- pytorch=1.10.0
- cudatoolkit=11.3.1

Просмотреть файл

@ -1,6 +1,8 @@
[pytest]
testpaths = testhiml
norecursedirs = outputs test_data
log_cli = True
log_cli_level = DEBUG
adopts = --strict-markers
addopts = --strict-markers
markers =
fast: Tests that should run very fast, and can act as smoke tests to see if something goes terribly wrong.

Просмотреть файл

@ -1,10 +1,10 @@
dataclasses-json==0.5.2
hi-ml-azure>=0.1.8
jinja2==3.0.2
matplotlib==3.4.3
opencv-python-headless==4.5.1.48
pandas==1.3.4
pytorch-lightning==1.5.5
rpdb==0.1.6
torchvision==0.11.1
torch>=1.8
jinja2>=3.0.2
matplotlib>=3.4.3
opencv-python-headless>=4.5.1.48
pandas>=1.3.4
pytorch-lightning>=1.5.5
rpdb>=0.1.6
torchvision>=0.11.1
torch>=1.10.0

Просмотреть файл

@ -5,7 +5,6 @@
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import torch
from pytorch_lightning import LightningDataModule, LightningModule
from torchmetrics import MeanAbsoluteError
@ -16,63 +15,91 @@ from torch.utils.data import DataLoader, Dataset
from health_ml.lightning_container import LightningContainer
class HelloDataset(Dataset):
def _create_1d_regression_dataset(n: int = 100, seed: int = 0) -> torch.Tensor:
"""Creates a simple 1-D dataset of a noisy linear function.
:param n: The number of datapoints to generate, defaults to 100
:type n: int, optional
:param seed: Random number generator seed, defaults to 0
:type seed: int, optional
:return: A tensor that contains X values in [:, 0] and Y values in [:, 1]
:rtype: torch.Tensor
"""
A simple 1dim regression task, read from a data file stored in the test data folder.
torch.manual_seed(seed)
x = torch.rand((n, 1)) * 10
y = 0.2 * x + 0.1 * torch.randn(x.size())
xy = torch.cat((x, y), dim=1)
return xy
def _split_crossval(xy: torch.Tensor, crossval_count: int, crossval_index: int) -> Tuple[torch.Tensor, torch.Tensor]:
"""
# Creating the data file:
# import numpy as np
# import torch
#
# N = 100
# x = torch.rand((N, 1)) * 10
# y = 0.2 * x + 0.1 * torch.randn(x.size())
# xy = torch.cat((x, y), dim=1)
# np.savetxt("health_ml/configs/hellocontainer.csv", xy.numpy(), delimiter=",")
def __init__(self, raw_data: List[List[float]]) -> None:
Generates a split of the given dataset along the first dimension for cross-validation.
:param xy: The data that should be split. The split will be generated acros dimension 0.
:type xy: torch.Tensor
:param crossval_count: The number of splits in total
:type crossval_count: int
:param crossval_index: The index of the split that should be generated (0 <= crossval_index < crossval_count)
:type crossval_index: int
:return: A tuple of (training data, validation data)
:rtype: Tuple[torch.Tensor, torch.Tensor]
"""
n = xy.shape[0]
split_size = n // crossval_count
val_start = crossval_index * split_size
val_end = (crossval_index + 1) * split_size
train1_start = 0 if crossval_index == 0 else (crossval_index - 1) * split_size
train1_end = 0 if crossval_index == 0 else val_start
train2_start = val_end if crossval_index < (crossval_count - 1) else 0
train2_end = n if crossval_index < (crossval_count - 1) else 0
val = xy[val_start:val_end]
train = torch.concat([xy[train1_start:train1_end], xy[train2_start:train2_end]])
return (train, val)
class HelloWorldDataset(Dataset):
"""
A simple 1dim regression task
"""
def __init__(self, xy: torch.Tensor) -> None:
"""
Creates the 1-dim regression dataset.
:param raw_data: The raw data. This must be numeric data which can be converted into a tensor.
See the static method from_path_and_indexes for an example call.
:param xy: The raw data, x in the first column, y in the second column
"""
super().__init__() # type: ignore
self.data = torch.tensor(raw_data, dtype=torch.float)
self.xy = xy
def __len__(self) -> int:
return self.data.shape[0]
return self.xy.shape[0]
def __getitem__(self, item: int) -> Dict[str, torch.Tensor]:
return {'x': self.data[item][0:1], 'y': self.data[item][1:2]}
@staticmethod
def from_path_and_indexes(
root_folder: Path,
start_index: int,
end_index: int) -> 'HelloDataset':
"""
Static method to instantiate a HelloDataset from the root folder with the start and end indexes.
:param root_folder: The folder in which the data file lives ("hellocontainer.csv")
:param start_index: The first row to read.
:param end_index: The last row to read (exclusive)
:return: A new instance based on the root folder and the start and end indexes.
"""
raw_data = np.loadtxt(root_folder / "hellocontainer.csv", delimiter=",")[start_index:end_index]
return HelloDataset(raw_data)
return {"x": self.xy[item][0:1], "y": self.xy[item][1:2]}
class HelloDataModule(LightningDataModule):
class HelloWorldDataModule(LightningDataModule):
"""
A data module that gives the training, validation and test data for a simple 1-dim regression task.
"""
def __init__(
self,
root_folder: Path) -> None:
def __init__(self, crossval_count: int, crossval_index: int) -> None:
super().__init__()
self.train = HelloDataset.from_path_and_indexes(root_folder, start_index=0, end_index=50)
self.val = HelloDataset.from_path_and_indexes(root_folder, start_index=50, end_index=70)
self.test = HelloDataset.from_path_and_indexes(root_folder, start_index=70, end_index=100)
n_total = 200
xy = _create_1d_regression_dataset(n=n_total)
n_test = 40
n_val = 50
self.test = HelloWorldDataset(xy=xy[:n_test])
if crossval_count <= 1:
self.val = HelloWorldDataset(xy=xy[n_test:(n_test + n_val)])
self.train = HelloWorldDataset(xy=xy[(n_test + n_val):])
else:
# This could be done via a library function like sklearn's KFold function, but we don't want to add
# scikit-learn as a dependency just for this example.
train, val = _split_crossval(xy[n_test:], crossval_count=crossval_count, crossval_index=crossval_index)
self.val = HelloWorldDataset(xy=val)
self.train = HelloWorldDataset(xy=train)
def prepare_data(self, *args: Any, **kwargs: Any) -> None:
pass
@ -126,8 +153,9 @@ class HelloRegression(LightningModule):
self.log("loss", loss, on_epoch=True, on_step=False)
return loss
def validation_step(self, batch: Dict[str, torch.Tensor], *args: Any, # type: ignore
**kwargs: Any) -> torch.Tensor:
def validation_step( # type: ignore
self, batch: Dict[str, torch.Tensor], *args: Any, **kwargs: Any
) -> torch.Tensor:
"""
This method is part of the standard PyTorch Lightning interface. For an introduction, please see
https://pytorch-lightning.readthedocs.io/en/stable/starter/converting.html
@ -208,7 +236,7 @@ class HelloRegression(LightningModule):
Path("test_mae.txt").write_text(str(self.test_mae.compute().item()))
class HelloContainer(LightningContainer):
class HelloWorld(LightningContainer):
"""
An example container for using the hi-ml runner. This container has methods
to generate the actual Lightning model, and read out the datamodule that will be used for training.
@ -231,5 +259,7 @@ class HelloContainer(LightningContainer):
# in turn contains 3 data loaders for training, validation, and test set.
def get_data_module(self) -> LightningDataModule:
assert self.local_dataset_dir is not None
return HelloDataModule(
root_folder=self.local_dataset_dir) # type: ignore
# If you would like to use the built-in cross validation functionality that runs training in parallel,
# you need to provide the crossvalidation parameters in the LightningContainer to the datamodule. The
# datamodule must carry out appropriate splitting of the data.
return HelloWorldDataModule(crossval_count=self.crossval_count, crossval_index=self.crossval_index)

Просмотреть файл

@ -10,15 +10,18 @@ from pathlib import Path
from typing import List, Optional
import param
from azureml.train.hyperdrive import HyperDriveConfig
from param import Parameterized
from health_azure import create_crossval_hyperdrive_config
from health_azure.utils import RUN_CONTEXT, PathOrString, is_running_in_azure_ml
from health_ml.utils import fixed_paths
from health_ml.utils.common_utils import (CHECKPOINT_FOLDER,
create_unique_timestamp_id,
DEFAULT_AML_UPLOAD_DIR,
DEFAULT_LOGS_DIR_NAME, is_windows, parse_model_id_and_version)
DEFAULT_LOGS_DIR_NAME,
parse_model_id_and_version)
from health_ml.utils.type_annotations import TupleFloat2
@ -43,16 +46,6 @@ class LRSchedulerType(Enum):
MultiStep = "MultiStep"
@unique
class MultiprocessingStartMethod(Enum):
"""
Different methods for starting data loader processes.
"""
fork = "fork"
forkserver = "forkserver"
spawn = "spawn"
@unique
class OptimizerType(Enum):
"""
@ -79,7 +72,7 @@ class ExperimentFolderHandler(Parameterized):
def create(project_root: Path,
is_offline_run: bool,
model_name: str,
output_to: Path = Path()) -> ExperimentFolderHandler:
output_to: Optional[Path] = None) -> ExperimentFolderHandler:
"""
Creates a new object that holds output folder configurations. When running inside of AzureML, the output
folders will be directly under the project root. If not running inside AzureML, a folder with a timestamp
@ -96,8 +89,7 @@ class ExperimentFolderHandler(Parameterized):
"""
if not project_root.is_absolute():
raise ValueError(f"The project root is required to be an absolute path, but got {project_root}")
# output_to by default will be Path() which is not None, but Path().stem is None
if is_offline_run or output_to.stem:
if is_offline_run or output_to:
if output_to:
logging.info(f"All results will be written to the specified output folder {output_to}")
root = Path(output_to).absolute()
@ -140,20 +132,17 @@ class WorkflowParams(param.Parameterized):
model_id: str = param.String(default="",
doc="A model id string in the form 'model name:version' "
"to use a registered model for inference.")
multiprocessing_start_method: MultiprocessingStartMethod = \
param.ClassSelector(class_=MultiprocessingStartMethod,
default=(MultiprocessingStartMethod.spawn if is_windows()
else MultiprocessingStartMethod.fork),
doc="Method to be used to start child processes in pytorch. Should be one of forkserver, "
"fork or spawn. If not specified, fork is used on Linux and spawn on Windows. "
"Set to forkserver as a possible remedy for stuck jobs.")
regression_test_folder: Optional[Path] = \
param.ClassSelector(class_=Path, default=None, allow_None=True,
doc="A path to a folder that contains a set of files. At the end of training and "
"model evaluation, all files given in that folder must be present in the job's output "
"folder, and their contents must match exactly. When running in AzureML, you need to "
"ensure that this folder is part of the snapshot that gets uploaded. The path should "
"be relative to the repository root directory.")
crossval_count: int = param.Integer(default=1, bounds=(0, None),
doc="The number of splits to use when doing cross-validation. "
"Use 1 to disable cross-validation")
crossval_index: int = param.Integer(default=0, bounds=(0, None),
doc="When doing cross validation, this is the index of the current "
"split. Valid values: 0 .. (crossval_count -1)")
hyperdrive: bool = param.Boolean(False, doc="If True, use the Hyperdrive configuration specified in the "
"LightningContainer to run hyperparameter tuning. If False, just "
"run a plain single training job.")
CROSSVAL_INDEX_ARG_NAME = "crossval_index"
CROSSVAL_COUNT_ARG_NAME = "crossval_count"
def validate(self) -> None:
if sum([bool(param) for param in [self.weights_url, self.local_weights_path, self.model_id]]) > 1:
@ -162,6 +151,10 @@ class WorkflowParams(param.Parameterized):
if self.model_id:
parse_model_id_and_version(self.model_id)
if self.crossval_count > 1:
if not (0 <= self.crossval_index < (self.crossval_count - 1)):
raise ValueError(f"Attribute crossval_index out of bounds (crossval_count = {self.crossval_count})")
@property
def is_running_in_aml(self) -> bool:
"""
@ -180,21 +173,37 @@ class WorkflowParams(param.Parameterized):
seed = self.random_seed
return seed
@property
def is_crossvalidation_enabled(self) -> bool:
"""
Returns True if the present parameters indicate that cross-validation should be used.
"""
return self.crossval_count > 1
def get_crossval_hyperdrive_config(self) -> HyperDriveConfig:
# For crossvalidation, the name of the metric to monitor does not matter because no early termination or such
# is specified.
return create_crossval_hyperdrive_config(num_splits=self.crossval_count,
cross_val_index_arg_name=self.CROSSVAL_INDEX_ARG_NAME,
metric_name="val/loss"
)
class DatasetParams(param.Parameterized):
azure_datasets: List[str] = param.List(default=[], class_=str,
doc="If provided, the ID of one or more datasets to use when running in"
" AzureML.This dataset must exist as a folder of the same name in the"
" 'datasets' container in the datasets storage account. This dataset"
" will be mounted and made available at the 'local_dataset' path"
" when running in AzureML.")
" AzureML. This dataset must exist as a folder of the same name "
"in the 'datasets' container in the datasets storage account. This "
"dataset will be mounted and made available at the 'local_dataset' "
"path when running in AzureML.")
local_datasets: List[Path] = param.List(default=[], class_=Path,
doc="A list of one or more paths to the dataset to use, when training"
" outside of Azure ML.")
dataset_mountpoints: List[Path] = param.List(default=[], class_=Path,
doc="The path at which the AzureML dataset should be made available "
"via mounting or downloading. This only affects jobs running in "
"AzureML. If empty, use a random mount/download point.")
doc="The path at which the AzureML dataset should be made "
"available via mounting or downloading. This only affects "
"jobs running in AzureML. If empty, use a random "
"mount/download point.")
def validate(self) -> None:
if (not self.azure_datasets) and (not self.local_datasets):
@ -207,10 +216,10 @@ class DatasetParams(param.Parameterized):
class OutputParams(param.Parameterized):
output_to: Path = param.ClassSelector(class_=Path, default=Path(),
doc="If provided, the run outputs will be written to the given folder. If "
"not provided, outputs will go into a subfolder of the project root "
"folder.")
output_to: Optional[Path] = param.ClassSelector(class_=Path, default=None,
doc="If provided, the run outputs will be written to the given "
"folder. If not provided, outputs will go into a subfolder "
"of the project root folder.")
file_system_config: ExperimentFolderHandler = param.ClassSelector(default=ExperimentFolderHandler(),
class_=ExperimentFolderHandler,
instantiate=False,
@ -229,14 +238,15 @@ class OutputParams(param.Parameterized):
def set_output_to(self, output_to: PathOrString) -> None:
"""
Adjusts the file system settings in the present object such that all outputs are written to the given folder.
Adjusts the file system settings in the present object such that all outputs are written to the given
folder.
:param output_to: The absolute path to a folder that should contain the outputs.
"""
self.output_to = Path(output_to)
self.create_filesystem()
self.create_filesystem(project_root=fixed_paths.repository_root_directory())
def create_filesystem(self, project_root: Path = fixed_paths.repository_root_directory()) -> None:
def create_filesystem(self, project_root: Path) -> None:
"""
Creates new file system settings (outputs folder, logs folder) based on the information stored in the
present object. If any of the folders do not yet exist, they are created.
@ -268,7 +278,8 @@ class OutputParams(param.Parameterized):
class OptimizerParams(param.Parameterized):
l_rate: float = param.Number(1e-4, doc="The initial learning rate", bounds=(0, None))
_min_l_rate: float = param.Number(0.0, doc="The minimum learning rate for the Polynomial and Cosine schedulers.",
_min_l_rate: float = param.Number(0.0,
doc="The minimum learning rate for the Polynomial and Cosine schedulers.",
bounds=(0.0, None))
l_rate_scheduler: LRSchedulerType = param.ClassSelector(default=LRSchedulerType.Polynomial,
class_=LRSchedulerType,
@ -338,19 +349,20 @@ class TrainerParams(param.Parameterized):
autosave_every_n_val_epochs: int = param.Integer(1, bounds=(0, None),
doc="Save epoch checkpoints every N validation epochs. "
"If pl_check_val_every_n_epoch > 1, this means that "
"checkpoints are saved every N * pl_check_val_every_n_epoch "
"training epochs.")
"checkpoints are saved every "
"N * pl_check_val_every_n_epoch training epochs.")
detect_anomaly: bool = param.Boolean(False, doc="If true, test gradients for anomalies (NaN or Inf) during "
"training.")
use_mixed_precision: bool = param.Boolean(False, doc="If true, mixed precision training is activated during "
"training.")
max_num_gpus: int = param.Integer(default=-1, doc="The maximum number of GPUS to use. If set to a value < 0, use"
"all available GPUs. In distributed training, this is the "
"maximum number of GPUs per node.")
max_num_gpus: int = param.Integer(default=-1,
doc="The maximum number of GPUS to use. If set to a value < 0, use"
"all available GPUs. In distributed training, this is the "
"maximum number of GPUs per node.")
pl_progress_bar_refresh_rate: Optional[int] = \
param.Integer(default=None,
doc="PyTorch Lightning trainer flag 'progress_bar_refresh_rate': How often to refresh progress "
"bar (in steps). Value 0 disables progress bar. Value None chooses automatically.")
doc="PyTorch Lightning trainer flag 'progress_bar_refresh_rate': How often to refresh "
"progress bar (in steps). Value 0 disables progress bar. If None choose, automatically.")
pl_num_sanity_val_steps: int = \
param.Integer(default=0,
doc="PyTorch Lightning trainer flag 'num_sanity_val_steps': Number of validation "
@ -358,8 +370,8 @@ class TrainerParams(param.Parameterized):
pl_deterministic: bool = \
param.Boolean(default=False,
doc="Controls the PyTorch Lightning trainer flags 'deterministic' and 'benchmark'. If "
"'pl_deterministic' is True, results are perfectly reproducible. If False, they are not, but "
"you may see training speed increases.")
"'pl_deterministic' is True, results are perfectly reproducible. If False, they are not, "
"but you may see training speed increases.")
pl_find_unused_parameters: bool = \
param.Boolean(default=False,
doc="Controls the PyTorch Lightning flag 'find_unused_parameters' for the DDP plugin. "
@ -382,9 +394,9 @@ class TrainerParams(param.Parameterized):
monitor_loading: bool = param.Boolean(default=False,
doc="If True, add the BatchTimeCallback callback to the Lightning trainer "
"object. This will monitor how long individual batches take to load.")
additional_env_files: List[str] = param.List(class_=Path, default=[],
doc="Additional conda environment (.yml) files to merge into the"
" overall environment definition")
additional_env_files: List[Path] = param.List(class_=Path, default=[],
doc="Additional conda environment (.yml) files to merge into the"
" overall environment definition")
@property
def use_gpu(self) -> bool:
@ -411,5 +423,6 @@ class TrainerParams(param.Parameterized):
num_gpus = self.max_num_gpus
logging.info(f"Restricting the number of GPUs to {num_gpus}")
elif self.max_num_gpus > num_gpus:
logging.warning(f"You requested max_num_gpus {self.max_num_gpus} but there are only {num_gpus} available.")
logging.warning(
f"You requested max_num_gpus {self.max_num_gpus} but there are only {num_gpus} available.")
return num_gpus

Просмотреть файл

@ -1,11 +1,10 @@
import param
from typing import Optional
class ExperimentConfig(param.Parameterized):
cluster: Optional[str] = param.String(default=None, allow_None=True,
doc="The name of the GPU or CPU cluster inside the AzureML workspace"
"that should execute the job.")
cluster: str = param.String(default="", allow_None=False,
doc="The name of the GPU or CPU cluster inside the AzureML workspace"
"that should execute the job.")
num_nodes: int = param.Integer(default=1, doc="The number of virtual machines that will be allocated for this"
"job in AzureML.")
model: str = param.String(doc="The fully qualified name of the model to train/test -e.g."

Просмотреть файл

@ -30,6 +30,7 @@ class LightningContainer(WorkflowParams,
should be trained is returned by the `get_model` method. The training data must be returned in the form of
a LightningDataModule, by the `get_data_module` method.
"""
def __init__(self, **kwargs: Any) -> None:
super().__init__(**kwargs)
self._model: Optional[LightningModule] = None
@ -77,12 +78,20 @@ class LightningContainer(WorkflowParams,
"""
return []
def get_parameter_search_hyperdrive_config(self, _: ScriptRunConfig) -> HyperDriveConfig: # type: ignore
def get_parameter_tuning_config(self, run_config: ScriptRunConfig) -> HyperDriveConfig: # type: ignore
"""
Parameter search is not implemented. It should be implemented in a sub class if needed.
Returns a configuration for hyperparameter tuning via AzureML's Hyperdrive capability.
Hyperparameter tuning can be triggered on the commandline via the "--hyperdrive" flag.
Override this method in your LightningContainer to use hyperparameter tuning.
The HyperDriveConfig config object needs to specify which parameters should be searched over, and which
metric should be monitored.
:param run_config: The ScriptRunConfig object that needs to be passed into the constructor of
HyperDriveConfig.
"""
raise NotImplementedError("Parameter search is not implemented. It should be implemented in"
"a sub class if needed.")
raise NotImplementedError("Parameter search is not implemented. Please override 'get_parameter_tuning_config' "
"in your model container.")
def update_experiment_config(self, experiment_config: ExperimentConfig) -> None:
"""
@ -148,14 +157,17 @@ class LightningContainer(WorkflowParams,
self._model._optimizer_params = create_from_matching_params(self, OptimizerParams)
self._model._trainer_params = create_from_matching_params(self, TrainerParams)
def get_hyperdrive_config(self, run_config: ScriptRunConfig) -> HyperDriveConfig:
def get_hyperdrive_config(self) -> Optional[HyperDriveConfig]:
"""
Returns the HyperDrive config for either parameter search
Returns the HyperDrive config for either hyperparameter tuning or cross validation.
:param run_config: AzureML estimator
:return: HyperDriveConfigs
:return: A configuration object for HyperDrive
"""
return self.get_parameter_search_hyperdrive_config(run_config)
if self.is_crossvalidation_enabled:
return self.get_crossval_hyperdrive_config()
if self.hyperdrive:
return self.get_parameter_tuning_config(ScriptRunConfig(source_directory=""))
return None
def load_model_checkpoint(self, checkpoint_path: Path) -> None:
"""

Просмотреть файл

@ -19,11 +19,10 @@ from health_azure.utils import (ENV_GLOBAL_RANK, ENV_LOCAL_RANK, ENV_NODE_RANK,
from health_ml.lightning_container import LightningContainer
from health_ml.utils import AzureMLLogger, AzureMLProgressBar
from health_ml.utils.checkpoint_utils import cleanup_checkpoints
from health_ml.utils.common_utils import AUTOSAVE_CHECKPOINT_FILE_NAME, EXPERIMENT_SUMMARY_FILE
from health_ml.utils.common_utils import (AUTOSAVE_CHECKPOINT_FILE_NAME, EXPERIMENT_SUMMARY_FILE,
change_working_directory)
from health_ml.utils.lightning_loggers import StoringLogger
TEMP_PREFIX = "temp/"
T = TypeVar('T')
@ -213,9 +212,11 @@ def model_train(checkpoint_path: Optional[Path],
logging.info(f"Environment variables: {rank_info}. trainer.global_rank: {trainer.global_rank}")
# get recovery checkpoint if it exists
logging.info("Starting training")
trainer.fit(lightning_model, datamodule=data_module)
# Change to the outputs folder so that the model can write to current working directory, and still everything
# is put into the right place in AzureML (only the contents of the "outputs" folder is treated as a result file)
with change_working_directory(container.outputs_folder):
trainer.fit(lightning_model, datamodule=data_module)
assert trainer.logger is not None
trainer.logger.finalize('success')

Просмотреть файл

@ -20,7 +20,7 @@ from health_ml.model_trainer import create_lightning_trainer, model_train
from health_ml.utils import fixed_paths
from health_ml.utils.checkpoint_utils import CheckpointHandler
from health_ml.utils.common_utils import (
EFFECTIVE_RANDOM_SEED_KEY_NAME, logging_section,
EFFECTIVE_RANDOM_SEED_KEY_NAME, change_working_directory, logging_section,
RUN_RECOVERY_ID_KEY, RUN_RECOVERY_FROM_ID_KEY_NAME)
from health_ml.utils.lightning_loggers import StoringLogger
from health_ml.utils.type_annotations import PathOrString
@ -181,10 +181,13 @@ class MLRunner:
trainer, _ = create_lightning_trainer(self.container, num_nodes=1)
self.container.load_model_checkpoint(checkpoint_path=checkpoint_paths[0])
# Change the current working directory to ensure that test files go to thr right folder
data_module = self.container.get_data_module()
_ = trainer.test(self.container.model, datamodule=data_module)
# Change to the outputs folder so that the model can write to current working directory, and still
# everything is put into the right place in AzureML (there, only the contents of the "outputs" folder
# retained)
with change_working_directory(self.container.outputs_folder):
_ = trainer.test(self.container.model, datamodule=data_module)
else:
logging.warning("None of the suitable test methods is overridden. Skipping inference completely.")

Просмотреть файл

@ -9,27 +9,23 @@ import param
import sys
import uuid
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, Optional, Tuple
import matplotlib
from azureml.core import Workspace
# Add hi-ml packages to sys.path so that AML can find them
# Optionally add the histopathology module, if this exists
# Add hi-ml packages to sys.path so that AML can find them if we are using the runner directly from the git repo
himl_root = Path(__file__).absolute().parent.parent.parent.parent
print(f"Starting the himl runner at {himl_root}")
print(f"health_ml pkg root: {himl_root}")
health_ml_pkg = himl_root / "hi-ml" / "src"
health_azure_pkg = himl_root / "hi-ml-azure" / "src"
health_histopathology_dir = himl_root / "hi-ml-histopathology" / "src"
if health_histopathology_dir.exists():
sys.path.insert(0, str(health_histopathology_dir))
sys.path.insert(0, str(health_azure_pkg))
sys.path.insert(0, str(health_ml_pkg))
print(f"sys path: {sys.path}")
folders_to_add = [himl_root / "hi-ml" / "src",
himl_root / "hi-ml-azure" / "src",
himl_root / "hi-ml-histopathology" / "src"]
for folder in folders_to_add:
if folder.is_dir():
sys.path.insert(0, str(folder))
from health_azure import AzureRunInfo, submit_to_azure_if_needed # noqa: E402
from health_azure.datasets import create_dataset_configs # noqa: E402
from health_azure.paths import is_himl_used_from_git_repo # noqa: E402
from health_azure.utils import (get_workspace, is_local_rank_zero, merge_conda_files, # noqa: E402
set_environment_variables_for_multi_node, create_argparser, parse_arguments,
ParserResult, apply_overrides)
@ -38,12 +34,11 @@ from health_ml.experiment_config import ExperimentConfig # noqa: E402
from health_ml.lightning_container import LightningContainer # noqa: E402
from health_ml.run_ml import MLRunner # noqa: E402
from health_ml.utils import fixed_paths # noqa: E402
from health_ml.utils.common_utils import (get_all_environment_files, # noqa: E402
from health_ml.utils.common_utils import (check_conda_environments, get_all_environment_files, # noqa: E402
get_all_pip_requirements_files,
is_linux, logging_to_stdout)
from health_ml.utils.config_loader import ModelConfigLoader # noqa: E402
DEFAULT_DOCKER_BASE_IMAGE = "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04"
@ -142,8 +137,7 @@ class Runner:
self.experiment_config = experiment_config
if not experiment_config.model:
raise ValueError("Parameter 'model' needs to be set to specify which model to run.")
print(f"Creating model loader with the following args: {parser_result.args}")
model_config_loader: ModelConfigLoader = ModelConfigLoader(**parser_result.args)
model_config_loader: ModelConfigLoader = ModelConfigLoader()
# Create the model as per the "model" commandline option. This is a LightningContainer.
container = model_config_loader.create_model_config_from_name(model_name=experiment_config.model)
@ -161,6 +155,23 @@ class Runner:
return parser_result_
def validate(self) -> None:
"""
Runs sanity checks on the whole experiment.
"""
if not self.experiment_config.azureml:
if self.lightning_container.hyperdrive:
logging.info("You have turned on HyperDrive for parameter tuning. This can "
"only be run in AzureML. We switched on submitting to AzureML.")
self.experiment_config.azureml = True
if self.lightning_container.is_crossvalidation_enabled:
logging.info("You have turned on cross-validation. This can "
"only be run in AzureML. We switched on submitting to AzureML.")
self.experiment_config.azureml = True
if self.experiment_config.cluster:
logging.info("You have provided a compute cluster name, hence we switched on submitting to AzureML.")
self.experiment_config.azureml = True
def run(self) -> Tuple[LightningContainer, AzureRunInfo]:
"""
The main entry point for training and testing models from the commandline. This chooses a model to train
@ -174,6 +185,7 @@ class Runner:
logging_to_stdout(logging.INFO if is_local_rank_zero() else "ERROR")
initialize_rpdb()
self.parse_and_load_model()
self.validate()
azure_run_info = self.submit_to_azureml_if_needed()
self.run_in_situ(azure_run_info)
return self.lightning_container, azure_run_info
@ -191,30 +203,18 @@ class Runner:
entry_script = Path(sys.argv[0]).resolve()
script_params = sys.argv[1:]
additional_conda_env_files = self.lightning_container.additional_env_files
additional_env_files: Optional[List[Path]]
if additional_conda_env_files is not None:
additional_env_files = [Path(f) for f in additional_conda_env_files]
else:
additional_env_files = None
conda_dependencies_files = get_all_environment_files(self.project_root,
additional_files=additional_env_files)
pip_requirements_files = get_all_pip_requirements_files()
# Merge the project-specific dependencies with the packages and write unified definition
# to temp file. In case of version conflicts, the package version in the outer project is given priority.
temp_conda: Optional[Path] = None
if len(conda_dependencies_files) > 1 or len(pip_requirements_files) > 0:
temp_conda = root_folder / f"temp_environment-{uuid.uuid4().hex[:8]}.yml"
merge_conda_files(conda_dependencies_files, temp_conda, pip_files=pip_requirements_files)
# TODO: Update environment variables
environment_variables: Dict[str, Any] = {}
# get default datastore from provided workspace
workspace = get_workspace()
default_datastore = workspace.get_default_datastore().name
# Get default datastore from the provided workspace. Authentication can take a few seconds, hence only do
# that if we are really submitting to AzureML.
workspace: Optional[Workspace] = None
if self.experiment_config.azureml:
try:
workspace = get_workspace()
except ValueError:
logging.warning("No configuration file for an AzureML workspace was found.")
default_datastore = workspace.get_default_datastore().name if workspace is not None else ""
local_datasets = self.lightning_container.local_datasets
all_local_datasets = [Path(p) for p in local_datasets] if len(local_datasets) > 0 else []
@ -223,27 +223,48 @@ class Runner:
all_dataset_mountpoints=self.lightning_container.dataset_mountpoints,
all_local_datasets=all_local_datasets, # type: ignore
datastore=default_datastore)
if self.lightning_container.is_crossvalidation_enabled and not self.experiment_config.azureml:
raise ValueError("Cross-validation is only supported when submitting the job to AzureML.")
hyperdrive_config = self.lightning_container.get_hyperdrive_config()
temp_conda: Optional[Path] = None
try:
if self.experiment_config.azureml:
conda_files = get_all_environment_files(root_folder,
additional_files=self.lightning_container.additional_env_files)
check_conda_environments(conda_files)
# This adds all pip packages required by hi-ml and hi-ml-azure in case the code is used directly from
# source (submodule) rather than installed as a package.
pip_requirements_files = get_all_pip_requirements_files()
# Merge the project-specific dependencies with the packages and write unified definition to temp file.
if len(conda_files) > 1 or len(pip_requirements_files) > 0:
temp_conda = root_folder / f"temp_environment-{uuid.uuid4().hex[:8]}.yml"
merge_conda_files(conda_files, temp_conda, pip_files=pip_requirements_files)
if workspace is None:
raise ValueError("Unable to submit the script to AzureML because no workspace configuration file "
"(config.json) was found.")
if not self.experiment_config.cluster:
raise ValueError("You need to specify a cluster name via '--cluster NAME' to submit"
raise ValueError("You need to specify a cluster name via '--cluster NAME' to submit "
"the script to run in AzureML")
azure_run_info = submit_to_azure_if_needed(
entry_script=entry_script,
snapshot_root_directory=root_folder,
script_params=script_params,
conda_environment_file=temp_conda or conda_dependencies_files[0],
conda_environment_file=temp_conda or conda_files[0],
aml_workspace=workspace,
compute_cluster_name=self.experiment_config.cluster,
environment_variables=environment_variables,
default_datastore=default_datastore,
experiment_name=self.lightning_container.name, # create_experiment_name(),
experiment_name=self.lightning_container.model_name, # create_experiment_name(),
input_datasets=input_datasets, # type: ignore
num_nodes=self.experiment_config.num_nodes,
wait_for_completion=False,
ignored_folders=[],
submit_to_azureml=self.experiment_config.azureml,
docker_base_image=DEFAULT_DOCKER_BASE_IMAGE,
hyperdrive_config=hyperdrive_config,
create_output_folders=False,
tags=additional_run_tags(
commandline_args=" ".join(script_params))
)
@ -252,7 +273,7 @@ class Runner:
input_datasets=input_datasets, # type: ignore
submit_to_azureml=False)
finally:
if temp_conda:
if temp_conda and temp_conda.is_file():
temp_conda.unlink()
# submit_to_azure_if_needed calls sys.exit after submitting to AzureML. We only reach this when running
# the script locally or in AzureML.
@ -296,7 +317,7 @@ def run(project_root: Path) -> Tuple[LightningContainer, AzureRunInfo]:
def main() -> None:
run(project_root=fixed_paths.repository_root_directory())
run(project_root=fixed_paths.repository_root_directory() if is_himl_used_from_git_repo() else Path.cwd())
if __name__ == '__main__':

Просмотреть файл

@ -10,10 +10,11 @@ from typing import Any, Generator, Iterable, List, Optional, Union
import torch
from torch.nn import Module
from health_azure import utils
from health_azure import paths
from health_azure.paths import ENVIRONMENT_YAML_FILE_NAME, git_repo_root_folder, is_himl_used_from_git_repo
from health_azure.utils import PathOrString
from health_ml.utils import fixed_paths
from health_azure.utils import PathOrString, is_conda_file_with_pip_include
MAX_PATH_LENGTH = 260
@ -25,15 +26,17 @@ string_to_path = lambda x: None if (x is None or len(x.strip()) == 0) else Path(
# file and directory names
CHECKPOINT_SUFFIX = ".ckpt"
AUTOSAVE_CHECKPOINT_FILE_NAME = "autosave"
AUTOSAVE_CHECKPOINT_CANDIDATES = [AUTOSAVE_CHECKPOINT_FILE_NAME + CHECKPOINT_SUFFIX,
AUTOSAVE_CHECKPOINT_FILE_NAME + "-v1" + CHECKPOINT_SUFFIX]
AUTOSAVE_CHECKPOINT_CANDIDATES = [
AUTOSAVE_CHECKPOINT_FILE_NAME + CHECKPOINT_SUFFIX,
AUTOSAVE_CHECKPOINT_FILE_NAME + "-v1" + CHECKPOINT_SUFFIX,
]
CHECKPOINT_FOLDER = "checkpoints"
DEFAULT_AML_UPLOAD_DIR = "outputs"
DEFAULT_LOGS_DIR_NAME = "logs"
EXPERIMENT_SUMMARY_FILE = "experiment_summary.txt"
# run recovery
RUN_RECOVERY_ID_KEY = 'run_recovery_id'
RUN_RECOVERY_ID_KEY = "run_recovery_id"
RUN_RECOVERY_FROM_ID_KEY_NAME = "recovered_from"
# other
@ -45,6 +48,7 @@ class ModelExecutionMode(Enum):
"""
Model execution mode
"""
TRAIN = "Train"
TEST = "Test"
VAL = "Val"
@ -111,8 +115,7 @@ def _add_formatter(handler: logging.StreamHandler) -> None:
"""
Adds a logging formatter that includes the timestamp and the logging level.
"""
formatter = logging.Formatter(fmt="%(asctime)s %(levelname)-8s %(message)s",
datefmt="%Y-%m-%dT%H:%M:%SZ")
formatter = logging.Formatter(fmt="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%dT%H:%M:%SZ")
# noinspection PyTypeHints
formatter.converter = time.gmtime # type: ignore
handler.setFormatter(formatter)
@ -129,6 +132,7 @@ def logging_section(gerund: str) -> Generator:
:param gerund: string expressing what happens in this section of the log.
"""
from time import time
logging.info("")
msg = f"**** STARTING: {gerund} "
logging.info(msg + (100 - len(msg)) * "*")
@ -152,14 +156,14 @@ def is_windows() -> bool:
"""
Returns True if the host operating system is Windows.
"""
return os.name == 'nt'
return os.name == "nt"
def is_linux() -> bool:
"""
Returns True if the host operating system is a flavour of Linux.
"""
return os.name == 'posix'
return os.name == "posix"
def check_properties_are_not_none(obj: Any, ignore: Optional[List[str]] = None) -> None:
@ -206,16 +210,31 @@ def _create_generator(seed: Optional[int] = None) -> torch.Generator:
def get_all_environment_files(project_root: Path, additional_files: Optional[List[Path]] = None) -> List[Path]:
"""
Returns a list of all Conda environment files that should be used. This is just an
environment.yml file that lives at the project root folder, plus any additional files provided.
environment.yml file that lives at the project root folder, plus any additional files provided in the model.
:param project_root: The root folder of the code that starts the present training run.
:param additional_files: Optional list of additional environment files to merge
:return: A list with 1 entry that is the root level repo's conda environment files.
:return: A list of Conda environment files to use.
"""
env_files = []
project_yaml = project_root / fixed_paths.ENVIRONMENT_YAML_FILE_NAME
if project_yaml.exists():
project_yaml = project_root / paths.ENVIRONMENT_YAML_FILE_NAME
if paths.is_himl_used_from_git_repo():
logging.info("Searching for Conda files in the parent folders")
git_repo_root = paths.git_repo_root_folder()
env_file = utils.find_file_in_parent_folders(
file_name=paths.ENVIRONMENT_YAML_FILE_NAME, stop_at_path=[git_repo_root]
)
assert env_file is not None, "Expected to find at least the environment definition file at repo root"
logging.info(f"Using Conda environment in {env_file}")
env_files.append(env_file)
elif project_yaml.exists():
logging.info(f"Using Conda environment in current folder: {project_yaml}")
env_files.append(project_yaml)
if not env_files and not additional_files:
raise ValueError(
"No Conda environment files were found in the repository, and none were specified in the " "model itself."
)
if additional_files:
for additional_file in additional_files:
if additional_file.exists():
@ -223,26 +242,44 @@ def get_all_environment_files(project_root: Path, additional_files: Optional[Lis
return env_files
def check_conda_environments(env_files: List[Path]) -> None:
"""Tests if all conda environment files are valid. In particular, they must not contain "include" statements
in the pip section.
:param env_files: The list of Conda environment YAML files to check.
"""
if is_himl_used_from_git_repo():
repo_root_yaml: Optional[Path] = git_repo_root_folder() / ENVIRONMENT_YAML_FILE_NAME
else:
repo_root_yaml = None
for file in env_files:
has_pip_include, _ = is_conda_file_with_pip_include(file)
# PIP include statements are only valid when reading from the repository root YAML file, because we
# are manually adding the included files in get_all_pip_requirements_files
if has_pip_include and file != repo_root_yaml:
raise ValueError(
f"The Conda environment definition in {file} uses '-r' to reference pip requirements "
"files. This does not work in AzureML. Please add the pip dependencies directly."
)
def get_all_pip_requirements_files() -> List[Path]:
"""
If the root level hi-ml directory is available (e.g. it has been installed as a submodule or
downloaded directly into a parent repo) then we must add it's pip requirements to any environment
definition. This function returns a list of the necessary pip requirements files. If the hi-ml
root directory does not exist (e.g. hi-ml has been installed as a pip package, this is not necessary
and so this function returns None)
and so this function returns an empty list.)
:return: An list list of pip requirements files in the hi-ml and hi-ml-azure packages if relevant,
or else an empty list
"""
files = []
himl_root_dir = fixed_paths.himl_root_dir()
if himl_root_dir is not None:
himl_yaml = himl_root_dir / "hi-ml" / "run_requirements.txt"
himl_az_yaml = himl_root_dir / "hi-ml-azure" / "run_requirements.txt"
files.append(himl_yaml)
files.append(himl_az_yaml)
return files
return []
if paths.is_himl_used_from_git_repo():
git_root = paths.git_repo_root_folder()
for folder in [Path("hi-ml") / "run_requirements.txt", Path("hi-ml-azure") / "run_requirements.txt"]:
files.append(git_root / folder)
return files
def create_unique_timestamp_id() -> str:
@ -270,8 +307,7 @@ def parse_model_id_and_version(model_id_and_version: str) -> None:
expected format
"""
if len(model_id_and_version.split(":")) != 2:
raise ValueError(
f"model id should be in the form 'model_name:version', got {model_id_and_version}")
raise ValueError(f"model id should be in the form 'model_name:version', got {model_id_and_version}")
@contextmanager

Просмотреть файл

@ -7,171 +7,157 @@ from __future__ import annotations
import importlib
import inspect
import logging
import sys
from importlib._bootstrap import ModuleSpec
from importlib.util import find_spec
from pathlib import Path
from typing import Any, Dict, List, Optional
from typing import Dict, List, Optional
import param
from importlib._bootstrap import ModuleSpec
from health_azure.utils import PathOrString
from health_ml.lightning_container import LightningContainer
from health_ml.utils import fixed_paths
class ModelConfigLoader(param.Parameterized):
class ModelConfigLoader:
"""
Helper class to manage model config loading.
"""
def __init__(self, **params: Any):
super().__init__(**params)
default_module = self.get_default_search_module()
self.module_search_specs: List[ModuleSpec] = [importlib.util.find_spec(default_module)] # type: ignore
self._find_module_search_specs()
def __init__(self) -> None:
pass
def _find_module_search_specs(self) -> None:
"""
Given the fully qualified model name, append the root folder to the system path (so that the config
file can be discovered) and try to find a spec for the specifed module. If found, appends the spec
to self.module_search_specs
"""
model_namespace_parts = self.model.split(".")
if len(model_namespace_parts) == 1:
# config must be in the default path. This is already in module_search_specs so we dont need to do anything
return
else:
# Get the root folder of the fully qualified model name and ensure it is in the path to enable
# discovery of the config file
root_namespace = str(Path(model_namespace_parts[0]).absolute())
if root_namespace not in sys.path:
print(f"Adding {str(root_namespace)} to path")
sys.path.insert(0, str(root_namespace))
# Strip the root folder (now in the path) and the class name from the model namespace, leaving the
# module name - e.g. "mymodule.configs"
model_namespace = ".".join([str(p) for p in model_namespace_parts[1:-1]]) # type: ignore
custom_spec = importlib.util.find_spec(model_namespace) # type: ignore
if custom_spec is None:
raise ValueError(f"Search namespace {model_namespace} was not found.")
self.module_search_specs.append(custom_spec)
@staticmethod
def get_default_search_module() -> str:
def default_module_spec(self) -> ModuleSpec:
from health_ml import configs # type: ignore
return configs.__name__
default_module = configs.__name__
return find_spec(default_module)
def find_module_search_specs(self, model_name: str) -> ModuleSpec:
"""
Given model name (either only the class name or fully qualified), return the ModuleSpec that should be used for
loading. If the model name is only the class name, the function will return the result of calling
default_module_spec. Otherwise, this will return the module of the (fully qualified) model name.
"""
model_namespace_parts = model_name.split(".")
if len(model_namespace_parts) == 1:
# config must be in the default path, nothing to be done
return self.default_module_spec()
module_name = ".".join(model_namespace_parts[:-1])
logging.debug(f"Getting specification for module {module_name}")
try:
custom_spec: Optional[ModuleSpec] = find_spec(module_name)
except Exception:
custom_spec = None
if custom_spec is None:
raise ValueError(f"Module {module_name} was not found.")
return custom_spec
def _get_model_config(self, module_spec: ModuleSpec, model_name: str) -> Optional[LightningContainer]:
"""
Given a module specification check to see if it has a class property with
the <model_name> provided, and instantiate that config class with the
provided <config_overrides>. Otherwise, return None.
:param module_spec:
:return: Instantiated model config if it was found.
"""
# noinspection PyBroadException
try:
logging.debug(f"Importing {module_spec.name}")
target_module = importlib.import_module(module_spec.name)
# The "if" clause checks that obj is a class, of the desired name, that is
# defined in this module rather than being imported into it (and hence potentially
# being found twice).
_class = next(
obj
for name, obj in inspect.getmembers(target_module)
if inspect.isclass(obj) and name == model_name and inspect.getmodule(obj) == target_module
)
logging.info(f"Found class {_class} in file {module_spec.origin}")
# ignore the exception which will occur if the provided module cannot be loaded
# or the loaded module does not have the required class as a member
except Exception as e:
exception_text = str(e)
if exception_text != "":
logging.warning(f"Error when trying to import module {module_spec.name}: {exception_text}")
return None
model_config = _class()
return model_config
def _search_recursively_and_store(self, module_spec: ModuleSpec, model_name: str) -> Dict[str, LightningContainer]:
"""
Given a root namespace eg: A.B.C searches recursively in all child namespaces
for class property with the <model_name> provided. If found, this is
instantiated with the provided overrides, and added to the configs dictionary.
:param module_search_spec:
"""
configs: Dict[str, LightningContainer] = {}
root_namespace = module_spec.name
namespaces_to_search: List[str] = []
if module_spec.submodule_search_locations:
logging.debug(
f"Searching through {len(module_spec.submodule_search_locations)} folders that match namespace "
f"{module_spec.name}: {module_spec.submodule_search_locations}"
)
for root in module_spec.submodule_search_locations:
# List all python files in all the dirs under root, except for private dirs (prefixed with .)
all_py_files = [x for x in Path(root).rglob("*.py") if ".." not in str(x)]
for f in all_py_files:
if f.is_file() and "__pycache__" not in str(f) and f.name != "setup.py":
sub_namespace = path_to_namespace(f, root=root)
namespaces_to_search.append(root_namespace + "." + sub_namespace)
elif module_spec.origin:
# The module search spec already points to a python file: Search only that.
namespaces_to_search.append(module_spec.name)
else:
raise ValueError(f"Unable to process module spec: {module_spec}")
for n in namespaces_to_search: # type: ignore
_module_spec = None
# noinspection PyBroadException
try:
_module_spec = find_spec(n) # type: ignore
except Exception:
continue
if _module_spec:
config = self._get_model_config(_module_spec, model_name=model_name)
if config:
configs[n] = config # type: ignore
return configs
def create_model_config_from_name(self, model_name: str) -> LightningContainer:
"""
Returns a model configuration for a model of the given name.
To avoid having to import torch here, there are no references to LightningContainer.
Searching for a class member called <model_name> in the search modules provided recursively.
:param model_name: Fully qualified name of the model for which to get the configs for - i.e.
mymodule.configs.MyConfig
:param model_name: Class name (for example, "HelloWorld") if the model config is in the default search
namespace, or fully qualified name of the model, like mymodule.configs.MyConfig)
"""
if not model_name:
raise ValueError("Unable to load a model configuration because the model name is missing.")
# get the class name from the fully qualified name
model_name = model_name.split(".")[-1]
configs: Dict[str, LightningContainer] = {}
def _get_model_config(module_spec: ModuleSpec) -> Optional[LightningContainer]:
"""
Given a module specification check to see if it has a class property with
the <model_name> provided, and instantiate that config class with the
provided <config_overrides>. Otherwise, return None.
:param module_spec:
:return: Instantiated model config if it was found.
"""
# noinspection PyBroadException
try:
logging.debug(f"Importing {module_spec.name}")
target_module = importlib.import_module(module_spec.name)
# The "if" clause checks that obj is a class, of the desired name, that is
# defined in this module rather than being imported into it (and hence potentially
# being found twice).
_class = next(obj for name, obj in inspect.getmembers(target_module)
if inspect.isclass(obj)
and name == model_name # noqa: W503
and inspect.getmodule(obj) == target_module) # noqa: W503
logging.info(f"Found class {_class} in file {module_spec.origin}")
# ignore the exception which will occur if the provided module cannot be loaded
# or the loaded module does not have the required class as a member
except Exception as e:
exception_text = str(e)
if exception_text != "":
logging.warning(f"(from attempt to import module {module_spec.name}): {exception_text}")
return None
model_config = _class()
return model_config
def _search_recursively_and_store(module_search_spec: ModuleSpec) -> None:
"""
Given a root namespace eg: A.B.C searches recursively in all child namespaces
for class property with the <model_name> provided. If found, this is
instantiated with the provided overrides, and added to the configs dictionary.
:param module_search_spec:
"""
root_namespace = module_search_spec.name
namespaces_to_search: List[str] = []
if module_search_spec.submodule_search_locations:
logging.debug(f"Searching through {len(module_search_spec.submodule_search_locations)} folders that "
f"match namespace {module_search_spec.name}: "
f"{module_search_spec.submodule_search_locations}")
for root in module_search_spec.submodule_search_locations:
# List all python files in all the dirs under root, except for private dirs (prefixed with .)
all_py_files = [x for x in Path(root).rglob("*.py") if ".." not in str(x)]
for f in all_py_files:
if f.is_file() and "__pycache__" not in str(f) and f.name != "setup.py":
sub_namespace = path_to_namespace(f, root=root)
namespaces_to_search.append(root_namespace + "." + sub_namespace)
elif module_search_spec.origin:
# The module search spec already points to a python file: Search only that.
namespaces_to_search.append(module_search_spec.name)
else:
raise ValueError(f"Unable to process module spec: {module_search_spec}")
for n in namespaces_to_search: # type: ignore
_module_spec = None
# noinspection PyBroadException
try:
_module_spec = find_spec(n) # type: ignore
except Exception:
pass
if _module_spec:
config = _get_model_config(_module_spec)
if config:
configs[n] = config # type: ignore
for search_spec in self.module_search_specs:
_search_recursively_and_store(search_spec)
logging.info(f"Trying to locate model {model_name}")
name_parts = model_name.split(".")
class_name = name_parts[-1]
module_spec = self.find_module_search_specs(model_name)
configs = self._search_recursively_and_store(module_spec=module_spec, model_name=class_name)
if len(configs) == 0:
raise ValueError(
f"Model name {model_name} was not found in search namespaces: "
f"{[s.name for s in self.module_search_specs]}.")
raise ValueError(f"Model '{model_name}' was not found in search namespace {module_spec.name}")
elif len(configs) > 1:
raise ValueError(
f"Multiple instances of model name {model_name} were found in namespaces: {configs.keys()}.")
f"Multiple instances of model '{model_name}' were found in namespaces: {[*configs.keys()]}"
)
else:
return list(configs.values())[0]
def path_to_namespace(path: Path, root: PathOrString = fixed_paths.repository_root_directory()) -> str:
def path_to_namespace(path: Path, root: Path) -> str:
"""
Given a path (in form R/A/B/C) and an optional root directory R, create a namespace A.B.C.
If root is provided, then path must be a relative child to it.
Given a path (in form R/A/B/C) and a root directory R, create a namespace string A.B.C.
The path must be located under the root directory.
:param path: Path to convert to namespace
:param root: Path prefix to remove from namespace (default is project root)
:return:
:param root: Path prefix to remove from namespace.
:return: A Python namespace string
"""
return ".".join([Path(x).stem for x in path.relative_to(root).parts])

Просмотреть файл

@ -4,61 +4,21 @@
# ------------------------------------------------------------------------------------------
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from health_azure.utils import PathOrString
ENVIRONMENT_YAML_FILE_NAME = "environment.yml"
from health_azure.paths import git_repo_root_folder, is_himl_used_from_git_repo
def get_environment_yaml_file() -> Path:
"""
Returns the path where the environment.yml file is located, in the repository root directory.
The function throws an exception if the file is not found
:return: The full path to the environment files.
"""
# The environment file is copied into the package folder in setup.py.
root_dir = repository_root_directory()
env = root_dir / ENVIRONMENT_YAML_FILE_NAME
if not env.exists():
raise ValueError(f"File {ENVIRONMENT_YAML_FILE_NAME} was not found not found in in the repository root"
f"{root_dir}.")
return env
def repository_root_directory(path: Optional[PathOrString] = None) -> Path:
def repository_root_directory() -> Path:
"""
Gets the full path to the root directory that holds the present repository.
This function should only be called if the repository is available (for example, in unit tests).
It will raise a ValueError if the repo is not available.
:param path: if provided, a relative path to append to the absolute path to the repository root.
:return: The full path to the repository's root directory, with symlinks resolved if any.
"""
root = Path.cwd()
if path:
full_path = root / path
assert full_path.exists(), f"Path {full_path} doesn't exist"
return root / path
else:
return root
def himl_root_dir() -> Optional[Path]:
"""
Attempts to return the path to the top-level hi-ml repo that contains the hi-ml and hi-ml-azure packages.
This top level repo will only be present if hi-ml has been installed as a git submodule, or the repo has
been directly downlaoded. Otherwise (e.g.if hi-ml has been installed as a pip package) returns None
return: Path to the himl root dir if it exists, else None
"""
health_ml_root = Path(__file__).parent.parent
print(f"health ml root: {health_ml_root}")
if health_ml_root.parent.stem == "site-packages":
return None
himl_root = health_ml_root.parent.parent.parent
assert (himl_root / "hi-ml").is_dir(), f"no such dir {himl_root / 'hi-ml'}"
assert (himl_root / "hi-ml-azure").is_dir(), f"no such dir {himl_root / 'hi-ml-azure'}"
return himl_root
if is_himl_used_from_git_repo():
return git_repo_root_folder()
raise ValueError("This function should not be used if hi-ml is used as an installed package.")
@dataclass(frozen=True)
@ -66,6 +26,7 @@ class OutputFolderForTests:
"""
Data class for the output directories for a given test
"""
root_dir: Path
def create_file_or_folder_path(self, file_or_folder_name: str) -> Path:

Просмотреть файл

@ -1,10 +0,0 @@
name: test-env
channels:
- defaults
- pytorch
dependencies:
- pip=20.1.1
- python=3.7.3
- pytorch=1.8.0
- pip:
- -r ../test_requirements.txt

Просмотреть файл

@ -0,0 +1,14 @@
import logging
import sys
from pathlib import Path
root = Path(__file__).parent.parent.parent
for folder in ["hi-ml-azure", "hi-ml"]:
full_folder = str(root / folder / "src")
if full_folder not in sys.path:
print(f"Adding to sys.path for running hi-ml: {full_folder}")
sys.path.insert(0, full_folder)
# Matplotlib is very talkative in DEBUG mode
logging.getLogger('matplotlib').setLevel(logging.INFO)

Просмотреть файл

@ -1,139 +1,88 @@
import shutil
from pathlib import Path
from typing import Any
import pytest
from health_azure.utils import is_running_on_azure_agent
from health_ml.configs import hello_world as hello_config
from health_ml.lightning_container import LightningContainer
from health_ml.utils.config_loader import ModelConfigLoader, path_to_namespace
from testhiml.utils.fixed_paths_for_tests import full_test_data_path, tests_root_directory
@pytest.fixture(scope="module")
def config_loader() -> ModelConfigLoader:
return ModelConfigLoader(**{"model": "HelloContainer"})
def test_find_module_search_specs() -> None:
config_loader = ModelConfigLoader()
module_spec = config_loader.find_module_search_specs(model_name="health_ml.utils.config_loader.Foo")
assert module_spec.name == "health_ml.utils.config_loader"
module_spec = config_loader.find_module_search_specs(model_name="DoesNotExist")
assert module_spec.name == "health_ml.configs"
@pytest.fixture(scope="module")
def hello_config() -> Any:
from health_ml.configs import hello_container # type: ignore
assert Path(hello_container.__file__).exists(), "Can't find hello_container config"
return hello_container
def test_get_default_search_module() -> None:
config_loader = ModelConfigLoader()
search_module = config_loader.default_module_spec()
assert search_module.name == "health_ml.configs"
def test_find_module_search_specs(config_loader: ModelConfigLoader) -> None:
# By default, property module_search_specs includes the default config path - health_ml.configs
len_search_specs_before = len(config_loader.module_search_specs)
assert any([m.name == "health_ml.configs" for m in config_loader.module_search_specs])
config_loader._find_module_search_specs()
# nothing should have been added to module_search_specs
assert len(config_loader.module_search_specs) == len_search_specs_before
def test_find_module_search_specs_outside_default_dir() -> None:
if is_running_on_azure_agent():
return
model_name = "NewConfig"
dummy_config_dir = Path.cwd() / "test_configs"
dummy_config_dir.mkdir()
dummy_config_path = dummy_config_dir / "new_config.py"
dummy_config = f"""class {model_name}:
def __init__(self):
pass
"""
dummy_config_path.touch()
dummy_config_path.write_text(dummy_config)
dummy_config_namespace = f"test_configs.new_config.{model_name}"
config_loader2 = ModelConfigLoader(**{"model": f"{dummy_config_namespace}"})
# The root "testhiml" should now be in the system path and the module "outputs" should be in module_search_specs
# this wont be in the previous results, since the default path was used. The default search_spec (health_ml.configs)
# should also be in the results for hte new
assert any([m.name == "new_config" for m in config_loader2.module_search_specs])
assert any([m.name == "health_ml.configs" for m in config_loader2.module_search_specs])
# If the file doesnt exist but the parent module does, the module will still be appended to module_search_specs
# at this stage
config_loader3 = ModelConfigLoader(**{"model": "test_configs.new_config.idontexist"})
assert any([m.name == "new_config" for m in config_loader3.module_search_specs])
# If the parent module doesn't exist, an Exception should be raised
with pytest.raises(Exception) as e:
ModelConfigLoader(**{"model": "testhiml.idontexist.idontexist"})
assert "was not found" in str(e)
shutil.rmtree(dummy_config_dir)
def test_get_default_search_module(config_loader: ModelConfigLoader) -> None:
search_module = config_loader.get_default_search_module()
assert search_module == "health_ml.configs"
def test_create_model_config_from_name(config_loader: ModelConfigLoader, hello_config: Any
) -> None:
def test_create_model_config_from_name_errors() -> None:
config_loader = ModelConfigLoader()
# if no model name is given, an exception should be raised
with pytest.raises(Exception) as e:
config_loader.create_model_config_from_name("")
assert "the model name is missing" in str(e)
assert "the model name is missing" in str(e)
# if no config is found matching the model name, an exception should be raised
with pytest.raises(Exception) as e:
config_loader.create_model_config_from_name("idontexist")
assert "was not found in search namespaces" in str(e)
assert "was not found in search namespace" in str(e)
# if > 1 config is found matching the model name, an exception should be raised
config_name = "HelloContainer"
hello_config_path = Path(hello_config.__file__)
duplicate_config_file = hello_config_path.parent / "hello_container_2.py"
duplicate_config_file.touch()
shutil.copyfile(str(hello_config_path), str(duplicate_config_file))
with pytest.raises(Exception) as e:
config_loader.create_model_config_from_name(config_name)
assert "Multiple instances of model name " in str(e)
duplicate_config_file.unlink()
config_loader.create_model_config_from_name("testhiml.idontexist.idontexist")
assert "Module testhiml.idontexist was not found" in str(e)
def test_create_model_config_from_name_duplicates() -> None:
config_loader = ModelConfigLoader()
config_name = "HelloWorld"
# if exactly one config is found, expect a LightningContainer to be returned
container = config_loader.create_model_config_from_name(config_name)
assert isinstance(container, LightningContainer)
assert container.model_name == config_name
def test_config_in_dif_location(tmp_path: Path, hello_config: Any) -> None:
himl_root = Path(hello_config.__file__).parent.parent
model_name = "HelloContainer"
new_config_path = himl_root / "hello_container_to_delete.py"
new_config_path.touch()
# if > 1 config is found matching the model name, an exception should be raised
hello_config_path = Path(hello_config.__file__)
shutil.copyfile(str(hello_config_path), str(new_config_path))
config_loader = ModelConfigLoader(model=model_name)
# Trying to find this config should now cause an exception as it should find it in both "health_ml" and
# in "health_ml.configs"
# This file must be excluded from coverage reports, check .coveragerc
duplicate_config_file = hello_config_path.parent / "temp_config_for_unittests.py"
shutil.copyfile(hello_config_path, duplicate_config_file)
with pytest.raises(Exception) as e:
config_loader.create_model_config_from_name(model_name)
assert "Multiple instances of model name HelloContainer were found in namespaces: " \
"dict_keys(['health_ml.configs.hello_container', 'health_ml.hello_container_to_delete']) " in str(e)
new_config_path.unlink()
config_loader.create_model_config_from_name(config_name)
assert "Multiple instances of model " in str(e)
duplicate_config_file.unlink()
@pytest.mark.parametrize("is_external", [True, False])
def test_path_to_namespace(is_external: bool) -> None:
def test_path_to_namespace() -> None:
"""
A test to check conversion between namespace to path for InnerEye and external namespaces
A test to check conversion between paths and python namespaces.
"""
tests_root_dir = tests_root_directory()
if is_external:
folder_name = "logs"
full_folder = tests_root_dir / folder_name
assert path_to_namespace(
path=full_folder,
root=tests_root_dir
) == folder_name
else:
assert path_to_namespace(
path=full_test_data_path(),
root=tests_root_dir
) == "test_data"
assert path_to_namespace(Path("/foo/bar/baz"), root=Path("/foo")) == "bar.baz"
def test_config_fully_qualified() -> None:
"""
Test if we can load model configs when giving a full Python namespace.
"""
# This name was deliberately chosen to be outside the default searchar namespace
model_name = "health_ml.utils.config_loader.ModelConfigLoader"
config_loader = ModelConfigLoader()
model = config_loader.create_model_config_from_name(model_name=model_name)
assert type(model).__name__ == "ModelConfigLoader"
def test_config_fully_qualified_invalid() -> None:
"""
Test error handling if giving a too long namespace
"""
namespace = "health_ml.utils.config_loader.foo"
model_name = namespace + ".Foo"
config_loader = ModelConfigLoader()
with pytest.raises(ValueError) as ex:
config_loader.create_model_config_from_name(model_name=model_name)
assert f"Module {namespace} was not found" in str(ex)

Просмотреть файл

@ -60,6 +60,7 @@ def test_workflow_params_get_effective_random_seed() -> None:
assert seed == params.random_seed
@pytest.mark.fast
def test_validate_dataset_params() -> None:
# DatasetParams cannot be initialized with neither of azure_datasets or local_datasets set
with pytest.raises(ValueError) as ex:
@ -78,11 +79,11 @@ def test_validate_dataset_params() -> None:
# local datasets and dataset_mountpoints must be Paths
with pytest.raises(Exception) as e:
DatasetParams(local_datasets=["foo"])
assert "is not an instance of" in str(e)
assert "items must be instances of type <class 'pathlib.Path'>" in str(e)
with pytest.raises(Exception) as e:
DatasetParams(dataset_mountpoints=["foo"])
assert "is not an instance of" in str(e)
assert "items must be instances of type <class 'pathlib.Path'>" in str(e)
# The following should be okay
DatasetParams(local_datasets=[Path("foo")]).validate()

Просмотреть файл

@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch, Mock
from pytorch_lightning import Callback, Trainer
from pytorch_lightning.callbacks import GradientAccumulationScheduler, ModelCheckpoint, ModelSummary, TQDMProgressBar
from health_ml.configs.hello_container import HelloContainer # type: ignore
from health_ml.configs.hello_world import HelloWorld # type: ignore
from health_ml.lightning_container import LightningContainer
from health_ml.model_trainer import (create_lightning_trainer, write_experiment_summary_file, model_train)
from health_ml.utils.common_utils import EXPERIMENT_SUMMARY_FILE
@ -65,8 +65,8 @@ def test_create_lightning_trainer_with_callbacks() -> None:
callbacks = [MyCallback()]
return {"callbacks": callbacks}
model_name = "HelloContainer"
model_config_loader = ModelConfigLoader(model=model_name)
model_name = "HelloWorld"
model_config_loader = ModelConfigLoader()
container = model_config_loader.create_model_config_from_name(model_name)
container.monitor_gpu = False
container.monitor_loading = False
@ -88,7 +88,7 @@ def test_create_lightning_trainer_with_callbacks() -> None:
def test_model_train() -> None:
container = HelloContainer()
container = HelloWorld()
container.create_lightning_module_and_store()
with patch.object(container, "get_data_module"):

Просмотреть файл

@ -5,7 +5,7 @@ import pytest
from typing import Generator, Tuple
from unittest.mock import patch
from health_ml.configs.hello_container import HelloContainer
from health_ml.configs.hello_world import HelloWorld
from health_ml.experiment_config import ExperimentConfig
from health_ml.lightning_container import LightningContainer
from health_ml.run_ml import MLRunner
@ -34,7 +34,7 @@ def ml_runner() -> Generator:
@pytest.fixture(scope="module")
def ml_runner_with_container() -> Generator:
experiment_config = ExperimentConfig(model="HelloContainer")
container = HelloContainer()
container = HelloWorld()
runner = MLRunner(experiment_config=experiment_config, container=container)
runner.setup()
yield runner
@ -92,10 +92,12 @@ def test_run_inference(ml_runner_with_container: MLRunner, tmp_path: Path) -> No
"""
Test that run_inference gets called as expected.
"""
def _expected_files_exist() -> int:
def _expected_files_exist() -> bool:
output_dir = ml_runner_with_container.container.outputs_folder
expected_files = [Path("test_mse.txt"), Path("test_mae.txt")]
return sum([p.exists() for p in expected_files] + [output_dir.is_dir()])
if not output_dir.is_dir():
return False
expected_files = ["test_mse.txt", "test_mae.txt"]
return all([(output_dir / p).exists() for p in expected_files])
# create the test data
import numpy as np
@ -112,7 +114,7 @@ def test_run_inference(ml_runner_with_container: MLRunner, tmp_path: Path) -> No
assert not expected_ckpt_path.exists()
# update the container to look for test data at this location
ml_runner_with_container.container.local_dataset_dir = tmp_path
assert _expected_files_exist() == 0
assert not _expected_files_exist()
actual_train_ckpt_path = ml_runner_with_container.checkpoint_handler.get_recovery_or_checkpoint_path_train()
assert actual_train_ckpt_path is None
@ -123,8 +125,8 @@ def test_run_inference(ml_runner_with_container: MLRunner, tmp_path: Path) -> No
actual_test_ckpt_path = ml_runner_with_container.checkpoint_handler.get_checkpoints_to_test()
assert actual_test_ckpt_path == [expected_ckpt_path]
assert actual_test_ckpt_path[0].exists()
# After training, the outputs directory should now exist
assert _expected_files_exist() == 3
# After training, the outputs directory should now exist and contain the 2 error files
assert _expected_files_exist()
# if no checkpoint handler, no checkpoint paths will be saved and these are required for
# inference so ValueError will be raised
@ -132,6 +134,3 @@ def test_run_inference(ml_runner_with_container: MLRunner, tmp_path: Path) -> No
ml_runner_with_container.checkpoint_handler = None # type: ignore
ml_runner_with_container.run()
assert "expects exactly 1 checkpoint for inference, but got 0" in str(e)
Path("test_mae.txt").unlink()
Path("test_mse.txt").unlink()

Просмотреть файл

@ -8,8 +8,11 @@ from typing import List, Optional
from unittest.mock import patch, MagicMock
import pytest
from azureml.train.hyperdrive import HyperDriveConfig
from health_azure import AzureRunInfo, DatasetConfig
from health_ml.configs.hello_world import HelloWorld
from health_ml.deep_learning_config import WorkflowParams
from health_ml.lightning_container import LightningContainer
from health_ml.runner import Runner
@ -21,12 +24,12 @@ def mock_runner(tmp_path: Path) -> Runner:
@pytest.mark.parametrize("model_name, cluster, num_nodes, should_raise_value_error", [
("HelloContainer", "dummyCluster", 1, False),
("HelloWorld", "dummyCluster", 1, False),
("", "", None, True),
("HelloContainer", "", None, False),
("HelloWorld", "", None, False),
("a", None, 0, True),
(None, "b", 10, True),
("HelloContainer", "b", 10, False)
("HelloWorld", "b", 10, False)
])
def test_parse_and_load_model(mock_runner: Runner, model_name: Optional[str], cluster: Optional[str],
num_nodes: Optional[int], should_raise_value_error: bool) -> None:
@ -63,7 +66,7 @@ def test_parse_and_load_model(mock_runner: Runner, model_name: Optional[str], cl
def test_run(mock_runner: Runner) -> None:
model_name = "HelloContainer"
model_name = "HelloWorld"
arguments = ["", f"--model={model_name}"]
with patch("health_ml.runner.Runner.run_in_situ") as mock_run_in_situ:
with patch("health_ml.runner.get_workspace"):
@ -113,3 +116,94 @@ def test_submit_to_azureml_if_needed(mock_get_workspace: MagicMock,
assert run_info.input_datasets == []
assert run_info.is_running_in_azure_ml is False
assert run_info.output_folder is None
def test_crossvalidation_flag() -> None:
"""
Checks the basic use of the flags that trigger cross validation
:return:
"""
container = HelloWorld()
assert not container.is_crossvalidation_enabled
container.crossval_count = 2
assert container.is_crossvalidation_enabled
container.validate()
# Validation should fail if the cross validation index is out of bounds
container.crossval_index = container.crossval_count
with pytest.raises(ValueError):
container.validate()
def test_crossval_config() -> None:
"""
Check if the flags to trigger Hyperdrive runs work as expected.
"""
mock_tuning_config = "foo"
container = HelloWorld()
with patch("health_ml.configs.hello_world.HelloWorld.get_parameter_tuning_config",
return_value=mock_tuning_config):
# Without any flags set, no Hyperdrive config should be returned
assert container.get_hyperdrive_config() is None
# To trigger a hyperparameter search, the commandline flag for hyperdrive must be present
container.hyperdrive = True
assert container.get_hyperdrive_config() == mock_tuning_config
# Triggering cross validation works by just setting crossval_count
container.hyperdrive = False
container.crossval_count = 2
assert container.is_crossvalidation_enabled
crossval_config = container.get_hyperdrive_config()
assert isinstance(crossval_config, HyperDriveConfig)
def test_crossval_argument_names() -> None:
"""
Cross validation uses hardcoded argument names, check if they match the field names
"""
container = HelloWorld()
crossval_count = 8
crossval_index = 5
container.crossval_count = crossval_count
container.crossval_index = crossval_index
assert getattr(container, container.CROSSVAL_INDEX_ARG_NAME) == crossval_index
def test_submit_to_azure_hyperdrive(mock_runner: Runner) -> None:
"""
Test if the hyperdrive configurations are passed to the submission function.
"""
model_name = "HelloWorld"
crossval_count = 2
arguments = ["", f"--model={model_name}", "--cluster=foo", "--crossval_count", str(crossval_count)]
with patch("health_ml.runner.Runner.run_in_situ") as mock_run_in_situ:
with patch("health_ml.runner.get_workspace"):
with patch.object(sys, "argv", arguments):
with patch("health_ml.runner.submit_to_azure_if_needed") as mock_submit_to_aml:
mock_runner.run()
mock_run_in_situ.assert_called_once()
mock_submit_to_aml.assert_called_once()
# call_args is a tuple of (args, kwargs)
call_kwargs = mock_submit_to_aml.call_args[1]
# Submission to AzureML should have been turned on because a cluster name was supplied
assert mock_runner.experiment_config.azureml
assert call_kwargs["submit_to_azureml"]
# Check details of the Hyperdrive config
hyperdrive_config = call_kwargs["hyperdrive_config"]
parameter_space = hyperdrive_config._generator_config["parameter_space"]
assert parameter_space[WorkflowParams.CROSSVAL_INDEX_ARG_NAME] == ["choice", [list(range(crossval_count))]]
def test_run_hello_world(mock_runner: Runner) -> None:
"""Test running a model end-to-end via the commandline runner
"""
model_name = "HelloWorld"
arguments = ["", f"--model={model_name}"]
with patch("health_ml.runner.get_workspace") as mock_get_workspace:
with patch.object(sys, "argv", arguments):
mock_runner.run()
# get_workspace should not be called when using the runner outside AzureML, to not go through the
# time-consuming auth
mock_get_workspace.assert_not_called()
# Summary.txt is written at start, the other files during inference
expected_files = ["experiment_summary.txt", "test_mae.txt", "test_mse.txt"]
for file in expected_files:
assert (mock_runner.lightning_container.outputs_folder / file).is_file(), f"Missing file: {file}"

Просмотреть файл

@ -14,16 +14,26 @@
"reportPrivateImportUsage": false,
"executionEnvironments": [
{
"root": "hi-ml/src"
"root": "hi-ml/src",
"extraPaths": [
"hi-ml-azure/src"
]
},
{
"root": "hi-ml/testhiml"
"root": "hi-ml/testhiml",
"extraPaths": [
"hi-ml-azure/src",
"hi-ml/src",
]
},
{
"root": "hi-ml-azure/src"
},
{
"root": "hi-ml-azure/testazure"
"root": "hi-ml-azure/testazure",
"extraPaths": [
"hi-ml-azure/src",
]
}
]
}

Просмотреть файл

@ -1,30 +1,10 @@
coverage==5.5
conda-merge==0.1.5
flake8==3.8.4
gitpython==3.1.7
lightning-bolts==0.4.0
matplotlib==3.3.0
monai==0.6.0
more-itertools==8.10.0
mypy==0.910
opencv-python-headless==4.5.1.48
pandas==1.3.4
param==1.9.3
pillow==9.0.0
pydicom==2.0.0
pylint==2.9.5
black==22.1.0
coverage==6.3.2
flake8==4.0.1
mypy==0.931
pylint==2.12.2
pycobertura==2.0.1
pytest==6.2.2
pytest-cov==2.11.1
pytest-timeout==2.0.1
pytorch-lightning==1.5.5
ruamel.yaml==0.16.12
rpdb==0.1.6
scikit-learn==1.0
seaborn==0.10.1
simpleitk==1.2.4
torch==1.10.0
torchmetrics==0.6.0
torchvision==0.11.1
types-requests==2.25.6
yacs==0.1.8
scikit-learn # This is needed to make pyright pass on the docs folder