зеркало из https://github.com/microsoft/hi-ml.git
ENH: Group AML experiments by PR number (#681)
Ensure that all AML experiments that originate from a PR are submitted to the same experiment. - Add capability to hardcode experiment name for AML submission via an environment variable - Add a PR job that looks for running AML jobs and cancels them - Add timeouts for all smoke test jobs
This commit is contained in:
Родитель
7c95a4f471
Коммит
e81323f589
|
@ -0,0 +1,50 @@
|
|||
name: 'Cancel environment setup'
|
||||
description: 'Set up environment hi-ml-cpath workflows'
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Create AzureML config.json file
|
||||
shell: bash
|
||||
run: ./create_config.sh
|
||||
|
||||
# Use a cache action to save the full conda environment, so that we don't have to reinstall it every time.
|
||||
# Paths are tied to the location of the miniconda installation, and may need adjustment on a different OS.
|
||||
- name: Retrieve cached Conda environment
|
||||
id: cache-conda
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: /usr/share/miniconda/envs/AzureML_SDK
|
||||
key: azureml-conda-${{ hashFiles('.github/actions/cancel_azureml_jobs/azureml-env.yml') }}
|
||||
|
||||
# If the cache action didn't find a cache, then install the conda environment afresh.
|
||||
- name: Build Conda environment from scratch
|
||||
uses: conda-incubator/setup-miniconda@v2
|
||||
if: steps.cache-conda.outputs.cache-hit != 'true'
|
||||
with:
|
||||
environment-file: .github/actions/cancel_azureml_jobs/azureml-env.yml
|
||||
activate-environment: AzureML_SDK
|
||||
|
||||
# Modify the path to point to the new or cached Conda environment.
|
||||
# This is effectively also what `conda activate` does.
|
||||
- name: Activate environment
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Adding Conda bin folder to path"
|
||||
echo "/usr/share/miniconda/envs/AzureML_SDK/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Conda info
|
||||
shell: bash
|
||||
run: conda info
|
||||
|
||||
- name: Show active Python path
|
||||
shell: bash
|
||||
run: which python
|
||||
|
||||
# The AzureML experiment where the jobs are deleted is read from
|
||||
# the environment variable HIML_EXPERIMENT_NAME set in the workflow file.
|
||||
- name: Cancel AzureML jobs
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Cancelling all unfinished AzureML jobs for this PR."
|
||||
python .github/actions/cancel_azureml_jobs/cancel_aml_jobs.py
|
|
@ -0,0 +1,8 @@
|
|||
name: AzureML_SDK
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- pip=20.1.1
|
||||
- python=3.7.3
|
||||
- pip:
|
||||
- azureml-sdk==1.36.0
|
|
@ -0,0 +1,42 @@
|
|||
# ------------------------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
|
||||
# ------------------------------------------------------------------------------------------
|
||||
import os
|
||||
|
||||
from azureml._restclient.constants import RunStatus
|
||||
from azureml.core import Experiment, Run, Workspace
|
||||
from azureml.core.authentication import ServicePrincipalAuthentication
|
||||
|
||||
|
||||
def cancel_running_and_queued_jobs() -> None:
|
||||
print("Authenticating")
|
||||
auth = ServicePrincipalAuthentication(
|
||||
tenant_id='72f988bf-86f1-41af-91ab-2d7cd011db47',
|
||||
service_principal_id=os.environ["HIML_SERVICE_PRINCIPAL_ID"],
|
||||
service_principal_password=os.environ["HIML_SERVICE_PRINCIPAL_PASSWORD"])
|
||||
print("Getting AML workspace")
|
||||
workspace = Workspace.get(
|
||||
name="hi-ml",
|
||||
auth=auth,
|
||||
subscription_id=os.environ["HIML_SUBSCRIPTION_ID"],
|
||||
resource_group=os.environ["HIML_RESOURCE_GROUP"]
|
||||
)
|
||||
experiment_name = os.environ["HIML_EXPERIMENT_NAME"]
|
||||
experiment_name = experiment_name.replace("/", "_")
|
||||
print(f"Experiment: {experiment_name}")
|
||||
experiment = Experiment(workspace, name=experiment_name)
|
||||
print(f"Retrieved experiment {experiment.name}")
|
||||
for run in experiment.get_runs(include_children=True, properties={}):
|
||||
assert isinstance(run, Run)
|
||||
status_suffix = f"'{run.status}' run {run.id} ({run.display_name})"
|
||||
if run.status in (RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.FINALIZING, RunStatus.CANCELED,
|
||||
RunStatus.CANCEL_REQUESTED):
|
||||
print(f"Skipping {status_suffix}")
|
||||
else:
|
||||
print(f"Cancelling {status_suffix}")
|
||||
run.cancel()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cancel_running_and_queued_jobs()
|
|
@ -23,9 +23,20 @@ env:
|
|||
HIML_WORKSPACE_NAME: ${{ secrets.HIML_WORKSPACE_NAME }}
|
||||
HIML_SERVICE_PRINCIPAL_ID: ${{ secrets.HIML_SERVICE_PRINCIPAL_ID }}
|
||||
HIML_SERVICE_PRINCIPAL_PASSWORD: ${{ secrets.HIML_SERVICE_PRINCIPAL_PASSWORD }}
|
||||
# Set the AML experiment name for all AML jobs submitted during tests. Github.ref looks like
|
||||
# "refs/pull/123/merge" for PR builds.
|
||||
HIML_EXPERIMENT_NAME: ${{ github.ref }}
|
||||
|
||||
jobs:
|
||||
|
||||
cancel-azureml:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Cancel previous AzureML runs
|
||||
uses: ./.github/actions/cancel_azureml_jobs
|
||||
|
||||
flake8:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
|
@ -64,6 +75,7 @@ jobs:
|
|||
|
||||
pytest:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ cancel-azureml ]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
@ -85,10 +97,7 @@ jobs:
|
|||
if: always()
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
branch_prefix="refs/heads/"
|
||||
full_branch_name=$GITHUB_REF
|
||||
branch_name_without_prefix=${full_branch_name#$branch_prefix}
|
||||
python hi-ml-azure/run_pytest.py --mark=gpu --cluster=pr-gpu --conda_env=${{ env.folder }}/environment.yml --folder=${{ env.folder }} --coverage_module=${{ env.module_for_coverage_reporting }} --experiment="$branch_name_without_prefix"
|
||||
python hi-ml-azure/run_pytest.py --mark=gpu --cluster=pr-gpu --conda_env=${{ env.folder }}/environment.yml --folder=${{ env.folder }} --coverage_module=${{ env.module_for_coverage_reporting }}
|
||||
|
||||
- name: Upload coverage reports to Codecov
|
||||
# Coverage should also be uploaded if tests still fail.
|
||||
|
@ -100,6 +109,7 @@ jobs:
|
|||
|
||||
smoke_test_cucim_slidespandaimagenetmil:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ cancel-azureml ]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
@ -115,6 +125,7 @@ jobs:
|
|||
|
||||
smoke_test_openslide_slidespandaimagenetmil:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ cancel-azureml ]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
@ -130,6 +141,7 @@ jobs:
|
|||
|
||||
smoke_test_tilespandaimagenetmil:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ cancel-azureml ]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
@ -145,6 +157,7 @@ jobs:
|
|||
|
||||
smoke_test_tcgacrckimagenetmil:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ cancel-azureml ]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
@ -160,6 +173,7 @@ jobs:
|
|||
|
||||
smoke_test_tcgacrcksslmil:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ cancel-azureml ]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
@ -175,6 +189,7 @@ jobs:
|
|||
|
||||
smoke_test_crck_simclr:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ cancel-azureml ]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
@ -190,6 +205,7 @@ jobs:
|
|||
|
||||
smoke_test_crck_flexible_finetuning:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ cancel-azureml ]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
@ -205,6 +221,7 @@ jobs:
|
|||
|
||||
smoke_test_crck_loss_analysis:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ cancel-azureml ]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
@ -220,6 +237,7 @@ jobs:
|
|||
|
||||
smoke_test_slides_panda_loss_analysis:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ cancel-azureml ]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
@ -235,6 +253,7 @@ jobs:
|
|||
|
||||
smoke_test_slides_panda_no_ddp_sampler:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ cancel-azureml ]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
@ -250,6 +269,7 @@ jobs:
|
|||
|
||||
smoke_test_tiles_panda_no_ddp_sampler:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ cancel-azureml ]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
|
|
|
@ -22,11 +22,22 @@ env:
|
|||
HIML_DIST_ARTIFACT_SUFFIX: '-dist'
|
||||
HIML_PACKAGE_NAME_ARTIFACT_SUFFIX: '-package_name'
|
||||
HIML_VERSION_ARTIFACT_SUFFIX: '-latest_version'
|
||||
# Set the AML experiment name for all AML jobs submitted during tests. Github.ref looks like
|
||||
# "refs/pull/123/merge" for PR builds.
|
||||
HIML_EXPERIMENT_NAME: ${{ github.ref }}
|
||||
PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
|
||||
PYPI_TEST_API_TOKEN: ${{ secrets.PYPI_TEST_API_TOKEN }}
|
||||
|
||||
jobs:
|
||||
|
||||
cancel-azureml:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Cancel previous AzureML runs
|
||||
uses: ./.github/actions/cancel_azureml_jobs
|
||||
|
||||
flake8:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
|
@ -144,7 +155,7 @@ jobs:
|
|||
|
||||
test-artifact-pkg:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [ build-python ]
|
||||
needs: [ build-python, cancel-azureml ]
|
||||
strategy:
|
||||
matrix:
|
||||
folder: [ hi-ml, hi-ml-azure ]
|
||||
|
|
|
@ -35,7 +35,8 @@ from azureml.train.hyperdrive import HyperDriveConfig, GridParameterSampling, Pr
|
|||
from azureml.dataprep.fuse.daemon import MountContext
|
||||
|
||||
from health_azure.amulet import (ENV_AMLT_DATAREFERENCE_DATA, ENV_AMLT_DATAREFERENCE_OUTPUT, is_amulet_job)
|
||||
from health_azure.utils import (create_python_environment, create_run_recovery_id, find_file_in_parent_to_pythonpath,
|
||||
from health_azure.utils import (ENV_EXPERIMENT_NAME, create_python_environment, create_run_recovery_id,
|
||||
find_file_in_parent_to_pythonpath,
|
||||
is_run_and_child_runs_completed, is_running_in_azure_ml, register_environment,
|
||||
run_duration_string_to_seconds, to_azure_friendly_string, RUN_CONTEXT, get_workspace,
|
||||
PathOrString, DEFAULT_ENVIRONMENT_VARIABLES, get_ml_client,
|
||||
|
@ -424,6 +425,30 @@ def get_display_name_v2(tags: Optional[Dict[str, Any]] = None) -> str:
|
|||
return display_name
|
||||
|
||||
|
||||
def effective_experiment_name(experiment_name: Optional[str],
|
||||
entry_script: Optional[PathOrString] = None) -> str:
|
||||
"""Choose the experiment name to use for the run. If provided in the environment variable HIML_EXPERIMENT_NAME,
|
||||
then use that. Otherwise, use the argument `experiment_name`, or fall back to the default based on the
|
||||
entry point script.
|
||||
|
||||
:param experiment_name: The name of the AzureML experiment in which the run should be submitted.
|
||||
:param entry_script: The script that should be run in AzureML.
|
||||
:return: The effective experiment name to use, based on the fallback rules above.
|
||||
"""
|
||||
value_from_env = os.environ.get(ENV_EXPERIMENT_NAME, "")
|
||||
if value_from_env:
|
||||
raw_value = value_from_env
|
||||
elif experiment_name:
|
||||
raw_value = experiment_name
|
||||
elif entry_script is not None:
|
||||
raw_value = Path(entry_script).stem
|
||||
else:
|
||||
raise ValueError("No experiment name provided, and no entry script provided. ")
|
||||
cleaned_value = to_azure_friendly_string(raw_value)
|
||||
assert cleaned_value is not None, "Expecting an actual string"
|
||||
return cleaned_value
|
||||
|
||||
|
||||
def submit_run_v2(workspace: Optional[Workspace],
|
||||
experiment_name: str,
|
||||
environment: EnvironmentV2,
|
||||
|
@ -852,6 +877,7 @@ def submit_to_azure_if_needed( # type: ignore
|
|||
|
||||
amlignore_path = snapshot_root_directory / AML_IGNORE_FILE
|
||||
lines_to_append = [str(path) for path in (ignored_folders or [])]
|
||||
|
||||
with append_to_amlignore(amlignore=amlignore_path, lines_to_append=lines_to_append):
|
||||
if strictly_aml_v1:
|
||||
run_config = create_run_configuration(
|
||||
|
@ -880,10 +906,8 @@ def submit_to_azure_if_needed( # type: ignore
|
|||
else:
|
||||
config_to_submit = script_run_config
|
||||
|
||||
effective_experiment_name = experiment_name or Path(script_run_config.script).stem
|
||||
|
||||
run = submit_run(workspace=workspace,
|
||||
experiment_name=effective_experiment_name,
|
||||
experiment_name=effective_experiment_name(experiment_name, script_run_config.script),
|
||||
script_run_config=config_to_submit,
|
||||
tags=tags,
|
||||
wait_for_completion=wait_for_completion,
|
||||
|
@ -898,7 +922,6 @@ def submit_to_azure_if_needed( # type: ignore
|
|||
if entry_script is None:
|
||||
entry_script = Path(sys.argv[0])
|
||||
script_params = script_params or sys.argv[1:]
|
||||
effective_experiment_name = experiment_name or Path(entry_script).stem
|
||||
|
||||
ml_client = get_ml_client(ml_client=ml_client, aml_workspace=workspace)
|
||||
registered_env = register_environment_v2(environment, ml_client)
|
||||
|
@ -908,7 +931,7 @@ def submit_to_azure_if_needed( # type: ignore
|
|||
run = submit_run_v2(workspace=workspace,
|
||||
input_datasets_v2=input_datasets_v2,
|
||||
output_datasets_v2=output_datasets_v2,
|
||||
experiment_name=effective_experiment_name,
|
||||
experiment_name=effective_experiment_name(experiment_name, entry_script),
|
||||
environment=registered_env,
|
||||
snapshot_root_directory=snapshot_root_directory,
|
||||
entry_script=entry_script,
|
||||
|
|
|
@ -76,6 +76,9 @@ ENV_LOCAL_RANK = "LOCAL_RANK"
|
|||
ENV_RANK = "RANK"
|
||||
MASTER_PORT_DEFAULT = 6105
|
||||
|
||||
# Environment variables that affect job submission, in particular in builds
|
||||
ENV_EXPERIMENT_NAME = "HIML_EXPERIMENT_NAME"
|
||||
|
||||
# Other Azure ML related variables
|
||||
ENVIRONMENT_VERSION = "1"
|
||||
FINAL_MODEL_FOLDER = "final_model"
|
||||
|
|
|
@ -35,17 +35,16 @@ from azure.core.exceptions import ClientAuthenticationError, ResourceNotFoundErr
|
|||
from azureml.data.azure_storage_datastore import AzureBlobDatastore
|
||||
|
||||
import health_azure.utils as util
|
||||
from health_azure.himl import AML_IGNORE_FILE, append_to_amlignore
|
||||
from health_azure.himl import AML_IGNORE_FILE, append_to_amlignore, effective_experiment_name
|
||||
from health_azure.utils import (ENV_MASTER_ADDR, ENV_MASTER_PORT, MASTER_PORT_DEFAULT,
|
||||
PackageDependency, create_argparser, get_credential)
|
||||
from testazure.test_himl import RunTarget, render_and_run_test_script
|
||||
from testazure.utils_testazure import (DEFAULT_IGNORE_FOLDERS, DEFAULT_WORKSPACE, MockRun, change_working_directory,
|
||||
himl_azure_root, repository_root)
|
||||
experiment_for_unittests, himl_azure_root, repository_root)
|
||||
|
||||
RUN_ID = uuid4().hex
|
||||
RUN_NUMBER = 42
|
||||
EXPERIMENT_NAME = "fancy-experiment"
|
||||
AML_TESTS_EXPERIMENT = "test_experiment"
|
||||
|
||||
|
||||
def oh_no() -> None:
|
||||
|
@ -1103,16 +1102,16 @@ def test_get_latest_aml_run_from_experiment(num_runs: int, tags: Dict[str, str],
|
|||
assert len(aml_runs) == expected_num_returned
|
||||
|
||||
|
||||
def test_get_latest_aml_run_from_experiment_remote(tmp_path: Path) -> None:
|
||||
def test_get_latest_aml_run_from_experiment_remote() -> None:
|
||||
"""
|
||||
Test that a remote run with particular tags can be correctly retrieved, ignoring any more recent
|
||||
experiments which do not have the correct tags. Note: this test will instantiate 2 new Runs in the
|
||||
workspace described in your config.json file, under an experiment defined by AML_TESTS_EXPERIMENT
|
||||
workspace described in your config.json file, under an experiment defined by experiment_for_unittests()
|
||||
"""
|
||||
ws = DEFAULT_WORKSPACE.workspace
|
||||
assert True
|
||||
|
||||
experiment = Experiment(ws, AML_TESTS_EXPERIMENT)
|
||||
experiment = Experiment(ws, experiment_for_unittests())
|
||||
config = ScriptRunConfig(
|
||||
source_directory=".",
|
||||
command=["cd ."], # command that does nothing
|
||||
|
@ -1123,6 +1122,7 @@ def test_get_latest_aml_run_from_experiment_remote(tmp_path: Path) -> None:
|
|||
amlignore=Path("") / AML_IGNORE_FILE,
|
||||
lines_to_append=DEFAULT_IGNORE_FOLDERS):
|
||||
first_run = experiment.submit(config)
|
||||
first_run.diplay_name = "test_get_latest_aml_run_from_experiment_remote"
|
||||
tags = {"experiment_type": "great_experiment"}
|
||||
first_run.set_tags(tags)
|
||||
first_run.wait_for_completion()
|
||||
|
@ -1136,7 +1136,7 @@ def test_get_latest_aml_run_from_experiment_remote(tmp_path: Path) -> None:
|
|||
second_run.remove_tags(tags)
|
||||
|
||||
# Retrieve latest run with given tags (expect first_run to be returned)
|
||||
retrieved_runs = util.get_latest_aml_runs_from_experiment(AML_TESTS_EXPERIMENT, tags=tags, aml_workspace=ws)
|
||||
retrieved_runs = util.get_latest_aml_runs_from_experiment(experiment_for_unittests(), tags=tags, aml_workspace=ws)
|
||||
assert len(retrieved_runs) == 1
|
||||
assert retrieved_runs[0].id == first_run.id
|
||||
assert retrieved_runs[0].get_tags() == tags
|
||||
|
@ -1258,7 +1258,7 @@ def test_download_file_from_run(tmp_path: Path, dummy_env_vars: Dict[str, str],
|
|||
def test_download_file_from_run_remote(tmp_path: Path) -> None:
|
||||
# This test will create a Run in your workspace (using only local compute)
|
||||
ws = DEFAULT_WORKSPACE.workspace
|
||||
experiment = Experiment(ws, AML_TESTS_EXPERIMENT)
|
||||
experiment = Experiment(ws, experiment_for_unittests())
|
||||
config = ScriptRunConfig(
|
||||
source_directory=".",
|
||||
command=["cd ."], # command that does nothing
|
||||
|
@ -1268,6 +1268,7 @@ def test_download_file_from_run_remote(tmp_path: Path) -> None:
|
|||
amlignore=Path("") / AML_IGNORE_FILE,
|
||||
lines_to_append=DEFAULT_IGNORE_FOLDERS):
|
||||
run = experiment.submit(config)
|
||||
run.diplay_name = "test_download_file_from_run_remote"
|
||||
|
||||
file_to_upload = tmp_path / "dummy_file.txt"
|
||||
file_contents = "Hello world"
|
||||
|
@ -1308,7 +1309,7 @@ def test_download_run_file_during_run(tmp_path: Path) -> None:
|
|||
information about the workspace to use, but pick up the current workspace.
|
||||
"""
|
||||
# Create a run that contains a simple txt file
|
||||
experiment_name = "himl-tests"
|
||||
experiment_name = effective_experiment_name("himl-tests")
|
||||
run_to_download_from = util.create_aml_run_object(experiment_name=experiment_name,
|
||||
workspace=DEFAULT_WORKSPACE.workspace)
|
||||
file_contents = "Hello World!"
|
||||
|
@ -1582,7 +1583,7 @@ def test_checkpoint_download_remote(tmp_path: Path) -> None:
|
|||
prefix = "outputs/checkpoints/"
|
||||
|
||||
ws = DEFAULT_WORKSPACE.workspace
|
||||
experiment = Experiment(ws, AML_TESTS_EXPERIMENT)
|
||||
experiment = Experiment(ws, experiment_for_unittests())
|
||||
config = ScriptRunConfig(
|
||||
source_directory=".",
|
||||
command=["cd ."], # command that does nothing
|
||||
|
@ -1592,6 +1593,7 @@ def test_checkpoint_download_remote(tmp_path: Path) -> None:
|
|||
amlignore=Path("") / AML_IGNORE_FILE,
|
||||
lines_to_append=DEFAULT_IGNORE_FOLDERS):
|
||||
run = experiment.submit(config)
|
||||
run.display_name = "test_checkpoint_download_remote"
|
||||
|
||||
file_contents = "Hello world"
|
||||
file_name = "" # for pyright
|
||||
|
@ -2341,8 +2343,8 @@ def test_create_run() -> None:
|
|||
"""
|
||||
Test if we can create an AML run object here in the test suite, write logs and read them back in.
|
||||
"""
|
||||
run_name = "foo"
|
||||
experiment_name = "himl-tests"
|
||||
run_name = "test_create_run"
|
||||
experiment_name = effective_experiment_name("himl-tests")
|
||||
run: Optional[Run] = None
|
||||
try:
|
||||
run = util.create_aml_run_object(experiment_name=experiment_name, run_name=run_name,
|
||||
|
|
|
@ -45,6 +45,7 @@ from health_azure.datasets import (
|
|||
)
|
||||
from health_azure.utils import (
|
||||
DEFAULT_ENVIRONMENT_VARIABLES,
|
||||
ENV_EXPERIMENT_NAME,
|
||||
ENVIRONMENT_VERSION,
|
||||
EXPERIMENT_RUN_SEPARATOR,
|
||||
WORKSPACE_CONFIG_JSON,
|
||||
|
@ -741,6 +742,7 @@ def test_submit_run(mock_workspace: mock.MagicMock,
|
|||
assert "AzureML completed" not in out
|
||||
|
||||
|
||||
@pytest.mark.fast
|
||||
def test_submit_run_v2(tmp_path: Path) -> None:
|
||||
def _mock_sweep(*args: Any, **kwargs: Any) -> MagicMock:
|
||||
assert kwargs.get("compute") == dummy_compute_target
|
||||
|
@ -864,7 +866,7 @@ def test_submit_run_v2(tmp_path: Path) -> None:
|
|||
# 'command' should be called with the same args
|
||||
print(mock_command.call)
|
||||
# command should be called once when initialising the command job and once when updating the param sampling
|
||||
mock_command.call_count == 2
|
||||
assert mock_command.call_count == 2
|
||||
|
||||
mock_command.assert_any_call(
|
||||
code=str(dummy_root_directory),
|
||||
|
@ -1657,6 +1659,7 @@ def test_extract_v2_inputs_outputs_from_args() -> None:
|
|||
assert len(output_datasets) == 0
|
||||
|
||||
|
||||
@pytest.mark.fast
|
||||
def test_get_display_name_v2() -> None:
|
||||
dummy_display_name = "job display name"
|
||||
expected_display_name = "job-display-name"
|
||||
|
@ -1676,3 +1679,24 @@ def test_get_display_name_v2() -> None:
|
|||
# if no tags provided, display name should be empty
|
||||
display_name = himl.get_display_name_v2()
|
||||
assert display_name == ""
|
||||
|
||||
|
||||
@pytest.mark.fast
|
||||
def test_experiment_name() -> None:
|
||||
"""Test the logic for choosing experiment names: Explicitly given experiment name should be used if provided,
|
||||
otherwise fall back to environment variables and thpwden script name."""
|
||||
# When the test suite runs on the Github, the environment variable "HIML_EXPERIMENT_NAME" will be set.
|
||||
# Remove it to test the default behaviour.
|
||||
with mock.patch.dict(os.environ):
|
||||
os.environ.pop(ENV_EXPERIMENT_NAME, None)
|
||||
assert himl.effective_experiment_name("explicit", Path()) == "explicit"
|
||||
assert himl.effective_experiment_name("", Path("from_script.py")) == "from_script"
|
||||
# Provide experiment names with special characters here that should be filtered out
|
||||
with mock.patch("health_azure.himl.to_azure_friendly_string", return_value="mock_return"):
|
||||
assert himl.effective_experiment_name("explicit", Path()) == "mock_return"
|
||||
assert himl.effective_experiment_name("explicit/", Path()) == "explicit_"
|
||||
with mock.patch.dict(os.environ, {ENV_EXPERIMENT_NAME: "name/from/env"}):
|
||||
assert himl.effective_experiment_name("explicit", Path()) == "name_from_env"
|
||||
with mock.patch.dict(os.environ, {ENV_EXPERIMENT_NAME: "name_from_env"}):
|
||||
assert himl.effective_experiment_name("explicit", Path()) == "name_from_env"
|
||||
assert himl.effective_experiment_name("", Path("from_script.py")) == "name_from_env"
|
||||
|
|
|
@ -10,7 +10,8 @@ from contextlib import contextmanager
|
|||
from pathlib import Path
|
||||
from typing import Dict, Generator, Optional
|
||||
|
||||
from health_azure.utils import (UnitTestWorkspaceWrapper, WORKSPACE_CONFIG_JSON)
|
||||
from health_azure.utils import (ENV_EXPERIMENT_NAME, WORKSPACE_CONFIG_JSON, UnitTestWorkspaceWrapper,
|
||||
to_azure_friendly_string)
|
||||
|
||||
DEFAULT_DATASTORE = "himldatasets"
|
||||
FALLBACK_SINGLE_RUN = "refs_pull_545_merge:refs_pull_545_merge_1626538212_d2b07afd"
|
||||
|
@ -46,6 +47,15 @@ def repository_root() -> Path:
|
|||
return himl_azure_root().parent
|
||||
|
||||
|
||||
def experiment_for_unittests() -> str:
|
||||
"""
|
||||
Gets the name of the experiment to use for tests.
|
||||
"""
|
||||
experiment_name = to_azure_friendly_string(os.getenv(ENV_EXPERIMENT_NAME, "unittests"))
|
||||
assert experiment_name is not None
|
||||
return experiment_name
|
||||
|
||||
|
||||
@contextmanager
|
||||
def change_working_directory(path_or_str: Path) -> Generator:
|
||||
"""
|
||||
|
|
|
@ -160,11 +160,11 @@ define DEFAULT_SMOKE_TEST_ARGS
|
|||
endef
|
||||
|
||||
define AML_ONE_DEVICE_ARGS
|
||||
--cluster=testing-nc6 --wait_for_completion --num_nodes=1 --max_num_gpus=1 --strictly_aml_v1=True
|
||||
--cluster=testing-nc6 --wait_for_completion --num_nodes=1 --max_num_gpus=1 --strictly_aml_v1=True --max_run_duration=1h
|
||||
endef
|
||||
|
||||
define AML_MULTIPLE_DEVICE_ARGS
|
||||
--cluster=dedicated-nc24s-v2 --wait_for_completion --strictly_aml_v1=True
|
||||
--cluster=dedicated-nc24s-v2 --wait_for_completion --strictly_aml_v1=True --max_run_duration=1h
|
||||
endef
|
||||
|
||||
define DEEPSMILEDEFAULT_SMOKE_TEST_ARGS
|
||||
|
@ -206,7 +206,7 @@ smoke_test_cucim_slidespandaimagenetmil_local:
|
|||
# Once running in AML the following test takes around 6 minutes
|
||||
smoke_test_cucim_slidespandaimagenetmil_aml:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${DEEPSMILEPANDASLIDES_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS};}
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} --tag smoke_test_cucim_slidespandaimagenetmil_aml;}
|
||||
|
||||
smoke_test_openslide_slidespandaimagenetmil_local:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${DEEPSMILEPANDASLIDES_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
|
@ -214,7 +214,7 @@ smoke_test_openslide_slidespandaimagenetmil_local:
|
|||
|
||||
smoke_test_openslide_slidespandaimagenetmil_aml:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${DEEPSMILEPANDASLIDES_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${OPENSLIDE_BACKEND_ARGS} ${AML_MULTIPLE_DEVICE_ARGS};}
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${OPENSLIDE_BACKEND_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} --tag smoke_test_openslide_slidespandaimagenetmil_aml;}
|
||||
|
||||
# The following test takes about 6 minutes
|
||||
smoke_test_tilespandaimagenetmil_local:
|
||||
|
@ -223,7 +223,7 @@ smoke_test_tilespandaimagenetmil_local:
|
|||
|
||||
smoke_test_tilespandaimagenetmil_aml:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${DEEPSMILEPANDATILES_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS};}
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} --tag smoke_test_tilespandaimagenetmil_aml;}
|
||||
|
||||
# The following test takes about 30 seconds
|
||||
smoke_test_tcgacrcksslmil_local:
|
||||
|
@ -232,7 +232,7 @@ smoke_test_tcgacrcksslmil_local:
|
|||
|
||||
smoke_test_tcgacrcksslmil_aml:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${TCGACRCKSSLMIL_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
${TCGACRCKSSLMIL_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS};}
|
||||
${TCGACRCKSSLMIL_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} --tag smoke_test_tcgacrcksslmil_aml;}
|
||||
|
||||
smoke_test_tcgacrckimagenetmil_local:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${TCGACRCKIMANEGETMIL_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
|
@ -240,7 +240,7 @@ smoke_test_tcgacrckimagenetmil_local:
|
|||
|
||||
smoke_test_tcgacrckimagenetmil_aml:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${TCGACRCKIMANEGETMIL_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS};}
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} --tag smoke_test_tcgacrckimagenetmil_aml;}
|
||||
|
||||
# The following test takes about 3 minutes
|
||||
smoke_test_crck_simclr_local:
|
||||
|
@ -249,7 +249,7 @@ smoke_test_crck_simclr_local:
|
|||
|
||||
smoke_test_crck_simclr_aml:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${CRCKSIMCLR_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
${CRCKSIMCLR_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS};}
|
||||
${CRCKSIMCLR_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} --tag smoke_test_crck_simclr_aml;}
|
||||
|
||||
smoke_test_crck_flexible_finetuning_local:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${TCGACRCKSSLMIL_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
|
@ -257,7 +257,7 @@ smoke_test_crck_flexible_finetuning_local:
|
|||
|
||||
smoke_test_crck_flexible_finetuning_aml:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${TCGACRCKSSLMIL_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
${TCGACRCKSSLMIL_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} ${CRCK_TUNING_ARGS};}
|
||||
${TCGACRCKSSLMIL_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} ${CRCK_TUNING_ARGS} --tag smoke_test_crck_flexible_finetuning_aml;}
|
||||
|
||||
smoke_test_crck_loss_analysis_local:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${TCGACRCKSSLMIL_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
|
@ -265,7 +265,7 @@ smoke_test_crck_loss_analysis_local:
|
|||
|
||||
smoke_test_crck_loss_analysis_aml:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${TCGACRCKSSLMIL_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
${TCGACRCKSSLMIL_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} ${LOSS_ANALYSIS_ARGS};}
|
||||
${TCGACRCKSSLMIL_SMOKE_TEST_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} ${LOSS_ANALYSIS_ARGS} --tag smoke_test_crck_loss_analysis_aml;}
|
||||
|
||||
smoke_test_slides_panda_loss_analysis_local:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${DEEPSMILEPANDASLIDES_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
|
@ -273,7 +273,7 @@ smoke_test_slides_panda_loss_analysis_local:
|
|||
|
||||
smoke_test_slides_panda_loss_analysis_aml:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${DEEPSMILEPANDASLIDES_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${LOSS_ANALYSIS_ARGS} ${AML_MULTIPLE_DEVICE_ARGS};}
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${LOSS_ANALYSIS_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} --tag smoke_test_slides_panda_loss_analysis_aml;}
|
||||
|
||||
smoke_test_slides_panda_no_ddp_sampler_local:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${DEEPSMILEPANDASLIDES_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
|
@ -281,7 +281,7 @@ smoke_test_slides_panda_no_ddp_sampler_local:
|
|||
|
||||
smoke_test_slides_panda_no_ddp_sampler_aml:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${DEEPSMILEPANDASLIDES_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${DDP_SAMPLER_ARGS} ${AML_MULTIPLE_DEVICE_ARGS};}
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${DDP_SAMPLER_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} --tag smoke_test_slides_panda_no_ddp_sampler_aml;}
|
||||
|
||||
smoke_test_tiles_panda_no_ddp_sampler_local:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${DEEPSMILEPANDATILES_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
|
@ -289,7 +289,7 @@ smoke_test_tiles_panda_no_ddp_sampler_local:
|
|||
|
||||
smoke_test_tiles_panda_no_ddp_sampler_aml:
|
||||
{ ${BASE_CPATH_RUNNER_COMMAND} ${DEEPSMILEPANDATILES_ARGS} ${DEFAULT_SMOKE_TEST_ARGS} \
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${DDP_SAMPLER_ARGS} ${AML_MULTIPLE_DEVICE_ARGS};}
|
||||
${DEEPSMILEDEFAULT_SMOKE_TEST_ARGS} ${DDP_SAMPLER_ARGS} ${AML_MULTIPLE_DEVICE_ARGS} --tag smoke_test_tiles_panda_no_ddp_sampler_aml;}
|
||||
|
||||
smoke tests local: smoke_test_cucim_slidespandaimagenetmil_local smoke_test_openslide_slidespandaimagenetmil_local smoke_test_tilespandaimagenetmil_local smoke_test_tcgacrcksslmil_local smoke_test_crck_simclr_local smoke_test_crck_flexible_finetuning_local smoke_test_tcgacrckimagenetmil_local smoke_test_crck_loss_analysis_local smoke_test_slides_panda_loss_analysis_local smoke_test_slides_panda_no_ddp_sampler_local smoke_test_tiles_panda_no_ddp_sampler_local
|
||||
|
||||
|
|
|
@ -50,3 +50,7 @@ class ExperimentConfig(param.Parameterized):
|
|||
param.ClassSelector(class_=Path, default=None, allow_None=True,
|
||||
doc="The path to the AzureML workspace configuration file. If not specified, the "
|
||||
"configuration file in the current folder or one of its parents will be used.")
|
||||
max_run_duration: str = param.String(
|
||||
default="", doc="The maximum runtime that is allowed for this job in AzureML. This is given as a floating"
|
||||
"point number with a string suffix s, m, h, d for seconds, minutes, hours, day. Examples: '3.5h', '2d'"
|
||||
)
|
||||
|
|
|
@ -281,6 +281,7 @@ class Runner:
|
|||
input_datasets=input_datasets, # type: ignore
|
||||
num_nodes=self.experiment_config.num_nodes,
|
||||
wait_for_completion=self.experiment_config.wait_for_completion,
|
||||
max_run_duration=self.experiment_config.max_run_duration,
|
||||
ignored_folders=[],
|
||||
submit_to_azureml=bool(self.experiment_config.cluster),
|
||||
docker_base_image=DEFAULT_DOCKER_BASE_IMAGE,
|
||||
|
|
|
@ -28,7 +28,7 @@ from health_ml.utils.regression_test_utils import (
|
|||
compare_folder_contents,
|
||||
compare_folders_and_run_outputs,
|
||||
)
|
||||
from testazure.utils_testazure import DEFAULT_WORKSPACE
|
||||
from testazure.utils_testazure import DEFAULT_WORKSPACE, experiment_for_unittests
|
||||
|
||||
|
||||
def create_folder_and_write_text(file: Path, text: str) -> None:
|
||||
|
@ -242,7 +242,9 @@ def upload_to_run_and_compare(regression_test_subfolder: str, run_to_mock: str,
|
|||
file_contents = "some file contents"
|
||||
file_name = "contents.txt"
|
||||
regression_test_folder = tmp_path / "expected"
|
||||
run = create_aml_run_object(workspace=DEFAULT_WORKSPACE.workspace, experiment_name="test_regression_test_utils")
|
||||
run = create_aml_run_object(workspace=DEFAULT_WORKSPACE.workspace,
|
||||
experiment_name=experiment_for_unittests(),
|
||||
run_name="upload_to_run_and_compare")
|
||||
# Upload a single file to the newly created run. When comparing the run output files,
|
||||
# and seeing this in the set of files that are expected to exist on the run, this should pass.
|
||||
file1 = tmp_path / file_name
|
||||
|
|
|
@ -2,12 +2,14 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
|
||||
# ------------------------------------------------------------------------------------------
|
||||
import os
|
||||
import shutil
|
||||
import pytest
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Generator
|
||||
from unittest.mock import DEFAULT, MagicMock, Mock, patch
|
||||
from unittest import mock
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
from _pytest.logging import LogCaptureFixture
|
||||
from pytorch_lightning import LightningModule
|
||||
|
||||
|
@ -21,8 +23,8 @@ from health_ml.run_ml import MLRunner, get_mlflow_run_id_from_previous_loggers
|
|||
from health_ml.utils.checkpoint_utils import CheckpointParser
|
||||
from health_ml.utils.common_utils import is_gpu_available
|
||||
from health_ml.utils.lightning_loggers import HimlMLFlowLogger, StoringLogger
|
||||
from health_azure.utils import is_global_rank_zero
|
||||
from testazure.utils_testazure import DEFAULT_WORKSPACE
|
||||
from health_azure.utils import ENV_EXPERIMENT_NAME, is_global_rank_zero
|
||||
from testazure.utils_testazure import DEFAULT_WORKSPACE, experiment_for_unittests
|
||||
from testhiml.utils.fixed_paths_for_tests import mock_run_id
|
||||
|
||||
no_gpu = not is_gpu_available()
|
||||
|
@ -299,11 +301,11 @@ def test_run(run_inference_only: bool, run_extra_val_epoch: bool, ml_runner_with
|
|||
with patch("health_ml.run_ml.create_lightning_trainer") as mock_create_trainer:
|
||||
with patch.multiple(
|
||||
ml_runner_with_container,
|
||||
checkpoint_handler=DEFAULT,
|
||||
load_model_checkpoint=DEFAULT,
|
||||
run_training=DEFAULT,
|
||||
run_validation=DEFAULT,
|
||||
run_inference=DEFAULT,
|
||||
checkpoint_handler=mock.DEFAULT,
|
||||
load_model_checkpoint=mock.DEFAULT,
|
||||
run_training=mock.DEFAULT,
|
||||
run_validation=mock.DEFAULT,
|
||||
run_inference=mock.DEFAULT,
|
||||
) as mocks:
|
||||
mock_create_trainer.return_value = MagicMock(), MagicMock()
|
||||
ml_runner_with_container.run()
|
||||
|
@ -327,9 +329,9 @@ def test_run_inference_only(ml_runner_with_run_id: MLRunner) -> None:
|
|||
with patch("health_ml.run_ml.create_lightning_trainer") as mock_create_trainer:
|
||||
with patch.multiple(
|
||||
ml_runner_with_run_id,
|
||||
run_training=DEFAULT,
|
||||
run_validation=DEFAULT,
|
||||
validate_model_weights=DEFAULT
|
||||
run_training=mock.DEFAULT,
|
||||
run_validation=mock.DEFAULT,
|
||||
validate_model_weights=mock.DEFAULT
|
||||
) as mocks:
|
||||
mock_trainer = MagicMock()
|
||||
mock_create_trainer.return_value = mock_trainer, MagicMock()
|
||||
|
@ -349,7 +351,7 @@ def test_resume_training_from_run_id(run_extra_val_epoch: bool, ml_runner_with_r
|
|||
ml_runner_with_run_id.container.max_num_gpus = 0
|
||||
ml_runner_with_run_id.container.max_epochs += 10
|
||||
assert ml_runner_with_run_id.checkpoint_handler.trained_weights_path
|
||||
with patch.multiple(ml_runner_with_run_id, run_validation=DEFAULT, run_inference=DEFAULT) as mocks:
|
||||
with patch.multiple(ml_runner_with_run_id, run_validation=mock.DEFAULT, run_inference=mock.DEFAULT) as mocks:
|
||||
ml_runner_with_run_id.run()
|
||||
assert mocks["run_validation"].called == run_extra_val_epoch
|
||||
mocks["run_inference"].assert_called_once()
|
||||
|
@ -381,8 +383,7 @@ def test_log_on_vm(log_from_vm: bool) -> None:
|
|||
container = HelloWorld()
|
||||
container.max_epochs = 1
|
||||
# Mimic an experiment name given on the command line.
|
||||
experiment_name = "unittest"
|
||||
container.experiment = experiment_name
|
||||
container.experiment = experiment_for_unittests()
|
||||
# The tag is used to identify the run, similar to the behaviour when submitting a run to AzureML.
|
||||
tag = f"test_log_on_vm [{log_from_vm}]"
|
||||
container.tag = tag
|
||||
|
@ -404,18 +405,23 @@ def test_log_on_vm(log_from_vm: bool) -> None:
|
|||
assert isinstance(logger, HimlMLFlowLogger)
|
||||
|
||||
|
||||
@pytest.mark.fast
|
||||
def test_experiment_name() -> None:
|
||||
"""Test that the experiment name is set correctly, choosing either the experiment name given on the commandline
|
||||
or the model name"""
|
||||
container = HelloWorld()
|
||||
# No experiment name given on the commandline: use the model name
|
||||
model_name = "some_model"
|
||||
container._model_name = model_name
|
||||
assert container.effective_experiment_name == model_name
|
||||
# Experiment name given on the commandline: use the experiment name
|
||||
experiment_name = "unittest"
|
||||
container.experiment = experiment_name
|
||||
assert container.effective_experiment_name == experiment_name
|
||||
# When the test suite runs on the Github, the environment variable "HIML_EXPERIMENT_NAME" will be set.
|
||||
# Remove it to test the default behaviour.
|
||||
with patch.dict(os.environ):
|
||||
os.environ.pop(ENV_EXPERIMENT_NAME, None)
|
||||
container = HelloWorld()
|
||||
# No experiment name given on the commandline: use the model name
|
||||
model_name = "some_model"
|
||||
container._model_name = model_name
|
||||
assert container.effective_experiment_name == model_name
|
||||
# Experiment name given on the commandline: use the experiment name
|
||||
experiment_name = experiment_for_unittests()
|
||||
container.experiment = experiment_name
|
||||
assert container.effective_experiment_name == experiment_name
|
||||
|
||||
|
||||
def test_get_mlflow_run_id_from_previous_loggers() -> None:
|
||||
|
|
|
@ -12,6 +12,7 @@ from torchvision.transforms import Compose, Resize, CenterCrop
|
|||
from azureml.core import Run
|
||||
|
||||
from health_azure import object_to_yaml, create_aml_run_object
|
||||
from health_azure.himl import effective_experiment_name
|
||||
from health_azure.utils import is_running_in_azure_ml
|
||||
from health_ml.utils.serialization import ModelInfo
|
||||
from testazure.utils_testazure import DEFAULT_WORKSPACE
|
||||
|
@ -107,7 +108,7 @@ def test_serialization_roundtrip() -> None:
|
|||
def test_get_metadata() -> None:
|
||||
"""Test if model metadata is read correctly from the AzureML run."""
|
||||
run_name = "foo"
|
||||
experiment_name = "himl-tests"
|
||||
experiment_name = effective_experiment_name("himl-tests")
|
||||
run: Optional[Run] = None
|
||||
try:
|
||||
run = create_aml_run_object(
|
||||
|
|
|
@ -6,6 +6,7 @@ from pathlib import Path
|
|||
from typing import Optional
|
||||
from functools import lru_cache
|
||||
|
||||
from health_azure.himl import effective_experiment_name
|
||||
from health_azure.utils import PathOrString
|
||||
from health_azure.utils import create_aml_run_object
|
||||
from testazure.utils_testazure import DEFAULT_WORKSPACE
|
||||
|
@ -50,7 +51,7 @@ def mock_run_id(id: int = 0) -> str:
|
|||
:return: The run id of the created run that contains the checkpoint.
|
||||
"""
|
||||
|
||||
experiment_name = "himl-tests"
|
||||
experiment_name = effective_experiment_name("himl-tests")
|
||||
run_to_download_from = create_aml_run_object(experiment_name=experiment_name, workspace=DEFAULT_WORKSPACE.workspace)
|
||||
full_file_path = full_test_data_path(suffix="hello_world_checkpoint.ckpt")
|
||||
|
||||
|
|
|
@ -20,13 +20,14 @@ from azureml._restclient.constants import RunStatus
|
|||
from azureml.core import Run
|
||||
|
||||
from health_azure import RUN_CONTEXT, create_aml_run_object
|
||||
from health_azure.himl import effective_experiment_name
|
||||
from health_ml.utils import AzureMLLogger, AzureMLProgressBar, log_learning_rate, log_on_epoch
|
||||
from health_ml.utils.logging import _preprocess_hyperparams
|
||||
from testhiml.utils_testhiml import DEFAULT_WORKSPACE
|
||||
|
||||
|
||||
def create_unittest_run_object(snapshot_directory: Optional[Path] = None) -> Run:
|
||||
return create_aml_run_object(experiment_name="himl-tests",
|
||||
return create_aml_run_object(experiment_name=effective_experiment_name("himl-tests"),
|
||||
workspace=DEFAULT_WORKSPACE.workspace,
|
||||
snapshot_directory=snapshot_directory or ".")
|
||||
|
||||
|
@ -295,7 +296,9 @@ def test_azureml_logger_actual_run() -> None:
|
|||
"""
|
||||
When running outside of AzureML, a new run should be created.
|
||||
"""
|
||||
logger = AzureMLLogger(enable_logging_outside_azure_ml=True, workspace=DEFAULT_WORKSPACE.workspace)
|
||||
logger = AzureMLLogger(enable_logging_outside_azure_ml=True,
|
||||
workspace=DEFAULT_WORKSPACE.workspace,
|
||||
run_name="test_azureml_logger_actual_run")
|
||||
assert not logger.is_running_in_azure_ml
|
||||
assert logger.run is not None
|
||||
assert logger.run != RUN_CONTEXT
|
||||
|
|
Загрузка…
Ссылка в новой задаче