ENH: Allow command in script run config (#909)

This commit is contained in:
Anton Schwaighofer 2023-11-08 12:41:46 +00:00 коммит произвёл GitHub
Родитель 83df149051
Коммит 2d8a380108
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 76 добавлений и 26 удалений

Просмотреть файл

@ -343,8 +343,9 @@ def create_crossval_hyperparam_args_v2(
def create_script_run( def create_script_run(
script_params: List[str], script_params: List[str],
snapshot_root_directory: Optional[Path], snapshot_root_directory: Optional[Path] = None,
entry_script: Optional[PathOrString], entry_script: Optional[PathOrString] = None,
entry_command: Optional[PathOrString] = None,
) -> ScriptRunConfig: ) -> ScriptRunConfig:
""" """
Creates an AzureML ScriptRunConfig object, that holds the information about the snapshot, the entry script, and Creates an AzureML ScriptRunConfig object, that holds the information about the snapshot, the entry script, and
@ -354,13 +355,20 @@ def create_script_run(
parameters can be generated using the ``_get_script_params()`` function. parameters can be generated using the ``_get_script_params()`` function.
:param snapshot_root_directory: The directory that contains all code that should be packaged and sent to AzureML. :param snapshot_root_directory: The directory that contains all code that should be packaged and sent to AzureML.
All Python code that the script uses must be copied over. All Python code that the script uses must be copied over.
:param entry_script: The script that should be run in AzureML. If None, the current main Python file will be :param entry_script: The Python script that should be run in AzureML. If None, the current main Python file will be
executed. executed. If entry_command is provided, this argument is ignored.
:return: :param entry_command: The command that should be run in AzureML. Command arguments will be taken from
the 'script_params' argument. If provided, this will override the entry_script argument.
:return: A configuration object for a script run.
""" """
snapshot_root = sanitize_snapshoot_directory(snapshot_root_directory) snapshot_root = sanitize_snapshoot_directory(snapshot_root_directory)
entry_script_relative = sanitize_entry_script(entry_script, snapshot_root) if entry_command is not None:
return ScriptRunConfig(source_directory=str(snapshot_root), script=entry_script_relative, arguments=script_params) return ScriptRunConfig(source_directory=str(snapshot_root), command=[entry_command, *script_params])
else:
entry_script_relative = sanitize_entry_script(entry_script, snapshot_root)
return ScriptRunConfig(
source_directory=str(snapshot_root), script=entry_script_relative, arguments=script_params
)
def effective_experiment_name(experiment_name: Optional[str], entry_script: Optional[PathOrString] = None) -> str: def effective_experiment_name(experiment_name: Optional[str], entry_script: Optional[PathOrString] = None) -> str:
@ -393,9 +401,10 @@ def effective_experiment_name(experiment_name: Optional[str], entry_script: Opti
def submit_run_v2( def submit_run_v2(
ml_client: MLClient, ml_client: MLClient,
environment: EnvironmentV2, environment: EnvironmentV2,
entry_script: PathOrString,
script_params: List[str],
compute_target: str, compute_target: str,
entry_script: Optional[PathOrString] = None,
script_params: Optional[List[str]] = None,
entry_command: Optional[PathOrString] = None,
environment_variables: Optional[Dict[str, str]] = None, environment_variables: Optional[Dict[str, str]] = None,
experiment_name: Optional[str] = None, experiment_name: Optional[str] = None,
input_datasets_v2: Optional[Dict[str, Input]] = None, input_datasets_v2: Optional[Dict[str, Input]] = None,
@ -416,7 +425,10 @@ def submit_run_v2(
:param ml_client: An Azure MLClient object for interacting with Azure resources. :param ml_client: An Azure MLClient object for interacting with Azure resources.
:param environment: An AML v2 Environment object. :param environment: An AML v2 Environment object.
:param entry_script: The script that should be run in AzureML. :param entry_script: The Python script that should be run in AzureML. If None, the current main Python file will be
executed. If entry_command is provided, this argument is ignored.
:param entry_command: The command that should be run in AzureML. Command arguments will be taken from
the 'script_params' argument. If provided, this will override the entry_script argument.
:param script_params: A list of parameter to pass on to the script as it runs in AzureML. :param script_params: A list of parameter to pass on to the script as it runs in AzureML.
:param compute_target: The name of a compute target in Azure ML to submit the job to. :param compute_target: The name of a compute target in Azure ML to submit the job to.
:param environment_variables: The environment variables that should be set when running in AzureML. :param environment_variables: The environment variables that should be set when running in AzureML.
@ -443,14 +455,15 @@ def submit_run_v2(
:return: An AzureML Run object. :return: An AzureML Run object.
""" """
root_dir = sanitize_snapshoot_directory(snapshot_root_directory) root_dir = sanitize_snapshoot_directory(snapshot_root_directory)
entry_script_relative = sanitize_entry_script(entry_script, root_dir)
experiment_name = effective_experiment_name(experiment_name, entry_script_relative)
script_params = script_params or [] script_params = script_params or []
script_param_str = create_v2_job_command_line_args_from_params(script_params) script_param_str = create_v2_job_command_line_args_from_params(script_params)
if entry_command is None:
cmd = " ".join(["python", str(entry_script_relative), script_param_str]) entry_script_relative = sanitize_entry_script(entry_script, root_dir)
experiment_name = effective_experiment_name(experiment_name, entry_script_relative)
cmd = " ".join(["python", str(entry_script_relative), script_param_str])
else:
experiment_name = effective_experiment_name(experiment_name, entry_command)
cmd = " ".join([str(entry_command), script_param_str])
print(f"The following command will be run in AzureML: {cmd}") print(f"The following command will be run in AzureML: {cmd}")
@ -730,6 +743,7 @@ def submit_to_azure_if_needed( # type: ignore
pytorch_processes_per_node_v2: Optional[int] = None, pytorch_processes_per_node_v2: Optional[int] = None,
use_mpi_run_for_single_node_jobs: bool = True, use_mpi_run_for_single_node_jobs: bool = True,
display_name: Optional[str] = None, display_name: Optional[str] = None,
entry_command: Optional[PathOrString] = None,
) -> AzureRunInfo: # pragma: no cover ) -> AzureRunInfo: # pragma: no cover
""" """
Submit a folder to Azure, if needed and run it. Submit a folder to Azure, if needed and run it.
@ -747,7 +761,10 @@ def submit_to_azure_if_needed( # type: ignore
floating point number with a string suffix s, m, h, d for seconds, minutes, hours, day. Examples: '3.5h', '2d' floating point number with a string suffix s, m, h, d for seconds, minutes, hours, day. Examples: '3.5h', '2d'
:param experiment_name: The name of the AzureML experiment in which the run should be submitted. If omitted, :param experiment_name: The name of the AzureML experiment in which the run should be submitted. If omitted,
this is created based on the name of the current script. this is created based on the name of the current script.
:param entry_script: The script that should be run in AzureML :param entry_script: The Python script that should be run in AzureML. If None, the current main Python file will be
executed. If entry_command is provided, this argument is ignored.
:param entry_command: The command that should be run in AzureML. Command arguments will be taken from
the 'script_params' argument. If provided, this will override the entry_script argument.
:param compute_cluster_name: The name of the AzureML cluster that should run the job. This can be a cluster with :param compute_cluster_name: The name of the AzureML cluster that should run the job. This can be a cluster with
CPU or GPU machines. CPU or GPU machines.
:param conda_environment_file: The conda configuration file that describes which packages are necessary for your :param conda_environment_file: The conda configuration file that describes which packages are necessary for your
@ -915,6 +932,7 @@ def submit_to_azure_if_needed( # type: ignore
script_params=script_params, script_params=script_params,
snapshot_root_directory=snapshot_root_directory, snapshot_root_directory=snapshot_root_directory,
entry_script=entry_script, entry_script=entry_script,
entry_command=entry_command,
) )
script_run_config.run_config = run_config script_run_config.run_config = run_config
@ -942,9 +960,6 @@ def submit_to_azure_if_needed( # type: ignore
environment = create_python_environment_v2( environment = create_python_environment_v2(
conda_environment_file=conda_environment_file, docker_base_image=docker_base_image conda_environment_file=conda_environment_file, docker_base_image=docker_base_image
) )
if entry_script is None:
entry_script = Path(sys.argv[0])
registered_env = register_environment_v2(environment, ml_client) registered_env = register_environment_v2(environment, ml_client)
input_datasets_v2 = create_v2_inputs(ml_client, cleaned_input_datasets) input_datasets_v2 = create_v2_inputs(ml_client, cleaned_input_datasets)
output_datasets_v2 = create_v2_outputs(ml_client, cleaned_output_datasets) output_datasets_v2 = create_v2_outputs(ml_client, cleaned_output_datasets)
@ -959,6 +974,7 @@ def submit_to_azure_if_needed( # type: ignore
snapshot_root_directory=snapshot_root_directory, snapshot_root_directory=snapshot_root_directory,
entry_script=entry_script, entry_script=entry_script,
script_params=script_params, script_params=script_params,
entry_command=entry_command,
compute_target=compute_cluster_name, compute_target=compute_cluster_name,
tags=tags, tags=tags,
display_name=display_name, display_name=display_name,

Просмотреть файл

@ -455,8 +455,11 @@ def get_authentication() -> Union[InteractiveLoginAuthentication, ServicePrincip
tenant_id = get_secret_from_environment(ENV_TENANT_ID, allow_missing=True) tenant_id = get_secret_from_environment(ENV_TENANT_ID, allow_missing=True)
service_principal_password = get_secret_from_environment(ENV_SERVICE_PRINCIPAL_PASSWORD, allow_missing=True) service_principal_password = get_secret_from_environment(ENV_SERVICE_PRINCIPAL_PASSWORD, allow_missing=True)
# Check if all 3 environment variables are set # Check if all 3 environment variables are set
if bool(service_principal_id) and bool(tenant_id) and bool(service_principal_password): if service_principal_id and tenant_id and service_principal_password:
logging.info("Found all necessary environment variables for Service Principal authentication.") print(
"Found environment variables for Service Principal authentication: First characters of App ID "
f"are {service_principal_id[:8]}... in tenant {tenant_id[:8]}..."
)
return ServicePrincipalAuthentication( return ServicePrincipalAuthentication(
tenant_id=tenant_id, tenant_id=tenant_id,
service_principal_id=service_principal_id, service_principal_id=service_principal_id,
@ -1935,7 +1938,10 @@ def get_credential() -> Optional[TokenCredential]:
tenant_id = get_secret_from_environment(ENV_TENANT_ID, allow_missing=True) tenant_id = get_secret_from_environment(ENV_TENANT_ID, allow_missing=True)
service_principal_password = get_secret_from_environment(ENV_SERVICE_PRINCIPAL_PASSWORD, allow_missing=True) service_principal_password = get_secret_from_environment(ENV_SERVICE_PRINCIPAL_PASSWORD, allow_missing=True)
if service_principal_id and tenant_id and service_principal_password: if service_principal_id and tenant_id and service_principal_password:
logger.debug("Found environment variables for Service Principal authentication") print(
"Found environment variables for Service Principal authentication: First characters of App ID "
f"are {service_principal_id[:8]}... in tenant {tenant_id[:8]}..."
)
return _get_legitimate_service_principal_credential(tenant_id, service_principal_id, service_principal_password) return _get_legitimate_service_principal_credential(tenant_id, service_principal_id, service_principal_password)
try: try:

Просмотреть файл

@ -40,7 +40,7 @@ def test_get_credential() -> None:
ENV_SERVICE_PRINCIPAL_PASSWORD: "baz", ENV_SERVICE_PRINCIPAL_PASSWORD: "baz",
} }
with patch.object(os.environ, "get", return_value=mock_env_vars): with patch.dict(os.environ, mock_env_vars):
with patch.multiple( with patch.multiple(
"health_azure.utils", "health_azure.utils",
is_running_in_azure_ml=DEFAULT, is_running_in_azure_ml=DEFAULT,

Просмотреть файл

@ -464,6 +464,16 @@ def test_invalid_entry_script(tmp_path: Path) -> None:
assert script_run.script == "some_string" assert script_run.script == "some_string"
assert script_run.arguments == ["--foo"] assert script_run.arguments == ["--foo"]
# When proving a full command, this should override whatever is given in script and params
entry_command = "cmd"
script_params = ["arg1"]
script_run = himl.create_script_run(
snapshot_root_directory=None, entry_script="entry", entry_command="cmd", script_params=script_params
)
assert script_run.script is None
assert script_run.arguments is None
assert script_run.command == [entry_command, *script_params]
@pytest.mark.fast @pytest.mark.fast
def test_get_script_params() -> None: def test_get_script_params() -> None:
@ -1869,6 +1879,7 @@ def test_submitting_script_with_sdk_v2(tmp_path: Path, wait_for_completion: bool
assert after_submission_called, "after_submission callback was not called" assert after_submission_called, "after_submission callback was not called"
@pytest.mark.fast
def test_submitting_script_with_sdk_v2_accepts_relative_path(tmp_path: Path) -> None: def test_submitting_script_with_sdk_v2_accepts_relative_path(tmp_path: Path) -> None:
""" """
Test that submission of a script with AML V2 works when the script path is relative to the current working folder. Test that submission of a script with AML V2 works when the script path is relative to the current working folder.
@ -1903,6 +1914,20 @@ def test_submitting_script_with_sdk_v2_accepts_relative_path(tmp_path: Path) ->
expected_command = "python " + script_name expected_command = "python " + script_name
assert call_kwargs.get("command").startswith(expected_command), "Incorrect script argument" assert call_kwargs.get("command").startswith(expected_command), "Incorrect script argument"
with pytest.raises(NotImplementedError):
himl.submit_to_azure_if_needed(
entry_command="foo",
script_params=["bar"],
conda_environment_file=conda_env_path,
snapshot_root_directory=tmp_path,
submit_to_azureml=True,
strictly_aml_v1=False,
)
assert mock_command.call_count == 3
_, call_kwargs = mock_command.call_args
# The constructed command should be constructed from the entry_command and script_params arguments
assert call_kwargs.get("command").startswith("foo bar"), "Incorrect script argument"
# Submission should fail with an error if the entry script is not inside the snapshot root # Submission should fail with an error if the entry script is not inside the snapshot root
with pytest.raises(ValueError, match="entry script must be inside of the snapshot root"): with pytest.raises(ValueError, match="entry script must be inside of the snapshot root"):
with pytest.raises(NotImplementedError): with pytest.raises(NotImplementedError):
@ -1915,6 +1940,7 @@ def test_submitting_script_with_sdk_v2_accepts_relative_path(tmp_path: Path) ->
) )
@pytest.mark.fast
def test_submitting_script_with_sdk_v2_passes_display_name(tmp_path: Path) -> None: def test_submitting_script_with_sdk_v2_passes_display_name(tmp_path: Path) -> None:
""" """
Test that submission of a script with SDK v2 passes the display_name parameter to the "command" function Test that submission of a script with SDK v2 passes the display_name parameter to the "command" function
@ -1981,6 +2007,7 @@ def test_submitting_script_with_sdk_v2_passes_environment_variables(tmp_path: Pa
assert call_kwargs.get("environment_variables") == environment_variables, "environment_variables not passed" assert call_kwargs.get("environment_variables") == environment_variables, "environment_variables not passed"
@pytest.mark.fast
def test_conda_env_missing(tmp_path: Path) -> None: def test_conda_env_missing(tmp_path: Path) -> None:
""" """
Test that submission fails if no Conda environment file is found. Test that submission fails if no Conda environment file is found.

Просмотреть файл

@ -143,7 +143,7 @@ def test_ssl_container_cifar10_resnet_simclr() -> None:
# Note: It is possible that after the PyTorch 1.10 upgrade, we can't get parity between local runs and runs on # Note: It is possible that after the PyTorch 1.10 upgrade, we can't get parity between local runs and runs on
# the hosted build agents. If that suspicion is confirmed, we need to add branching for local and cloud results. # the hosted build agents. If that suspicion is confirmed, we need to add branching for local and cloud results.
expected_metrics = { expected_metrics = {
'simclr/val/loss': 2.859630584716797, 'simclr/val/loss': 2.8596301078796387,
'ssl_online_evaluator/val/loss': 2.2664988040924072, 'ssl_online_evaluator/val/loss': 2.2664988040924072,
'ssl_online_evaluator/val/AccuracyAtThreshold05': 0.20000000298023224, 'ssl_online_evaluator/val/AccuracyAtThreshold05': 0.20000000298023224,
'simclr/train/loss': 3.6261773109436035, 'simclr/train/loss': 3.6261773109436035,
@ -152,7 +152,8 @@ def test_ssl_container_cifar10_resnet_simclr() -> None:
'ssl_online_evaluator/train/online_AccuracyAtThreshold05': 0.0, 'ssl_online_evaluator/train/online_AccuracyAtThreshold05': 0.0,
} }
_compare_stored_metrics(runner, expected_metrics, abs=5e-5) # After package upgrades in #912, this is no longer reproducible with higher accuracy (was 5e-5)
_compare_stored_metrics(runner, expected_metrics, abs=1e-2)
# Check that the checkpoint contains both the optimizer for the embedding and for the linear head # Check that the checkpoint contains both the optimizer for the embedding and for the linear head
checkpoint_path = loaded_config.outputs_folder / "checkpoints" / "last.ckpt" checkpoint_path = loaded_config.outputs_folder / "checkpoints" / "last.ckpt"