зеркало из https://github.com/microsoft/hi-ml.git
BUG: Allow relative path for entry script for SDK v2 submission (#904)
When submitting a script with a path relative to current working directory, SDK v2 raises an error, even though the script exists. Closes #890
This commit is contained in:
Родитель
1a754d8fcc
Коммит
f3525192e1
|
@ -64,6 +64,9 @@ from health_azure.utils import (
|
|||
run_duration_string_to_seconds,
|
||||
to_azure_friendly_string,
|
||||
wait_for_job_completion,
|
||||
sanitize_snapshoot_directory,
|
||||
sanitize_entry_script,
|
||||
_is_module_calling_syntax,
|
||||
)
|
||||
|
||||
logger = logging.getLogger('health_azure')
|
||||
|
@ -340,8 +343,8 @@ def create_crossval_hyperparam_args_v2(
|
|||
|
||||
def create_script_run(
|
||||
script_params: List[str],
|
||||
snapshot_root_directory: Optional[Path] = None,
|
||||
entry_script: Optional[PathOrString] = None,
|
||||
snapshot_root_directory: Optional[Path],
|
||||
entry_script: Optional[PathOrString],
|
||||
) -> ScriptRunConfig:
|
||||
"""
|
||||
Creates an AzureML ScriptRunConfig object, that holds the information about the snapshot, the entry script, and
|
||||
|
@ -355,31 +358,9 @@ def create_script_run(
|
|||
executed.
|
||||
:return:
|
||||
"""
|
||||
if snapshot_root_directory is None:
|
||||
print("No snapshot root directory given. All files in the current working directory will be copied to AzureML.")
|
||||
snapshot_root_directory = Path.cwd()
|
||||
else:
|
||||
print(f"All files in this folder will be copied to AzureML: {snapshot_root_directory}")
|
||||
if entry_script is None:
|
||||
entry_script = Path(sys.argv[0])
|
||||
print("No entry script given. The current main Python file will be executed in AzureML.")
|
||||
elif isinstance(entry_script, str):
|
||||
entry_script = Path(entry_script)
|
||||
if entry_script.is_absolute():
|
||||
try:
|
||||
# The entry script always needs to use Linux path separators, even when submitting from Windows
|
||||
entry_script_relative = entry_script.relative_to(snapshot_root_directory).as_posix()
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
"The entry script must be inside of the snapshot root directory. "
|
||||
f"Snapshot root: {snapshot_root_directory}, entry script: {entry_script}"
|
||||
)
|
||||
else:
|
||||
entry_script_relative = str(entry_script)
|
||||
print(f"This command will be run in AzureML: {entry_script_relative} {' '.join(script_params)}")
|
||||
return ScriptRunConfig(
|
||||
source_directory=str(snapshot_root_directory), script=entry_script_relative, arguments=script_params
|
||||
)
|
||||
snapshot_root = sanitize_snapshoot_directory(snapshot_root_directory)
|
||||
entry_script_relative = sanitize_entry_script(entry_script, snapshot_root)
|
||||
return ScriptRunConfig(source_directory=str(snapshot_root), script=entry_script_relative, arguments=script_params)
|
||||
|
||||
|
||||
def effective_experiment_name(experiment_name: Optional[str], entry_script: Optional[PathOrString] = None) -> str:
|
||||
|
@ -398,7 +379,7 @@ def effective_experiment_name(experiment_name: Optional[str], entry_script: Opti
|
|||
elif experiment_name:
|
||||
raw_value = experiment_name
|
||||
elif entry_script is not None:
|
||||
if str(entry_script)[:3] == "-m ":
|
||||
if _is_module_calling_syntax(str(entry_script)):
|
||||
raw_value = str(entry_script)[3:]
|
||||
else:
|
||||
raw_value = Path(entry_script).stem
|
||||
|
@ -461,18 +442,15 @@ def submit_run_v2(
|
|||
display name will be generated by AzureML.
|
||||
:return: An AzureML Run object.
|
||||
"""
|
||||
snapshot_root_directory = snapshot_root_directory or Path.cwd()
|
||||
root_dir = Path(snapshot_root_directory)
|
||||
root_dir = sanitize_snapshoot_directory(snapshot_root_directory)
|
||||
entry_script_relative = sanitize_entry_script(entry_script, root_dir)
|
||||
|
||||
if str(entry_script)[:2] != "-m":
|
||||
entry_script = Path(entry_script).relative_to(root_dir).as_posix()
|
||||
|
||||
experiment_name = effective_experiment_name(experiment_name, entry_script)
|
||||
experiment_name = effective_experiment_name(experiment_name, entry_script_relative)
|
||||
|
||||
script_params = script_params or []
|
||||
script_param_str = create_v2_job_command_line_args_from_params(script_params)
|
||||
|
||||
cmd = " ".join(["python", str(entry_script), script_param_str])
|
||||
cmd = " ".join(["python", str(entry_script_relative), script_param_str])
|
||||
|
||||
print(f"The following command will be run in AzureML: {cmd}")
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ import logging
|
|||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
@ -2066,3 +2067,42 @@ def filter_v2_input_output_args(args: List[str]) -> List[str]:
|
|||
any integer.
|
||||
"""
|
||||
return [a for a in args if not re.match(V2_INPUT_DATASET_PATTERN, a) and not re.match(V2_OUTPUT_DATASET_PATTERN, a)]
|
||||
|
||||
|
||||
def _is_module_calling_syntax(entry_script: str) -> bool:
|
||||
"""Returns True if the entry script is of the format seen when calling Python like 'python -m Foo.bar'"""
|
||||
return entry_script.startswith("-m ")
|
||||
|
||||
|
||||
def sanitize_snapshoot_directory(snapshot_root_directory: Optional[PathOrString]) -> Path:
|
||||
"""Sets the default values for the snapshoot root directory, which is the current working directory."""
|
||||
if snapshot_root_directory is None:
|
||||
print("No snapshot root directory given. All files in the current working directory will be copied to AzureML.")
|
||||
return Path.cwd()
|
||||
else:
|
||||
print(f"All files in this folder will be copied to AzureML: {snapshot_root_directory}")
|
||||
return Path(snapshot_root_directory)
|
||||
|
||||
|
||||
def sanitize_entry_script(entry_script: Optional[PathOrString], snapshot_root: Path) -> str:
|
||||
if entry_script is None:
|
||||
print("No entry script given. The current main Python file will be executed in AzureML.")
|
||||
entry_script_path = Path(sys.argv[0])
|
||||
elif isinstance(entry_script, str):
|
||||
if _is_module_calling_syntax(entry_script):
|
||||
return entry_script
|
||||
entry_script_path = Path(entry_script)
|
||||
elif isinstance(entry_script, Path):
|
||||
entry_script_path = entry_script
|
||||
else:
|
||||
raise ValueError(f"entry_script must be a string or Path, but got {type(entry_script)}")
|
||||
if entry_script_path.is_absolute():
|
||||
try:
|
||||
entry_script_path = entry_script_path.relative_to(snapshot_root)
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
"The entry script must be inside of the snapshot root directory. "
|
||||
f"Snapshot root: {snapshot_root}, entry script: {entry_script}"
|
||||
)
|
||||
# The entry script always needs to use Linux path separators, even when submitting from Windows
|
||||
return str(entry_script_path.as_posix())
|
||||
|
|
|
@ -42,12 +42,15 @@ from health_azure.utils import (
|
|||
download_files_by_suffix,
|
||||
download_file_if_necessary,
|
||||
resolve_workspace_config_path,
|
||||
sanitize_snapshoot_directory,
|
||||
sanitize_entry_script,
|
||||
)
|
||||
from testazure.test_himl import RunTarget, render_and_run_test_script
|
||||
from testazure.utils_testazure import (
|
||||
DEFAULT_IGNORE_FOLDERS,
|
||||
DEFAULT_WORKSPACE,
|
||||
MockRun,
|
||||
change_working_directory,
|
||||
create_unittest_run_object,
|
||||
experiment_for_unittests,
|
||||
repository_root,
|
||||
|
@ -2143,3 +2146,39 @@ def test_resolve_workspace_config_path_missing(tmp_path: Path) -> None:
|
|||
mocked_file = tmp_path / "foo.json"
|
||||
with pytest.raises(FileNotFoundError, match="Workspace config file does not exist"):
|
||||
resolve_workspace_config_path(mocked_file)
|
||||
|
||||
|
||||
@pytest.mark.fast
|
||||
def test_sanitize_snapshot_root(tmp_path: Path) -> None:
|
||||
"""Test if the snapshot root directory will default to the current working directory if not provided.
|
||||
Otherwise, pass through unchanged.
|
||||
"""
|
||||
with change_working_directory(tmp_path):
|
||||
assert sanitize_snapshoot_directory(None) == tmp_path
|
||||
folder = tmp_path / "foo"
|
||||
assert sanitize_snapshoot_directory(folder) == folder
|
||||
assert sanitize_snapshoot_directory(str(folder)) == folder
|
||||
|
||||
|
||||
@pytest.mark.fast
|
||||
def test_sanitize_entry_script(tmp_path: Path) -> None:
|
||||
script = "some_script"
|
||||
# If no entry script is given, derive from sys.argv
|
||||
with patch("sys.argv", [script]):
|
||||
assert sanitize_entry_script(None, tmp_path) == script
|
||||
# If an entry script is given and it is valid, pass through unchanged
|
||||
assert sanitize_entry_script(script, tmp_path) == script
|
||||
# If the entry script is in a subfolder, we want to get the script with folder back, as a string.
|
||||
folder = "folder"
|
||||
script_with_folder = tmp_path / folder / script
|
||||
assert sanitize_entry_script(script_with_folder, tmp_path) == f"{folder}/{script}"
|
||||
# If the entry script is of the format "-m Foo.bar", return unchanged.
|
||||
entry_module = "-m Foo.bar"
|
||||
assert sanitize_entry_script(entry_module, tmp_path) == entry_module
|
||||
|
||||
# Error case: Invalid argument
|
||||
with pytest.raises(ValueError, match="entry_script must be a string or Path"):
|
||||
assert sanitize_entry_script([], tmp_path) # type: ignore
|
||||
# Error case: Entry script is not in the snapshot
|
||||
with pytest.raises(ValueError, match="entry script must be inside of the snapshot root"):
|
||||
assert sanitize_entry_script(script_with_folder, tmp_path / "other_folder")
|
||||
|
|
|
@ -448,15 +448,19 @@ def test_invalid_entry_script(tmp_path: Path) -> None:
|
|||
)
|
||||
assert "entry script must be inside of the snapshot root directory" in str(e)
|
||||
|
||||
# If no info is provided, the script run should copy all of the current working directory, and read the entry
|
||||
# script from sys.argv
|
||||
with mock.patch("sys.argv", ["foo"]):
|
||||
script_params = himl._get_script_params()
|
||||
script_run = himl.create_script_run(script_params)
|
||||
script_run = himl.create_script_run(script_params, snapshot_root_directory=None, entry_script=None)
|
||||
assert script_run.source_directory == str(Path.cwd())
|
||||
assert script_run.script == "foo"
|
||||
assert script_run.arguments == []
|
||||
|
||||
# Entry scripts where the path is not absolute should be left unchanged
|
||||
script_run = himl.create_script_run(entry_script="some_string", script_params=["--foo"])
|
||||
script_run = himl.create_script_run(
|
||||
script_params=["--foo"], snapshot_root_directory=None, entry_script="some_string"
|
||||
)
|
||||
assert script_run.script == "some_string"
|
||||
assert script_run.arguments == ["--foo"]
|
||||
|
||||
|
@ -1865,6 +1869,52 @@ def test_submitting_script_with_sdk_v2(tmp_path: Path, wait_for_completion: bool
|
|||
assert after_submission_called, "after_submission callback was not called"
|
||||
|
||||
|
||||
def test_submitting_script_with_sdk_v2_accepts_relative_path(tmp_path: Path) -> None:
|
||||
"""
|
||||
Test that submission of a script with AML V2 works when the script path is relative to the current working folder.
|
||||
"""
|
||||
# Create a minimal script in a temp folder. Script
|
||||
script_name = "test_script.py"
|
||||
test_script = tmp_path / script_name
|
||||
test_script.write_text("print('hello world')")
|
||||
conda_env_path = create_empty_conda_env(tmp_path)
|
||||
|
||||
with patch.multiple(
|
||||
"health_azure.himl",
|
||||
get_ml_client=DEFAULT,
|
||||
create_python_environment_v2=DEFAULT,
|
||||
register_environment_v2=DEFAULT,
|
||||
):
|
||||
with patch("health_azure.himl.command", side_effect=NotImplementedError) as mock_command:
|
||||
# Try calling the submission code with both relative and absolute paths. In either case, the entry script
|
||||
# should be converted to a single string that is relative to the snapshot root directory.
|
||||
for index, entry_script in enumerate([script_name, test_script]):
|
||||
with pytest.raises(NotImplementedError):
|
||||
himl.submit_to_azure_if_needed(
|
||||
entry_script=entry_script, # type: ignore
|
||||
conda_environment_file=conda_env_path,
|
||||
snapshot_root_directory=tmp_path,
|
||||
submit_to_azureml=True,
|
||||
strictly_aml_v1=False,
|
||||
)
|
||||
assert mock_command.call_count == index + 1
|
||||
_, call_kwargs = mock_command.call_args
|
||||
# The constructed command is to start python with the given entry script, given as a relative path
|
||||
expected_command = "python " + script_name
|
||||
assert call_kwargs.get("command").startswith(expected_command), "Incorrect script argument"
|
||||
|
||||
# Submission should fail with an error if the entry script is not inside the snapshot root
|
||||
with pytest.raises(ValueError, match="entry script must be inside of the snapshot root"):
|
||||
with pytest.raises(NotImplementedError):
|
||||
himl.submit_to_azure_if_needed(
|
||||
entry_script=test_script,
|
||||
conda_environment_file=conda_env_path,
|
||||
snapshot_root_directory=tmp_path / "subfolder",
|
||||
submit_to_azureml=True,
|
||||
strictly_aml_v1=False,
|
||||
)
|
||||
|
||||
|
||||
def test_submitting_script_with_sdk_v2_passes_display_name(tmp_path: Path) -> None:
|
||||
"""
|
||||
Test that submission of a script with SDK v2 passes the display_name parameter to the "command" function
|
||||
|
|
Загрузка…
Ссылка в новой задаче