* Remove blobxfer

* Update CHANGELOG.md

* Remove configs that are not required

* Remove from environment.yml

* Fix numba issue

* Improve CHANGELOG.md

* Fix tests

* Remove configs that are not required
This commit is contained in:
Javier 2020-12-03 10:44:05 +00:00 коммит произвёл GitHub
Родитель 014c74e34f
Коммит a2c27e19d7
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
12 изменённых файлов: 13 добавлений и 334 удалений

Просмотреть файл

@ -24,6 +24,7 @@ folder structure is present irrespective of using InnerEye as a submodule or not
environment will be contained in the model.
### Removed
- Removed blobxfer completely. AzureML Data-stores for reading datasets make the following configs obsolete: 'datasets_storage_account' and 'datasets_storage_account_key' and they are not longer supported.
### Deprecated

Просмотреть файл

@ -59,13 +59,6 @@ class AzureConfig(GenericConfig):
subscription_id: str = param.String(doc="The ID of your Azure subscription.")
tenant_id: str = param.String(doc="The Azure tenant ID.")
application_id: str = param.String(doc="Optional: The ID of the Service Principal for authentication to Azure.")
datasets_storage_account: str = \
param.String(doc="Optional: The blob storage account to use when downloading datasets for use outside of "
"AzureML. This storage account must be the same as the one configured as a 'datastore' "
"in AzureML.")
datasets_storage_account_key: str = \
param.String(doc="Optional: The access key for the storage account that holds the datasets. "
"This is only used for downloading datasets outside of AzureML.")
datasets_container: str = param.String(doc="Optional: The blob storage container with the datasets.")
azureml_datastore: str = param.String(doc="The name of the AzureML datastore that holds the input training data. "
"This must be created manually, and point to a folder inside the "
@ -196,13 +189,6 @@ class AzureConfig(GenericConfig):
config.project_root = project_root
return config
def get_dataset_storage_account_key(self) -> Optional[str]:
"""
Gets the storage account key for the storage account that holds the dataset.
"""
secrets_handler = SecretsHandling(project_root=self.project_root)
return secrets_handler.get_secret_from_environment(fixed_paths.DATASETS_ACCOUNT_KEY, allow_missing=True)
def get_workspace(self) -> Workspace:
"""
Return a workspace object for an existing Azure Machine Learning Workspace (or default from YAML).

Просмотреть файл

@ -248,15 +248,6 @@ def get_run_id(run: Optional[Run] = None) -> str:
return run_context.id
def storage_account_from_full_name(full_account_name: str) -> str:
"""
Extracts the actual storage account name from the full name, like "/subscriptions/abc123../something/account_name"
:param full_account_name: Full name of account
:return: Storage account name
"""
return full_account_name.split("/")[-1]
def get_cross_validation_split_index(run: Run) -> int:
"""
Gets the cross validation index from the run's tags or returns the default
@ -425,6 +416,7 @@ def is_run_and_child_runs_completed(run: Run) -> bool:
:param run: The AzureML run to check.
:return: True if the run and all child runs completed successfully.
"""
def is_completed(run: Run) -> bool:
status = run.get_status()
if run.status == RunStatus.COMPLETED:

Просмотреть файл

@ -26,19 +26,16 @@ This means: slices 79 to 107 inclusive in the z direction are missing, i.e. ther
import csv
import os
import sys
from pathlib import Path
from typing import Dict, Iterator, List, Optional, Set, TextIO, Tuple
import numpy as np
import param
from azure.storage.blob import BlockBlobService
from InnerEye.Azure.azure_config import AzureConfig
from InnerEye.Common import fixed_paths
from InnerEye.Common.common_util import logging_to_stdout
from InnerEye.Common.generic_parsing import GenericConfig
from InnerEye.ML.utils.blobxfer_util import download_blobs
from InnerEye.ML.utils.io_util import read_image_as_array_with_header
MISSING_SLICE_MARKER = "Ms:"
@ -78,7 +75,8 @@ def report_structure_extremes(dataset_dir: str, azure_config: AzureConfig) -> No
:param azure_config: An object with all necessary information for accessing Azure.
:param dataset_dir: directory containing subject subdirectories with integer names.
"""
download_dataset_directory(azure_config, dataset_dir)
if not os.path.isdir(dataset_dir):
raise ValueError(f"Invalid path: {dataset_dir}")
subjects: Set[int] = set()
series_map = None
institution_map = None
@ -122,37 +120,11 @@ def report_structure_extremes(dataset_dir: str, azure_config: AzureConfig) -> No
if index % 25 == 0:
print(f"Processed {index} subjects")
print(f"Processed all {len(subjects)} subjects")
upload_to_dataset_directory(azure_config, dataset_dir, files_created)
# If we found any structures with missing slices, raise an exception, which should be
# uncaught where necessary to make any appropriate build step fail.
if n_missing > 0:
raise ValueError(f"Found {n_missing} structures with missing slices")
def download_dataset_directory(azure_config: AzureConfig, dataset_dir: str) -> bool:
if os.path.isdir(dataset_dir):
return False
account_key = azure_config.get_dataset_storage_account_key()
blobs_root_path = os.path.join(azure_config.datasets_container, os.path.basename(dataset_dir)) + "/"
sys.stdout.write(f"Downloading data to {dataset_dir} ...")
assert account_key is not None # for mypy
download_blobs(azure_config.datasets_storage_account, account_key, blobs_root_path, Path(dataset_dir))
sys.stdout.write("done\n")
return True
def upload_to_dataset_directory(azure_config: AzureConfig, dataset_dir: str, files: Set[str]) -> None:
if not files:
return
account_key = azure_config.get_dataset_storage_account_key()
block_blob_service = BlockBlobService(account_name=azure_config.datasets_storage_account, account_key=account_key)
container_name = os.path.join(azure_config.datasets_container, os.path.basename(dataset_dir))
for path in files:
blob_name = path[len(dataset_dir) + 1:]
block_blob_service.create_blob_from_path(container_name, blob_name, path)
print(f"Uploaded {path} to {azure_config.datasets_storage_account}:{container_name}/{blob_name}")
def report_structure_extremes_for_subject(subj_dir: str, series_id: str) -> Iterator[str]:
"""
:param subj_dir: subject directory, containing <structure>.nii.gz files

Просмотреть файл

@ -54,8 +54,6 @@ PRIVATE_SETTINGS_FILE = "InnerEyePrivateSettings.yml"
# Names of secrets stored as environment variables or in the PROJECT_SECRETS_FILE:
# Secret for the Service Principal
SERVICE_PRINCIPAL_KEY = "APPLICATION_KEY"
# The access key for the Azure storage account that holds the datasets.
DATASETS_ACCOUNT_KEY = "DATASETS_ACCOUNT_KEY"
INNEREYE_PACKAGE_ROOT = repository_root_directory(INNEREYE_PACKAGE_NAME)
SETTINGS_YAML_FILE_NAME = "settings.yml"

Просмотреть файл

@ -5,6 +5,7 @@
import copy
import logging
import shutil
import time
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
@ -25,7 +26,7 @@ from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, \
from InnerEye.Common import fixed_paths
from InnerEye.Common.build_config import ExperimentResultLocation, build_information_to_dot_net_json_file
from InnerEye.Common.common_util import ModelProcessing, is_windows, logging_section, print_exception
from InnerEye.Common.fixed_paths import INNEREYE_PACKAGE_NAME, PROJECT_SECRETS_FILE
from InnerEye.Common.fixed_paths import INNEREYE_PACKAGE_NAME
from InnerEye.ML.common import DATASET_CSV_FILE_NAME, ModelExecutionMode
from InnerEye.ML.config import SegmentationModelBase
from InnerEye.ML.deep_learning_config import CHECKPOINT_FOLDER, FINAL_ENSEMBLE_MODEL_FOLDER, FINAL_MODEL_FOLDER, \
@ -38,7 +39,6 @@ from InnerEye.ML.model_training import model_train
from InnerEye.ML.runner import ModelDeploymentHookSignature, Runner, get_all_environment_files
from InnerEye.ML.scalar_config import ScalarModelBase
from InnerEye.ML.utils import ml_util
from InnerEye.ML.utils.blobxfer_util import download_blobs
from InnerEye.ML.utils.checkpoint_handling import CheckpointHandler
from InnerEye.ML.utils.ml_util import make_pytorch_reproducible
from InnerEye.ML.visualizers import activation_maps
@ -61,45 +61,6 @@ def try_to_mount_input_dataset(run_context: Any) -> Optional[Path]:
return None
def download_dataset_via_blobxfer(dataset_id: str,
azure_config: AzureConfig,
target_folder: Path) -> Optional[Path]:
"""
Attempts to downloads a dataset from the Azure storage account for datasets, with download happening via
blobxfer. This is only possible if the datasets storage account and keyword are present in the `azure_config`.
The function returns None if the required settings were not present.
:param dataset_id: The folder of the dataset, expected in the container given by azure_config.datasets_container.
:param azure_config: The object with all Azure-related settings.
:param target_folder: The local folder into which the dataset should be downloaded.
:return: The folder that contains the downloaded dataset. Returns None if the datasets account name or password
were not present.
"""
datasets_account_key = azure_config.get_dataset_storage_account_key()
if not datasets_account_key:
logging.info("No account key for the dataset storage account was found.")
logging.info(f"We checked in environment variables and in the file {PROJECT_SECRETS_FILE}")
return None
if (not azure_config.datasets_container) or (not azure_config.datasets_storage_account):
logging.info("Datasets storage account or container missing.")
return None
target_folder.mkdir(exist_ok=True)
result_folder = target_folder / dataset_id
# only download if hasn't already been downloaded
if result_folder.is_dir():
logging.info(f"Folder already exists, skipping download: {result_folder}")
return result_folder
with logging_section(f"Downloading dataset {dataset_id}"):
download_blobs(
account=azure_config.datasets_storage_account,
account_key=datasets_account_key,
# When specifying the blobs root path, ensure that there is a slash at the end, otherwise
# all datasets with that dataset_id as a prefix get downloaded.
blobs_root_path=f"{azure_config.datasets_container}/{dataset_id}/",
destination=result_folder
)
return result_folder
def download_dataset(azure_dataset_id: str,
target_folder: Path,
azure_config: AzureConfig) -> Path:
@ -109,20 +70,11 @@ def download_dataset(azure_dataset_id: str,
AzureML dataset attached to the given AzureML workspace. The dataset is downloaded into the `target_folder`,
in a subfolder that has the same name as the dataset. If there already appears to be such a folder, and the folder
contains a dataset.csv file, no download is started.
:param local_dataset: The path to an existing local dataset.
:param azure_dataset_id: The name of a dataset that is registered in the AzureML workspace.
:param target_folder: The folder in which to download the dataset from Azure.
:param azure_config: All Azure-related configuration options.
:return: A path on the local machine that contains the dataset.
"""
try:
downloaded_via_blobxfer = download_dataset_via_blobxfer(dataset_id=azure_dataset_id,
azure_config=azure_config,
target_folder=target_folder)
if downloaded_via_blobxfer:
return downloaded_via_blobxfer
except Exception as ex:
print_exception(ex, message="Unable to download dataset via blobxfer.")
logging.info("Trying to download dataset via AzureML datastore now.")
azure_dataset = get_or_create_dataset(azure_config, azure_dataset_id)
if not isinstance(azure_dataset, FileDataset):
@ -136,7 +88,10 @@ def download_dataset(azure_dataset_id: str,
return expected_dataset_path
logging.info("Starting to download the dataset - WARNING, this could take very long!")
with logging_section("Downloading dataset"):
t0 = time.perf_counter()
azure_dataset.download(target_path=str(expected_dataset_path), overwrite=False)
t1 = time.perf_counter() - t0
logging.info(f"Azure dataset '{azure_dataset_id}' downloaded in {t1} seconds")
logging.info(f"Azure dataset '{azure_dataset_id}' is now available in {expected_dataset_path}")
return expected_dataset_path

Просмотреть файл

@ -1,177 +0,0 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
from __future__ import annotations
import logging
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
import blobxfer
import blobxfer.models.azure as azmodels
from blobxfer.api import AzureStorageCredentials, ConcurrencyOptions, DownloadOptions, GeneralOptions, SkipOnOptions, \
UploadOptions
from blobxfer.models.options import FileProperties, Timeout, VectoredIo
from blobxfer.models.upload import VectoredIoDistributionMode
from InnerEye.Azure.azure_util import storage_account_from_full_name, to_azure_friendly_container_path
# Azure storage is extremely talkative, printing out each client request (thousands of them)
logger = logging.getLogger('azure.storage')
logger.setLevel(logging.WARNING)
# Blobxfer also prints at single line per file at least.
logger = logging.getLogger('blobxfer')
logger.setLevel(logging.WARNING)
@dataclass
class BlobXFerConfig:
"""
Class to hold Bloxfer configurations and helpers functions to download and
upload functions to Azure.
"""
account_name: str
account_key: str
concurrency: ConcurrencyOptions
timeout: Timeout
general: GeneralOptions
file_properties: FileProperties
skipon_options: SkipOnOptions
@staticmethod
def create_default(account: str, account_key: str) -> BlobXFerConfig:
"""
Returns the default configuration.
:param account: Name of the Azure storage account
:param account_key: Key to this storage account
:return: default Blobxferconfig
"""
concurrency = ConcurrencyOptions(crypto_processes=2, md5_processes=2, disk_threads=16,
transfer_threads=16, action=1)
timeout = Timeout(connect=20, read=60, max_retries=3)
general = GeneralOptions(concurrency, progress_bar=False, verbose=False, timeout=timeout, quiet=True)
file_properties = FileProperties(attributes=False, cache_control=None, content_type=None,
lmt=False, md5=None)
skipon_options = SkipOnOptions(filesize_match=True, lmt_ge=True, md5_match=True)
return BlobXFerConfig(
account_name=storage_account_from_full_name(account),
account_key=account_key,
concurrency=concurrency,
timeout=timeout,
general=general,
file_properties=file_properties,
skipon_options=skipon_options
)
def get_download_options(self, num_folders_to_strip: int = 0) -> DownloadOptions:
"""
Returns a BloxFer DownloadOptions object.
:param num_folders_to_strip: The filenames will be stripped off their leading directories, up to this level e.g.
if original path is 'container/foo/1.txt' and number_folders_to_strip is 2, and destination folder is 'bar',
the downloaded file will be 'bar/1.txt'
"""
return DownloadOptions(check_file_md5=True,
chunk_size_bytes=4194304,
delete_extraneous_destination=False,
delete_only=False,
max_single_object_concurrency=8,
mode=azmodels.StorageModes.Auto,
overwrite=True,
recursive=True,
rename=False,
restore_file_properties=self.file_properties,
rsa_private_key=None,
strip_components=num_folders_to_strip)
def get_upload_options(self, num_folders_to_strip: int = 0) -> UploadOptions:
"""
Returns a UploadOptions object.
:param num_folders_to_strip: The filenames will be stripped off their leading directories, up to this level e.g.
if original path is 'container/foo/1.txt' and number_folders_to_strip is 2, and destination folder is 'bar',
the downloaded file will be 'bar/1.txt'
"""
return UploadOptions(
access_tier=None,
one_shot_bytes=33554432,
rsa_public_key=None,
stdin_as_page_blob_size=0,
store_file_properties=self.file_properties,
vectored_io=VectoredIo(
stripe_chunk_size_bytes=0,
distribution_mode=VectoredIoDistributionMode.Disabled
),
chunk_size_bytes=4194304,
delete_extraneous_destination=False,
delete_only=False,
mode=azmodels.StorageModes.Auto,
overwrite=True,
recursive=True,
rename=False,
strip_components=num_folders_to_strip
)
def get_credentials(self) -> AzureStorageCredentials:
credentials = AzureStorageCredentials(self.general)
credentials.add_storage_account(self.account_name, self.account_key, endpoint="core.windows.net")
return credentials
def download_blobs(account: str, account_key: str, blobs_root_path: str, destination: Path,
is_file: bool = False, config: Optional[BlobXFerConfig] = None) -> Path:
"""
Download a given set of files in Azure blob storage to the local destination path, via blobxfer.
:param account: The name of the storage account to access the files.
:param account_key: The key for the storage account.
:param blobs_root_path: The path of the files that should be downloaded. This must be in format
'container/file_prefix/', ending with a slash (will be added if not provided and is_file is False).
:param destination: The destination folder for the copied files on the local machine.
:param is_file: If True then only a single file is required to be downloaded
:param config: BlobXFerConfig to use for download configuration, use default presets if None.
The filenames will be stripped off their leading directories, up to the level given by blobs_root_path.
For example, if blobs_root_path is 'container/foo/'
and contains a file 'container/foo/1.txt', and destination is 'bar', the downloaded file will be 'bar/1.txt'
"""
if not config:
config = BlobXFerConfig.create_default(account=account, account_key=account_key)
start_time = time.time()
# the account name can be an Azure Resource ID so extract the name from it if this is the case
logging.info(f"Downloading '{blobs_root_path}' from storage account {config.account_name} to {destination}")
blobs_root_path = to_azure_friendly_container_path(Path(blobs_root_path))
if not (blobs_root_path.endswith("/") or is_file):
blobs_root_path += "/"
blobs_root_path_dirs = blobs_root_path.rstrip("/").split("/")
num_folders_to_strip = len(blobs_root_path_dirs) - 1
blobs_path_without_container = "/".join(blobs_root_path_dirs[1:])
logging.info(f"Cleaned download path: '{blobs_root_path}' from storage account {config.account_name}")
download = config.get_download_options(num_folders_to_strip)
local_path = blobxfer.api.LocalDestinationPath(str(destination))
# noinspection PyTypeChecker
download_spec = blobxfer.api.DownloadSpecification(download, config.skipon_options, local_path)
source = blobxfer.api.AzureSourcePath()
source.add_path_with_storage_account(blobs_root_path, config.account_name)
if not is_file:
source.add_includes([f"{blobs_path_without_container}/*"])
download_spec.add_azure_source_path(source)
# noinspection PyTypeChecker
downloader = blobxfer.api.Downloader(config.general, config.get_credentials(), download_spec)
downloader.start()
elapsed = time.time() - start_time
logging.info(f"Finished downloading in {elapsed:0.2f}sec.")
if is_file:
destination = destination / Path(blobs_root_path).name
if destination.exists():
return destination
raise ValueError(f"Unable to download {blobs_root_path} from "
f"storage account {config.account_name} to {destination}")
else:
return destination

Просмотреть файл

@ -2,7 +2,6 @@ variables:
tenant_id: '72f988bf-86f1-41af-91ab-2d7cd011db47'
subscription_id: ''
application_id: ''
datasets_storage_account: 'innereyepublicdatasets'
datasets_container: 'datasets'
azureml_datastore: 'innereyedatasets'
resource_group: 'InnerEye-DeepLearning'

Просмотреть файл

@ -86,7 +86,7 @@ def test_create_runner_parser(with_config: bool) -> None:
azure_parser = create_runner_parser(SegmentationModelBase if with_config else None)
args_list = ["--model=Lung", "--train=False", "--l_rate=100.0",
"--unknown=1", "--subscription_id", "Test1", "--tenant_id=Test2",
"--application_id", "Test3", "--datasets_storage_account=Test4",
"--application_id", "Test3",
"--log_level=INFO",
# Normally we don't use extra index URLs in InnerEye, hence this won't be set in YAML.
"--pip_extra_index_url=foo"]
@ -96,7 +96,6 @@ def test_create_runner_parser(with_config: bool) -> None:
# These values have been set on the commandline, to values that are not the parser defaults.
non_default_args = {
"datasets_storage_account": "Test4",
"train": False,
"model": "Lung",
"subscription_id": "Test1",

Просмотреть файл

@ -34,7 +34,7 @@ def test_create_ml_runner_args(is_default_namespace: bool,
args_list = [f"--model={model_name}", "--train=True", "--l_rate=100.0",
"--norm_method=Simple Norm", "--subscription_id", "Test1", "--tenant_id=Test2",
"--application_id", "Test3", "--datasets_storage_account=Test4", "--datasets_container", "Test5",
"--application_id", "Test3", "--datasets_container", "Test5",
"--pytest_mark", "gpu", f"--output_to={outputs_folder}"]
if not is_default_namespace:
args_list.append(f"--model_configs_namespace={model_configs_namespace}")
@ -45,7 +45,6 @@ def test_create_ml_runner_args(is_default_namespace: bool,
runner.parse_and_load_model()
azure_config = runner.azure_config
model_config = runner.model_config
assert azure_config.datasets_storage_account == "Test4"
assert azure_config.model == model_name
assert model_config.l_rate == 100.0
assert model_config.norm_method == PhotometricNormalizationMethod.SimpleNorm
@ -60,7 +59,7 @@ def test_create_ml_runner_args(is_default_namespace: bool,
assert model_config.outputs_folder == (project_root / DEFAULT_AML_UPLOAD_DIR)
assert model_config.logs_folder == (project_root / DEFAULT_LOGS_DIR_NAME)
assert not hasattr(model_config, "datasets_storage_account")
assert not hasattr(model_config, "datasets_container")
assert azure_config.pytest_mark == "gpu"
@ -126,7 +125,6 @@ def test_parsing_with_custom_yaml(test_output_dirs: OutputFolderForTests) -> Non
yaml_file = test_output_dirs.root_dir / "custom.yml"
yaml_file.write_text("""variables:
tenant_id: 'foo'
datasets_storage_account: 'account'
start_epoch: 7
random_seed: 1
""")
@ -143,7 +141,6 @@ def test_parsing_with_custom_yaml(test_output_dirs: OutputFolderForTests) -> Non
assert loader_result is not None
assert runner.azure_config is not None
# This is only present in yaml
assert runner.azure_config.datasets_storage_account == "account"
# This is present in yaml and command line, and the latter should be used.
assert runner.azure_config.tenant_id == "bar"
# Settings in model config: start_epoch is only in yaml

Просмотреть файл

@ -12,11 +12,9 @@ from InnerEye.Azure.azure_util import fetch_child_runs, fetch_run, get_results_b
from InnerEye.Common import common_util, fixed_paths
from InnerEye.Common.common_util import OTHER_RUNS_SUBDIR_NAME, logging_section, logging_to_stdout
from InnerEye.Common.output_directories import OutputFolderForTests
from InnerEye.ML import run_ml
from InnerEye.ML.common import CHECKPOINT_FILE_SUFFIX, DATASET_CSV_FILE_NAME
from InnerEye.ML.model_config_base import ModelConfigBase
from InnerEye.ML.run_ml import MLRunner
from InnerEye.ML.utils.blobxfer_util import download_blobs
from InnerEye.ML.utils.run_recovery import RunRecovery
from Tests.Common.test_util import DEFAULT_ENSEMBLE_RUN_RECOVERY_ID, DEFAULT_RUN_RECOVERY_ID
from Tests.ML.util import get_default_azure_config
@ -128,44 +126,3 @@ def test_download_azureml_dataset(test_output_dirs: OutputFolderForTests) -> Non
for file in ["ct", "esophagus", "heart", "lung_l", "lung_r", "spinalcord"]:
f = (sub_folder / file).with_suffix(".nii.gz")
assert f.is_file()
def test_download_dataset_via_blobxfer(test_output_dirs: OutputFolderForTests) -> None:
azure_config = get_default_azure_config()
result_path = run_ml.download_dataset_via_blobxfer(dataset_id="test-dataset",
azure_config=azure_config,
target_folder=test_output_dirs.root_dir)
assert result_path
assert result_path.is_dir()
dataset_csv = Path(result_path) / DATASET_CSV_FILE_NAME
assert dataset_csv.exists()
@pytest.mark.parametrize("is_file", [True, False])
def test_download_blobxfer(test_output_dirs: OutputFolderForTests, is_file: bool, runner_config: AzureConfig) -> None:
"""
Test for a bug in early versions of download_blobs: download is happening via prefixes, but because of
stripping leading directory names, blobs got overwritten.
"""
root = test_output_dirs.root_dir
account_key = runner_config.get_dataset_storage_account_key()
assert account_key is not None
# Expected test data in Azure blobs:
# folder1/folder1.txt with content "folder1.txt"
# folder1_with_suffix/folder2.txt with content "folder2.txt"
# folder1_with_suffix/folder1.txt with content "this comes from folder2"
# with bug present, folder1_with_suffix/folder1.txt will overwrite folder1/folder1.txt
blobs_root_path = "data-for-testsuite/folder1"
if is_file:
blobs_root_path += "/folder1.txt"
download_blobs(runner_config.datasets_storage_account, account_key, blobs_root_path, root, is_file)
folder1 = root / "folder1.txt"
assert folder1.exists()
if not is_file:
otherfile = root / "otherfile.txt"
folder2 = root / "folder2.txt"
assert folder1.read_text().strip() == "folder1.txt"
assert otherfile.exists()
assert otherfile.read_text().strip() == "folder1.txt"
assert not folder2.exists()

Просмотреть файл

@ -14,7 +14,6 @@ dependencies:
- azureml-mlflow==1.17.0
- azureml-sdk==1.17.0
- azureml-tensorboard==1.17.0
- blobxfer==1.9.4
- conda-merge==0.1.5
- dataclasses-json==0.5.2
- flake8==3.8.3
@ -29,6 +28,7 @@ dependencies:
- mypy==0.770
- mypy-extensions==0.4.3
- numpy==1.19.1
- numba==0.51.2
- pandas==1.1.0
- papermill==2.2.2
- param==1.9.3