From e6f792151bf77c4c92fd4725f0723c02a25c98b5 Mon Sep 17 00:00:00 2001 From: Hong Ooi Date: Sun, 10 Feb 2019 17:35:50 +1100 Subject: [PATCH] Generics (#15) * first commit * add tests * better filenames * documenting * update readme * rewording * missed one unrename * one more doc pass * fixup * really fixup --- NAMESPACE | 56 +++++- NEWS.md | 7 +- R/adls_client_funcs.R | 7 +- R/{storage.R => az_storage.R} | 0 R/azcopy.R | 1 + R/blob_client_funcs.R | 7 +- R/{client.R => client_endpoint.R} | 110 ------------ R/client_generics.R | 283 ++++++++++++++++++++++++++++++ R/file_client_funcs.R | 7 +- R/storage_utils.R | 9 +- R/transfer_generics.R | 165 +++++++++++++++++ README.md | 84 +++++++-- man/adls.Rd | 2 +- man/adls_filesystem.Rd | 2 +- man/az_storage.Rd | 2 +- man/azcopy.Rd | 1 + man/blob.Rd | 2 +- man/blob_container.Rd | 2 +- man/file.Rd | 2 +- man/file_share.Rd | 2 +- man/file_transfer.Rd | 104 ++++++++--- man/generics.Rd | 194 ++++++++++++++++++++ man/storage_endpoint.Rd | 2 +- tests/testthat/test05_generics.R | 126 +++++++++++++ 24 files changed, 1003 insertions(+), 174 deletions(-) rename R/{storage.R => az_storage.R} (100%) rename R/{client.R => client_endpoint.R} (65%) create mode 100644 R/client_generics.R create mode 100644 R/transfer_generics.R create mode 100644 man/generics.Rd create mode 100644 tests/testthat/test05_generics.R diff --git a/NAMESPACE b/NAMESPACE index 245564d..3c8f5c5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,14 @@ S3method(create_blob_container,character) S3method(create_file_share,character) S3method(create_file_share,file_endpoint) S3method(create_file_share,file_share) +S3method(create_storage_container,adls_endpoint) +S3method(create_storage_container,blob_endpoint) +S3method(create_storage_container,character) +S3method(create_storage_container,file_endpoint) +S3method(create_storage_container,storage_container) +S3method(create_storage_dir,adls_filesystem) +S3method(create_storage_dir,blob_container) +S3method(create_storage_dir,file_share) S3method(delete_adls_filesystem,adls_endpoint) S3method(delete_adls_filesystem,adls_filesystem) S3method(delete_adls_filesystem,character) @@ -22,6 +30,17 @@ S3method(delete_blob_container,character) S3method(delete_file_share,character) S3method(delete_file_share,file_endpoint) S3method(delete_file_share,file_share) +S3method(delete_storage_container,adls_endpoint) +S3method(delete_storage_container,blob_endpoint) +S3method(delete_storage_container,character) +S3method(delete_storage_container,file_endpoint) +S3method(delete_storage_container,storage_container) +S3method(delete_storage_dir,adls_filesystem) +S3method(delete_storage_dir,blob_container) +S3method(delete_storage_dir,file_share) +S3method(delete_storage_file,adls_filesystem) +S3method(delete_storage_file,blob_container) +S3method(delete_storage_file,file_share) S3method(file_share,character) S3method(file_share,file_endpoint) S3method(get_storage_properties,blob_container) @@ -33,11 +52,34 @@ S3method(list_blob_containers,blob_endpoint) S3method(list_blob_containers,character) S3method(list_file_shares,character) S3method(list_file_shares,file_endpoint) +S3method(list_storage_containers,adls_endpoint) +S3method(list_storage_containers,blob_endpoint) +S3method(list_storage_containers,character) +S3method(list_storage_containers,file_endpoint) +S3method(list_storage_files,adls_filesystem) +S3method(list_storage_files,blob_container) +S3method(list_storage_files,file_share) S3method(print,adls_endpoint) S3method(print,adls_filesystem) S3method(print,blob_container) S3method(print,file_share) S3method(print,storage_endpoint) +S3method(storage_container,adls_endpoint) +S3method(storage_container,blob_endpoint) +S3method(storage_container,character) +S3method(storage_container,file_endpoint) +S3method(storage_download,adls_filesystem) +S3method(storage_download,blob_container) +S3method(storage_download,file_share) +S3method(storage_multidownload,adls_filesystem) +S3method(storage_multidownload,blob_container) +S3method(storage_multidownload,file_share) +S3method(storage_multiupload,adls_filesystem) +S3method(storage_multiupload,blob_container) +S3method(storage_multiupload,file_share) +S3method(storage_upload,adls_filesystem) +S3method(storage_upload,blob_container) +S3method(storage_upload,file_share) export(acquire_lease) export(adls_endpoint) export(adls_filesystem) @@ -53,6 +95,8 @@ export(create_adls_filesystem) export(create_azure_dir) export(create_blob_container) export(create_file_share) +export(create_storage_container) +export(create_storage_dir) export(delete_adls_dir) export(delete_adls_file) export(delete_adls_filesystem) @@ -62,10 +106,12 @@ export(delete_blob) export(delete_blob_container) export(delete_file_share) export(delete_pool) +export(delete_storage_container) +export(delete_storage_dir) +export(delete_storage_file) export(download_adls_file) export(download_azure_file) export(download_blob) -export(download_from_azure) export(download_from_url) export(file_endpoint) export(file_share) @@ -80,6 +126,8 @@ export(list_azure_files) export(list_blob_containers) export(list_blobs) export(list_file_shares) +export(list_storage_containers) +export(list_storage_files) export(multidownload_adls_file) export(multidownload_azure_file) export(multidownload_blob) @@ -88,11 +136,15 @@ export(multiupload_azure_file) export(multiupload_blob) export(release_lease) export(renew_lease) +export(storage_container) +export(storage_download) export(storage_endpoint) +export(storage_multidownload) +export(storage_multiupload) +export(storage_upload) export(upload_adls_file) export(upload_azure_file) export(upload_blob) -export(upload_to_azure) export(upload_to_url) import(AzureRMR) importFrom(utils,URLencode) diff --git a/NEWS.md b/NEWS.md index 0a58faa..5fc8128 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,12 +5,15 @@ * Support authentication via Azure Active Directory tokens for blob and ADLSgen2 storage. * Support uploading and downloading to in-memory R objects, without having to create a temporary file. Uploading can be done with `src` a `rawConnection` or `textConnection` object. For downloading, if `dest` is `NULL`, the downloaded data is returned as a raw vector, or if `dest` is a `rawConnection`, in the connection object. See the examples in the documentation. * Implement parallel file transfers using a background pool of R processes. This can significantly speed up transfers when working with multiple small files. -* Rename `upload_to_url`/`download_from_url` to `upload_to_azure` and `download_from_azure` respectively, to emphasise that these functions are for interacting with Azure storage, not websites in general. The old functions will still work, but print a warning. * Experimental support for using the Microsoft AzCopy commandline utility to perform file transfers. Set the argument `use_azcopy=TRUE` in any upload or download function to call AzCopy rather than relying on internal R code. The `call_azcopy` function also allows you to run AzCopy with arbitrary arguments. Requires [AzCopy version 10](https://github.com/Azure/azure-storage-azcopy). +* New generics for storage operations: + - `storage_container`, `create_storage_container`, `delete_storage_container`, `list_storage_containers` for managing containers (blob containers, file shares, ADLSgen2 filesystems) + - `storage_upload`, `storage_download`, `storage_multiupload`, `storage_multidownload` for file transfers + - `list_storage_files`, `create_storage_dir`, `delete_storage_dir`, `delete_storage_file` for managing objects within a container ## Other changes -* Add ADLS upload/download support to `upload_to_azure` and `download_from_azure`. +* Add ADLS upload/download support to `upload_to_url` and `download_from_url`. * Set default blocksize for `upload_azure_file` to 4MB, the maximum permitted by the API (#5). * Allow resource group and subscription accessor methods to work when AzureStor is not on the search path. diff --git a/R/adls_client_funcs.R b/R/adls_client_funcs.R index b1ebaa5..33dc776 100644 --- a/R/adls_client_funcs.R +++ b/R/adls_client_funcs.R @@ -22,7 +22,8 @@ #' #' For `list_adls_filesystems`, a list of such objects. #' -#' @seealso [storage_endpoint], [az_storage] +#' @seealso +#' [storage_endpoint], [az_storage], [storage_container] #' #' @examples #' \dontrun{ @@ -64,7 +65,7 @@ adls_filesystem.character <- function(endpoint, key=NULL, token=NULL, sas=NULL, adls_filesystem.adls_endpoint <- function(endpoint, name, ...) { obj <- list(name=name, endpoint=endpoint) - class(obj) <- "adls_filesystem" + class(obj) <- c("adls_filesystem", "storage_container") obj } @@ -230,7 +231,7 @@ delete_adls_filesystem.adls_endpoint <- function(endpoint, name, confirm=TRUE, . #' For `download_adls_file`, if `dest=NULL`, the contents of the downloaded file as a raw vector. #' #' @seealso -#' [adls_filesystem], [az_storage] +#' [adls_filesystem], [az_storage], [storage_download], [call_azcopy] #' #' @examples #' \dontrun{ diff --git a/R/storage.R b/R/az_storage.R similarity index 100% rename from R/storage.R rename to R/az_storage.R diff --git a/R/azcopy.R b/R/azcopy.R index a401bf6..0994686 100644 --- a/R/azcopy.R +++ b/R/azcopy.R @@ -16,6 +16,7 @@ #' [AzCopy page on Microsoft Docs](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10) #' #' [AzCopy GitHub repo](https://github.com/Azure/azure-storage-azcopy) +#' @aliases azcopy #' @rdname azcopy #' @export call_azcopy <- function(...) diff --git a/R/blob_client_funcs.R b/R/blob_client_funcs.R index b2ce1dd..ade5269 100644 --- a/R/blob_client_funcs.R +++ b/R/blob_client_funcs.R @@ -24,7 +24,8 @@ #' #' For `list_blob_containers`, a list of such objects. #' -#' @seealso [storage_endpoint], [az_storage] +#' @seealso +#' [storage_endpoint], [az_storage], [storage_container] #' #' @examples #' \dontrun{ @@ -73,7 +74,7 @@ blob_container.character <- function(endpoint, key=NULL, token=NULL, sas=NULL, blob_container.blob_endpoint <- function(endpoint, name, ...) { obj <- list(name=name, endpoint=endpoint) - class(obj) <- "blob_container" + class(obj) <- c("blob_container", "storage_container") obj } @@ -246,7 +247,7 @@ delete_blob_container.blob_endpoint <- function(endpoint, name, confirm=TRUE, le #' For `list_blobs`, details on the blobs in the container. For `download_blob`, if `dest=NULL`, the contents of the downloaded blob as a raw vector. #' #' @seealso -#' [blob_container], [az_storage] +#' [blob_container], [az_storage], [storage_download], [call_azcopy] #' #' [AzCopy version 10 on GitHub](https://github.com/Azure/azure-storage-azcopy) #' diff --git a/R/client.R b/R/client_endpoint.R similarity index 65% rename from R/client.R rename to R/client_endpoint.R index 8da392e..ae19e9f 100644 --- a/R/client.R +++ b/R/client_endpoint.R @@ -164,113 +164,3 @@ print.adls_endpoint <- function(x, ...) invisible(x) } - - -#' Generic upload and download -#' -#' @param src,dest The source and destination files/URLs. Paths are allowed. -#' @param key,token,sas Authentication arguments: an access key, Azure Active Directory (AAD) token or a shared access signature (SAS). If multiple arguments are supplied, a key takes priority over a token, which takes priority over a SAS. -#' @param ... Further arguments to pass to lower-level functions. -#' @param overwrite For downloading, whether to overwrite any destination files that exist. -#' -#' @details -#' These functions allow you to transfer files to and from a storage account, given the URL of the destination (for uploading) or source (for downloading). They dispatch to [upload_azure_file]/[download_azure_file] for a file storage URL, [upload_blob]/[download_blob] for a blob storage URL, and [upload_adls_file]/[download_adls_file] for an ADLSgen2 URL respectively. -#' -#' You can provide a SAS either as part of the URL itself, or in the `sas` argument. -#' -#' @seealso -#' [download_azure_file], [download_blob], [az_storage] -#' -#' @examples -#' \dontrun{ -#' -#' # authenticated download with an access key -#' download_from_azure("https://mystorage.blob.core.windows.net/mycontainer/bigfile.zip", -#' "~/bigfile.zip", -#' key="access_key") -#' -#' } -#' @rdname file_transfer -#' @export -download_from_azure <- function(src, dest, key=NULL, token=NULL, sas=NULL, ..., overwrite=FALSE) -{ - az_path <- parse_storage_url(src) - if(is.null(sas)) - sas <- find_sas(src) - endpoint <- storage_endpoint(az_path[1], key=key, token=token, sas=sas, ...) - - if(inherits(endpoint, "blob_endpoint")) - { - cont <- blob_container(endpoint, az_path[2]) - download_blob(cont, az_path[3], dest, overwrite=overwrite) - } - else if(inherits(endpoint, "file_endpoint")) - { - share <- file_share(endpoint, az_path[2]) - download_azure_file(share, az_path[3], dest, overwrite=overwrite) - } - else if(inherits(endpoint, "adls_endpoint")) - { - fs <- adls_filesystem(endpoint, az_path[2]) - download_adls_file(fs, az_path[3], dest, overwrite=overwrite) - } - else stop("Unknown storage endpoint", call.=FALSE) -} - - -#' @rdname file_transfer -#' @export -upload_to_azure <- function(src, dest, key=NULL, token=token, sas=NULL, ...) -{ - az_path <- parse_storage_url(dest) - if(is.null(sas)) - sas <- find_sas(dest) - endpoint <- storage_endpoint(az_path[1], key=key, token=token, sas=sas, ...) - - if(inherits(endpoint, "blob_endpoint")) - { - cont <- blob_container(endpoint, az_path[2]) - upload_blob(cont, src, az_path[3]) - } - else if(inherits(endpoint, "file_endpoint")) - { - share <- file_share(endpoint, az_path[2]) - upload_azure_file(share, src, az_path[3]) - } - else if(inherits(endpoint, "adls_endpoint")) - { - fs <- adls_endpoint(endpoint, az_path[2]) - upload_adls_file(fs, src, az_path[3]) - } - else stop("Unknown storage endpoint", call.=FALSE) -} - - -find_sas <- function(url) -{ - querymark <- regexpr("\\?sv", url) - if(querymark == -1) - NULL - else substr(url, querymark + 1, nchar(url)) -} - - -## deprecate old functions - -#' @rdname file_transfer -#' @export -download_from_url <- function(src, dest, key=NULL, token=NULL, sas=NULL, ..., overwrite=FALSE) -{ - .Deprecated("download_from_azure") - download_from_azure(src, dest, key=key, token=token, sas=sas, ..., overwrite=overwrite) -} - - -#' @rdname file_transfer -#' @export -upload_to_url <- function(src, dest, key=NULL, token=NULL, sas=NULL, ...) -{ - .Deprecated("upload_to_azure") - upload_to_azure(src, dest, key=key, token=token, sas=sas, ...) -} - diff --git a/R/client_generics.R b/R/client_generics.R new file mode 100644 index 0000000..3a9d808 --- /dev/null +++ b/R/client_generics.R @@ -0,0 +1,283 @@ +#' Storage client generics +#' +#' @param endpoint A storage endpoint object, or for the character methods, a string giving the full URL to the container. +#' @param container A storage container object. +#' @param key,token,sas For the character methods, authentication credentials for the container: either an access key, an Azure Active Directory (AAD) token, or a SAS. If multiple arguments are supplied, a key takes priority over a token, which takes priority over a SAS. +#' @param name For the storage container management methods, a container name. +#' @param file,dir For the storage object management methods, a file or directory name. +#' @param confirm For the deletion methods, whether to ask for confirmation first. +#' @param ... Further arguments to pass to lower-level functions. +#' +#' @details +#' These methods provide a framework for all storage management tasks supported by AzureStor. They dispatch to the appropriate functions for each type of storage. +#' +#' Storage container management methods: +#' - `storage_container` dispatches to `blob_container`, `file_share` or `adls_filesystem` +#' - `create_storage_container` dispatches to `create_blob_container`, `create_file_share` or `create_adls_filesystem` +#' - `delete_storage_container` dispatches to `delete_blob_container`, `delete_file_share` or `delete_adls_filesystem` +#' - `list_storage_containers` dispatches to `list_blob_containers`, `list_file_shares` or `list_adls_filesystems` +#' +#' Storage object management methods: +#' - `list_storage_files` dispatches to `list_blobs`, `list_azure_files` or `list_adls_files` +#' - `create_storage_dir` dispatches to `create_azure_dir` or `create_adls_dir`; throws an error if passed a blob container +#' - `delete_storage_dir` dispatches to `delete_azure_dir` or `delete_adls_dir`; throws an error if passed a blob container +#' - `delete_storage_file` dispatches to `delete_blob`, `delete_azure_file` or `delete_adls_file` +#' +#' @seealso +#' [storage_endpoint], [blob_container], [file_share], [adls_filesystem] +#' +#' [list_blobs], [list_azure_files], [list_adls_files] +#' +#' Similar generics exist for file transfer methods; see the page for [storage_download]. +#' +#' @examples +#' \dontrun{ +#' +#' # storage endpoints for the one account +#' bl <- storage_endpoint("https://mystorage.blob.core.windows.net/", key="access_key") +#' fl <- storage_endpoint("https://mystorage.file.core.windows.net/", key="access_key") +#' +#' list_storage_containers(bl) +#' list_storage_containers(fl) +#' +#' # creating containers +#' cont <- create_storage_container(bl, "newblobcontainer") +#' fs <- create_storage_container(fl, "newfileshare") +#' +#' # creating directories (if possible) +#' create_storage_dir(cont, "newdir") # will error out +#' create_storage_dir(fs, "newdir") +#' +#' # transfer a file +#' storage_upload(bl, "~/file.txt", "storage_file.txt") +#' storage_upload(cont, "~/file.txt", "newdir/storage_file.txt") +#' +#' } +#' @aliases storage_generics +#' @rdname generics +#' @export +storage_container <- function(endpoint, ...) +UseMethod("storage_container") + +#' @rdname generics +#' @export +storage_container.blob_endpoint <- function(endpoint, name, ...) +blob_container(endpoint, name, ...) + +#' @rdname generics +#' @export +storage_container.file_endpoint <- function(endpoint, name, ...) +file_share(endpoint, name, ...) + +#' @rdname generics +#' @export +storage_container.adls_endpoint <- function(endpoint, name, ...) +adls_filesystem(endpoint, name, ...) + +#' @rdname generics +#' @export +storage_container.character <- function(endpoint, key=NULL, token=NULL, sas=NULL, ...) +{ + lst <- parse_storage_url(endpoint) + endpoint <- storage_endpoint(lst[[1]], key=key, token=token, sas=sas, ...) + storage_container(endpoint, lst[[2]]) +} + + +# create container + +#' @rdname generics +#' @export +create_storage_container <- function(endpoint, ...) +UseMethod("create_storage_container") + +#' @rdname generics +#' @export +create_storage_container.blob_endpoint <- function(endpoint, name, ...) +create_blob_container(endpoint, name, ...) + +#' @rdname generics +#' @export +create_storage_container.file_endpoint <- function(endpoint, name, ...) +create_file_share(endpoint, name, ...) + +#' @rdname generics +#' @export +create_storage_container.adls_endpoint <- function(endpoint, name, ...) +create_adls_filesystem(endpoint, name, ...) + +#' @rdname generics +#' @export +create_storage_container.storage_container <- function(endpoint, ...) +create_storage_container(endpoint$endpoint, endpoint$name, ...) + +#' @rdname generics +#' @export +create_storage_container.character <- function(endpoint, key=NULL, token=NULL, sas=NULL, ...) +{ + lst <- parse_storage_url(endpoint) + endpoint <- storage_endpoint(lst[[1]], key=key, token=token, sas=sas, ...) + create_storage_container(endpoint, lst[[2]]) +} + + +# delete container + +#' @rdname generics +#' @export +delete_storage_container <- function(endpoint, ...) +UseMethod("delete_storage_container") + +#' @rdname generics +#' @export +delete_storage_container.blob_endpoint <- function(endpoint, name, ...) +delete_blob_container(endpoint, name, ...) + +#' @rdname generics +#' @export +delete_storage_container.file_endpoint <- function(endpoint, name, ...) +delete_file_share(endpoint, name, ...) + +#' @rdname generics +#' @export +delete_storage_container.adls_endpoint <- function(endpoint, name, ...) +delete_adls_filesystem(endpoint, name, ...) + +#' @rdname generics +#' @export +delete_storage_container.storage_container <- function(endpoint, ...) +delete_storage_container(endpoint$endpoint, endpoint$name, ...) + +#' @rdname generics +#' @export +delete_storage_container.character <- function(endpoint, key=NULL, token=NULL, sas=NULL, confirm=TRUE, ...) +{ + lst <- parse_storage_url(endpoint) + endpoint <- storage_endpoint(lst[[1]], key=key, token=token, sas=sas, ...) + delete_storage_container(endpoint, lst[[2]], confirm=confirm) +} + + +# list containers + +#' @rdname generics +#' @export +list_storage_containers <- function(endpoint, ...) +UseMethod("list_storage_containers") + +#' @rdname generics +#' @export +list_storage_containers.blob_endpoint <- function(endpoint, ...) +list_blob_containers(endpoint, ...) + +#' @rdname generics +#' @export +list_storage_containers.file_endpoint <- function(endpoint, ...) +list_file_shares(endpoint, ...) + +#' @rdname generics +#' @export +list_storage_containers.adls_endpoint <- function(endpoint, ...) +list_adls_filesystems(endpoint, ...) + +#' @rdname generics +#' @export +list_storage_containers.character <- function(endpoint, key=NULL, token=NULL, sas=NULL, ...) +{ + lst <- parse_storage_url(endpoint) + endpoint <- storage_endpoint(lst[[1]], key=key, token=token, sas=sas, ...) + list_storage_containers(endpoint, lst[[2]]) +} + + +# list files + +#' @rdname generics +#' @export +list_storage_files <- function(container, ...) +UseMethod("list_storage_files") + +#' @rdname generics +#' @export +list_storage_files.blob_container <- function(container, ...) +list_blobs(container, ...) + +#' @rdname generics +#' @export +list_storage_files.file_share <- function(container, ...) +list_azure_files(container, ...) + +#' @rdname generics +#' @export +list_storage_files.adls_filesystem <- function(container, ...) +list_adls_files(container, ...) + + +# create directory + +#' @rdname generics +#' @export +create_storage_dir <- function(container, ...) +UseMethod("create_storage_dir") + +#' @rdname generics +#' @export +create_storage_dir.blob_container <- function(container, ...) +stop("Blob storage does not support directories") + +#' @rdname generics +#' @export +create_storage_dir.file_share <- function(container, dir, ...) +create_azure_dir(container, dir, ...) + +#' @rdname generics +#' @export +create_storage_dir.adls_filesystem <- function(container, dir, ...) +create_adls_dir(container, dir, ...) + + +# delete directory + +#' @rdname generics +#' @export +delete_storage_dir <- function(container, ...) +UseMethod("delete_storage_dir") + +#' @rdname generics +#' @export +delete_storage_dir.blob_container <- function(container, ...) +stop("Blob storage does not support directories") + +#' @rdname generics +#' @export +delete_storage_dir.file_share <- function(container, dir, ...) +delete_azure_dir(container, dir, ...) + +#' @rdname generics +#' @export +delete_storage_dir.adls_filesystem <- function(container, dir, confirm=TRUE, ...) +delete_adls_dir(container, dir, confirm=confirm, ...) + + +# delete file + +#' @rdname generics +#' @export +delete_storage_file <- function(container, ...) +UseMethod("delete_storage_file") + +#' @rdname generics +#' @export +delete_storage_file.blob_container <- function(container, file, ...) +delete_blob(container, file, ...) + +#' @rdname generics +#' @export +delete_storage_file.file_share <- function(container, file, ...) +delete_azure_file(container, file, ...) + +#' @rdname generics +#' @export +delete_storage_file.adls_filesystem <- function(container, file, confirm=TRUE, ...) +delete_adls_file(container, file, confirm=confirm, ...) + diff --git a/R/file_client_funcs.R b/R/file_client_funcs.R index d5e6945..9bbcc7a 100644 --- a/R/file_client_funcs.R +++ b/R/file_client_funcs.R @@ -18,7 +18,8 @@ #' #' For `list_file_shares`, a list of such objects. #' -#' @seealso [storage_endpoint], [az_storage] +#' @seealso +#' [storage_endpoint], [az_storage], [storage_container] #' #' @examples #' \dontrun{ @@ -60,7 +61,7 @@ file_share.character <- function(endpoint, key=NULL, sas=NULL, file_share.file_endpoint <- function(endpoint, name, ...) { obj <- list(name=name, endpoint=endpoint) - class(obj) <- "file_share" + class(obj) <- c("file_share", "storage_container") obj } @@ -215,7 +216,7 @@ delete_file_share.file_endpoint <- function(endpoint, name, confirm=TRUE, ...) #' For `download_azure_file`, if `dest=NULL`, the contents of the downloaded file as a raw vector. #' #' @seealso -#' [file_share], [az_storage] +#' [file_share], [az_storage], [storage_download], [call_azcopy] #' #' [AzCopy version 10 on GitHub](https://github.com/Azure/azure-storage-azcopy) #' diff --git a/R/storage_utils.R b/R/storage_utils.R index 01ecf5e..cacc1d4 100644 --- a/R/storage_utils.R +++ b/R/storage_utils.R @@ -66,7 +66,7 @@ add_token <- function(token, headers, api) if(is.null(headers$`x-ms-version`)) headers$`x-ms-version` <- api - if(inherits(token, "R6") && inherits(token, "AzureToken")) + if(AzureRMR::is_azure_token(token) || inherits(token, "Token2.0")) { # if token has expired, renew it if(!token$validate()) @@ -165,13 +165,6 @@ storage_error_message <- function(response, for_httr=TRUE) } -# # keep only the scheme and host parts of a URL -# get_hostroot <- function(url) -# { -# parse_storage_url(url)[1] -# } - - parse_storage_url <- function(url) { url <- httr::parse_url(url) diff --git a/R/transfer_generics.R b/R/transfer_generics.R new file mode 100644 index 0000000..052cb7c --- /dev/null +++ b/R/transfer_generics.R @@ -0,0 +1,165 @@ +#' Upload and download generics +#' +#' @param container A storage container object. +#' @param src,dest The source and destination files to transfer. +#' @param key,token,sas Authentication arguments: an access key, Azure Active Directory (AAD) token or a shared access signature (SAS). If multiple arguments are supplied, a key takes priority over a token, which takes priority over a SAS. For `upload_to_url` and `download_to_url`, you can also provide a SAS as part of the URL itself. +#' @param ... Further arguments to pass to lower-level functions. +#' @param overwrite For downloading, whether to overwrite any destination files that exist. +#' +#' @details +#' These functions allow you to transfer files to and from a storage account. +#' +#' `storage_upload`, `storage_download`, `storage_multiupload` and `storage_multidownload` take as first argument a storage container, either for blob storage, file storage, or ADLSgen2. They dispatch to the corresponding file transfer functions for the given storage type. +#' +#' `upload_to_url` and `download_to_url` allow you to transfer a file to or from Azure storage, given the URL of the source or destination. The storage details (endpoint, container name, and so on) are obtained from the URL. +#' +#' @seealso +#' [storage_container], [blob_container], [file_share], [adls_filesystem] +#' +#' [download_blob], [download_azure_file], [download_adls_file], [call_azcopy] +#' +#' @examples +#' \dontrun{ +#' +#' # download from blob storage +#' bl <- storage_endpoint("https://mystorage.blob.core.windows.net/", key="access_key") +#' cont <- storage_container(bl, "mycontainer") +#' storage_download(cont, "bigfile.zip", "~/bigfile.zip") +#' +#' # same download but directly from the URL +#' download_from_url("https://mystorage.blob.core.windows.net/mycontainer/bigfile.zip", +#' "~/bigfile.zip", +#' key="access_key") +#' +#' # upload to ADLSgen2 +#' ad <- storage_endpoint("https://myadls.dfs.core.windows.net/", token=mytoken) +#' cont <- storage_container(ad, "myfilesystem") +#' create_storage_dir(cont, "newdir") +#' storage_upload(cont, "files.zip", "newdir/files.zip") +#' +#' # same upload but directly to the URL +#' upload_to_url("files.zip", +#' "https://myadls.dfs.core.windows.net/myfilesystem/newdir/files.zip", +#' token=mytoken) +#' +#' } +#' @rdname file_transfer +#' @export +storage_upload <- function(container, ...) +UseMethod("storage_upload") + +#' @rdname file_transfer +#' @export +storage_upload.blob_container <- function(container, src, dest, ...) +upload_blob(container, src, dest, ...) + +#' @rdname file_transfer +#' @export +storage_upload.file_share <- function(container, src, dest, ...) +upload_azure_file(container, src, dest, ...) + +#' @rdname file_transfer +#' @export +storage_upload.adls_filesystem <- function(container, src, dest, ...) +upload_adls_file(container, src, dest, ...) + +#' @rdname file_transfer +#' @export +storage_multiupload <- function(container, ...) +UseMethod("storage_multiupload") + +#' @rdname file_transfer +#' @export +storage_multiupload.blob_container <- function(container, src, dest, ...) +multiupload_blob(container, src, dest, ...) + +#' @rdname file_transfer +#' @export +storage_multiupload.file_share <- function(container, src, dest, ...) +multiupload_azure_file(container, src, dest, ...) + +#' @rdname file_transfer +#' @export +storage_multiupload.adls_filesystem <- function(container, src, dest, ...) +multiupload_adls_file(container, src, dest, ...) + + +# download + +#' @rdname file_transfer +#' @export +storage_download <- function(container, ...) +UseMethod("storage_download") + +#' @rdname file_transfer +#' @export +storage_download.blob_container <- function(container, src, dest, ...) +download_blob(container, src, dest, ...) + +#' @rdname file_transfer +#' @export +storage_download.file_share <- function(container, src, dest, ...) +download_azure_file(container, src, dest, ...) + +#' @rdname file_transfer +#' @export +storage_download.adls_filesystem <- function(container, src, dest, ...) +download_adls_file(container, src, dest, ...) + +#' @rdname file_transfer +#' @export +storage_multidownload <- function(container, ...) +UseMethod("storage_multidownload") + +#' @rdname file_transfer +#' @export +storage_multidownload.blob_container <- function(container, src, dest, ...) +multidownload_blob(container, src, dest, ...) + +#' @rdname file_transfer +#' @export +storage_multidownload.file_share <- function(container, src, dest, ...) +multidownload_azure_file(container, src, dest, ...) + +#' @rdname file_transfer +#' @export +storage_multidownload.adls_filesystem <- function(container, src, dest, ...) +multidownload_adls_file(container, src, dest, ...) + + +#' @rdname file_transfer +#' @export +download_from_url <- function(src, dest, key=NULL, token=NULL, sas=NULL, ..., overwrite=FALSE) +{ + az_path <- parse_storage_url(src) + if(is.null(sas)) + sas <- find_sas(src) + + endpoint <- storage_endpoint(az_path[1], key=key, token=token, sas=sas, ...) + cont <- storage_container(endpoint, az_path[2]) + storage_download(cont, az_path[3], dest, overwrite=overwrite) +} + + +#' @rdname file_transfer +#' @export +upload_to_url <- function(src, dest, key=NULL, token=token, sas=NULL, ...) +{ + az_path <- parse_storage_url(dest) + if(is.null(sas)) + sas <- find_sas(dest) + + endpoint <- storage_endpoint(az_path[1], key=key, token=token, sas=sas, ...) + cont <- storage_container(endpoint, az_path[2]) + storage_upload(cont, src, az_path[3]) +} + + +find_sas <- function(url) +{ + querymark <- regexpr("\\?sv", url) + if(querymark == -1) + NULL + else substr(url, querymark + 1, nchar(url)) +} + diff --git a/README.md b/README.md index ea11084..3dc5615 100644 --- a/README.md +++ b/README.md @@ -4,18 +4,18 @@ ![Downloads](https://cranlogs.r-pkg.org/badges/AzureStor) [![Travis Build Status](https://travis-ci.org/cloudyr/AzureStor.png?branch=master)](https://travis-ci.org/cloudyr/AzureStor) -This package implements both an admin- and client-side interface to [Azure Storage Services](https://docs.microsoft.com/en-us/rest/api/storageservices/). The admin interface uses R6 classes and extends the framework provided by [AzureRMR](https://github.com/hong-revo/AzureRMR). The client interface provides easy access to storage via S3 classes and methods. +This package implements both an admin- and client-side interface to [Azure Storage Services](https://docs.microsoft.com/en-us/rest/api/storageservices/). The admin interface uses R6 classes and extends the framework provided by [AzureRMR](https://github.com/hong-revo/AzureRMR). The client interface provides several S3 methods for efficiently managing storage and performing file transfers. ## Storage endpoints The interface for accessing storage is similar across blobs, files and ADLSGen2. You call the `storage_endpoint` function and provide the endpoint URI, along with your authentication credentials. AzureStor will figure out the type of storage from the URI. AzureStor supports all the different ways you can authenticate with a storage endpoint: -- Blob storage supports authenticating with an access key, shared access signature (SAS), or an Azure Active Directory OAuth token; +- Blob storage supports authenticating with an access key, shared access signature (SAS), or an Azure Active Directory (AAD) OAuth token; - File storage supports access key and SAS; - ADLSgen2 supports access key and AAD token. -In the case of an AAD token, you can also provide an object obtained via `AzureRMR::get_azure_token()`. If you do this, AzureStor can also automatically refresh the token for you when it expires. +In the case of an AAD token, you can also provide an object obtained via `AzureAuth::get_azure_token()` or `httr::oauth2.0_token`. If you do this, AzureStor can also automatically refresh the token for you when it expires. ```r # various endpoints for an account: blob, file, ADLS2 @@ -31,20 +31,33 @@ ad_endp_tok2 <- storage_endpoint("https://mystorage.dfs.core.windows.net", token ## Listing, creating and deleting containers -AzureStor provides several functions for managing containers within a storage endpoint: +AzureStor provides a rich framework for managing storage. The following generics allow you to manage storage containers: + +- `storage_container`: get a storage container (blob container, file share or ADLS filesystem) +- `create_storage_container` +- `delete_storage_container` +- `list_storage_containers` + +In turn these dispatch to the following lower-level functions for each type of storage: | Operation | Blob | File | ADLS2 | | --------- | ---- | ---- | ----- | -| list containers | `list_blob_containers` | `list_file_shares` | `list_adls_filesystems` | | get container | `blob_container` | `file_share` | `adls_filesystem` | | create container | `create_blob_container` | `create_file_share` | `create_adls_filesystem` | | delete container | `delete_blob_container` | `delete_file_share` | `delete_adls_filesystem` | +| list containers | `list_blob_containers` | `list_file_shares` | `list_adls_filesystems` | ```r # example of working with containers (blob storage) -list_blob_containers(bl_endp) +list_storage_containers(bl_endp_key) +cont <- storage_container(bl_endp, "mycontainer") +newcont <- create_storage_container(bl_endp, "newcontainer") +delete_storage_container(newcont) + +# you can also call the lower-level functions directly if desired +list_blob_containers(bl_endp_key) cont <- blob_container(bl_endp, "mycontainer") -newcont <- create_blob_container(bl_endp, "newcont") +newcont <- create_blob_container(bl_endp, "newcontainer") delete_blob_container(newcont) ``` @@ -52,6 +65,15 @@ delete_blob_container(newcont) Functions for working with objects within a storage container: +- `list_storage_files`: list files/blobs in a directory (for ADLSgen2 and file storage) or blob container +- `create_storage_dir`: for ADLSgen2 and file storage, create a directory +- `delete_storage_dir`: for ADLSgen2 and file storage, delete a directory +- `delete_storage_file`: delete a file or blob +- `storage_upload`/`storage_download`: transfer a file to or from a storage container +- `storage_multiupload`/`storage_multidownload`: transfer multiple files in parallel to or from a storage container + +As above, these dispatch to a family of lower-level functions for each type of storage: + | Operation | Blob | File | ADLS2 | | --------- | ---- | ---- | ----- | | list files | `list_blobs` | `list_azure_files` | `list_adls_files` | @@ -63,9 +85,33 @@ Functions for working with objects within a storage container: | upload multiple files | `multiupload_blob` | `multiupload_azure_file` | `multiupload_adls_file` | | download multiple files | `multidownload_blob` | `multidownload_azure_file` | `multidownload_adls_file` | -### Uploading and downloading +```r +# example of working with files and directories (ADLSgen2) +cont <- storage_container(ad_end_tok, "myfilesystem") +list_storage_files(cont) +create_storage_dir(cont, "newdir") +storage_download(cont, "/readme.txt", "~/readme.txt") +storage_multiupload(cont, "N:/data/*.*", "newdir") # uploading everything in a directory, in parallel +``` -AzureStor also includes a couple of extra features for uploading and downloading files. First, You can upload an in-memory R object via a _connection_, and similarly, you can download a file to a connection, or return it as a raw vector. This lets you transfer an object without having to create a temporary file as an intermediate step. + +## Uploading and downloading + +AzureStor includes a number of extra features to make transferring files efficient. + +### Parallel file transfers + + First, as noted above, you can transfer multiple files in parallel using the `multiupload_*`/`multidownload_*` functions. These use a pool of background R processes to do the transfers in parallel, which usually results in major speedups when transferring multiple small files. The pool is created the first time a parallel file transfer is performed, and persists for the duration of the R session; this means you don't have to wait for the pool to be (re-)created each time. + +```r +# uploading/downloading multiple files at once: use a wildcard to specify files to transfer +multiupload_adls_file(filesystem, src="N:/logfiles/*.zip", dest="/") +multidownload_adls_file(filesystem, src="/monthly/jan*.*", dest="~/data/january") +``` + +### Transfer to and from connections + +Second, you can upload a (single) in-memory R object via a _connection_, and similarly, you can download a file to a connection, or return it as a raw vector. This lets you transfer an object without having to create a temporary file as an intermediate step. ```r # uploading serialized R objects via connections @@ -86,17 +132,27 @@ download_blob(cont, src="iris.rds", dest=con) unserialize(con) ``` -Second, when transferring several files at once, you can transfer them in parallel using the `multiupload_*`/`multidownload_*` functions. These use a pool of background R processes to do the transfers in parallel, which usually results in major speedups when transferring multiple small files. The pool is created the first time a parallel file transfer is performed, and persists for the duration of the R session; this means you don't have to wait for the pool to be (re-)created each time. +### Interface to AzCopy + +Third, AzureStor includes an interface to [AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10), Microsoft's high-performance commandline utility for copying files to and from storage. To take advantage of this, simply include the argument `use_azcopy=TRUE` on any upload or download function. AzureStor will then call AzCopy to perform the file transfer, rather than using its own internal code. In addition, a `call_azcopy` function is provided to let you use AzCopy for any task. ```r -# uploading/downloading multiple files at once: use a wildcard to specify files to transfer -multiupload_adls_file(filesystem, src="N:/logfiles/*.zip", dest="/") -multidownload_adls_file(filesystem, src="/monthly/jan*.*", dest="~/data/january") +# use azcopy to download +myfs <- storage_container(ad_endp, "myfilesystem") +storage_download(adlsfs, "/incoming/bigfile.tar.gz", "/data") + +# use azcopy to sync a local and remote dir +call_azcopy('sync c:/local/path "https://mystorage.blob.core.windows.net/mycontainer" --recursive=true') ``` +For more information, see the [AzCopy repo on GitHub](https://github.com/Azure/azure-storage-azcopy). + +**Note that AzureStor uses AzCopy version 10. It is incompatible with versions 8.1 and earlier.** + + ## Admin interface -AzureStor's admin-side interface allows you to easily create and delete resource accounts, as well as obtain access keys and generate a SAS. Here is a sample workflow: +Finally, AzureStor's admin-side interface allows you to easily create and delete resource accounts, as well as obtain access keys and generate a SAS. Here is a sample workflow: ```r library(AzureRMR) diff --git a/man/adls.Rd b/man/adls.Rd index 203aeae..c1de5e1 100644 --- a/man/adls.Rd +++ b/man/adls.Rd @@ -110,5 +110,5 @@ unserialize(con) } } \seealso{ -\link{adls_filesystem}, \link{az_storage} +\link{adls_filesystem}, \link{az_storage}, \link{storage_download}, \link{call_azcopy} } diff --git a/man/adls_filesystem.Rd b/man/adls_filesystem.Rd index b55aca3..de782b8 100644 --- a/man/adls_filesystem.Rd +++ b/man/adls_filesystem.Rd @@ -108,5 +108,5 @@ delete_adls_filesystem("https://mystorage.dfs.core.windows.net/newfs", key="acce } } \seealso{ -\link{storage_endpoint}, \link{az_storage} +\link{storage_endpoint}, \link{az_storage}, \link{storage_container} } diff --git a/man/az_storage.Rd b/man/az_storage.Rd index dbfa44a..cedebf2 100644 --- a/man/az_storage.Rd +++ b/man/az_storage.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/storage.R +% Please edit documentation in R/az_storage.R \docType{class} \name{az_storage} \alias{az_storage} diff --git a/man/azcopy.Rd b/man/azcopy.Rd index 80fbd65..9d64cbb 100644 --- a/man/azcopy.Rd +++ b/man/azcopy.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/azcopy.R \name{call_azcopy} \alias{call_azcopy} +\alias{azcopy} \alias{azcopy_login} \title{Call the azcopy file transfer utility} \usage{ diff --git a/man/blob.Rd b/man/blob.Rd index 4ddbb50..6885de3 100644 --- a/man/blob.Rd +++ b/man/blob.Rd @@ -102,7 +102,7 @@ unserialize(con) } } \seealso{ -\link{blob_container}, \link{az_storage} +\link{blob_container}, \link{az_storage}, \link{storage_download}, \link{call_azcopy} \href{https://github.com/Azure/azure-storage-azcopy}{AzCopy version 10 on GitHub} } diff --git a/man/blob_container.Rd b/man/blob_container.Rd index b5fb785..4b4cb49 100644 --- a/man/blob_container.Rd +++ b/man/blob_container.Rd @@ -120,5 +120,5 @@ blob_container("https://mystorage.blob.core.windows.net/mycontainer", token=toke } } \seealso{ -\link{storage_endpoint}, \link{az_storage} +\link{storage_endpoint}, \link{az_storage}, \link{storage_container} } diff --git a/man/file.Rd b/man/file.Rd index f87d9bb..bad81fa 100644 --- a/man/file.Rd +++ b/man/file.Rd @@ -107,7 +107,7 @@ unserialize(con) } } \seealso{ -\link{file_share}, \link{az_storage} +\link{file_share}, \link{az_storage}, \link{storage_download}, \link{call_azcopy} \href{https://github.com/Azure/azure-storage-azcopy}{AzCopy version 10 on GitHub} } diff --git a/man/file_share.Rd b/man/file_share.Rd index 17d5377..edc9e48 100644 --- a/man/file_share.Rd +++ b/man/file_share.Rd @@ -100,5 +100,5 @@ delete_file_share("https://mystorage.file.core.windows.net/newshare", key="acces } } \seealso{ -\link{storage_endpoint}, \link{az_storage} +\link{storage_endpoint}, \link{az_storage}, \link{storage_container} } diff --git a/man/file_transfer.Rd b/man/file_transfer.Rd index 3f9f534..76c64d6 100644 --- a/man/file_transfer.Rd +++ b/man/file_transfer.Rd @@ -1,50 +1,112 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/client.R -\name{download_from_azure} -\alias{download_from_azure} -\alias{upload_to_azure} +% Please edit documentation in R/transfer_generics.R +\name{storage_upload} +\alias{storage_upload} +\alias{storage_upload.blob_container} +\alias{storage_upload.file_share} +\alias{storage_upload.adls_filesystem} +\alias{storage_multiupload} +\alias{storage_multiupload.blob_container} +\alias{storage_multiupload.file_share} +\alias{storage_multiupload.adls_filesystem} +\alias{storage_download} +\alias{storage_download.blob_container} +\alias{storage_download.file_share} +\alias{storage_download.adls_filesystem} +\alias{storage_multidownload} +\alias{storage_multidownload.blob_container} +\alias{storage_multidownload.file_share} +\alias{storage_multidownload.adls_filesystem} \alias{download_from_url} \alias{upload_to_url} -\title{Generic upload and download} +\title{Upload and download generics} \usage{ -download_from_azure(src, dest, key = NULL, token = NULL, sas = NULL, - ..., overwrite = FALSE) +storage_upload(container, ...) -upload_to_azure(src, dest, key = NULL, token = token, sas = NULL, - ...) +\method{storage_upload}{blob_container}(container, src, dest, ...) + +\method{storage_upload}{file_share}(container, src, dest, ...) + +\method{storage_upload}{adls_filesystem}(container, src, dest, ...) + +storage_multiupload(container, ...) + +\method{storage_multiupload}{blob_container}(container, src, dest, ...) + +\method{storage_multiupload}{file_share}(container, src, dest, ...) + +\method{storage_multiupload}{adls_filesystem}(container, src, dest, ...) + +storage_download(container, ...) + +\method{storage_download}{blob_container}(container, src, dest, ...) + +\method{storage_download}{file_share}(container, src, dest, ...) + +\method{storage_download}{adls_filesystem}(container, src, dest, ...) + +storage_multidownload(container, ...) + +\method{storage_multidownload}{blob_container}(container, src, dest, ...) + +\method{storage_multidownload}{file_share}(container, src, dest, ...) + +\method{storage_multidownload}{adls_filesystem}(container, src, dest, ...) download_from_url(src, dest, key = NULL, token = NULL, sas = NULL, ..., overwrite = FALSE) -upload_to_url(src, dest, key = NULL, token = NULL, sas = NULL, ...) +upload_to_url(src, dest, key = NULL, token = token, sas = NULL, ...) } \arguments{ -\item{src, dest}{The source and destination files/URLs. Paths are allowed.} - -\item{key, token, sas}{Authentication arguments: an access key, Azure Active Directory (AAD) token or a shared access signature (SAS). If multiple arguments are supplied, a key takes priority over a token, which takes priority over a SAS.} +\item{container}{A storage container object.} \item{...}{Further arguments to pass to lower-level functions.} +\item{src, dest}{The source and destination files to transfer.} + +\item{key, token, sas}{Authentication arguments: an access key, Azure Active Directory (AAD) token or a shared access signature (SAS). If multiple arguments are supplied, a key takes priority over a token, which takes priority over a SAS. For \code{upload_to_url} and \code{download_to_url}, you can also provide a SAS as part of the URL itself.} + \item{overwrite}{For downloading, whether to overwrite any destination files that exist.} } \description{ -Generic upload and download +Upload and download generics } \details{ -These functions allow you to transfer files to and from a storage account, given the URL of the destination (for uploading) or source (for downloading). They dispatch to \link{upload_azure_file}/\link{download_azure_file} for a file storage URL, \link{upload_blob}/\link{download_blob} for a blob storage URL, and \link{upload_adls_file}/\link{download_adls_file} for an ADLSgen2 URL respectively. +These functions allow you to transfer files to and from a storage account. -You can provide a SAS either as part of the URL itself, or in the \code{sas} argument. +\code{storage_upload}, \code{storage_download}, \code{storage_multiupload} and \code{storage_multidownload} take as first argument a storage container, either for blob storage, file storage, or ADLSgen2. They dispatch to the corresponding file transfer functions for the given storage type. + +\code{upload_to_url} and \code{download_to_url} allow you to transfer a file to or from Azure storage, given the URL of the source or destination. The storage details (endpoint, container name, and so on) are obtained from the URL. } \examples{ \dontrun{ -# authenticated download with an access key -download_from_azure("https://mystorage.blob.core.windows.net/mycontainer/bigfile.zip", - "~/bigfile.zip", - key="access_key") +# download from blob storage +bl <- storage_endpoint("https://mystorage.blob.core.windows.net/", key="access_key") +cont <- storage_container(bl, "mycontainer") +storage_download(cont, "bigfile.zip", "~/bigfile.zip") + +# same download but directly from the URL +download_from_url("https://mystorage.blob.core.windows.net/mycontainer/bigfile.zip", + "~/bigfile.zip", + key="access_key") + +# upload to ADLSgen2 +ad <- storage_endpoint("https://myadls.dfs.core.windows.net/", token=mytoken) +cont <- storage_container(ad, "myfilesystem") +create_storage_dir(cont, "newdir") +storage_upload(cont, "files.zip", "newdir/files.zip") + +# same upload but directly to the URL +upload_to_url("files.zip", + "https://myadls.dfs.core.windows.net/myfilesystem/newdir/files.zip", + token=mytoken) } } \seealso{ -\link{download_azure_file}, \link{download_blob}, \link{az_storage} +\link{storage_container}, \link{blob_container}, \link{file_share}, \link{adls_filesystem} + +\link{download_blob}, \link{download_azure_file}, \link{download_adls_file}, \link{call_azcopy} } diff --git a/man/generics.Rd b/man/generics.Rd new file mode 100644 index 0000000..f98a3aa --- /dev/null +++ b/man/generics.Rd @@ -0,0 +1,194 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/client_generics.R +\name{storage_container} +\alias{storage_container} +\alias{storage_generics} +\alias{storage_container.blob_endpoint} +\alias{storage_container.file_endpoint} +\alias{storage_container.adls_endpoint} +\alias{storage_container.character} +\alias{create_storage_container} +\alias{create_storage_container.blob_endpoint} +\alias{create_storage_container.file_endpoint} +\alias{create_storage_container.adls_endpoint} +\alias{create_storage_container.storage_container} +\alias{create_storage_container.character} +\alias{delete_storage_container} +\alias{delete_storage_container.blob_endpoint} +\alias{delete_storage_container.file_endpoint} +\alias{delete_storage_container.adls_endpoint} +\alias{delete_storage_container.storage_container} +\alias{delete_storage_container.character} +\alias{list_storage_containers} +\alias{list_storage_containers.blob_endpoint} +\alias{list_storage_containers.file_endpoint} +\alias{list_storage_containers.adls_endpoint} +\alias{list_storage_containers.character} +\alias{list_storage_files} +\alias{list_storage_files.blob_container} +\alias{list_storage_files.file_share} +\alias{list_storage_files.adls_filesystem} +\alias{create_storage_dir} +\alias{create_storage_dir.blob_container} +\alias{create_storage_dir.file_share} +\alias{create_storage_dir.adls_filesystem} +\alias{delete_storage_dir} +\alias{delete_storage_dir.blob_container} +\alias{delete_storage_dir.file_share} +\alias{delete_storage_dir.adls_filesystem} +\alias{delete_storage_file} +\alias{delete_storage_file.blob_container} +\alias{delete_storage_file.file_share} +\alias{delete_storage_file.adls_filesystem} +\title{Storage client generics} +\usage{ +storage_container(endpoint, ...) + +\method{storage_container}{blob_endpoint}(endpoint, name, ...) + +\method{storage_container}{file_endpoint}(endpoint, name, ...) + +\method{storage_container}{adls_endpoint}(endpoint, name, ...) + +\method{storage_container}{character}(endpoint, key = NULL, + token = NULL, sas = NULL, ...) + +create_storage_container(endpoint, ...) + +\method{create_storage_container}{blob_endpoint}(endpoint, name, ...) + +\method{create_storage_container}{file_endpoint}(endpoint, name, ...) + +\method{create_storage_container}{adls_endpoint}(endpoint, name, ...) + +\method{create_storage_container}{storage_container}(endpoint, ...) + +\method{create_storage_container}{character}(endpoint, key = NULL, + token = NULL, sas = NULL, ...) + +delete_storage_container(endpoint, ...) + +\method{delete_storage_container}{blob_endpoint}(endpoint, name, ...) + +\method{delete_storage_container}{file_endpoint}(endpoint, name, ...) + +\method{delete_storage_container}{adls_endpoint}(endpoint, name, ...) + +\method{delete_storage_container}{storage_container}(endpoint, ...) + +\method{delete_storage_container}{character}(endpoint, key = NULL, + token = NULL, sas = NULL, confirm = TRUE, ...) + +list_storage_containers(endpoint, ...) + +\method{list_storage_containers}{blob_endpoint}(endpoint, ...) + +\method{list_storage_containers}{file_endpoint}(endpoint, ...) + +\method{list_storage_containers}{adls_endpoint}(endpoint, ...) + +\method{list_storage_containers}{character}(endpoint, key = NULL, + token = NULL, sas = NULL, ...) + +list_storage_files(container, ...) + +\method{list_storage_files}{blob_container}(container, ...) + +\method{list_storage_files}{file_share}(container, ...) + +\method{list_storage_files}{adls_filesystem}(container, ...) + +create_storage_dir(container, ...) + +\method{create_storage_dir}{blob_container}(container, ...) + +\method{create_storage_dir}{file_share}(container, dir, ...) + +\method{create_storage_dir}{adls_filesystem}(container, dir, ...) + +delete_storage_dir(container, ...) + +\method{delete_storage_dir}{blob_container}(container, ...) + +\method{delete_storage_dir}{file_share}(container, dir, ...) + +\method{delete_storage_dir}{adls_filesystem}(container, dir, + confirm = TRUE, ...) + +delete_storage_file(container, ...) + +\method{delete_storage_file}{blob_container}(container, file, ...) + +\method{delete_storage_file}{file_share}(container, file, ...) + +\method{delete_storage_file}{adls_filesystem}(container, file, + confirm = TRUE, ...) +} +\arguments{ +\item{endpoint}{A storage endpoint object, or for the character methods, a string giving the full URL to the container.} + +\item{...}{Further arguments to pass to lower-level functions.} + +\item{name}{For the storage container management methods, a container name.} + +\item{key, token, sas}{For the character methods, authentication credentials for the container: either an access key, an Azure Active Directory (AAD) token, or a SAS. If multiple arguments are supplied, a key takes priority over a token, which takes priority over a SAS.} + +\item{confirm}{For the deletion methods, whether to ask for confirmation first.} + +\item{container}{A storage container object.} + +\item{file, dir}{For the storage object management methods, a file or directory name.} +} +\description{ +Storage client generics +} +\details{ +These methods provide a framework for all storage management tasks supported by AzureStor. They dispatch to the appropriate functions for each type of storage. + +Storage container management methods: +\itemize{ +\item \code{storage_container} dispatches to \code{blob_container}, \code{file_share} or \code{adls_filesystem} +\item \code{create_storage_container} dispatches to \code{create_blob_container}, \code{create_file_share} or \code{create_adls_filesystem} +\item \code{delete_storage_container} dispatches to \code{delete_blob_container}, \code{delete_file_share} or \code{delete_adls_filesystem} +\item \code{list_storage_containers} dispatches to \code{list_blob_containers}, \code{list_file_shares} or \code{list_adls_filesystems} +} + +Storage object management methods: +\itemize{ +\item \code{list_storage_files} dispatches to \code{list_blobs}, \code{list_azure_files} or \code{list_adls_files} +\item \code{create_storage_dir} dispatches to \code{create_azure_dir} or \code{create_adls_dir}; throws an error if passed a blob container +\item \code{delete_storage_dir} dispatches to \code{delete_azure_dir} or \code{delete_adls_dir}; throws an error if passed a blob container +\item \code{delete_storage_file} dispatches to \code{delete_blob}, \code{delete_azure_file} or \code{delete_adls_file} +} +} +\examples{ +\dontrun{ + +# storage endpoints for the one account +bl <- storage_endpoint("https://mystorage.blob.core.windows.net/", key="access_key") +fl <- storage_endpoint("https://mystorage.file.core.windows.net/", key="access_key") + +list_storage_containers(bl) +list_storage_containers(fl) + +# creating containers +cont <- create_storage_container(bl, "newblobcontainer") +fs <- create_storage_container(fl, "newfileshare") + +# creating directories (if possible) +create_storage_dir(cont, "newdir") # will error out +create_storage_dir(fs, "newdir") + +# transfer a file +storage_upload(bl, "~/file.txt", "storage_file.txt") +storage_upload(cont, "~/file.txt", "newdir/storage_file.txt") + +} +} +\seealso{ +\link{storage_endpoint}, \link{blob_container}, \link{file_share}, \link{adls_filesystem} + +\link{list_blobs}, \link{list_azure_files}, \link{list_adls_files} + +Similar generics exist for file transfer methods; see the page for \link{storage_download}. +} diff --git a/man/storage_endpoint.Rd b/man/storage_endpoint.Rd index 4eadc90..6207a7c 100644 --- a/man/storage_endpoint.Rd +++ b/man/storage_endpoint.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/client.R +% Please edit documentation in R/client_endpoint.R \name{storage_endpoint} \alias{storage_endpoint} \alias{endpoint} diff --git a/tests/testthat/test05_generics.R b/tests/testthat/test05_generics.R new file mode 100644 index 0000000..1f3dde7 --- /dev/null +++ b/tests/testthat/test05_generics.R @@ -0,0 +1,126 @@ +context("Client generics") + +tenant <- Sys.getenv("AZ_TEST_TENANT_ID") +app <- Sys.getenv("AZ_TEST_APP_ID") +password <- Sys.getenv("AZ_TEST_PASSWORD") +subscription <- Sys.getenv("AZ_TEST_SUBSCRIPTION") + +if(tenant == "" || app == "" || password == "" || subscription == "") + skip("Authentication tests skipped: ARM credentials not set") + +rgname <- Sys.getenv("AZ_TEST_STORAGE_RG") +storname1 <- Sys.getenv("AZ_TEST_STORAGE_HNS") +storname2 <- Sys.getenv("AZ_TEST_STORAGE_NOHNS") + +if(rgname == "" || storname1 == "" || storname2 == "") + skip("Blob client tests skipped: resource names not set") + +sub <- AzureRMR::az_rm$new(tenant=tenant, app=app, password=password)$get_subscription(subscription) +stor1 <- sub$get_resource_group(rgname)$get_storage_account(storname1) +stor2 <- sub$get_resource_group(rgname)$get_storage_account(storname2) + +test_that("Blob dispatch works", +{ + endpname <- stor2$properties$primaryEndpoints$blob + expect_type(endpname, "character") + key <- stor2$list_keys()[[1]] + + contname <- paste(sample(letters, 10, TRUE), collapse="") + dirname <- "newdir" + filename <- "iris.csv" + + # working with a container + expect_is(endp <- storage_endpoint(endpname, key=key), "blob_endpoint") + expect_silent(cont <- storage_container(endp, contname)) + expect_silent(create_storage_container(cont)) + + # working with objects within container + expect_silent(list_storage_files(cont)) + expect_error(create_storage_dir(cont, dirname)) + + # file transfer + expect_silent(storage_upload(cont, file.path("../resources", filename), filename)) + expect_silent(storage_download(cont, filename, tempfile())) + + # delete the objects + expect_silent(delete_storage_file(cont, filename, confirm=FALSE)) + expect_error(delete_storage_dir(cont, dirname, confirm=FALSE)) + expect_silent(delete_storage_container(cont, confirm=FALSE)) +}) + + +test_that("File dispatch works", +{ + endpname <- stor2$properties$primaryEndpoints$file + expect_type(endpname, "character") + key <- stor2$list_keys()[[1]] + + contname <- paste(sample(letters, 10, TRUE), collapse="") + dirname <- "newdir" + filename <- "iris.csv" + + # working with a container + expect_is(endp <- storage_endpoint(endpname, key=key), "file_endpoint") + expect_silent(cont <- storage_container(endp, contname)) + expect_silent(create_storage_container(cont)) + + # working with objects within container + expect_silent(create_storage_dir(cont, dirname)) + expect_silent(list_storage_files(cont, dirname)) + + # file transfer + expect_silent(storage_upload(cont, file.path("../resources", filename), file.path(dirname, filename))) + expect_silent(storage_download(cont, file.path(dirname, filename), tempfile())) + + # delete the objects + expect_silent(delete_storage_file(cont, file.path(dirname, filename), confirm=FALSE)) + expect_silent(delete_storage_dir(cont, dirname, confirm=FALSE)) + expect_silent(delete_storage_container(cont, confirm=FALSE)) +}) + + +test_that("ADLSgen2 dispatch works", +{ + endpname <- stor1$properties$primaryEndpoints$dfs + expect_type(endpname, "character") + key <- stor1$list_keys()[[1]] + + contname <- paste(sample(letters, 10, TRUE), collapse="") + dirname <- "newdir" + filename <- "iris.csv" + + # working with a container + expect_is(endp <- storage_endpoint(endpname, key=key), "adls_endpoint") + expect_silent(cont <- storage_container(endp, contname)) + expect_silent(create_storage_container(cont)) + + # working with objects within container + expect_silent(create_storage_dir(cont, dirname)) + expect_silent(list_storage_files(cont, dirname)) + + # file transfer + expect_silent(storage_upload(cont, file.path("../resources", filename), file.path(dirname, filename))) + expect_silent(storage_download(cont, file.path(dirname, filename), tempfile())) + + # delete the objects + expect_silent(delete_storage_file(cont, file.path(dirname, filename), confirm=FALSE)) + expect_silent(delete_storage_dir(cont, dirname, confirm=FALSE)) + expect_silent(delete_storage_container(cont, confirm=FALSE)) +}) + + +teardown( +{ + bl <- stor2$get_blob_endpoint() + blconts <- list_blob_containers(bl) + lapply(blconts, delete_blob_container, confirm=FALSE) + + fl <- stor2$get_file_endpoint() + flconts <- list_file_shares(fl) + lapply(flconts, delete_file_share, confirm=FALSE) + + ad <- stor1$get_adls_endpoint() + adconts <- list_adls_filesystems(ad) + lapply(adconts, delete_adls_filesystem, confirm=FALSE) +}) +