зеркало из https://github.com/Azure/AzureStor.git
Родитель
6da1756f06
Коммит
dbabefa0bc
|
@ -23,6 +23,7 @@ Imports:
|
|||
Suggests:
|
||||
knitr,
|
||||
jsonlite,
|
||||
testthat
|
||||
testthat,
|
||||
processx
|
||||
Roxygen: list(markdown=TRUE)
|
||||
RoxygenNote: 6.1.1
|
||||
|
|
|
@ -96,7 +96,6 @@ export(adls_endpoint)
|
|||
export(adls_file_exists)
|
||||
export(adls_filesystem)
|
||||
export(az_storage)
|
||||
export(azcopy_login)
|
||||
export(azure_file_exists)
|
||||
export(blob_container)
|
||||
export(blob_endpoint)
|
||||
|
|
10
NEWS.md
10
NEWS.md
|
@ -1,5 +1,14 @@
|
|||
# AzureStor 3.0.1.9000
|
||||
|
||||
## Significant user-visible changes
|
||||
|
||||
- Enhanced support for AzCopy:
|
||||
- Calling AzCopy from the various upload/download methods can now use existing authentication credentials without needing to login separately. Note that AzCopy only supports SAS and OAuth authentication, not access key.
|
||||
- `call_azcopy` now uses the processx package under the hood, which is a powerful and flexible framework for running external programs from R. The interface is slightly changed: rather than taking the entire commandline as a single string, `call_azcopy` now expects each AzCopy commandline option to be an individual argument. See `?call_azcopy` for examples of the new interface.
|
||||
- Recursive file transfers with AzCopy is now supported.
|
||||
|
||||
## Other changes
|
||||
|
||||
- New `storage_file_exists` generic to check for file existence, which dispatches to `blob_exists`, `azure_file_exists` and `adls_file_exists` for the individual storage types.
|
||||
- Move AAD token validity check inside the retry loop in `call_storage_endpoint`; this fixes a bug where the token could expire during a long transfer.
|
||||
- Default destination arguments now work for file transfer generics as well.
|
||||
|
@ -11,6 +20,7 @@
|
|||
- In `list_blobs` and `list_adls_files`, check that a field exists before trying to modify it (works around problem of possibly inconsistent response from the endpoint).
|
||||
- Allow passing a SAS with a leading `?` (as generated by the Azure Portal and Storage Explorer) to the client functions.
|
||||
- Fix some bugs in `multidownload_blob`.
|
||||
- The `az_storage$get_*_endpoint()` methods now support passing an AAD token for authentication.
|
||||
|
||||
# AzureStor 3.0.0
|
||||
|
||||
|
|
|
@ -349,7 +349,7 @@ multiupload_adls_file <- function(filesystem, src, dest, recursive=FALSE, blocks
|
|||
max_concurrent_transfers=10)
|
||||
{
|
||||
if(use_azcopy)
|
||||
return(azcopy_upload(filesystem, src, dest, blocksize=blocksize, lease=lease))
|
||||
return(azcopy_upload(filesystem, src, dest, blocksize=blocksize, lease=lease, recursive=recursive))
|
||||
|
||||
multiupload_internal(filesystem, src, dest, recursive=recursive, blocksize=blocksize, lease=lease,
|
||||
max_concurrent_transfers=max_concurrent_transfers)
|
||||
|
@ -373,7 +373,7 @@ multidownload_adls_file <- function(filesystem, src, dest, recursive=FALSE, bloc
|
|||
max_concurrent_transfers=10)
|
||||
{
|
||||
if(use_azcopy)
|
||||
return(azcopy_upload(filesystem, src, dest, overwrite=overwrite))
|
||||
return(azcopy_download(filesystem, src, dest, overwrite=overwrite, recursive=recursive))
|
||||
|
||||
multidownload_internal(filesystem, src, dest, recursive=recursive, blocksize=blocksize, overwrite=overwrite,
|
||||
max_concurrent_transfers=max_concurrent_transfers)
|
||||
|
|
|
@ -98,19 +98,19 @@ public=list(
|
|||
#self$do_operation("listServiceSas", body=parms, encode="json", http_verb="POST")$serviceSasToken
|
||||
#},
|
||||
|
||||
get_blob_endpoint=function(key=self$list_keys()[1], sas=NULL)
|
||||
get_blob_endpoint=function(key=self$list_keys()[1], sas=NULL, token=NULL)
|
||||
{
|
||||
blob_endpoint(self$properties$primaryEndpoints$blob, key=key, sas=sas)
|
||||
blob_endpoint(self$properties$primaryEndpoints$blob, key=key, sas=sas, token=token)
|
||||
},
|
||||
|
||||
get_file_endpoint=function(key=self$list_keys()[1], sas=NULL)
|
||||
get_file_endpoint=function(key=self$list_keys()[1], sas=NULL, token=NULL)
|
||||
{
|
||||
file_endpoint(self$properties$primaryEndpoints$file, key=key, sas=sas)
|
||||
file_endpoint(self$properties$primaryEndpoints$file, key=key, sas=sas, token=token)
|
||||
},
|
||||
|
||||
get_adls_endpoint=function(key=self$list_keys()[1], sas=NULL)
|
||||
get_adls_endpoint=function(key=self$list_keys()[1], sas=NULL, token=NULL)
|
||||
{
|
||||
adls_endpoint(self$properties$primaryEndpoints$dfs, key=key, sas=sas)
|
||||
adls_endpoint(self$properties$primaryEndpoints$dfs, key=key, sas=sas, token=token)
|
||||
},
|
||||
|
||||
regen_key=function(key=1)
|
||||
|
|
243
R/azcopy.R
243
R/azcopy.R
|
@ -1,222 +1,131 @@
|
|||
#' Call the azcopy file transfer utility
|
||||
#'
|
||||
#' @param ... Arguments to pass to AzCopy on the commandline. If no arguments are supplied, a help screen is printed.
|
||||
#' @param force For `azcopy_login`, whether to force AzCopy to relogin. If `FALSE` (the default), and AzureStor has detected that AzCopy has already logged in, this has no effect.
|
||||
#' @param env A named character vector of environment variables to set for AzCopy.
|
||||
#' @param silent Whether to print the output from AzCopy to the screen; also sets whether an error return code from AzCopy will be propagated to an R error.
|
||||
#'
|
||||
#' @details
|
||||
#' AzureStor has the ability to use the Microsoft AzCopy commandline utility to transfer files. To enable this, set the argument `use_azcopy=TRUE` in any call to an upload or download function; AzureStor will then call AzCopy to perform the file transfer rather than relying on its own code. You can also call AzCopy directly with the `call_azcopy` function, passing it any arguments as required.
|
||||
#' AzureStor has the ability to use the Microsoft AzCopy commandline utility to transfer files. To enable this, ensure the processx package is installed and set the argument `use_azcopy=TRUE` in any call to an upload or download function; AzureStor will then call AzCopy to perform the file transfer rather than relying on its own code. You can also call AzCopy directly with the `call_azcopy` function.
|
||||
#'
|
||||
#' AzureStor requires version 10 or later of AzCopy. The first time you try to run it, AzureStor will check that the version of AzCopy is correct, and throw an error if it is version 8 or earlier.
|
||||
#'
|
||||
#' The AzCopy utility must be in your path for AzureStor to find it. Note that unlike earlier versions, Azcopy 10 is a single, self-contained binary file that can be placed in any directory.
|
||||
#'
|
||||
#' AzCopy uses its own mechanisms for authenticating with Azure Active Directory, which is independent of the OAuth tokens used by AzureStor. AzureStor will try to ensure that AzCopy has previously authenticated before trying to transfer a file with a token, but this may not always succeed. You can run `azcopy_login(force=TRUE)` to force it to authenticate.
|
||||
#'
|
||||
#' @return
|
||||
#' A list, invisibly, with the following components:
|
||||
#' - `status`: The exit status of the AzCopy command. If this is NA, then the process was killed and had no exit status.
|
||||
#' - `stdout`: The standard output of the command.
|
||||
#' - `stderr`: The standard error of the command.
|
||||
#' - `timeout`: Whether AzCopy was killed because of a timeout.
|
||||
#' @seealso
|
||||
#' [processx::run], [download_blob], [download_azure_file], [download_adls_file]
|
||||
#'
|
||||
#' [AzCopy page on Microsoft Docs](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10)
|
||||
#'
|
||||
#' [AzCopy GitHub repo](https://github.com/Azure/azure-storage-azcopy)
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#'
|
||||
#' endp <- storage_endpoint("https://mystorage.blob.core.windows.net", sas="mysas")
|
||||
#' cont <- storage_container(endp, "mycontainer")
|
||||
#'
|
||||
#' # print various help screens
|
||||
#' call_azcopy("help")
|
||||
#' call_azcopy("help", "copy")
|
||||
#'
|
||||
#' # calling azcopy to download a blob
|
||||
#' storage_download(cont, "myblob.csv", use_azcopy=TRUE)
|
||||
#'
|
||||
#' # calling azcopy directly (must specify the SAS explicitly in the source URL)
|
||||
#' call_azcopy("copy",
|
||||
#' "https://mystorage.blob.core.windows.net/mycontainer/myblob.csv?mysas",
|
||||
#' "myblob.csv")
|
||||
#'
|
||||
#' }
|
||||
#' @aliases azcopy
|
||||
#' @rdname azcopy
|
||||
#' @export
|
||||
call_azcopy <- function(...)
|
||||
call_azcopy <- function(..., env=NULL, silent=FALSE)
|
||||
{
|
||||
azcopy <- get_azcopy_path()
|
||||
args <- paste(sapply(list(...), as.character), collapse=" ")
|
||||
cat("Command: azcopy", args, "\n")
|
||||
system2(azcopy, args)
|
||||
args <- as.character(unlist(list(...)))
|
||||
invisible(processx::run(get_azcopy_path(), args, env=env, echo_cmd=!silent, echo=!silent, error_on_status=!silent))
|
||||
}
|
||||
|
||||
|
||||
#' @rdname azcopy
|
||||
#' @export
|
||||
azcopy_login <- function(force=FALSE)
|
||||
call_azcopy_from_storage <- function(object, ...)
|
||||
{
|
||||
if(exists("azcopy_logged_in", envir=.AzureStor) && isTRUE(.AzureStor$azcopy_logged_in) && !force)
|
||||
return(invisible(NULL))
|
||||
res <- call_azcopy("login")
|
||||
if(res == 0)
|
||||
.AzureStor$azcopy_logged_in <- TRUE
|
||||
invisible(NULL)
|
||||
}
|
||||
if(!requireNamespace("processx"))
|
||||
stop("The processx package must be installed to use azcopy", call.=FALSE)
|
||||
|
||||
|
||||
# azcopy unset/NULL -> not initialized
|
||||
# azcopy = NA -> binary not found, or version < 10 (not usable)
|
||||
# azcopy = path -> usable
|
||||
get_azcopy_path <- function()
|
||||
{
|
||||
if(exists("azcopy", envir=.AzureStor))
|
||||
{
|
||||
if(!is.na(.AzureStor$azcopy))
|
||||
return(.AzureStor$azcopy)
|
||||
else stop("azcopy version 10+ required but not found", call.=FALSE)
|
||||
}
|
||||
else
|
||||
{
|
||||
set_azcopy_path()
|
||||
Recall()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
set_azcopy_path <- function(path="azcopy")
|
||||
{
|
||||
path <- Sys.which(path)
|
||||
if(is.na(path) || path == "")
|
||||
{
|
||||
.AzureStor$azcopy <- NA
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# both stdout=TRUE and stderr=TRUE could result in jumbled output;
|
||||
# assume only one stream will actually have data for a given invocation
|
||||
ver <- suppressWarnings(system2(path, "--version", stdout=TRUE, stderr=TRUE))
|
||||
if(!grepl("version 1[[:digit:]]", ver, ignore.case=TRUE))
|
||||
{
|
||||
.AzureStor$azcopy <- NA
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
.AzureStor$azcopy <- unname(path)
|
||||
message("Using azcopy binary ", path)
|
||||
invisible(NULL)
|
||||
auth <- azcopy_auth(object)
|
||||
if(auth$login)
|
||||
on.exit(call_azcopy("logout", silent=TRUE))
|
||||
invisible(call_azcopy(..., env=auth$env))
|
||||
}
|
||||
|
||||
|
||||
azcopy_upload <- function(container, src, dest, ...)
|
||||
{
|
||||
UseMethod("azcopy_upload")
|
||||
}
|
||||
|
||||
azcopy_upload.blob_container <- function(container, src, dest, type="BlockBlob", blocksize=2^24, lease=NULL, ...)
|
||||
{
|
||||
opts <- paste("--blobType", type, "--block-size", sprintf("%.0f", blocksize))
|
||||
azcopy_upload_internal(container, src, dest, opts)
|
||||
}
|
||||
|
||||
azcopy_upload.file_share <- function(container, src, dest, blocksize=2^24, ...)
|
||||
{
|
||||
opts <- sprintf("--block-size %.0f", blocksize)
|
||||
azcopy_upload_internal(container, src, dest, opts)
|
||||
}
|
||||
|
||||
azcopy_upload.adls_filesystem <- function(container, src, dest, blocksize=2^24, lease=NULL, ...)
|
||||
{
|
||||
opts <- sprintf("--block-size %.0f", blocksize)
|
||||
azcopy_upload_internal(container, src, dest, opts)
|
||||
}
|
||||
|
||||
azcopy_upload_internal <- function(container, src, dest, opts)
|
||||
{
|
||||
auth <- check_azcopy_auth(container)
|
||||
|
||||
if(attr(auth, "method") == "key")
|
||||
{
|
||||
acctname <- sub("\\..*$", "", httr::parse_url(container$endpoint$url)$host)
|
||||
Sys.setenv(ACCOUNT_NAME=acctname, ACCOUNT_KEY=auth)
|
||||
on.exit(Sys.unsetenv(c("ACCOUNT_NAME", "ACCOUNT_KEY")))
|
||||
}
|
||||
else if(attr(auth, "method") == "token")
|
||||
azcopy_login()
|
||||
else if(attr(auth, "method") == "sas")
|
||||
dest <- paste0(dest, "?", auth)
|
||||
opts <- azcopy_upload_opts(container, ...)
|
||||
|
||||
dest_uri <- httr::parse_url(container$endpoint$url)
|
||||
dest_uri$path <- gsub("//", "/", file.path(container$name, dest))
|
||||
dest <- httr::build_url(dest_uri)
|
||||
dest <- azcopy_add_sas(container$endpoint, httr::build_url(dest_uri))
|
||||
|
||||
call_azcopy("copy", shQuote(src), shQuote(dest), opts)
|
||||
call_azcopy_from_storage(container$endpoint, "copy", src, dest, opts)
|
||||
}
|
||||
|
||||
azcopy_upload_opts <- function(container, ...)
|
||||
{
|
||||
UseMethod("azcopy_upload_opts")
|
||||
}
|
||||
|
||||
azcopy_upload_opts.blob_container <- function(container, type="BlockBlob", blocksize=2^24, recursive=FALSE,
|
||||
lease=NULL, ...)
|
||||
{
|
||||
c("--blob-type", type, "--block-size-mb", sprintf("%.0f", blocksize/1048576), if(recursive) "--recursive")
|
||||
}
|
||||
|
||||
azcopy_upload_opts.file_share <- function(container, blocksize=2^22, recursive=FALSE, ...)
|
||||
{
|
||||
c("--block-size-mb", sprintf("%.0f", blocksize/1048576), if(recursive) "--recursive")
|
||||
}
|
||||
|
||||
azcopy_upload_opts.adls_filesystem <- function(container, blocksize=2^24, recursive=FALSE, lease=NULL, ...)
|
||||
{
|
||||
c("--block-size-mb", sprintf("%.0f", blocksize/1048576), if(recursive) "--recursive")
|
||||
}
|
||||
|
||||
|
||||
azcopy_download <- function(container, src, dest, ...)
|
||||
{
|
||||
UseMethod("azcopy_download")
|
||||
}
|
||||
|
||||
# currently all azcopy_download methods are the same
|
||||
azcopy_download.blob_container <- function(container, src, dest, overwrite=FALSE, ...)
|
||||
{
|
||||
opts <- paste0("--overwrite=", tolower(as.character(overwrite)))
|
||||
azcopy_download_internal(container, src, dest, opts)
|
||||
}
|
||||
|
||||
azcopy_download.file_share <- function(container, src, dest, overwrite=FALSE, ...)
|
||||
{
|
||||
opts <- paste0("--overwrite=", tolower(as.character(overwrite)))
|
||||
azcopy_download_internal(container, src, dest, opts)
|
||||
}
|
||||
|
||||
azcopy_download.adls_filesystem <- function(container, src, dest, overwrite=FALSE, ...)
|
||||
{
|
||||
opts <- paste0("--overwrite=", tolower(as.character(overwrite)))
|
||||
azcopy_download_internal(container, src, dest, opts)
|
||||
}
|
||||
|
||||
azcopy_download_internal <- function(container, src, dest, opts)
|
||||
{
|
||||
auth <- check_azcopy_auth(container)
|
||||
|
||||
if(attr(auth, "method") == "key")
|
||||
{
|
||||
acctname <- sub("\\..*$", "", httr::parse_url(container$endpoint$url)$host)
|
||||
Sys.setenv(ACCOUNT_NAME=acctname, ACCOUNT_KEY=auth)
|
||||
on.exit(Sys.unsetenv(c("ACCOUNT_NAME", "ACCOUNT_KEY")))
|
||||
}
|
||||
else if(attr(auth, "method") == "token")
|
||||
azcopy_login()
|
||||
else if(attr(auth, "method") == "sas")
|
||||
src <- paste0(src, "?", auth)
|
||||
opts <- azcopy_download_opts(container, ...)
|
||||
|
||||
src_uri <- httr::parse_url(container$endpoint$url)
|
||||
src_uri$path <- gsub("//", "/", file.path(container$name, src))
|
||||
src <- httr::build_url(src_uri)
|
||||
src <- azcopy_add_sas(container$endpoint, httr::build_url(src_uri))
|
||||
|
||||
call_azcopy("copy", shQuote(src), shQuote(dest), opts)
|
||||
call_azcopy_from_storage(container$endpoint, "copy", src, dest, opts)
|
||||
}
|
||||
|
||||
|
||||
check_azcopy_auth <- function(container)
|
||||
azcopy_download_opts <- function(container, ...)
|
||||
{
|
||||
UseMethod("check_azcopy_auth")
|
||||
UseMethod("azcopy_download_opts")
|
||||
}
|
||||
|
||||
check_azcopy_auth.blob_container <- function(container)
|
||||
# currently all azcopy_download_opts methods are the same
|
||||
azcopy_download_opts.blob_container <- function(container, overwrite=FALSE, recursive=FALSE, ...)
|
||||
{
|
||||
endpoint <- container$endpoint
|
||||
|
||||
if(!is.null(endpoint$token))
|
||||
return(structure(0, method="token"))
|
||||
if(!is.null(endpoint$sas))
|
||||
return(structure(endpoint$sas, method="sas"))
|
||||
|
||||
warning("No supported authentication method found for blob storage; defaulting to public", call.=FALSE)
|
||||
return(structure(0, method="public"))
|
||||
c(paste0("--overwrite=", tolower(as.character(overwrite))), if(recursive) "--recursive")
|
||||
}
|
||||
|
||||
check_azcopy_auth.file_share <- function(container)
|
||||
azcopy_download_opts.file_share <- function(container, overwrite=FALSE, recursive=FALSE, ...)
|
||||
{
|
||||
endpoint <- container$endpoint
|
||||
|
||||
if(!is.null(endpoint$sas))
|
||||
return(structure(endpoint$sas, method="sas"))
|
||||
stop("No supported authentication method found for file storage", call.=FALSE)
|
||||
c(paste0("--overwrite=", tolower(as.character(overwrite))), if(recursive) "--recursive")
|
||||
}
|
||||
|
||||
check_azcopy_auth.adls_filesystem <- function(container)
|
||||
azcopy_download_opts.adls_filesystem <- function(container, overwrite=FALSE, recursive=FALSE, ...)
|
||||
{
|
||||
endpoint <- container$endpoint
|
||||
|
||||
if(!is.null(endpoint$key))
|
||||
return(structure(endpoint$key, method="key"))
|
||||
if(!is.null(endpoint$token))
|
||||
return(structure(0, method="token"))
|
||||
|
||||
stop("No supported authentication method found for ADLSgen2", call.=FALSE)
|
||||
c(paste0("--overwrite=", tolower(as.character(overwrite))), if(recursive) "--recursive")
|
||||
}
|
||||
|
||||
check_azcopy_auth.default <- function(container)
|
||||
{
|
||||
stop("Unknown or unsupported container type: ", class(container)[1], call.=FALSE)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
# multiple code paths for authenticating
|
||||
# key: set AZCOPY_ACCOUNT_NAME and AZCOPY_ACCOUNT_KEY envvars
|
||||
# sas: append sas to URL (handled separately)
|
||||
# token:
|
||||
# - client creds: run azcopy login, pass client secret in AZCOPY_SPA_CLIENT_SECRET envvar
|
||||
# - auth code: set AZCOPY_OAUTH_TOKEN_INFO envvar
|
||||
# managed: run azcopy login --identity
|
||||
azcopy_auth <- function(endpoint)
|
||||
{
|
||||
env <- character(0)
|
||||
obj <- list(login=FALSE)
|
||||
|
||||
if(!is.null(endpoint$key))
|
||||
{
|
||||
stop("AzCopy does not support authentication with a shared key", call.=FALSE)
|
||||
# env["ACCOUNT_NAME"] <- sub("\\..*$", "", httr::parse_url(endpoint$url)$hostname)
|
||||
# env["ACCOUNT_KEY"] <- unname(endpoint$key)
|
||||
}
|
||||
else if(!is.null(endpoint$token))
|
||||
{
|
||||
token <- endpoint$token
|
||||
if(inherits(token, "AzureTokenClientCreds"))
|
||||
{
|
||||
obj$login <- TRUE
|
||||
env["AZCOPY_SPA_CLIENT_SECRET"] <- token$client$client_secret
|
||||
args <- c("login", "--service-principal", "--tenant-id", token$tenant,
|
||||
"--application-id", token$client$client_id)
|
||||
call_azcopy(args, env=env, silent=TRUE)
|
||||
}
|
||||
else if(inherits(token, c("AzureTokenAuthCode", "AzureTokenDeviceCode")))
|
||||
{
|
||||
creds <- list(
|
||||
access_token=token$credentials$access_token,
|
||||
refresh_token=token$credentials$refresh_token,
|
||||
expires_in=token$credentials$expires_in,
|
||||
expires_on=token$credentials$expires_on,
|
||||
not_before=token$credentials$not_before,
|
||||
resource=token$credentials$resource,
|
||||
token_type=token$credentials$token_type,
|
||||
scope=token$credentials$scope,
|
||||
`_tenant`=token$tenant,
|
||||
`_ad_endpoint`=token$aad_host,
|
||||
`_client_id`=token$client$client_id
|
||||
)
|
||||
env["AZCOPY_OAUTH_TOKEN_INFO"] <- jsonlite::toJSON(creds[!sapply(creds, is.null)], auto_unbox=TRUE)
|
||||
}
|
||||
else if(inherits(token, "AzureTokenManaged"))
|
||||
{
|
||||
obj$login <- TRUE
|
||||
call_azcopy(c("login", "--identity"), env, silent=TRUE)
|
||||
}
|
||||
else stop(
|
||||
"Only client_credentials, authorization_code, device_code and managed_identity flows supported for azcopy",
|
||||
call.=FALSE
|
||||
)
|
||||
}
|
||||
obj$env <- env
|
||||
obj
|
||||
}
|
||||
|
||||
|
||||
azcopy_add_sas <- function(endpoint, url)
|
||||
{
|
||||
if(!is.null(endpoint$sas))
|
||||
url <- paste0(url, "?", sub("^\\?", "", endpoint$sas))
|
||||
url
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
# azcopy unset/NULL -> not initialized
|
||||
# azcopy = NA -> binary not found, or version < 10 (not usable)
|
||||
# azcopy = path -> usable
|
||||
get_azcopy_path <- function()
|
||||
{
|
||||
if(exists("azcopy", envir=.AzureStor))
|
||||
{
|
||||
if(!is.na(.AzureStor$azcopy))
|
||||
return(.AzureStor$azcopy)
|
||||
else stop("azcopy version 10+ required but not found", call.=FALSE)
|
||||
}
|
||||
else
|
||||
{
|
||||
set_azcopy_path()
|
||||
Recall()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
set_azcopy_path <- function(path="azcopy")
|
||||
{
|
||||
path <- Sys.which(path)
|
||||
if(is.na(path) || path == "")
|
||||
{
|
||||
.AzureStor$azcopy <- NA
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
ver <- suppressWarnings(processx::run(path, "--version"))
|
||||
if(!grepl("version 1[[:digit:]]", ver$stdout, ignore.case=TRUE))
|
||||
{
|
||||
.AzureStor$azcopy <- NA
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
.AzureStor$azcopy <- unname(path)
|
||||
message("Using azcopy binary ", path)
|
||||
invisible(NULL)
|
||||
}
|
||||
|
|
@ -380,7 +380,7 @@ multiupload_blob <- function(container, src, dest, recursive=FALSE, type="BlockB
|
|||
max_concurrent_transfers=10)
|
||||
{
|
||||
if(use_azcopy)
|
||||
return(azcopy_upload(container, src, dest, type=type, blocksize=blocksize, lease=lease))
|
||||
return(azcopy_upload(container, src, dest, type=type, blocksize=blocksize, lease=lease, recursive=recursive))
|
||||
|
||||
multiupload_internal(container, src, dest, recursive=recursive, type=type, blocksize=blocksize, lease=lease,
|
||||
max_concurrent_transfers=max_concurrent_transfers)
|
||||
|
@ -403,7 +403,7 @@ multidownload_blob <- function(container, src, dest, recursive=FALSE, blocksize=
|
|||
max_concurrent_transfers=10)
|
||||
{
|
||||
if(use_azcopy)
|
||||
return(azcopy_download(container, src, dest, overwrite=overwrite, lease=lease))
|
||||
return(azcopy_download(container, src, dest, overwrite=overwrite, lease=lease, recursive=recursive))
|
||||
|
||||
multidownload_internal(container, src, dest, recursive=recursive, blocksize=blocksize, overwrite=overwrite,
|
||||
lease=lease, max_concurrent_transfers=max_concurrent_transfers)
|
||||
|
|
|
@ -334,7 +334,7 @@ multiupload_azure_file <- function(share, src, dest, recursive=FALSE, create_dir
|
|||
max_concurrent_transfers=10)
|
||||
{
|
||||
if(use_azcopy)
|
||||
return(azcopy_upload(share, src, dest, blocksize=blocksize))
|
||||
return(azcopy_upload(share, src, dest, blocksize=blocksize, recursive=recursive))
|
||||
|
||||
multiupload_internal(share, src, dest, recursive=recursive, create_dir=create_dir, blocksize=blocksize,
|
||||
max_concurrent_transfers=max_concurrent_transfers)
|
||||
|
@ -356,7 +356,7 @@ multidownload_azure_file <- function(share, src, dest, recursive=FALSE, blocksiz
|
|||
max_concurrent_transfers=10)
|
||||
{
|
||||
if(use_azcopy)
|
||||
return(azcopy_download(share, src, dest, overwrite=overwrite))
|
||||
return(azcopy_download(share, src, dest, overwrite=overwrite, recursive=recursive))
|
||||
|
||||
multidownload_internal(share, src, dest, recursive=recursive, blocksize=blocksize, overwrite=overwrite,
|
||||
max_concurrent_transfers=max_concurrent_transfers)
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
\name{az_storage}
|
||||
\alias{az_storage}
|
||||
\title{Storage account resource class}
|
||||
\format{An object of class \code{R6ClassGenerator} of length 24.}
|
||||
\format{An object of class \code{R6ClassGenerator} of length 25.}
|
||||
\usage{
|
||||
az_storage
|
||||
}
|
||||
|
|
|
@ -3,31 +3,59 @@
|
|||
\name{call_azcopy}
|
||||
\alias{call_azcopy}
|
||||
\alias{azcopy}
|
||||
\alias{azcopy_login}
|
||||
\title{Call the azcopy file transfer utility}
|
||||
\usage{
|
||||
call_azcopy(...)
|
||||
|
||||
azcopy_login(force = FALSE)
|
||||
call_azcopy(..., env = NULL, silent = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{...}{Arguments to pass to AzCopy on the commandline. If no arguments are supplied, a help screen is printed.}
|
||||
|
||||
\item{force}{For \code{azcopy_login}, whether to force AzCopy to relogin. If \code{FALSE} (the default), and AzureStor has detected that AzCopy has already logged in, this has no effect.}
|
||||
\item{env}{A named character vector of environment variables to set for AzCopy.}
|
||||
|
||||
\item{silent}{Whether to print the output from AzCopy to the screen; also sets whether an error return code from AzCopy will be propagated to an R error.}
|
||||
}
|
||||
\value{
|
||||
A list, invisibly, with the following components:
|
||||
\itemize{
|
||||
\item \code{status}: The exit status of the AzCopy command. If this is NA, then the process was killed and had no exit status.
|
||||
\item \code{stdout}: The standard output of the command.
|
||||
\item \code{stderr}: The standard error of the command.
|
||||
\item \code{timeout}: Whether AzCopy was killed because of a timeout.
|
||||
}
|
||||
}
|
||||
\description{
|
||||
Call the azcopy file transfer utility
|
||||
}
|
||||
\details{
|
||||
AzureStor has the ability to use the Microsoft AzCopy commandline utility to transfer files. To enable this, set the argument \code{use_azcopy=TRUE} in any call to an upload or download function; AzureStor will then call AzCopy to perform the file transfer rather than relying on its own code. You can also call AzCopy directly with the \code{call_azcopy} function, passing it any arguments as required.
|
||||
AzureStor has the ability to use the Microsoft AzCopy commandline utility to transfer files. To enable this, ensure the processx package is installed and set the argument \code{use_azcopy=TRUE} in any call to an upload or download function; AzureStor will then call AzCopy to perform the file transfer rather than relying on its own code. You can also call AzCopy directly with the \code{call_azcopy} function.
|
||||
|
||||
AzureStor requires version 10 or later of AzCopy. The first time you try to run it, AzureStor will check that the version of AzCopy is correct, and throw an error if it is version 8 or earlier.
|
||||
|
||||
The AzCopy utility must be in your path for AzureStor to find it. Note that unlike earlier versions, Azcopy 10 is a single, self-contained binary file that can be placed in any directory.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
||||
AzCopy uses its own mechanisms for authenticating with Azure Active Directory, which is independent of the OAuth tokens used by AzureStor. AzureStor will try to ensure that AzCopy has previously authenticated before trying to transfer a file with a token, but this may not always succeed. You can run \code{azcopy_login(force=TRUE)} to force it to authenticate.
|
||||
endp <- storage_endpoint("https://mystorage.blob.core.windows.net", sas="mysas")
|
||||
cont <- storage_container(endp, "mycontainer")
|
||||
|
||||
# print various help screens
|
||||
call_azcopy("help")
|
||||
call_azcopy("help", "copy")
|
||||
|
||||
# calling azcopy to download a blob
|
||||
storage_download(cont, "myblob.csv", use_azcopy=TRUE)
|
||||
|
||||
# calling azcopy directly (must specify the SAS explicitly in the source URL)
|
||||
call_azcopy("copy",
|
||||
"https://mystorage.blob.core.windows.net/mycontainer/myblob.csv?mysas",
|
||||
"myblob.csv")
|
||||
|
||||
}
|
||||
}
|
||||
\seealso{
|
||||
\link[processx:run]{processx::run}, \link{download_blob}, \link{download_azure_file}, \link{download_adls_file}
|
||||
|
||||
\href{https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10}{AzCopy page on Microsoft Docs}
|
||||
|
||||
\href{https://github.com/Azure/azure-storage-azcopy}{AzCopy GitHub repo}
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
context("Blob client interface, extra")
|
||||
|
||||
tenant <- Sys.getenv("AZ_TEST_TENANT_ID")
|
||||
app <- Sys.getenv("AZ_TEST_APP_ID")
|
||||
cli_app <- Sys.getenv("AZ_TEST_NATIVE_APP_ID")
|
||||
password <- Sys.getenv("AZ_TEST_PASSWORD")
|
||||
subscription <- Sys.getenv("AZ_TEST_SUBSCRIPTION")
|
||||
|
||||
if(tenant == "" || app == "" || password == "" || subscription == "")
|
||||
skip("Authentication tests skipped: ARM credentials not set")
|
||||
|
||||
rgname <- Sys.getenv("AZ_TEST_STORAGE_RG")
|
||||
storname <- Sys.getenv("AZ_TEST_STORAGE_NOHNS")
|
||||
sas <- Sys.getenv("AZ_TEST_STORAGE_AZCOPY_SAS")
|
||||
|
||||
if(rgname == "" || storname == "" || sas == "")
|
||||
skip("Azcopy client tests skipped: resource names not set")
|
||||
|
||||
set_azcopy_path()
|
||||
if(is.null(.AzureStor$azcopy) || is.na(.AzureStor$azcopy))
|
||||
skip("Azcopy tests skipped: not detected")
|
||||
|
||||
stor <- AzureRMR::az_rm$new(tenant=tenant, app=app, password=password)$
|
||||
get_subscription(subscription)$
|
||||
get_resource_group(rgname)$
|
||||
get_storage_account(storname)
|
||||
|
||||
token_svc <- AzureRMR::get_azure_token("https://storage.azure.com/", tenant=tenant, app=app, password=password)
|
||||
token_usr <- AzureRMR::get_azure_token("https://storage.azure.com/", tenant=tenant, app=cli_app)
|
||||
key <- stor$list_keys()[1]
|
||||
|
||||
bl_svc <- stor$get_blob_endpoint(key=NULL, sas=NULL, token=token_svc)
|
||||
bl_usr <- stor$get_blob_endpoint(key=NULL, sas=NULL, token=token_usr)
|
||||
bl_sas <- stor$get_blob_endpoint(key=NULL, sas=sas, token=NULL)
|
||||
ad_key <- stor$get_adls_endpoint(key=key, sas=NULL, token=NULL)
|
||||
|
||||
options(azure_storage_progress_bar=FALSE)
|
||||
|
||||
files_identical <- function(set1, set2)
|
||||
{
|
||||
all(mapply(function(f1, f2)
|
||||
{
|
||||
s1 <- file.size(f1)
|
||||
s2 <- file.size(f2)
|
||||
s1 == s2 && identical(readBin(f1, "raw", s1), readBin(f2, "raw", s2))
|
||||
}, set1, set2))
|
||||
}
|
||||
|
||||
|
||||
test_that("call_azcopy works",
|
||||
{
|
||||
expect_output(azc1 <- call_azcopy())
|
||||
expect_output(azc2 <- call_azcopy("help"))
|
||||
expect_identical(substr(azc1$stdout, 1, 200), substr(azc2$stdout, 1, 200))
|
||||
})
|
||||
|
||||
|
||||
# test_that("azcopy works with key",
|
||||
# {
|
||||
# contname <- paste0(sample(letters, 10, TRUE), collapse="")
|
||||
# destname <- tempfile()
|
||||
# cont <- create_storage_container(ad_key, contname)
|
||||
# storage_upload(cont, "../resources/iris.csv", "iris.csv", use_azcopy=TRUE)
|
||||
# storage_download(cont, "iris.csv", destname, use_azcopy=TRUE)
|
||||
# expect_true(files_identical("../resources/iris.csv", destname))
|
||||
# })
|
||||
|
||||
|
||||
test_that("azcopy works with service token",
|
||||
{
|
||||
contname <- paste0(sample(letters, 10, TRUE), collapse="")
|
||||
destname <- tempfile()
|
||||
cont <- create_storage_container(bl_svc, contname)
|
||||
storage_upload(cont, "../resources/iris.csv", "iris.csv", use_azcopy=TRUE)
|
||||
storage_download(cont, "iris.csv", destname, use_azcopy=TRUE)
|
||||
expect_true(files_identical("../resources/iris.csv", destname))
|
||||
})
|
||||
|
||||
|
||||
test_that("azcopy works with user token",
|
||||
{
|
||||
contname <- paste0(sample(letters, 10, TRUE), collapse="")
|
||||
destname <- tempfile()
|
||||
cont <- create_storage_container(bl_usr, contname)
|
||||
storage_upload(cont, "../resources/iris.csv", "iris.csv", use_azcopy=TRUE)
|
||||
storage_download(cont, "iris.csv", destname, use_azcopy=TRUE)
|
||||
expect_true(files_identical("../resources/iris.csv", destname))
|
||||
})
|
||||
|
||||
|
||||
test_that("azcopy works with sas",
|
||||
{
|
||||
contname <- paste0(sample(letters, 10, TRUE), collapse="")
|
||||
destname <- tempfile()
|
||||
cont <- create_storage_container(bl_sas, contname)
|
||||
storage_upload(cont, "../resources/iris.csv", "iris.csv", use_azcopy=TRUE)
|
||||
storage_download(cont, "iris.csv", destname, use_azcopy=TRUE)
|
||||
expect_true(files_identical("../resources/iris.csv", destname))
|
||||
})
|
||||
|
||||
|
||||
teardown(
|
||||
{
|
||||
conts <- list_blob_containers(bl_svc)
|
||||
lapply(conts, delete_blob_container, confirm=FALSE)
|
||||
conts <- list_adls_filesystems(ad_key)
|
||||
lapply(conts, delete_adls_filesystem, confirm=FALSE)
|
||||
})
|
Загрузка…
Ссылка в новой задаче