зеркало из https://github.com/Azure/AzureStor.git
also display progress bar for uploads
This commit is contained in:
Родитель
2651f51d3b
Коммит
d30ad5306e
1
NEWS.md
1
NEWS.md
|
@ -2,6 +2,7 @@
|
|||
|
||||
- By default, HTTP(S) requests to the storage endpoint will now be retried on encountering a network error. To change the number of retries, call `options(azure_storage_retries=N)` where N >= 0. Setting this option to zero disables retrying.
|
||||
- Downloading now proceeds in blocks, much like uploading. The default block size is set to 16MB for blob and ADLSgen2, and 4MB for file storage. While this reduces the throughput slightly (basically there is at least one extra REST call involved), it allows retrying a failed transfer on a per-block basis rather than having to redownload the entire file.
|
||||
- Also display the progress bar for uploads.
|
||||
|
||||
# AzureStor 2.0.1
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ globalVariables(c("self", "pool"), "AzureStor")
|
|||
{
|
||||
options(azure_storage_api_version="2018-03-28")
|
||||
options(azure_adls_api_version="2018-06-17")
|
||||
options(azure_dl_progress_bar=TRUE)
|
||||
options(azure_storage_progress_bar=TRUE)
|
||||
options(azure_storage_retries=10)
|
||||
|
||||
# all methods extending classes in external package must be run from .onLoad
|
||||
|
|
|
@ -225,7 +225,7 @@ delete_adls_filesystem.adls_endpoint <- function(endpoint, name, confirm=TRUE, .
|
|||
#'
|
||||
#' The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, `src` can be a [textConnection] or [rawConnection] object. For downloading, `dest` can be NULL or a `rawConnection` object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
|
||||
#'
|
||||
#' By default, `download_adls_file` will display a progress bar as it is downloading. To turn this off, use `options(azure_dl_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_dl_progress_bar=TRUE)`.
|
||||
#' By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use `options(azure_storage_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_storage_progress_bar=TRUE)`.
|
||||
#'
|
||||
#' @return
|
||||
#' For `list_adls_files`, if `info="name"`, a vector of file/directory names. If `info="all"`, a data frame giving the file size and whether each object is a file or directory.
|
||||
|
|
|
@ -26,28 +26,28 @@ multiupload_adls_file_internal <- function(filesystem, src, dest, blocksize=2^22
|
|||
|
||||
upload_adls_file_internal <- function(filesystem, src, dest, blocksize=2^24, lease=NULL)
|
||||
{
|
||||
con <- if(inherits(src, "textConnection"))
|
||||
rawConnection(charToRaw(paste0(readLines(src), collapse="\n")))
|
||||
else if(inherits(src, "rawConnection"))
|
||||
src
|
||||
else file(src, open="rb")
|
||||
on.exit(close(con))
|
||||
|
||||
# create the file
|
||||
content_type <- if(inherits(src, "connection"))
|
||||
"application/octet-stream"
|
||||
else mime::guess_type(src)
|
||||
|
||||
headers <- list(`x-ms-content-type`=content_type)
|
||||
#if(!is.null(lease))
|
||||
#headers[["x-ms-lease-id"]] <- as.character(lease)
|
||||
if(!is.null(lease))
|
||||
headers[["x-ms-lease-id"]] <- as.character(lease)
|
||||
|
||||
do_container_op(filesystem, dest, options=list(resource="file"), headers=headers, http_verb="PUT")
|
||||
|
||||
src <- normalize_src(src)
|
||||
on.exit(close(src$con))
|
||||
|
||||
bar <- storage_progress_bar$new(src$size, "up")
|
||||
|
||||
# transfer the contents
|
||||
blocklist <- list()
|
||||
pos <- 0
|
||||
repeat
|
||||
{
|
||||
body <- readBin(con, "raw", blocksize)
|
||||
body <- readBin(src$con, "raw", blocksize)
|
||||
thisblock <- length(body)
|
||||
if(thisblock == 0)
|
||||
break
|
||||
|
@ -58,11 +58,15 @@ upload_adls_file_internal <- function(filesystem, src, dest, blocksize=2^24, lea
|
|||
)
|
||||
opts <- list(action="append", position=sprintf("%.0f", pos))
|
||||
|
||||
do_container_op(filesystem, dest, headers=headers, body=body, options=opts, http_verb="PATCH")
|
||||
do_container_op(filesystem, dest, headers=headers, body=body, options=opts, progress=bar$update(),
|
||||
http_verb="PATCH")
|
||||
|
||||
bar$offset <- bar$offset + blocksize
|
||||
pos <- pos + thisblock
|
||||
}
|
||||
|
||||
bar$close()
|
||||
|
||||
# flush contents
|
||||
do_container_op(filesystem, dest,
|
||||
options=list(action="flush", position=sprintf("%.0f", pos)),
|
||||
|
|
|
@ -243,7 +243,7 @@ delete_blob_container.blob_endpoint <- function(endpoint, name, confirm=TRUE, le
|
|||
#'
|
||||
#' The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, `src` can be a [textConnection] or [rawConnection] object. For downloading, `dest` can be NULL or a `rawConnection` object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
|
||||
#'
|
||||
#' By default, `download_blob` will display a progress bar as it is downloading. To turn this off, use `options(azure_dl_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_dl_progress_bar=TRUE)`.
|
||||
#' By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use `options(azure_storage_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_storage_progress_bar=TRUE)`.
|
||||
#'
|
||||
#' @return
|
||||
#' For `list_blobs`, details on the blobs in the container. For `download_blob`, if `dest=NULL`, the contents of the downloaded blob as a raw vector.
|
||||
|
|
|
@ -41,12 +41,10 @@ upload_blob_internal <- function(container, src, dest, type="BlockBlob", blocksi
|
|||
if(!is.null(lease))
|
||||
headers[["x-ms-lease-id"]] <- as.character(lease)
|
||||
|
||||
con <- if(inherits(src, "textConnection"))
|
||||
rawConnection(charToRaw(paste0(readLines(src), collapse="\n")))
|
||||
else if(inherits(src, "rawConnection"))
|
||||
src
|
||||
else file(src, open="rb")
|
||||
on.exit(close(con))
|
||||
src <- normalize_src(src)
|
||||
on.exit(close(src$con))
|
||||
|
||||
bar <- storage_progress_bar$new(src$size, "up")
|
||||
|
||||
# upload each block
|
||||
blocklist <- list()
|
||||
|
@ -54,7 +52,7 @@ upload_blob_internal <- function(container, src, dest, type="BlockBlob", blocksi
|
|||
i <- 1
|
||||
repeat
|
||||
{
|
||||
body <- readBin(con, "raw", blocksize)
|
||||
body <- readBin(src$con, "raw", blocksize)
|
||||
thisblock <- length(body)
|
||||
if(thisblock == 0)
|
||||
break
|
||||
|
@ -64,12 +62,16 @@ upload_blob_internal <- function(container, src, dest, type="BlockBlob", blocksi
|
|||
id <- openssl::base64_encode(sprintf("%s-%010d", base_id, i))
|
||||
opts <- list(comp="block", blockid=id)
|
||||
|
||||
do_container_op(container, dest, headers=headers, body=body, options=opts, http_verb="PUT")
|
||||
do_container_op(container, dest, headers=headers, body=body, options=opts, progress=bar$update(),
|
||||
http_verb="PUT")
|
||||
|
||||
blocklist <- c(blocklist, list(Latest=list(id)))
|
||||
bar$offset <- bar$offset + blocksize
|
||||
i <- i + 1
|
||||
}
|
||||
|
||||
bar$close()
|
||||
|
||||
# update block list
|
||||
body <- as.character(xml2::as_xml_document(list(BlockList=blocklist)))
|
||||
headers <- list("content-length"=sprintf("%.0f", nchar(body)))
|
||||
|
|
|
@ -210,7 +210,7 @@ delete_file_share.file_endpoint <- function(endpoint, name, confirm=TRUE, ...)
|
|||
#'
|
||||
#' The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, `src` can be a [textConnection] or [rawConnection] object. For downloading, `dest` can be NULL or a `rawConnection` object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
|
||||
#'
|
||||
#' By default, `download_azure_file` will display a progress bar as it is downloading. To turn this off, use `options(azure_dl_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_dl_progress_bar=TRUE)`.
|
||||
#' By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use `options(azure_storage_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_storage_progress_bar=TRUE)`.
|
||||
#'
|
||||
#' @return
|
||||
#' For `list_azure_files`, if `info="name"`, a vector of file/directory names. If `info="all"`, a data frame giving the file size and whether each object is a file or directory.
|
||||
|
|
|
@ -30,49 +30,27 @@ upload_azure_file_internal <- function(share, src, dest, blocksize=2^22)
|
|||
"application/octet-stream"
|
||||
else mime::guess_type(src)
|
||||
|
||||
if(inherits(src, "textConnection"))
|
||||
{
|
||||
src <- charToRaw(paste0(readLines(src), collapse="\n"))
|
||||
nbytes <- length(src)
|
||||
con <- rawConnection(src)
|
||||
}
|
||||
else if(inherits(src, "rawConnection"))
|
||||
{
|
||||
con <- src
|
||||
# need to read the data to get object size (!)
|
||||
nbytes <- 0
|
||||
repeat
|
||||
{
|
||||
x <- readBin(con, "raw", n=blocksize)
|
||||
if(length(x) == 0)
|
||||
break
|
||||
nbytes <- nbytes + length(x)
|
||||
}
|
||||
seek(con, 0) # reposition connection after reading
|
||||
}
|
||||
else
|
||||
{
|
||||
con <- file(src, open="rb")
|
||||
nbytes <- file.info(src)$size
|
||||
}
|
||||
on.exit(close(con))
|
||||
src <- normalize_src(src)
|
||||
on.exit(close(src$con))
|
||||
|
||||
# first, create the file
|
||||
# ensure content-length is never exponential notation
|
||||
headers <- list("x-ms-type"="file",
|
||||
"x-ms-content-length"=sprintf("%.0f", nbytes))
|
||||
"x-ms-content-length"=sprintf("%.0f", src$size))
|
||||
do_container_op(share, dest, headers=headers, http_verb="PUT")
|
||||
|
||||
# then write the bytes into it, one block at a time
|
||||
options <- list(comp="range")
|
||||
headers <- list("x-ms-write"="Update")
|
||||
|
||||
bar <- storage_progress_bar$new(src$size, "up")
|
||||
|
||||
# upload each block
|
||||
blocklist <- list()
|
||||
range_begin <- 0
|
||||
while(range_begin < nbytes)
|
||||
while(range_begin < src$size)
|
||||
{
|
||||
body <- readBin(con, "raw", blocksize)
|
||||
body <- readBin(src$con, "raw", blocksize)
|
||||
thisblock <- length(body)
|
||||
if(thisblock == 0) # sanity check
|
||||
break
|
||||
|
@ -81,11 +59,15 @@ upload_azure_file_internal <- function(share, src, dest, blocksize=2^22)
|
|||
headers[["content-length"]] <- sprintf("%.0f", thisblock)
|
||||
headers[["range"]] <- sprintf("bytes=%.0f-%.0f", range_begin, range_begin + thisblock - 1)
|
||||
|
||||
do_container_op(share, dest, headers=headers, body=body, options=options, http_verb="PUT")
|
||||
do_container_op(share, dest, headers=headers, body=body, options=options, progress=bar$update(),
|
||||
http_verb="PUT")
|
||||
|
||||
bar$offset <- bar$offset + blocksize
|
||||
range_begin <- range_begin + thisblock
|
||||
}
|
||||
|
||||
bar$close()
|
||||
|
||||
do_container_op(share, dest, headers=list("x-ms-content-type"=content_type),
|
||||
options=list(comp="properties"),
|
||||
http_verb="PUT")
|
||||
|
|
|
@ -11,7 +11,7 @@ public=list(
|
|||
|
||||
initialize=function(size, direction)
|
||||
{
|
||||
self$display <- isTRUE(getOption("azure_dl_progress_bar"))
|
||||
self$display <- isTRUE(getOption("azure_storage_progress_bar"))
|
||||
if(self$display)
|
||||
{
|
||||
self$direction <- direction
|
||||
|
|
|
@ -226,3 +226,40 @@ retry_transfer <- function(res)
|
|||
}
|
||||
|
||||
|
||||
normalize_src <- function(src)
|
||||
{
|
||||
file_src <- is.character(src)
|
||||
raw_src <- inherits(src, "rawConnection")
|
||||
txt_src <- inherits(src, "textConnection")
|
||||
if(!file_src && !raw_src && !txt_src)
|
||||
stop("Invalid source specification", call.=FALSE)
|
||||
|
||||
if(txt_src)
|
||||
{
|
||||
src <- charToRaw(paste0(readLines(src), collapse="\n"))
|
||||
nbytes <- length(src)
|
||||
con <- rawConnection(src)
|
||||
}
|
||||
else if(raw_src)
|
||||
{
|
||||
con <- src
|
||||
# need to read the data to get object size (!)
|
||||
nbytes <- 0
|
||||
repeat
|
||||
{
|
||||
x <- readBin(con, "raw", n=1e6)
|
||||
if(length(x) == 0)
|
||||
break
|
||||
nbytes <- nbytes + length(x)
|
||||
}
|
||||
seek(con, 0) # reposition connection after reading
|
||||
}
|
||||
else
|
||||
{
|
||||
con <- file(src, open="rb")
|
||||
nbytes <- file.info(src)$size
|
||||
}
|
||||
list(con=con, size=nbytes)
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#'
|
||||
#' `upload_to_url` and `download_to_url` allow you to transfer a file to or from Azure storage, given the URL of the source or destination. The storage details (endpoint, container name, and so on) are obtained from the URL.
|
||||
#'
|
||||
#' By default, `storage_download` and `download_from_url` will display a progress bar while they are downloading. To turn this off, use `options(azure_dl_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_dl_progress_bar=TRUE)`.
|
||||
#' By default, `storage_download` and `download_from_url` will display a progress bar while they are downloading. To turn this off, use `options(azure_storage_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_storage_progress_bar=TRUE)`.
|
||||
#'
|
||||
#' @seealso
|
||||
#' [storage_container], [blob_container], [file_share], [adls_filesystem]
|
||||
|
|
|
@ -71,7 +71,7 @@ Upload, download, or delete a file; list files in a directory; create or delete
|
|||
|
||||
The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, \code{src} can be a \link{textConnection} or \link{rawConnection} object. For downloading, \code{dest} can be NULL or a \code{rawConnection} object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
|
||||
|
||||
By default, \code{download_adls_file} will display a progress bar as it is downloading. To turn this off, use \code{options(azure_dl_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_dl_progress_bar=TRUE)}.
|
||||
By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use \code{options(azure_storage_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_storage_progress_bar=TRUE)}.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
|
|
@ -66,7 +66,7 @@ Upload, download, or delete a blob; list blobs in a container.
|
|||
|
||||
The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, \code{src} can be a \link{textConnection} or \link{rawConnection} object. For downloading, \code{dest} can be NULL or a \code{rawConnection} object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
|
||||
|
||||
By default, \code{download_blob} will display a progress bar as it is downloading. To turn this off, use \code{options(azure_dl_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_dl_progress_bar=TRUE)}.
|
||||
By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use \code{options(azure_storage_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_storage_progress_bar=TRUE)}.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
|
|
@ -68,7 +68,7 @@ Upload, download, or delete a file; list files in a directory; create or delete
|
|||
|
||||
The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, \code{src} can be a \link{textConnection} or \link{rawConnection} object. For downloading, \code{dest} can be NULL or a \code{rawConnection} object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
|
||||
|
||||
By default, \code{download_azure_file} will display a progress bar as it is downloading. To turn this off, use \code{options(azure_dl_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_dl_progress_bar=TRUE)}.
|
||||
By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use \code{options(azure_storage_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_storage_progress_bar=TRUE)}.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
|
|
@ -79,7 +79,7 @@ These functions allow you to transfer files to and from a storage account.
|
|||
|
||||
\code{upload_to_url} and \code{download_to_url} allow you to transfer a file to or from Azure storage, given the URL of the source or destination. The storage details (endpoint, container name, and so on) are obtained from the URL.
|
||||
|
||||
By default, \code{storage_download} and \code{download_from_url} will display a progress bar while they are downloading. To turn this off, use \code{options(azure_dl_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_dl_progress_bar=TRUE)}.
|
||||
By default, \code{storage_download} and \code{download_from_url} will display a progress bar while they are downloading. To turn this off, use \code{options(azure_storage_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_storage_progress_bar=TRUE)}.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
|
|
@ -16,7 +16,7 @@ if(rgname == "" || storname == "")
|
|||
|
||||
sub <- AzureRMR::az_rm$new(tenant=tenant, app=app, password=password)$get_subscription(subscription)
|
||||
stor <- sub$get_resource_group(rgname)$get_storage_account(storname)
|
||||
options(azure_dl_progress_bar=FALSE)
|
||||
options(azure_storage_progress_bar=FALSE)
|
||||
|
||||
test_that("Blob client interface works",
|
||||
{
|
||||
|
|
|
@ -16,7 +16,7 @@ if(rgname == "" || storname == "")
|
|||
|
||||
sub <- AzureRMR::az_rm$new(tenant=tenant, app=app, password=password)$get_subscription(subscription)
|
||||
stor <- sub$get_resource_group(rgname)$get_storage_account(storname)
|
||||
options(azure_dl_progress_bar=FALSE)
|
||||
options(azure_storage_progress_bar=FALSE)
|
||||
|
||||
test_that("File client interface works",
|
||||
{
|
||||
|
|
|
@ -16,7 +16,7 @@ if(rgname == "" || storname == "")
|
|||
|
||||
sub <- AzureRMR::az_rm$new(tenant=tenant, app=app, password=password)$get_subscription(subscription)
|
||||
stor <- sub$get_resource_group(rgname)$get_storage_account(storname)
|
||||
options(azure_dl_progress_bar=FALSE)
|
||||
options(azure_storage_progress_bar=FALSE)
|
||||
|
||||
test_that("ADLSgen2 client interface works",
|
||||
{
|
||||
|
|
Загрузка…
Ссылка в новой задаче