also display progress bar for uploads

This commit is contained in:
Hong Ooi 2019-05-18 09:51:39 +10:00
Родитель 2651f51d3b
Коммит d30ad5306e
18 изменённых файлов: 88 добавлений и 62 удалений

Просмотреть файл

@ -2,6 +2,7 @@
- By default, HTTP(S) requests to the storage endpoint will now be retried on encountering a network error. To change the number of retries, call `options(azure_storage_retries=N)` where N >= 0. Setting this option to zero disables retrying.
- Downloading now proceeds in blocks, much like uploading. The default block size is set to 16MB for blob and ADLSgen2, and 4MB for file storage. While this reduces the throughput slightly (basically there is at least one extra REST call involved), it allows retrying a failed transfer on a per-block basis rather than having to redownload the entire file.
- Also display the progress bar for uploads.
# AzureStor 2.0.1

Просмотреть файл

@ -11,7 +11,7 @@ globalVariables(c("self", "pool"), "AzureStor")
{
options(azure_storage_api_version="2018-03-28")
options(azure_adls_api_version="2018-06-17")
options(azure_dl_progress_bar=TRUE)
options(azure_storage_progress_bar=TRUE)
options(azure_storage_retries=10)
# all methods extending classes in external package must be run from .onLoad

Просмотреть файл

@ -225,7 +225,7 @@ delete_adls_filesystem.adls_endpoint <- function(endpoint, name, confirm=TRUE, .
#'
#' The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, `src` can be a [textConnection] or [rawConnection] object. For downloading, `dest` can be NULL or a `rawConnection` object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
#'
#' By default, `download_adls_file` will display a progress bar as it is downloading. To turn this off, use `options(azure_dl_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_dl_progress_bar=TRUE)`.
#' By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use `options(azure_storage_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_storage_progress_bar=TRUE)`.
#'
#' @return
#' For `list_adls_files`, if `info="name"`, a vector of file/directory names. If `info="all"`, a data frame giving the file size and whether each object is a file or directory.

Просмотреть файл

@ -26,28 +26,28 @@ multiupload_adls_file_internal <- function(filesystem, src, dest, blocksize=2^22
upload_adls_file_internal <- function(filesystem, src, dest, blocksize=2^24, lease=NULL)
{
con <- if(inherits(src, "textConnection"))
rawConnection(charToRaw(paste0(readLines(src), collapse="\n")))
else if(inherits(src, "rawConnection"))
src
else file(src, open="rb")
on.exit(close(con))
# create the file
content_type <- if(inherits(src, "connection"))
"application/octet-stream"
else mime::guess_type(src)
headers <- list(`x-ms-content-type`=content_type)
#if(!is.null(lease))
#headers[["x-ms-lease-id"]] <- as.character(lease)
if(!is.null(lease))
headers[["x-ms-lease-id"]] <- as.character(lease)
do_container_op(filesystem, dest, options=list(resource="file"), headers=headers, http_verb="PUT")
src <- normalize_src(src)
on.exit(close(src$con))
bar <- storage_progress_bar$new(src$size, "up")
# transfer the contents
blocklist <- list()
pos <- 0
repeat
{
body <- readBin(con, "raw", blocksize)
body <- readBin(src$con, "raw", blocksize)
thisblock <- length(body)
if(thisblock == 0)
break
@ -58,11 +58,15 @@ upload_adls_file_internal <- function(filesystem, src, dest, blocksize=2^24, lea
)
opts <- list(action="append", position=sprintf("%.0f", pos))
do_container_op(filesystem, dest, headers=headers, body=body, options=opts, http_verb="PATCH")
do_container_op(filesystem, dest, headers=headers, body=body, options=opts, progress=bar$update(),
http_verb="PATCH")
bar$offset <- bar$offset + blocksize
pos <- pos + thisblock
}
bar$close()
# flush contents
do_container_op(filesystem, dest,
options=list(action="flush", position=sprintf("%.0f", pos)),

Просмотреть файл

@ -243,7 +243,7 @@ delete_blob_container.blob_endpoint <- function(endpoint, name, confirm=TRUE, le
#'
#' The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, `src` can be a [textConnection] or [rawConnection] object. For downloading, `dest` can be NULL or a `rawConnection` object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
#'
#' By default, `download_blob` will display a progress bar as it is downloading. To turn this off, use `options(azure_dl_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_dl_progress_bar=TRUE)`.
#' By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use `options(azure_storage_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_storage_progress_bar=TRUE)`.
#'
#' @return
#' For `list_blobs`, details on the blobs in the container. For `download_blob`, if `dest=NULL`, the contents of the downloaded blob as a raw vector.

Просмотреть файл

@ -41,12 +41,10 @@ upload_blob_internal <- function(container, src, dest, type="BlockBlob", blocksi
if(!is.null(lease))
headers[["x-ms-lease-id"]] <- as.character(lease)
con <- if(inherits(src, "textConnection"))
rawConnection(charToRaw(paste0(readLines(src), collapse="\n")))
else if(inherits(src, "rawConnection"))
src
else file(src, open="rb")
on.exit(close(con))
src <- normalize_src(src)
on.exit(close(src$con))
bar <- storage_progress_bar$new(src$size, "up")
# upload each block
blocklist <- list()
@ -54,7 +52,7 @@ upload_blob_internal <- function(container, src, dest, type="BlockBlob", blocksi
i <- 1
repeat
{
body <- readBin(con, "raw", blocksize)
body <- readBin(src$con, "raw", blocksize)
thisblock <- length(body)
if(thisblock == 0)
break
@ -64,12 +62,16 @@ upload_blob_internal <- function(container, src, dest, type="BlockBlob", blocksi
id <- openssl::base64_encode(sprintf("%s-%010d", base_id, i))
opts <- list(comp="block", blockid=id)
do_container_op(container, dest, headers=headers, body=body, options=opts, http_verb="PUT")
do_container_op(container, dest, headers=headers, body=body, options=opts, progress=bar$update(),
http_verb="PUT")
blocklist <- c(blocklist, list(Latest=list(id)))
bar$offset <- bar$offset + blocksize
i <- i + 1
}
bar$close()
# update block list
body <- as.character(xml2::as_xml_document(list(BlockList=blocklist)))
headers <- list("content-length"=sprintf("%.0f", nchar(body)))

Просмотреть файл

@ -210,7 +210,7 @@ delete_file_share.file_endpoint <- function(endpoint, name, confirm=TRUE, ...)
#'
#' The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, `src` can be a [textConnection] or [rawConnection] object. For downloading, `dest` can be NULL or a `rawConnection` object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
#'
#' By default, `download_azure_file` will display a progress bar as it is downloading. To turn this off, use `options(azure_dl_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_dl_progress_bar=TRUE)`.
#' By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use `options(azure_storage_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_storage_progress_bar=TRUE)`.
#'
#' @return
#' For `list_azure_files`, if `info="name"`, a vector of file/directory names. If `info="all"`, a data frame giving the file size and whether each object is a file or directory.

Просмотреть файл

@ -30,49 +30,27 @@ upload_azure_file_internal <- function(share, src, dest, blocksize=2^22)
"application/octet-stream"
else mime::guess_type(src)
if(inherits(src, "textConnection"))
{
src <- charToRaw(paste0(readLines(src), collapse="\n"))
nbytes <- length(src)
con <- rawConnection(src)
}
else if(inherits(src, "rawConnection"))
{
con <- src
# need to read the data to get object size (!)
nbytes <- 0
repeat
{
x <- readBin(con, "raw", n=blocksize)
if(length(x) == 0)
break
nbytes <- nbytes + length(x)
}
seek(con, 0) # reposition connection after reading
}
else
{
con <- file(src, open="rb")
nbytes <- file.info(src)$size
}
on.exit(close(con))
src <- normalize_src(src)
on.exit(close(src$con))
# first, create the file
# ensure content-length is never exponential notation
headers <- list("x-ms-type"="file",
"x-ms-content-length"=sprintf("%.0f", nbytes))
"x-ms-content-length"=sprintf("%.0f", src$size))
do_container_op(share, dest, headers=headers, http_verb="PUT")
# then write the bytes into it, one block at a time
options <- list(comp="range")
headers <- list("x-ms-write"="Update")
bar <- storage_progress_bar$new(src$size, "up")
# upload each block
blocklist <- list()
range_begin <- 0
while(range_begin < nbytes)
while(range_begin < src$size)
{
body <- readBin(con, "raw", blocksize)
body <- readBin(src$con, "raw", blocksize)
thisblock <- length(body)
if(thisblock == 0) # sanity check
break
@ -81,11 +59,15 @@ upload_azure_file_internal <- function(share, src, dest, blocksize=2^22)
headers[["content-length"]] <- sprintf("%.0f", thisblock)
headers[["range"]] <- sprintf("bytes=%.0f-%.0f", range_begin, range_begin + thisblock - 1)
do_container_op(share, dest, headers=headers, body=body, options=options, http_verb="PUT")
do_container_op(share, dest, headers=headers, body=body, options=options, progress=bar$update(),
http_verb="PUT")
bar$offset <- bar$offset + blocksize
range_begin <- range_begin + thisblock
}
bar$close()
do_container_op(share, dest, headers=list("x-ms-content-type"=content_type),
options=list(comp="properties"),
http_verb="PUT")

Просмотреть файл

@ -11,7 +11,7 @@ public=list(
initialize=function(size, direction)
{
self$display <- isTRUE(getOption("azure_dl_progress_bar"))
self$display <- isTRUE(getOption("azure_storage_progress_bar"))
if(self$display)
{
self$direction <- direction

Просмотреть файл

@ -226,3 +226,40 @@ retry_transfer <- function(res)
}
normalize_src <- function(src)
{
file_src <- is.character(src)
raw_src <- inherits(src, "rawConnection")
txt_src <- inherits(src, "textConnection")
if(!file_src && !raw_src && !txt_src)
stop("Invalid source specification", call.=FALSE)
if(txt_src)
{
src <- charToRaw(paste0(readLines(src), collapse="\n"))
nbytes <- length(src)
con <- rawConnection(src)
}
else if(raw_src)
{
con <- src
# need to read the data to get object size (!)
nbytes <- 0
repeat
{
x <- readBin(con, "raw", n=1e6)
if(length(x) == 0)
break
nbytes <- nbytes + length(x)
}
seek(con, 0) # reposition connection after reading
}
else
{
con <- file(src, open="rb")
nbytes <- file.info(src)$size
}
list(con=con, size=nbytes)
}

Просмотреть файл

@ -13,7 +13,7 @@
#'
#' `upload_to_url` and `download_to_url` allow you to transfer a file to or from Azure storage, given the URL of the source or destination. The storage details (endpoint, container name, and so on) are obtained from the URL.
#'
#' By default, `storage_download` and `download_from_url` will display a progress bar while they are downloading. To turn this off, use `options(azure_dl_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_dl_progress_bar=TRUE)`.
#' By default, `storage_download` and `download_from_url` will display a progress bar while they are downloading. To turn this off, use `options(azure_storage_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_storage_progress_bar=TRUE)`.
#'
#' @seealso
#' [storage_container], [blob_container], [file_share], [adls_filesystem]

Просмотреть файл

@ -71,7 +71,7 @@ Upload, download, or delete a file; list files in a directory; create or delete
The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, \code{src} can be a \link{textConnection} or \link{rawConnection} object. For downloading, \code{dest} can be NULL or a \code{rawConnection} object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
By default, \code{download_adls_file} will display a progress bar as it is downloading. To turn this off, use \code{options(azure_dl_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_dl_progress_bar=TRUE)}.
By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use \code{options(azure_storage_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_storage_progress_bar=TRUE)}.
}
\examples{
\dontrun{

Просмотреть файл

@ -66,7 +66,7 @@ Upload, download, or delete a blob; list blobs in a container.
The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, \code{src} can be a \link{textConnection} or \link{rawConnection} object. For downloading, \code{dest} can be NULL or a \code{rawConnection} object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
By default, \code{download_blob} will display a progress bar as it is downloading. To turn this off, use \code{options(azure_dl_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_dl_progress_bar=TRUE)}.
By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use \code{options(azure_storage_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_storage_progress_bar=TRUE)}.
}
\examples{
\dontrun{

Просмотреть файл

@ -68,7 +68,7 @@ Upload, download, or delete a file; list files in a directory; create or delete
The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, \code{src} can be a \link{textConnection} or \link{rawConnection} object. For downloading, \code{dest} can be NULL or a \code{rawConnection} object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
By default, \code{download_azure_file} will display a progress bar as it is downloading. To turn this off, use \code{options(azure_dl_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_dl_progress_bar=TRUE)}.
By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use \code{options(azure_storage_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_storage_progress_bar=TRUE)}.
}
\examples{
\dontrun{

Просмотреть файл

@ -79,7 +79,7 @@ These functions allow you to transfer files to and from a storage account.
\code{upload_to_url} and \code{download_to_url} allow you to transfer a file to or from Azure storage, given the URL of the source or destination. The storage details (endpoint, container name, and so on) are obtained from the URL.
By default, \code{storage_download} and \code{download_from_url} will display a progress bar while they are downloading. To turn this off, use \code{options(azure_dl_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_dl_progress_bar=TRUE)}.
By default, \code{storage_download} and \code{download_from_url} will display a progress bar while they are downloading. To turn this off, use \code{options(azure_storage_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_storage_progress_bar=TRUE)}.
}
\examples{
\dontrun{

Просмотреть файл

@ -16,7 +16,7 @@ if(rgname == "" || storname == "")
sub <- AzureRMR::az_rm$new(tenant=tenant, app=app, password=password)$get_subscription(subscription)
stor <- sub$get_resource_group(rgname)$get_storage_account(storname)
options(azure_dl_progress_bar=FALSE)
options(azure_storage_progress_bar=FALSE)
test_that("Blob client interface works",
{

Просмотреть файл

@ -16,7 +16,7 @@ if(rgname == "" || storname == "")
sub <- AzureRMR::az_rm$new(tenant=tenant, app=app, password=password)$get_subscription(subscription)
stor <- sub$get_resource_group(rgname)$get_storage_account(storname)
options(azure_dl_progress_bar=FALSE)
options(azure_storage_progress_bar=FALSE)
test_that("File client interface works",
{

Просмотреть файл

@ -16,7 +16,7 @@ if(rgname == "" || storname == "")
sub <- AzureRMR::az_rm$new(tenant=tenant, app=app, password=password)$get_subscription(subscription)
stor <- sub$get_resource_group(rgname)$get_storage_account(storname)
options(azure_dl_progress_bar=FALSE)
options(azure_storage_progress_bar=FALSE)
test_that("ADLSgen2 client interface works",
{