closes #3
This commit is contained in:
Hong Ooi 2019-07-18 13:29:28 +10:00 коммит произвёл GitHub
Родитель cea1a4733e
Коммит 66c343d363
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
16 изменённых файлов: 106 добавлений и 23 удалений

Просмотреть файл

@ -4,6 +4,7 @@ S3method(adls_filesystem,adls_endpoint)
S3method(adls_filesystem,character)
S3method(blob_container,blob_endpoint)
S3method(blob_container,character)
S3method(copy_url_to_storage,blob_container)
S3method(create_adls_filesystem,adls_endpoint)
S3method(create_adls_filesystem,adls_filesystem)
S3method(create_adls_filesystem,character)
@ -90,6 +91,8 @@ export(blob_endpoint)
export(break_lease)
export(call_azcopy)
export(change_lease)
export(copy_url_to_blob)
export(copy_url_to_storage)
export(create_adls_dir)
export(create_adls_filesystem)
export(create_azure_dir)

Просмотреть файл

@ -10,7 +10,7 @@
#' @section Usage:
#' ```
#' create_storage_account(name, location, kind = "StorageV2", replication = "Standard_LRS",
#' access_tier = "hot"), https_only = TRUE,
#' access_tier = "hot"), https_only = TRUE,
#' hierarchical_namespace_enabled = FALSE, properties = list(), ...)
#' ```
#' @section Arguments:
@ -27,7 +27,7 @@
#' @section Details:
#' This method deploys a new storage account resource, with parameters given by the arguments. A storage account can host multiple types of storage:
#' - blob storage
#' - file storage
#' - file storage
#' - table storage
#' - queue storage
#' - Azure Data Lake Storage Gen2

Просмотреть файл

@ -118,7 +118,7 @@ list_adls_filesystems.character <- function(endpoint, key=NULL, token=NULL, sas=
list_adls_filesystems.adls_endpoint <- function(endpoint, ...)
{
lst <- do_storage_call(endpoint$url, "/", options=list(resource="account"),
key=endpoint$key, token=endpoint$token,, sas=endpoint$sas,
key=endpoint$key, token=endpoint$token, sas=endpoint$sas,
api_version=endpoint$api_version)
sapply(lst$filesystems$name, function(fs) adls_filesystem(endpoint, fs), simplify=FALSE)
@ -209,7 +209,7 @@ delete_adls_filesystem.adls_endpoint <- function(endpoint, name, confirm=TRUE, .
#' @param filesystem An ADLSgen2 filesystem object.
#' @param dir,file A string naming a directory or file respectively.
#' @param info Whether to return names only, or all information in a directory listing.
#' @param src,dest The source and destination files for uploading and downloading. Paths are allowed. For uploading, `src` can also be a [textConnection] or [rawConnection] object to allow transferring in-memory R objects without creating a temporary file.
#' @param src,dest The source and destination paths/files for uploading and downloading. See 'Details' below.
#' @param confirm Whether to ask for confirmation on deleting a file or directory.
#' @param blocksize The number of bytes to upload/download per HTTP(S) request.
#' @param lease The lease for a file, if present.

Просмотреть файл

@ -122,7 +122,7 @@ download_adls_file_internal <- function(filesystem, src, dest, blocksize=2^24, o
}
if(conn_dest)
on.exit(seek(dest, 0))
# get file size (for progress bar)
res <- do_container_op(filesystem, src, headers=headers, http_verb="HEAD", http_status_handler="pass")
httr::stop_for_status(res, storage_error_message(res))

Просмотреть файл

@ -225,7 +225,7 @@ delete_blob_container.blob_endpoint <- function(endpoint, name, confirm=TRUE, le
#'
#' @param container A blob container object.
#' @param blob A string naming a blob.
#' @param src,dest The source and destination files for uploading and downloading. See 'Details' below.For uploading, `src` can also be a [textConnection] or [rawConnection] object to allow transferring in-memory R objects without creating a temporary file. For downloading,
#' @param src,dest The source and destination files for uploading and downloading. See 'Details' below.
#' @param info For `list_blobs`, level of detail about each blob to return: a vector of names only; the name, size and last-modified date (default); or all information.
#' @param confirm Whether to ask for confirmation on deleting a blob.
#' @param blocksize The number of bytes to upload/download per HTTP(S) request.
@ -287,6 +287,11 @@ delete_blob_container.blob_endpoint <- function(endpoint, name, confirm=TRUE, le
#' download_blob(cont, "iris.rds", con)
#' unserialize(con)
#'
#' # copy from a public URL: Iris data from UCI machine learning repository
#' copy_url_to_blob(cont,
#' "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data",
#' "iris.csv")
#'
#' }
#' @rdname blob
#' @export

37
R/blob_copyurl.R Normal file
Просмотреть файл

@ -0,0 +1,37 @@
#' @details
#' `copy_url_to_storage` transfers the contents of the file at the specified HTTP\[S\] URL directly to storage, without requiring a temporary local copy to be made. Currently this is only implemented for blob storage.
#' @rdname file_transfer
#' @export
copy_url_to_storage <- function(container, src, dest, ...)
{
UseMethod("copy_from_url")
}
#' @rdname file_transfer
#' @export
copy_url_to_storage.blob_container <- function(container, src, dest, ...)
{
copy_url_to_blob(container, src, dest, ...)
}
#' @param async For `copy_url_to_blob`, whether the copy operation should be asynchronous (proceed in the background).
#' @details
#' `copy_url_to_blob` transfers the contents of the file at the specified HTTP\[S\] URL directly to blob storage, without requiring a temporary local copy to be made. This has a current file size limit of 256MB.
#' @rdname blob
#' @export
copy_url_to_blob <- function(container, src, dest, lease=NULL, async=FALSE)
{
if(!is_url(src))
stop("Source must be a HTTP[S] url", call.=FALSE)
headers <- list(
`x-ms-copy-source`=src,
`x-ms-requires-sync`=!async
)
if(!is.null(lease))
headers[["x-ms-lease-id"]] <- as.character(lease)
do_container_op(container, dest, headers=headers, http_verb="PUT")
}

Просмотреть файл

@ -40,7 +40,7 @@ break_lease <- function(container, blob="", period=NULL)
{
headers <- list("x-ms-lease-action"="break")
if(!is_empty(period))
headers=c(headers, list("x-ms-lease-break-period"=period))
headers <- c(headers, list("x-ms-lease-break-period"=period))
do_container_op(container, blob, options=list(comp="lease", restype="container"), headers=headers,
http_verb="PUT")
}

Просмотреть файл

@ -3,7 +3,7 @@
#' Get, list, create, or delete file shares.
#'
#' @param endpoint Either a file endpoint object as created by [storage_endpoint], or a character string giving the URL of the endpoint.
#' @param key,token,sas If an endpoint object is not supplied, authentication credentials: either an access key, an Azure Active Directory (AAD) token, or a SAS, in that order of priority.
#' @param key,token,sas If an endpoint object is not supplied, authentication credentials: either an access key, an Azure Active Directory (AAD) token, or a SAS, in that order of priority.
#' @param api_version If an endpoint object is not supplied, the storage API version to use when interacting with the host. Currently defaults to `"2018-03-28"`.
#' @param name The name of the file share to get, create, or delete.
#' @param confirm For deleting a share, whether to ask for confirmation.
@ -195,7 +195,7 @@ delete_file_share.file_endpoint <- function(endpoint, name, confirm=TRUE, ...)
#' @param share A file share object.
#' @param dir,file A string naming a directory or file respectively.
#' @param info Whether to return names only, or all information in a directory listing.
#' @param src,dest The source and destination files for uploading and downloading. For uploading, `src` can also be a [textConnection] or [rawConnection] object to allow transferring in-memory R objects without creating a temporary file.
#' @param src,dest The source and destination files for uploading and downloading. See 'Details' below.
#' @param confirm Whether to ask for confirmation on deleting a file or directory.
#' @param blocksize The number of bytes to upload/download per HTTP(S) request.
#' @param overwrite When downloading, whether to overwrite an existing destination file.
@ -276,7 +276,7 @@ list_azure_files <- function(share, dir, info=c("all", "name"),
name <- vapply(lst$Entries, function(ent) ent$Name[[1]], FUN.VALUE=character(1))
if(info == "name")
return(name)
type <- if(is_empty(name)) character(0) else names(name)
size <- vapply(lst$Entries,
function(ent) if(is_empty(ent$Properties)) NA_character_

Просмотреть файл

@ -45,7 +45,7 @@ delete_pool <- function()
{
if(!exists("pool", envir=.AzureStor))
return()
message("Deleting background pool")
parallel::stopCluster(.AzureStor$pool)
rm(pool, envir=.AzureStor)

Просмотреть файл

@ -13,7 +13,7 @@
#'
#' `upload_to_url` and `download_to_url` allow you to transfer a file to or from Azure storage, given the URL of the source or destination. The storage details (endpoint, container name, and so on) are obtained from the URL.
#'
#' By default, `storage_download` and `download_from_url` will display a progress bar while they are downloading. To turn this off, use `options(azure_storage_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_storage_progress_bar=TRUE)`.
#' By default, the upload and download functions will display a progress bar while they are downloading. To turn this off, use `options(azure_storage_progress_bar=FALSE)`. To turn the progress bar back on, use `options(azure_storage_progress_bar=TRUE)`.
#'
#' @seealso
#' [storage_container], [blob_container], [file_share], [adls_filesystem]

Просмотреть файл

@ -42,7 +42,7 @@ delete_adls_dir(filesystem, dir, recursive = FALSE, confirm = TRUE)
\item{recursive}{For \code{list_adls_files}, and \code{delete_adls_dir}, whether the operation should recurse through subdirectories. For \code{delete_adls_dir}, this must be TRUE to delete a non-empty directory.}
\item{src, dest}{The source and destination files for uploading and downloading. Paths are allowed. For uploading, \code{src} can also be a \link{textConnection} or \link{rawConnection} object to allow transferring in-memory R objects without creating a temporary file.}
\item{src, dest}{The source and destination paths/files for uploading and downloading. See 'Details' below.}
\item{blocksize}{The number of bytes to upload/download per HTTP(S) request.}

Просмотреть файл

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/blob_client_funcs.R
% Please edit documentation in R/blob_client_funcs.R, R/blob_copyurl.R
\name{list_blobs}
\alias{list_blobs}
\alias{upload_blob}
@ -7,6 +7,7 @@
\alias{download_blob}
\alias{multidownload_blob}
\alias{delete_blob}
\alias{copy_url_to_blob}
\title{Operations on a blob container or blob}
\usage{
list_blobs(container, info = c("partial", "name", "all"),
@ -27,6 +28,8 @@ multidownload_blob(container, src, dest, blocksize = 2^24,
max_concurrent_transfers = 10)
delete_blob(container, blob, confirm = TRUE)
copy_url_to_blob(container, src, dest, lease = NULL, async = FALSE)
}
\arguments{
\item{container}{A blob container object.}
@ -35,7 +38,7 @@ delete_blob(container, blob, confirm = TRUE)
\item{prefix}{For \code{list_blobs}, filters the result to return only blobs whose name begins with this prefix.}
\item{src, dest}{The source and destination files for uploading and downloading. See 'Details' below.For uploading, \code{src} can also be a \link{textConnection} or \link{rawConnection} object to allow transferring in-memory R objects without creating a temporary file. For downloading,}
\item{src, dest}{The source and destination files for uploading and downloading. See 'Details' below.}
\item{type}{When uploading, the type of blob to create. Currently only block blobs are supported.}
@ -52,6 +55,8 @@ delete_blob(container, blob, confirm = TRUE)
\item{blob}{A string naming a blob.}
\item{confirm}{Whether to ask for confirmation on deleting a blob.}
\item{async}{For \code{copy_url_to_blob}, whether the copy operation should be asynchronous (proceed in the background).}
}
\value{
For \code{list_blobs}, details on the blobs in the container. For \code{download_blob}, if \code{dest=NULL}, the contents of the downloaded blob as a raw vector.
@ -67,6 +72,8 @@ Upload, download, or delete a blob; list blobs in a container.
The file transfer functions also support working with connections to allow transferring R objects without creating temporary files. For uploading, \code{src} can be a \link{textConnection} or \link{rawConnection} object. For downloading, \code{dest} can be NULL or a \code{rawConnection} object. In the former case, the downloaded data is returned as a raw vector, and for the latter, it will be placed into the connection. See the examples below.
By default, the upload and download functions will display a progress bar to track the file transfer. To turn this off, use \code{options(azure_storage_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_storage_progress_bar=TRUE)}.
\code{copy_url_to_blob} transfers the contents of the file at the specified HTTP[S] URL directly to blob storage, without requiring a temporary local copy to be made. This has a current file size limit of 256MB.
}
\examples{
\dontrun{
@ -102,6 +109,11 @@ con <- rawConnection(raw(0), "r+")
download_blob(cont, "iris.rds", con)
unserialize(con)
# copy from a public URL: Iris data from UCI machine learning repository
copy_url_to_blob(cont,
"https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data",
"iris.csv")
}
}
\seealso{

Просмотреть файл

@ -8,7 +8,7 @@ Method for the \link[AzureRMR:az_resource_group]{AzureRMR::az_resource_group} cl
}
\section{Usage}{
\preformatted{create_storage_account(name, location, kind = "StorageV2", replication = "Standard_LRS",
access_tier = "hot"), https_only = TRUE,
access_tier = "hot"), https_only = TRUE,
hierarchical_namespace_enabled = FALSE, properties = list(), ...)
}
}

Просмотреть файл

@ -41,7 +41,7 @@ delete_azure_dir(share, dir, confirm = TRUE)
\item{prefix}{For \code{list_azure_files}, filters the result to return only files and directories whose name begins with this prefix.}
\item{src, dest}{The source and destination files for uploading and downloading. For uploading, \code{src} can also be a \link{textConnection} or \link{rawConnection} object to allow transferring in-memory R objects without creating a temporary file.}
\item{src, dest}{The source and destination files for uploading and downloading. See 'Details' below.}
\item{blocksize}{The number of bytes to upload/download per HTTP(S) request.}

Просмотреть файл

@ -1,6 +1,8 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/transfer_generics.R
\name{storage_upload}
% Please edit documentation in R/blob_copyurl.R, R/transfer_generics.R
\name{copy_url_to_storage}
\alias{copy_url_to_storage}
\alias{copy_url_to_storage.blob_container}
\alias{storage_upload}
\alias{storage_upload.blob_container}
\alias{storage_upload.file_share}
@ -21,6 +23,10 @@
\alias{upload_to_url}
\title{Upload and download generics}
\usage{
copy_url_to_storage(container, src, dest, ...)
\method{copy_url_to_storage}{blob_container}(container, src, dest, ...)
storage_upload(container, ...)
\method{storage_upload}{blob_container}(container, src, dest, ...)
@ -61,10 +67,10 @@ upload_to_url(src, dest, key = NULL, token = NULL, sas = NULL, ...)
\arguments{
\item{container}{A storage container object.}
\item{...}{Further arguments to pass to lower-level functions.}
\item{src, dest}{The source and destination files to transfer.}
\item{...}{Further arguments to pass to lower-level functions.}
\item{key, token, sas}{Authentication arguments: an access key, Azure Active Directory (AAD) token or a shared access signature (SAS). If multiple arguments are supplied, a key takes priority over a token, which takes priority over a SAS. For \code{upload_to_url} and \code{download_to_url}, you can also provide a SAS as part of the URL itself.}
\item{overwrite}{For downloading, whether to overwrite any destination files that exist.}
@ -73,13 +79,15 @@ upload_to_url(src, dest, key = NULL, token = NULL, sas = NULL, ...)
Upload and download generics
}
\details{
\code{copy_url_to_storage} transfers the contents of the file at the specified HTTP[S] URL directly to storage, without requiring a temporary local copy to be made. Currently this is only implemented for blob storage.
These functions allow you to transfer files to and from a storage account.
\code{storage_upload}, \code{storage_download}, \code{storage_multiupload} and \code{storage_multidownload} take as first argument a storage container, either for blob storage, file storage, or ADLSgen2. They dispatch to the corresponding file transfer functions for the given storage type.
\code{upload_to_url} and \code{download_to_url} allow you to transfer a file to or from Azure storage, given the URL of the source or destination. The storage details (endpoint, container name, and so on) are obtained from the URL.
By default, \code{storage_download} and \code{download_from_url} will display a progress bar while they are downloading. To turn this off, use \code{options(azure_storage_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_storage_progress_bar=TRUE)}.
By default, the upload and download functions will display a progress bar while they are downloading. To turn this off, use \code{options(azure_storage_progress_bar=FALSE)}. To turn the progress bar back on, use \code{options(azure_storage_progress_bar=TRUE)}.
}
\examples{
\dontrun{

Просмотреть файл

@ -187,7 +187,7 @@ test_that("Blob client interface works",
test_that("AAD authentication works",
{
url <- stor$get_blob_endpoint()$url
url <- stor$get_blob_endpoint()$url
token <- AzureRMR::get_azure_token("https://storage.azure.com/", tenant=tenant, app=app, password=password)
bl <- blob_endpoint(url, token=token)
cont <- create_blob_container(bl, "newcontainer4")
@ -322,6 +322,24 @@ test_that("chunked downloading works",
})
test_that("copy from url works",
{
bl <- stor$get_blob_endpoint()
cont <- create_blob_container(bl, "urltransfer")
# copy from GitHub repo
src_url <- "https://raw.githubusercontent.com/Azure/AzureStor/master/tests/resources/iris.csv"
orig_file <- "../resources/iris.csv"
new_file <- tempfile()
copy_url_to_blob(cont, src_url, "iris.csv", async=FALSE)
download_blob(cont, "iris.csv", new_file)
# use readLines to workaround GH auto-translating CRLF -> LF
expect_identical(readLines(orig_file), readLines(new_file))
})
teardown(
{
bl <- stor$get_blob_endpoint()