Implement create/delete dirs for blob storage (#59)

* blob create/delete

* implement generics

* works

* update news

* better wording

* also update readme

* update readme again
This commit is contained in:
Hong Ooi 2020-07-01 19:20:41 +10:00 коммит произвёл GitHub
Родитель adb2550407
Коммит 25632dda2e
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 106 добавлений и 23 удалений

Просмотреть файл

@ -119,6 +119,7 @@ export(create_adls_dir)
export(create_adls_filesystem) export(create_adls_filesystem)
export(create_azure_dir) export(create_azure_dir)
export(create_blob_container) export(create_blob_container)
export(create_blob_dir)
export(create_file_share) export(create_file_share)
export(create_storage_container) export(create_storage_container)
export(create_storage_dir) export(create_storage_dir)
@ -129,6 +130,7 @@ export(delete_azure_dir)
export(delete_azure_file) export(delete_azure_file)
export(delete_blob) export(delete_blob)
export(delete_blob_container) export(delete_blob_container)
export(delete_blob_dir)
export(delete_file_share) export(delete_file_share)
export(delete_storage_container) export(delete_storage_container)
export(delete_storage_dir) export(delete_storage_dir)

Просмотреть файл

@ -1,7 +1,9 @@
# AzureStor 3.2.2 # AzureStor 3.2.2
- Implement recursive/non-recursive directory listings for `list_blobs`, thanks to @cantpitch. Note that since blob storage doesn't have true directories, there are some warts to be aware of; see the help for `list_blobs` for more details. - Extended support for directories in blob storage. Note that since blob storage doesn't have true directories, there are some warts to be aware of; see `?blob` for more details.
- Fixes to the directory detection logic of `list_blobs`. Again, since blob storage doesn't have true directories, the `isdir` column of the `list_blobs` output should be treated as a best guess. - Implement non-recursive directory listings for `list_blobs`, thanks to @cantpitch.
- Fixes to the directory detection logic of `list_blobs`.
- Implement `create_blob_dir` and `delete_blob_dir`.
- Remove broken implementation for recursive deleting of subdirectory contents for file storage. - Remove broken implementation for recursive deleting of subdirectory contents for file storage.
# AzureStor 3.2.1 # AzureStor 3.2.1

Просмотреть файл

@ -220,7 +220,7 @@ delete_blob_container.blob_endpoint <- function(endpoint, name, confirm=TRUE, le
#' Operations on a blob container or blob #' Operations on a blob container or blob
#' #'
#' Upload, download, or delete a blob; list blobs in a container; check blob availability. #' Upload, download, or delete a blob; list blobs in a container; create or delete directories; check blob availability.
#' #'
#' @param container A blob container object. #' @param container A blob container object.
#' @param blob A string naming a blob. #' @param blob A string naming a blob.
@ -235,7 +235,7 @@ delete_blob_container.blob_endpoint <- function(endpoint, name, confirm=TRUE, le
#' @param use_azcopy Whether to use the AzCopy utility from Microsoft to do the transfer, rather than doing it in R. #' @param use_azcopy Whether to use the AzCopy utility from Microsoft to do the transfer, rather than doing it in R.
#' @param max_concurrent_transfers For `multiupload_blob` and `multidownload_blob`, the maximum number of concurrent file transfers. Each concurrent file transfer requires a separate R process, so limit this if you are low on memory. #' @param max_concurrent_transfers For `multiupload_blob` and `multidownload_blob`, the maximum number of concurrent file transfers. Each concurrent file transfer requires a separate R process, so limit this if you are low on memory.
#' @param prefix For `list_blobs`, an alternative way to specify the directory. #' @param prefix For `list_blobs`, an alternative way to specify the directory.
#' @param recursive For the multiupload/download functions, whether to recursively transfer files in subdirectories. For `list_blobs`, whether to include the contents of any subdirectories in the listing. #' @param recursive For the multiupload/download functions, whether to recursively transfer files in subdirectories. For `list_blobs`, whether to include the contents of any subdirectories in the listing. For `delete_blob_dir`, whether to recursively delete subdirectory contents as well (not yet supported).
#' #'
#' @details #' @details
#' `upload_blob` and `download_blob` are the workhorse file transfer functions for blobs. They each take as inputs a _single_ filename as the source for uploading/downloading, and a single filename as the destination. Alternatively, for uploading, `src` can be a [textConnection] or [rawConnection] object; and for downloading, `dest` can be NULL or a `rawConnection` object. If `dest` is NULL, the downloaded data is returned as a raw vector, and if a raw connection, it will be placed into the connection. See the examples below. #' `upload_blob` and `download_blob` are the workhorse file transfer functions for blobs. They each take as inputs a _single_ filename as the source for uploading/downloading, and a single filename as the destination. Alternatively, for uploading, `src` can be a [textConnection] or [rawConnection] object; and for downloading, `dest` can be NULL or a `rawConnection` object. If `dest` is NULL, the downloaded data is returned as a raw vector, and if a raw connection, it will be placed into the connection. See the examples below.
@ -256,8 +256,8 @@ delete_blob_container.blob_endpoint <- function(endpoint, name, confirm=TRUE, le
#' #'
#' - The `isdir` column in the data frame output of `list_blobs` is a best guess as to whether an object represents a file or directory, and may not always be correct. Currently, `list_blobs` assumes that any object with a file size of zero is a directory. #' - The `isdir` column in the data frame output of `list_blobs` is a best guess as to whether an object represents a file or directory, and may not always be correct. Currently, `list_blobs` assumes that any object with a file size of zero is a directory.
#' - Zero-length files can cause problems for the blob storage service as a whole (not just AzureStor). Try to avoid uploading such files. #' - Zero-length files can cause problems for the blob storage service as a whole (not just AzureStor). Try to avoid uploading such files.
#' - The output of `list_blobs(recursive=TRUE)` can vary based on whether the storage account has hierarchical namespaces enabled. #' - `create_blob_dir` and `delete_blob_dir` function as expected only for accounts with hierarchical namespaces enabled. When this feature is disabled, directories do not exist as objects in their own right: to create a directory, simply upload a blob to that directory. To delete a directory, delete all the blobs within it; as far as the blob storage service is concerned, the directory then no longer exists.
#' - `create_storage_dir` and `delete_storage_dir` currently do not have methods for blob containers. #' - Similarly, the output of `list_blobs(recursive=TRUE)` can vary based on whether the storage account has hierarchical namespaces enabled.
#' #'
#' @return #' @return
#' For `list_blobs`, details on the blobs in the container. For `download_blob`, if `dest=NULL`, the contents of the downloaded blob as a raw vector. For `blob_exists` a flag whether the blob exists. #' For `list_blobs`, details on the blobs in the container. For `download_blob`, if `dest=NULL`, the contents of the downloaded blob as a raw vector. For `blob_exists` a flag whether the blob exists.
@ -472,6 +472,45 @@ delete_blob <- function(container, blob, confirm=TRUE)
invisible(do_container_op(container, blob, http_verb="DELETE")) invisible(do_container_op(container, blob, http_verb="DELETE"))
} }
#' @rdname blob
#' @export
create_blob_dir <- function(container, dir)
{
# workaround: upload a zero-length file to the desired dir, then delete the file
destfile <- file.path(dir, basename(tempfile()))
opts <- options(azure_storage_progress_bar=FALSE)
on.exit(options(opts))
upload_blob(container, rawConnection(raw(0)), destfile)
delete_blob(container, destfile, confirm=FALSE)
invisible(NULL)
}
#' @rdname blob
#' @export
delete_blob_dir <- function(container, dir, recursive=FALSE, confirm=TRUE)
{
if(dir %in% c("/", "."))
return(invisible(NULL))
if(!delete_confirmed(confirm, paste0(container$endpoint$url, container$name, "/", dir), "directory"))
return(invisible(NULL))
if(recursive)
stop("Recursive deleting of subdirectory contents not yet supported", call.=FALSE)
parent <- dirname(dir)
if(parent == ".")
parent <- "/"
lst <- list_blobs(container, parent, recursive=FALSE)
whichrow <- which(lst$name == paste0(dir, "/"))
if(is_empty(whichrow) || !lst$isdir[whichrow])
stop("Not a directory", call.=FALSE)
delete_blob(container, dir, confirm=FALSE)
}
#' @rdname blob #' @rdname blob
#' @export #' @export
blob_exists <- function(container, blob) blob_exists <- function(container, blob)
@ -483,3 +522,4 @@ blob_exists <- function(container, blob)
httr::stop_for_status(res, storage_error_message(res)) httr::stop_for_status(res, storage_error_message(res))
return(TRUE) return(TRUE)
} }

Просмотреть файл

@ -222,8 +222,8 @@ UseMethod("create_storage_dir")
#' @rdname generics #' @rdname generics
#' @export #' @export
create_storage_dir.blob_container <- function(container, ...) create_storage_dir.blob_container <- function(container, dir, ...)
stop("Blob storage does not support directories") create_blob_dir(container, dir, ...)
#' @rdname generics #' @rdname generics
#' @export #' @export
@ -245,8 +245,8 @@ UseMethod("delete_storage_dir")
#' @rdname generics #' @rdname generics
#' @export #' @export
delete_storage_dir.blob_container <- function(container, ...) delete_storage_dir.blob_container <- function(container, dir, ...)
stop("Blob storage does not support directories") delete_blob_dir(container, dir, ...)
#' @rdname generics #' @rdname generics
#' @export #' @export

Просмотреть файл

@ -53,9 +53,8 @@ delete_storage_container(newcont)
These functions for working with objects within a storage container: These functions for working with objects within a storage container:
- `list_storage_files`: list files/blobs in a directory (for ADLSgen2 and file storage) or blob container - `list_storage_files`: list files/blobs in a directory (defaults to the root directory)
- `create_storage_dir`: for ADLSgen2 and file storage, create a directory - `create_storage_dir`/`delete_storage_dir`: create or delete a directory
- `delete_storage_dir`: for ADLSgen2 and file storage, delete a directory
- `delete_storage_file`: delete a file or blob - `delete_storage_file`: delete a file or blob
- `storage_file_exists`: check that a file or blob exists - `storage_file_exists`: check that a file or blob exists
- `storage_upload`/`storage_download`: transfer a file to or from a storage container - `storage_upload`/`storage_download`: transfer a file to or from a storage container
@ -65,7 +64,7 @@ These functions for working with objects within a storage container:
```r ```r
# example of working with files and directories (ADLSgen2) # example of working with files and directories (ADLSgen2)
cont <- storage_container(ad_end_tok, "myfilesystem") cont <- storage_container(ad_endp_tok, "myfilesystem")
list_storage_files(cont) list_storage_files(cont)
create_storage_dir(cont, "newdir") create_storage_dir(cont, "newdir")
storage_download(cont, "/readme.txt") storage_download(cont, "/readme.txt")

Просмотреть файл

@ -7,6 +7,8 @@
\alias{download_blob} \alias{download_blob}
\alias{multidownload_blob} \alias{multidownload_blob}
\alias{delete_blob} \alias{delete_blob}
\alias{create_blob_dir}
\alias{delete_blob_dir}
\alias{blob_exists} \alias{blob_exists}
\alias{copy_url_to_blob} \alias{copy_url_to_blob}
\alias{multicopy_url_to_blob} \alias{multicopy_url_to_blob}
@ -31,6 +33,10 @@ multidownload_blob(container, src, dest, recursive = FALSE,
delete_blob(container, blob, confirm = TRUE) delete_blob(container, blob, confirm = TRUE)
create_blob_dir(container, dir)
delete_blob_dir(container, dir, recursive = FALSE, confirm = TRUE)
blob_exists(container, blob) blob_exists(container, blob)
copy_url_to_blob(container, src, dest, lease = NULL, async = FALSE) copy_url_to_blob(container, src, dest, lease = NULL, async = FALSE)
@ -47,7 +53,7 @@ multicopy_url_to_blob(container, src, dest, lease = NULL, async = FALSE,
\item{prefix}{For \code{list_blobs}, an alternative way to specify the directory.} \item{prefix}{For \code{list_blobs}, an alternative way to specify the directory.}
\item{recursive}{For the multiupload/download functions, whether to recursively transfer files in subdirectories. For \code{list_blobs}, whether to include the contents of any subdirectories in the listing.} \item{recursive}{For the multiupload/download functions, whether to recursively transfer files in subdirectories. For \code{list_blobs}, whether to include the contents of any subdirectories in the listing. For \code{delete_blob_dir}, whether to recursively delete subdirectory contents as well (not yet supported).}
\item{src, dest}{The source and destination files for uploading and downloading. See 'Details' below.} \item{src, dest}{The source and destination files for uploading and downloading. See 'Details' below.}
@ -73,7 +79,7 @@ multicopy_url_to_blob(container, src, dest, lease = NULL, async = FALSE,
For \code{list_blobs}, details on the blobs in the container. For \code{download_blob}, if \code{dest=NULL}, the contents of the downloaded blob as a raw vector. For \code{blob_exists} a flag whether the blob exists. For \code{list_blobs}, details on the blobs in the container. For \code{download_blob}, if \code{dest=NULL}, the contents of the downloaded blob as a raw vector. For \code{blob_exists} a flag whether the blob exists.
} }
\description{ \description{
Upload, download, or delete a blob; list blobs in a container; check blob availability. Upload, download, or delete a blob; list blobs in a container; create or delete directories; check blob availability.
} }
\details{ \details{
\code{upload_blob} and \code{download_blob} are the workhorse file transfer functions for blobs. They each take as inputs a \emph{single} filename as the source for uploading/downloading, and a single filename as the destination. Alternatively, for uploading, \code{src} can be a \link{textConnection} or \link{rawConnection} object; and for downloading, \code{dest} can be NULL or a \code{rawConnection} object. If \code{dest} is NULL, the downloaded data is returned as a raw vector, and if a raw connection, it will be placed into the connection. See the examples below. \code{upload_blob} and \code{download_blob} are the workhorse file transfer functions for blobs. They each take as inputs a \emph{single} filename as the source for uploading/downloading, and a single filename as the destination. Alternatively, for uploading, \code{src} can be a \link{textConnection} or \link{rawConnection} object; and for downloading, \code{dest} can be NULL or a \code{rawConnection} object. If \code{dest} is NULL, the downloaded data is returned as a raw vector, and if a raw connection, it will be placed into the connection. See the examples below.
@ -101,8 +107,8 @@ Blob storage does not have true directories, instead using filenames containing
\itemize{ \itemize{
\item The \code{isdir} column in the data frame output of \code{list_blobs} is a best guess as to whether an object represents a file or directory, and may not always be correct. Currently, \code{list_blobs} assumes that any object with a file size of zero is a directory. \item The \code{isdir} column in the data frame output of \code{list_blobs} is a best guess as to whether an object represents a file or directory, and may not always be correct. Currently, \code{list_blobs} assumes that any object with a file size of zero is a directory.
\item Zero-length files can cause problems for the blob storage service as a whole (not just AzureStor). Try to avoid uploading such files. \item Zero-length files can cause problems for the blob storage service as a whole (not just AzureStor). Try to avoid uploading such files.
\item The output of \code{list_blobs(recursive=TRUE)} can vary based on whether the storage account has hierarchical namespaces enabled. \item \code{create_blob_dir} and \code{delete_blob_dir} function as expected only for accounts with hierarchical namespaces enabled. When this feature is disabled, directories do not exist as objects in their own right: to create a directory, simply upload a blob to that directory. To delete a directory, delete all the blobs within it; as far as the blob storage service is concerned, the directory then no longer exists.
\item \code{create_storage_dir} and \code{delete_storage_dir} currently do not have methods for blob containers. \item Similarly, the output of \code{list_blobs(recursive=TRUE)} can vary based on whether the storage account has hierarchical namespaces enabled.
} }
} }

Просмотреть файл

@ -101,7 +101,7 @@ list_storage_files(container, ...)
create_storage_dir(container, ...) create_storage_dir(container, ...)
\method{create_storage_dir}{blob_container}(container, ...) \method{create_storage_dir}{blob_container}(container, dir, ...)
\method{create_storage_dir}{file_share}(container, dir, ...) \method{create_storage_dir}{file_share}(container, dir, ...)
@ -109,7 +109,7 @@ create_storage_dir(container, ...)
delete_storage_dir(container, ...) delete_storage_dir(container, ...)
\method{delete_storage_dir}{blob_container}(container, ...) \method{delete_storage_dir}{blob_container}(container, dir, ...)
\method{delete_storage_dir}{file_share}(container, dir, ...) \method{delete_storage_dir}{file_share}(container, dir, ...)

Просмотреть файл

@ -22,7 +22,41 @@ stor2 <- sub$get_resource_group(rgname)$get_storage_account(storname2)
options(azure_storage_progress_bar=FALSE) options(azure_storage_progress_bar=FALSE)
test_that("Blob dispatch works", test_that("Blob dispatch works, HNS",
{
endpname <- stor1$properties$primaryEndpoints$blob
expect_type(endpname, "character")
key <- stor1$list_keys()[[1]]
contname <- paste(sample(letters, 10, TRUE), collapse="")
dirname <- "newdir"
filename <- "iris.csv"
# working with a container
expect_is(endp <- storage_endpoint(endpname, key=key), "blob_endpoint")
expect_silent(cont <- storage_container(endp, contname))
expect_silent(create_storage_container(cont))
# working with objects within container
expect_silent(list_storage_files(cont))
expect_silent(create_storage_dir(cont, dirname))
# file transfer
expect_silent(storage_upload(cont, file.path("../resources", filename), filename))
expect_silent(storage_download(cont, filename, tempfile()))
# file existence
expect_false(storage_file_exists(cont, "nonexistent"))
expect_true(storage_file_exists(cont, filename))
# delete the objects
expect_silent(delete_storage_file(cont, filename, confirm=FALSE))
expect_silent(delete_storage_dir(cont, dirname, confirm=FALSE))
expect_silent(delete_storage_container(cont, confirm=FALSE))
})
test_that("Blob dispatch works, no HNS",
{ {
endpname <- stor2$properties$primaryEndpoints$blob endpname <- stor2$properties$primaryEndpoints$blob
expect_type(endpname, "character") expect_type(endpname, "character")
@ -39,7 +73,7 @@ test_that("Blob dispatch works",
# working with objects within container # working with objects within container
expect_silent(list_storage_files(cont)) expect_silent(list_storage_files(cont))
expect_error(create_storage_dir(cont, dirname)) expect_silent(create_storage_dir(cont, dirname))
# file transfer # file transfer
expect_silent(storage_upload(cont, file.path("../resources", filename), filename)) expect_silent(storage_upload(cont, file.path("../resources", filename), filename))
@ -51,7 +85,7 @@ test_that("Blob dispatch works",
# delete the objects # delete the objects
expect_silent(delete_storage_file(cont, filename, confirm=FALSE)) expect_silent(delete_storage_file(cont, filename, confirm=FALSE))
expect_error(delete_storage_dir(cont, dirname, confirm=FALSE)) expect_error(delete_storage_dir(cont, dirname, confirm=FALSE)) # deleting object also deletes dir
expect_silent(delete_storage_container(cont, confirm=FALSE)) expect_silent(delete_storage_container(cont, confirm=FALSE))
}) })