* versions

* tests, docs

* redoc

* fixes

* rm cruft
This commit is contained in:
Hong Ooi 2022-05-22 13:03:08 +10:00 коммит произвёл GitHub
Родитель 90b573efde
Коммит 962edb6ae8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
15 изменённых файлов: 223 добавлений и 25 удалений

Просмотреть файл

@ -44,6 +44,7 @@ S3method(delete_storage_file,adls_filesystem)
S3method(delete_storage_file,blob_container)
S3method(delete_storage_file,file_share)
S3method(delete_storage_snapshot,blob_container)
S3method(delete_storage_version,blob_container)
S3method(file_share,character)
S3method(file_share,file_endpoint)
S3method(get_account_sas,az_storage)
@ -77,6 +78,7 @@ S3method(list_storage_files,adls_filesystem)
S3method(list_storage_files,blob_container)
S3method(list_storage_files,file_share)
S3method(list_storage_snapshots,blob_container)
S3method(list_storage_versions,blob_container)
S3method(multicopy_url_to_storage,blob_container)
S3method(print,adls_endpoint)
S3method(print,adls_filesystem)
@ -146,11 +148,13 @@ export(delete_blob)
export(delete_blob_container)
export(delete_blob_dir)
export(delete_blob_snapshot)
export(delete_blob_version)
export(delete_file_share)
export(delete_storage_container)
export(delete_storage_dir)
export(delete_storage_file)
export(delete_storage_snapshot)
export(delete_storage_version)
export(do_container_op)
export(download_adls_file)
export(download_azure_file)
@ -171,11 +175,13 @@ export(list_adls_filesystems)
export(list_azure_files)
export(list_blob_containers)
export(list_blob_snapshots)
export(list_blob_versions)
export(list_blobs)
export(list_file_shares)
export(list_storage_containers)
export(list_storage_files)
export(list_storage_snapshots)
export(list_storage_versions)
export(multicopy_url_to_blob)
export(multicopy_url_to_storage)
export(multidownload_adls_file)

Просмотреть файл

@ -1,7 +1,8 @@
# AzureStor 3.6.1.9000
- Add support for blob snapshots:
- New generics `create_storage_snapshot`, `list_storage_snapshots` and `delete_storage_snapshots` to manage snapshots. These currently only have methods for blob containers.
- Add support for blob snapshots and versions:
- New functions `create_blob_snapshot`, `list_blob_snapshots` and `delete_blob_snapshot` to manage snapshots, and `list_blob_versions` and `delete_blob_version` to manaage versions.
- Add generics `create_storage_snapshot`, `list_storage_snapshots`, `delete_blob_snapshot`, `list_storage_versions` and `list_storage_version`. These dispatch to the above functions for blob containers.
- Argument `snapshot` added to `download_blob`, `get_storage_properties` and `get_storage_metadata` to work with specific snapshots
- Add missing `directory_depth` argument for the `az_storage$get_service_sas` method (#114)

Просмотреть файл

@ -239,7 +239,7 @@ delete_blob_container.blob_endpoint <- function(endpoint, name, confirm=TRUE, le
#' @param recursive For the multiupload/download functions, whether to recursively transfer files in subdirectories. For `list_blobs`, whether to include the contents of any subdirectories in the listing. For `delete_blob_dir`, whether to recursively delete subdirectory contents as well.
#' @param put_md5 For uploading, whether to compute the MD5 hash of the blob(s). This will be stored as part of the blob's properties. Only used for block blobs.
#' @param check_md5 For downloading, whether to verify the MD5 hash of the downloaded blob(s). This requires that the blob's `Content-MD5` property is set. If this is TRUE and the `Content-MD5` property is missing, a warning is generated.
#' @param snapshot For `download_blob`, an optional snapshot identifier. This should be a datetime string, in the format "yyyy-mm-ddTHH:MM:SS.SSSSSSSZ". If omitted, download the base blob.
#' @param snapshot,version For `download_blob`, optional snapshot and version identifiers. These should be datetime strings, in the format "yyyy-mm-ddTHH:MM:SS.SSSSSSSZ". If omitted, download the base blob.
#'
#' @details
#' `upload_blob` and `download_blob` are the workhorse file transfer functions for blobs. They each take as inputs a _single_ filename as the source for uploading/downloading, and a single filename as the destination. Alternatively, for uploading, `src` can be a [textConnection] or [rawConnection] object; and for downloading, `dest` can be NULL or a `rawConnection` object. If `dest` is NULL, the downloaded data is returned as a raw vector, and if a raw connection, it will be placed into the connection. See the examples below.
@ -276,7 +276,7 @@ delete_blob_container.blob_endpoint <- function(endpoint, name, confirm=TRUE, le
#' For `list_blobs`, details on the blobs in the container. For `download_blob`, if `dest=NULL`, the contents of the downloaded blob as a raw vector. For `blob_exists` a flag whether the blob exists.
#'
#' @seealso
#' [blob_container], [az_storage], [storage_download], [call_azcopy], [list_blob_snapshots]
#' [blob_container], [az_storage], [storage_download], [call_azcopy], [list_blob_snapshots], [list_blob_versions]
#'
#' [AzCopy version 10 on GitHub](https://github.com/Azure/azure-storage-azcopy)
#' [Guide to the different blob types](https://docs.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs)
@ -455,12 +455,12 @@ multiupload_blob <- function(container, src, dest, recursive=FALSE, type=c("Bloc
#' @rdname blob
#' @export
download_blob <- function(container, src, dest=basename(src), blocksize=2^24, overwrite=FALSE, lease=NULL,
check_md5=FALSE, use_azcopy=FALSE, snapshot=NULL)
check_md5=FALSE, use_azcopy=FALSE, snapshot=NULL, version=NULL)
{
if(use_azcopy)
azcopy_download(container, src, dest, overwrite=overwrite, lease=lease, check_md5=check_md5)
else download_blob_internal(container, src, dest, blocksize=blocksize, overwrite=overwrite, lease=lease,
check_md5=check_md5, snapshot=snapshot)
check_md5=check_md5, snapshot=snapshot, version=version)
}
#' @rdname blob

Просмотреть файл

@ -14,21 +14,23 @@ upload_blob_internal <- function(container, src, dest, type, blocksize, lease=NU
download_blob_internal <- function(container, src, dest, blocksize=2^24, overwrite=FALSE, lease=NULL,
check_md5=FALSE, snapshot=NULL)
check_md5=FALSE, snapshot=NULL, version=NULL)
{
headers <- list()
if(!is.null(lease))
headers[["x-ms-lease-id"]] <- as.character(lease)
opts <- if(is.null(snapshot))
list()
else list(snapshot=snapshot)
opts <- list()
if(!is.null(snapshot))
opts$snapshot <- snapshot
if(!is.null(version))
opts$versionid <- version
dest <- init_download_dest(dest, overwrite)
on.exit(dispose_download_dest(dest))
# get file size (for progress bar) and MD5 hash
props <- get_storage_properties(container, src, snapshot=snapshot)
props <- get_storage_properties(container, src, snapshot=snapshot, version=version)
size <- as.numeric(props[["content-length"]])
src_md5 <- props[["content-md5"]]

49
R/blob_version.R Normal file
Просмотреть файл

@ -0,0 +1,49 @@
#' List and delete blob versions
#'
#' @param container A blob container.
#' @param blob The path/name of a blob.
#' @param version For `delete_blob_version`, the specific version to delete. This should be a datetime string, in the format `yyyy-mm-ddTHH:MM:SS.SSSSSSSZ`.
#' @param confirm Whether to ask for confirmation on deleting a blob version.
#' @details
#' A version captures the state of a blob at a given point in time. Each version is identified with a version ID. When blob versioning is enabled for a storage account, Azure Storage automatically creates a new version with a unique ID when a blob is first created and each time that the blob is subsequently modified.
#'
#' A version ID can identify the current version or a previous version. A blob can have only one current version at a time.
#'
#' When you create a new blob, a single version exists, and that version is the current version. When you modify an existing blob, the current version becomes a previous version. A new version is created to capture the updated state, and that new version is the current version. When you delete a blob, the current version of the blob becomes a previous version, and there is no longer a current version. Any previous versions of the blob persist.
#'
#' Versions are different to [snapshots][list_blob_snapshots]:
#' - A new snapshot has to be explicitly created via `create_blob_snapshot`. A new blob version is automatically created whenever the base blob is modified (and hence there is no `create_blob_version` function).
#' - Deleting the base blob will also delete all snapshots for that blob, while blob versions will be retained (but will typically be inaccessible).
#' - Snapshots are only available for storage accounts with hierarchical namespaces disabled, while versioning can be used with any storage account.
#'
#' @return
#' For `list_blob_versions`, a vector of datetime strings which are the IDs of each version.
#' @rdname version
#' @export
list_blob_versions <- function(container, blob)
{
opts <- list(comp="list", restype="container", include="versions", prefix=as.character(blob))
res <- do_container_op(container, options=opts)
lst <- res$Blobs
while(length(res$NextMarker) > 0)
{
opts$marker <- res$NextMarker[[1]]
res <- do_container_op(container, options=opts)
lst <- c(lst, res$Blobs)
}
unname(unlist(lapply(lst, function(bl) bl$VersionId[[1]])))
}
#' @rdname version
#' @export
delete_blob_version <- function(container, blob, version, confirm=TRUE)
{
if(!delete_confirmed(confirm, version, "blob version"))
return(invisible(NULL))
opts <- list(versionid=version)
invisible(do_container_op(container, blob, options=opts, http_verb="DELETE"))
}

Просмотреть файл

@ -27,6 +27,8 @@
#' - `create_storage_snapshot` dispatches to `create_blob_snapshot`
#' - `list_storage_snapshots` dispatches to `list_blob_snapshots`
#' - `delete_storage_snapshot` dispatches to `delete_blob_snapshot`
#' - `list_storage_versions` dispatches to `list_blob_versions`
#' - `delete_storage_version` dispatches to `delete_blob_version`
#'
#' @seealso
#' [storage_endpoint], [blob_container], [file_share], [adls_filesystem]
@ -364,3 +366,28 @@ UseMethod("delete_storage_snapshot")
#' @export
delete_storage_snapshot.blob_container <- function(container, file, ...)
delete_blob_snapshot(container, file, ...)
# versions
#' @rdname generics
#' @export
list_storage_versions <- function(container, ...)
UseMethod("list_storage_versions")
#' @rdname generics
#' @export
list_storage_versions.blob_container <- function(container, ...)
list_blob_versions(container, ...)
#' @rdname generics
#' @export
delete_storage_version <- function(container, file, ...)
UseMethod("delete_storage_version")
#' @rdname generics
#' @export
delete_storage_version.blob_container <- function(container, file, ...)
delete_blob_version(container, file, ...)

Просмотреть файл

@ -3,7 +3,7 @@
#' @param object A blob container, file share or ADLS filesystem object.
#' @param blob,file Optionally the name of an individual blob, file or directory within a container.
#' @param isdir For the file share method, whether the `file` argument is a file or directory. If omitted, `get_storage_metadata` will auto-detect the type; however this can be slow, so supply this argument if possible.
#' @param snapshot For the blob method of `get_storage_metadata`, an optional snapshot identifier. This should be a datetime string, in the format "yyyy-mm-ddTHH:MM:SS.SSSSSSSZ". Ignored if `blob` is omitted.
#' @param snapshot,version For the blob method of `get_storage_metadata`, optional snapshot and version identifiers. These should be datetime strings, in the format "yyyy-mm-ddTHH:MM:SS.SSSSSSSZ". Ignored if `blob` is omitted.
#' @param ... For the metadata setters, name-value pairs to set as metadata for a blob or file.
#' @param keep_existing For the metadata setters, whether to retain existing metadata information.
#' @details
@ -48,7 +48,7 @@ get_storage_metadata <- function(object, ...)
#' @rdname metadata
#' @export
get_storage_metadata.blob_container <- function(object, blob, snapshot=NULL, ...)
get_storage_metadata.blob_container <- function(object, blob, snapshot=NULL, version=NULL, ...)
{
if(missing(blob))
{
@ -60,6 +60,8 @@ get_storage_metadata.blob_container <- function(object, blob, snapshot=NULL, ...
options <- list(comp="metadata")
if(!is.null(snapshot))
options$snapshot <- snapshot
if(!is.null(version))
options$versionid <- version
}
res <- do_container_op(object, blob, options=options, http_verb="HEAD")

Просмотреть файл

@ -4,7 +4,7 @@
#' @param filesystem An ADLS filesystem.
#' @param blob,file Optionally the name of an individual blob, file or directory within a container.
#' @param isdir For the file share method, whether the `file` argument is a file or directory. If omitted, `get_storage_properties` will auto-detect the type; however this can be slow, so supply this argument if possible.
#' @param snapshot For the blob method of `get_storage_properties`, an optional snapshot identifier. This should be a datetime string, in the format "yyyy-mm-ddTHH:MM:SS.SSSSSSSZ". Ignored if `blob` is omitted.
#' @param snapshot,version For the blob method of `get_storage_properties`, optional snapshot and version identifiers. These should be datetime strings, in the format "yyyy-mm-ddTHH:MM:SS.SSSSSSSZ". Ignored if `blob` is omitted.
#' @param ... For compatibility with the generic.
#' @return
#' `get_storage_properties` returns a list describing the object properties. If the `blob` or `file` argument is present for the container methods, the properties will be for the blob/file specified. If this argument is omitted, the properties will be for the container itself.
@ -45,16 +45,19 @@ get_storage_properties <- function(object, ...)
#' @rdname properties
#' @export
get_storage_properties.blob_container <- function(object, blob, snapshot=NULL, ...)
get_storage_properties.blob_container <- function(object, blob, snapshot=NULL, version=NULL, ...)
{
# properties for container
if(missing(blob))
return(do_container_op(object, options=list(restype="container"), http_verb="HEAD"))
# properties for blob
opts <- if(is.null(snapshot))
list()
else list(snapshot=snapshot)
opts <- list()
if(!is.null(snapshot))
opts$snapshot <- snapshot
if(!is.null(version))
opts$versionid <- version
do_container_op(object, blob, options=opts, http_verb="HEAD")
}

Просмотреть файл

@ -29,7 +29,7 @@ multiupload_blob(container, src, dest, recursive = FALSE,
download_blob(container, src, dest = basename(src), blocksize = 2^24,
overwrite = FALSE, lease = NULL, check_md5 = FALSE,
use_azcopy = FALSE, snapshot = NULL)
use_azcopy = FALSE, snapshot = NULL, version = NULL)
multidownload_blob(container, src, dest, recursive = FALSE,
blocksize = 2^24, overwrite = FALSE, lease = NULL, check_md5 = FALSE,
@ -82,7 +82,7 @@ multicopy_url_to_blob(container, src, dest, lease = NULL, async = FALSE,
\item{check_md5}{For downloading, whether to verify the MD5 hash of the downloaded blob(s). This requires that the blob's \code{Content-MD5} property is set. If this is TRUE and the \code{Content-MD5} property is missing, a warning is generated.}
\item{snapshot}{For \code{download_blob}, an optional snapshot identifier. This should be a datetime string, in the format "yyyy-mm-ddTHH:MM:SS.SSSSSSSZ". If omitted, download the base blob.}
\item{snapshot, version}{For \code{download_blob}, optional snapshot and version identifiers. These should be datetime strings, in the format "yyyy-mm-ddTHH:MM:SS.SSSSSSSZ". If omitted, download the base blob.}
\item{blob}{A string naming a blob.}
@ -191,7 +191,7 @@ copy_url_to_blob(cont,
}
}
\seealso{
\link{blob_container}, \link{az_storage}, \link{storage_download}, \link{call_azcopy}, \link{list_blob_snapshots}
\link{blob_container}, \link{az_storage}, \link{storage_download}, \link{call_azcopy}, \link{list_blob_snapshots}, \link{list_blob_versions}
\href{https://github.com/Azure/azure-storage-azcopy}{AzCopy version 10 on GitHub}
\href{https://docs.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs}{Guide to the different blob types}

Просмотреть файл

@ -54,6 +54,10 @@
\alias{list_storage_snapshots.blob_container}
\alias{delete_storage_snapshot}
\alias{delete_storage_snapshot.blob_container}
\alias{list_storage_versions}
\alias{list_storage_versions.blob_container}
\alias{delete_storage_version}
\alias{delete_storage_version.blob_container}
\title{Storage client generics}
\usage{
storage_container(endpoint, ...)
@ -160,6 +164,14 @@ list_storage_snapshots(container, ...)
delete_storage_snapshot(container, file, ...)
\method{delete_storage_snapshot}{blob_container}(container, file, ...)
list_storage_versions(container, ...)
\method{list_storage_versions}{blob_container}(container, ...)
delete_storage_version(container, file, ...)
\method{delete_storage_version}{blob_container}(container, file, ...)
}
\arguments{
\item{endpoint}{A storage endpoint object, or for the character methods, a string giving the full URL to the container.}
@ -201,6 +213,8 @@ Storage object management methods:
\item \code{create_storage_snapshot} dispatches to \code{create_blob_snapshot}
\item \code{list_storage_snapshots} dispatches to \code{list_blob_snapshots}
\item \code{delete_storage_snapshot} dispatches to \code{delete_blob_snapshot}
\item \code{list_storage_versions} dispatches to \code{list_blob_versions}
\item \code{delete_storage_version} dispatches to \code{delete_blob_version}
}
}
\examples{

Просмотреть файл

@ -13,7 +13,7 @@
\usage{
get_storage_metadata(object, ...)
\method{get_storage_metadata}{blob_container}(object, blob, snapshot = NULL, ...)
\method{get_storage_metadata}{blob_container}(object, blob, snapshot = NULL, version = NULL, ...)
\method{get_storage_metadata}{file_share}(object, file, isdir, ...)
@ -34,7 +34,7 @@ set_storage_metadata(object, ...)
\item{blob, file}{Optionally the name of an individual blob, file or directory within a container.}
\item{snapshot}{For the blob method of \code{get_storage_metadata}, an optional snapshot identifier. This should be a datetime string, in the format "yyyy-mm-ddTHH:MM:SS.SSSSSSSZ". Ignored if \code{blob} is omitted.}
\item{snapshot, version}{For the blob method of \code{get_storage_metadata}, optional snapshot and version identifiers. These should be datetime strings, in the format "yyyy-mm-ddTHH:MM:SS.SSSSSSSZ". Ignored if \code{blob} is omitted.}
\item{isdir}{For the file share method, whether the \code{file} argument is a file or directory. If omitted, \code{get_storage_metadata} will auto-detect the type; however this can be slow, so supply this argument if possible.}

Просмотреть файл

@ -11,7 +11,7 @@
\usage{
get_storage_properties(object, ...)
\method{get_storage_properties}{blob_container}(object, blob, snapshot = NULL, ...)
\method{get_storage_properties}{blob_container}(object, blob, snapshot = NULL, version = NULL, ...)
\method{get_storage_properties}{file_share}(object, file, isdir, ...)
@ -28,7 +28,7 @@ get_adls_file_status(filesystem, file)
\item{blob, file}{Optionally the name of an individual blob, file or directory within a container.}
\item{snapshot}{For the blob method of \code{get_storage_properties}, an optional snapshot identifier. This should be a datetime string, in the format "yyyy-mm-ddTHH:MM:SS.SSSSSSSZ". Ignored if \code{blob} is omitted.}
\item{snapshot, version}{For the blob method of \code{get_storage_properties}, optional snapshot and version identifiers. These should be datetime strings, in the format "yyyy-mm-ddTHH:MM:SS.SSSSSSSZ". Ignored if \code{blob} is omitted.}
\item{isdir}{For the file share method, whether the \code{file} argument is a file or directory. If omitted, \code{get_storage_properties} will auto-detect the type; however this can be slow, so supply this argument if possible.}

40
man/version.Rd Normal file
Просмотреть файл

@ -0,0 +1,40 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/blob_version.R
\name{list_blob_versions}
\alias{list_blob_versions}
\alias{delete_blob_version}
\title{List and delete blob versions}
\usage{
list_blob_versions(container, blob)
delete_blob_version(container, blob, version, confirm = TRUE)
}
\arguments{
\item{container}{A blob container.}
\item{blob}{The path/name of a blob.}
\item{version}{For \code{delete_blob_version}, the specific version to delete. This should be a datetime string, in the format \code{yyyy-mm-ddTHH:MM:SS.SSSSSSSZ}.}
\item{confirm}{Whether to ask for confirmation on deleting a blob version.}
}
\value{
For \code{list_blob_versions}, a vector of datetime strings which are the IDs of each version.
}
\description{
List and delete blob versions
}
\details{
A version captures the state of a blob at a given point in time. Each version is identified with a version ID. When blob versioning is enabled for a storage account, Azure Storage automatically creates a new version with a unique ID when a blob is first created and each time that the blob is subsequently modified.
A version ID can identify the current version or a previous version. A blob can have only one current version at a time.
When you create a new blob, a single version exists, and that version is the current version. When you modify an existing blob, the current version becomes a previous version. A new version is created to capture the updated state, and that new version is the current version. When you delete a blob, the current version of the blob becomes a previous version, and there is no longer a current version. Any previous versions of the blob persist.
Versions are different to \link[=list_blob_snapshots]{snapshots}:
\itemize{
\item A new snapshot has to be explicitly created via \code{create_blob_snapshot}. A new blob version is automatically created whenever the base blob is modified (and hence there is no \code{create_blob_version} function).
\item Deleting the base blob will also delete all snapshots for that blob, while blob versions will be retained (but will typically be inaccessible).
\item Snapshots are only available for storage accounts with hierarchical namespaces disabled, while versioning can be used with any storage account.
}
}

Просмотреть файл

Просмотреть файл

@ -0,0 +1,54 @@
context("Blob snapshots")
tenant <- Sys.getenv("AZ_TEST_TENANT_ID")
app <- Sys.getenv("AZ_TEST_APP_ID")
password <- Sys.getenv("AZ_TEST_PASSWORD")
subscription <- Sys.getenv("AZ_TEST_SUBSCRIPTION")
if(tenant == "" || app == "" || password == "" || subscription == "")
skip("Authentication tests skipped: ARM credentials not set")
rgname <- Sys.getenv("AZ_TEST_STORAGE_RG")
storname <- Sys.getenv("AZ_TEST_STORAGE_NOHNS")
if(rgname == "" || storname == "")
skip("Blob snapshot tests skipped: resource names not set")
sub <- AzureRMR::az_rm$new(tenant=tenant, app=app, password=password)$get_subscription(subscription)
stor1 <- sub$get_resource_group(rgname)$get_storage_account(storname)
bl1 <- stor1$get_blob_endpoint()
opts <- options(azure_storage_progress_bar=FALSE)
src1 <- file.path(tempdir(), write_file(tempdir()))
src2 <- file.path(tempdir(), write_file(tempdir()))
test_that("Blob versioning works",
{
cont <- create_blob_container(bl1, make_name())
upload_blob(cont, src1, "file1")
expect_true(blob_exists(cont, "file1"))
Sys.sleep(2)
upload_blob(cont, src2, "file1")
v <- list_blob_versions(cont, "file1")
expect_type(v, "character")
expect_identical(length(v), 2L)
dest1 <- tempfile()
download_blob(cont, "file1", dest1, version=v[1])
expect_true(files_identical(src1, dest1))
})
teardown(
{
options(opts)
conts <- list_blob_containers(bl1)
lapply(conts, delete_blob_container, confirm=FALSE)
})