зеркало из https://github.com/Azure/AzureStor.git
Add file format helpers (#75)
* change org
* Revert "change org"
This reverts commit 512e64fb94
.
* better cloudyr checking
* Format helpers (#1)
* better cloudyr checking
* add format helpers
* document
* update mds, desc
This commit is contained in:
Родитель
6469843eea
Коммит
0c7d8d0c15
|
@ -6,7 +6,7 @@ name: R-CMD-check
|
|||
|
||||
jobs:
|
||||
R-CMD-check:
|
||||
if: github.repository_owner == 'Azure'
|
||||
if: github.repository_owner != 'cloudyr'
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
|
||||
name: ${{ matrix.config.os }} (${{ matrix.config.r }})
|
||||
|
@ -29,7 +29,7 @@ jobs:
|
|||
fetch-depth: 0 # required for mirroring, see https://stackoverflow.com/a/64272409/474349
|
||||
|
||||
- name: Copy to Cloudyr
|
||||
if: runner.os == 'Linux' && github.ref == 'refs/heads/master'
|
||||
if: runner.os == 'Linux' && github.ref == 'refs/heads/master' && github.repository_owner == 'Azure'
|
||||
env:
|
||||
token: "${{ secrets.ghPat }}"
|
||||
# git config hack required, see https://stackoverflow.com/q/64270867/474349
|
||||
|
@ -91,7 +91,7 @@ jobs:
|
|||
path: check
|
||||
|
||||
- name: Update Cloudyr drat
|
||||
if: success() && runner.os == 'Linux' && github.ref == 'refs/heads/master'
|
||||
if: success() && runner.os == 'Linux' && github.ref == 'refs/heads/master' && github.repository_owner == 'Azure'
|
||||
env:
|
||||
token: "${{ secrets.ghPat }}"
|
||||
run: |
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
Package: AzureStor
|
||||
Title: Storage Management in 'Azure'
|
||||
Version: 3.3.0
|
||||
Version: 3.3.0.9000
|
||||
Authors@R: c(
|
||||
person("Hong", "Ooi", , "hongooi73@gmail.com", role = c("aut", "cre")),
|
||||
person("Microsoft", role="cph")
|
||||
|
@ -21,6 +21,7 @@ Imports:
|
|||
xml2,
|
||||
AzureRMR (>= 2.3.0)
|
||||
Suggests:
|
||||
readr,
|
||||
knitr,
|
||||
rmarkdown,
|
||||
jsonlite,
|
||||
|
|
10
NAMESPACE
10
NAMESPACE
|
@ -174,9 +174,19 @@ export(storage_container)
|
|||
export(storage_download)
|
||||
export(storage_endpoint)
|
||||
export(storage_file_exists)
|
||||
export(storage_load_rdata)
|
||||
export(storage_load_rds)
|
||||
export(storage_multidownload)
|
||||
export(storage_multiupload)
|
||||
export(storage_read_csv)
|
||||
export(storage_read_csv2)
|
||||
export(storage_read_delim)
|
||||
export(storage_save_rdata)
|
||||
export(storage_save_rds)
|
||||
export(storage_upload)
|
||||
export(storage_write_csv)
|
||||
export(storage_write_csv2)
|
||||
export(storage_write_delim)
|
||||
export(upload_adls_file)
|
||||
export(upload_azure_file)
|
||||
export(upload_blob)
|
||||
|
|
12
NEWS.md
12
NEWS.md
|
@ -1,10 +1,18 @@
|
|||
# AzureStor 3.3.0.9000
|
||||
|
||||
- Add helper functions to transfer data in commonly-used formats. These work via connections and so do not create temporary files on disk.
|
||||
- `storage_save_rds`/`storage_load_rds`
|
||||
- `storage_save_rdata`/`storage_load_rdata`
|
||||
- `storage_write_delim`/`storage_read_delim` (for tab-delimited files)
|
||||
- `storage_write_csv`/`storage_read_csv`
|
||||
- `storage_write_csv2`/`storage_read_csv2`
|
||||
|
||||
# AzureStor 3.3.0
|
||||
|
||||
- ADLS, file and block blob uploads gain the option to compute and store the MD5 hash of the uploaded file, via the `put_md5` argument to `upload_adls_file`, `upload_azure_file` and `upload_blob`.
|
||||
- Similarly, downloads gain the option to verify the integrity of the downloaded file using the MD5 hash, via the `check_md5` argument to `download_adls_file`, `download_azure_file` and `download_blob`. This requires that the file's `Content-MD5` property is set.
|
||||
- Add support for uploading to [append blobs](https://docs.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs), which are a type of blob optimized for append operations. They are useful for data that is constantly growing, but should not be modified once written, such as server logs. See `?upload_blob` for more details.
|
||||
- Add support for the [Azurite](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azurite) and [Azure SDK](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-emulator) storage emulators. To connect to the endpoint, use the service-specific functions `blob_endpoint` and `queue_endpoint` (the latter from the AzureQstor package), passing the full URL including the account name: `blob_endpoint("http://127.0.0.1:10000/myaccount", key="mykey")`. The warning about an unrecognised endpoint can be ignored. See the linked pages for full details on how to authenticate to the emulator.
|
||||
|
||||
- Add support for the [Azurite](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azurite) and [Azure SDK](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-emulator) storage emulators. To connect to the endpoint, use the service-specific functions `blob_endpoint` and `queue_endpoint` (the latter from the AzureQstor package), passing the full URL including the account name: `blob_endpoint("http://127.0.0.1:10000/myaccount", key="mykey")`. The warning about an unrecognised endpoint can be ignored. See the linked pages for full details on how to authenticate to the emulator.<br/>
|
||||
Note that the Azure SDK emulator is no longer being actively developed; it's recommended to use Azurite.
|
||||
- Add a 10-second fuzz factor to the default starting datetime for a generated SAS, to allow for differences in clocks.
|
||||
- More fixes to the directory handling of `list_blobs()`.
|
||||
|
|
|
@ -0,0 +1,243 @@
|
|||
#' Save and load R objects to/from a storage account
|
||||
#'
|
||||
#' @param object An R object to save to storage.
|
||||
#' @param container An Azure storage container object.
|
||||
#' @param file The name of a file in storage.
|
||||
#' @param envir For `storage_save_rdata` and `storage_load_rdata`, the environment from which to get objects to save, or in which to restore objects, respectively.
|
||||
#' @param ... Further arguments passed to `serialize`, `unserialize`, `save` and `load` as appropriate.
|
||||
#' @details
|
||||
#' These are equivalents to `saveRDS`, `readRDS`, `save` and `load` for saving and loading R objects to a storage account. With the exception of `storage_save_rdata`, they work via connections and so do not create temporary files. `storage_save_rdata` uses a temporary file so that compression of the resulting image is enabled.
|
||||
#'
|
||||
#' @seealso
|
||||
#' [storage_download], [download_blob], [download_azure_file], [download_adls_file], [save], [load], [saveRDS]
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#'
|
||||
#' bl <- storage_endpoint("https://mystorage.blob.core.windows.net/", key="access_key")
|
||||
#' cont <- storage_container(bl, "mycontainer")
|
||||
#'
|
||||
#' storage_save_rds(iris, cont, "iris.rds")
|
||||
#' irisnew <- storage_load_rds(iris, "iris.rds")
|
||||
#' identical(iris, irisnew) # TRUE
|
||||
#'
|
||||
#' storage_save_rdata(iris, mtcars, container=cont, file="dataframes.rdata")
|
||||
#' storage_load_rdata(cont, "dataframes.rdata")
|
||||
#'
|
||||
#' }
|
||||
#' @rdname storage_save
|
||||
#' @export
|
||||
storage_save_rds <- function(object, container, file, ...)
|
||||
{
|
||||
conn <- rawConnection(serialize(object, NULL, ...), open="rb")
|
||||
storage_upload(container, conn, file)
|
||||
}
|
||||
|
||||
|
||||
#' @rdname storage_save
|
||||
#' @export
|
||||
storage_load_rds <- function(container, file, ...)
|
||||
{
|
||||
conn <- storage_download(container, file, NULL)
|
||||
unserialize(conn, ...)
|
||||
}
|
||||
|
||||
|
||||
#' @rdname storage_save
|
||||
#' @export
|
||||
storage_save_rdata <- function(..., container, file, envir=parent.frame())
|
||||
{
|
||||
# save to a temporary file as saving to a connection disables compression
|
||||
tmpsave <- tempfile(fileext=".rdata")
|
||||
on.exit(unlink(tmpsave))
|
||||
save(..., file=tmpsave, envir=envir)
|
||||
storage_upload(container, tmpsave, file)
|
||||
}
|
||||
|
||||
|
||||
#' @rdname storage_save
|
||||
#' @export
|
||||
storage_load_rdata <- function(container, file, envir=parent.frame(), ...)
|
||||
{
|
||||
conn <- storage_download(container, file, NULL)
|
||||
load(rawConnection(conn, open="rb"), envir=envir, ...)
|
||||
}
|
||||
|
||||
|
||||
#' Read and write a data frame to/from a storage account
|
||||
#'
|
||||
#' @param object A data frame to write to storage.
|
||||
#' @param container An Azure storage container object.
|
||||
#' @param file The name of a file in storage.
|
||||
#' @param delim For `storage_write_delim` and `storage_read_delim`, the field delimiter. Defaults to `\t` (tab).
|
||||
#' @param ... Optional arguments passed to the file reading/writing functions. See 'Details'.
|
||||
#' @details
|
||||
#' These functions let you read and write data frames to storage. `storage_read_delim` and `write_delim` are for reading and writing arbitrary delimited files. `storage_read_csv` and `write_csv` are for comma-delimited (CSV) files. `storage_read_csv2` and `write_csv2` are for files with the semicolon `;` as delimiter and comma `,` as the decimal point, as used in some European countries.
|
||||
#'
|
||||
#' If the readr package is installed, they call down to `read_delim`, `write_delim`, `read_csv2` and `write_csv2`. Otherwise, they use `read_delim` and `write.table`.
|
||||
#' @seealso
|
||||
#' [storage_download], [download_blob], [download_azure_file], [download_adls_file],
|
||||
#' [write.table], [read.csv], [readr::write_delim], [readr::read_delim]
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#'
|
||||
#' bl <- storage_endpoint("https://mystorage.blob.core.windows.net/", key="access_key")
|
||||
#' cont <- storage_container(bl, "mycontainer")
|
||||
#'
|
||||
#' storage_write_csv(iris, cont, "iris.csv")
|
||||
#' # if readr is not installed
|
||||
#' irisnew <- storage_read_csv(cont, "iris.csv", stringsAsFactors=TRUE)
|
||||
#' # if readr is installed
|
||||
#' irisnew <- storage_read_csv(cont, "iris.csv", col_types="nnnnf")
|
||||
#'
|
||||
#' all(mapply(identical, iris, irisnew)) # TRUE
|
||||
#'
|
||||
#' }
|
||||
#' @rdname storage_write
|
||||
#' @export
|
||||
storage_write_delim <- function(object, container, file, delim="\t ", ...)
|
||||
{
|
||||
func <- if(requireNamespace("readr"))
|
||||
storage_write_delim_readr
|
||||
else storage_write_delim_base
|
||||
func(object, container, file, delim=delim, ...)
|
||||
}
|
||||
|
||||
|
||||
storage_write_delim_readr <- function(object, container, file, delim="\t ", ...)
|
||||
{
|
||||
conn <- rawConnection(raw(0), open="r+b")
|
||||
readr::write_delim(object, conn, delim=delim, ...)
|
||||
seek(conn, 0)
|
||||
storage_upload(container, conn, file)
|
||||
}
|
||||
|
||||
|
||||
storage_write_delim_base <- function(object, container, file, delim="\t", ...)
|
||||
{
|
||||
conn <- rawConnection(raw(0), open="r+b")
|
||||
write.table(object, conn, sep=delim, ...)
|
||||
seek(conn, 0)
|
||||
storage_upload(container, conn, file)
|
||||
}
|
||||
|
||||
|
||||
#' @rdname storage_write
|
||||
#' @export
|
||||
storage_write_csv <- function(object, container, file, ...)
|
||||
{
|
||||
func <- if(requireNamespace("readr"))
|
||||
storage_write_csv_readr
|
||||
else storage_write_csv_base
|
||||
func(object, container, file, ...)
|
||||
}
|
||||
|
||||
|
||||
storage_write_csv_readr <- function(object, container, file, ...)
|
||||
{
|
||||
storage_write_delim_readr(object, container, file, delim=",", ...)
|
||||
}
|
||||
|
||||
|
||||
storage_write_csv_base <- function(object, container, file, ...)
|
||||
{
|
||||
storage_write_delim_base(object, container, file, delim=",", ...)
|
||||
}
|
||||
|
||||
|
||||
#' @rdname storage_write
|
||||
#' @export
|
||||
storage_write_csv2 <- function(object, container, file, ...)
|
||||
{
|
||||
func <- if(requireNamespace("readr"))
|
||||
storage_write_csv2_readr
|
||||
else storage_write_csv2_base
|
||||
func(object, container, file, ...)
|
||||
}
|
||||
|
||||
|
||||
storage_write_csv2_readr <- function(object, container, file, ...)
|
||||
{
|
||||
conn <- rawConnection(raw(0), open="r+b")
|
||||
readr::write_csv2(object, conn, ...)
|
||||
seek(conn, 0)
|
||||
storage_upload(container, conn, file)
|
||||
}
|
||||
|
||||
|
||||
storage_write_csv2_base <- function(object, container, file, ...)
|
||||
{
|
||||
storage_write_delim_base(object, container, file, delim=";", dec=",", ...)
|
||||
}
|
||||
|
||||
|
||||
#' @rdname storage_write
|
||||
#' @export
|
||||
storage_read_delim <- function(container, file, delim="\t", ...)
|
||||
{
|
||||
func <- if(requireNamespace("readr"))
|
||||
storage_read_delim_readr
|
||||
else storage_read_delim_base
|
||||
func(container, file, delim=delim, ...)
|
||||
}
|
||||
|
||||
|
||||
storage_read_delim_readr <- function(container, file, delim="\t", ...)
|
||||
{
|
||||
txt <- storage_download(container, file, NULL)
|
||||
readr::read_delim(txt, delim=delim, ...)
|
||||
}
|
||||
|
||||
|
||||
storage_read_delim_base <- function(container, file, delim="\t", ...)
|
||||
{
|
||||
txt <- storage_download(container, file, NULL)
|
||||
read.delim(text=rawToChar(txt), sep=delim, ...)
|
||||
}
|
||||
|
||||
|
||||
#' @rdname storage_write
|
||||
#' @export
|
||||
storage_read_csv <- function(container, file, ...)
|
||||
{
|
||||
func <- if(requireNamespace("readr"))
|
||||
storage_read_csv_readr
|
||||
else storage_read_csv_base
|
||||
func(container, file, ...)
|
||||
}
|
||||
|
||||
|
||||
storage_read_csv_readr <- function(container, file, ...)
|
||||
{
|
||||
storage_read_delim_readr(container, file, delim=",", ...)
|
||||
}
|
||||
|
||||
|
||||
storage_read_csv_base <- function(container, file, ...)
|
||||
{
|
||||
storage_read_delim_base(container, file, delim=",", ...)
|
||||
}
|
||||
|
||||
|
||||
#' @rdname storage_write
|
||||
#' @export
|
||||
storage_read_csv2 <- function(container, file, ...)
|
||||
{
|
||||
func <- if(requireNamespace("readr"))
|
||||
storage_read_csv2_readr
|
||||
else storage_read_csv2_base
|
||||
func(container, file, ...)
|
||||
}
|
||||
|
||||
|
||||
storage_read_csv2_readr <- function(container, file, ...)
|
||||
{
|
||||
txt <- storage_download(container, file, NULL)
|
||||
readr::read_csv2(txt, ...)
|
||||
}
|
||||
|
||||
|
||||
storage_read_csv2_base <- function(container, file, ...)
|
||||
{
|
||||
storage_read_delim_base(container, file, delim=";", dec=",", ...)
|
||||
}
|
||||
|
17
README.md
17
README.md
|
@ -90,6 +90,23 @@ dest <- file.path("data/", src)
|
|||
storage_multiupload(cont, src=src, dest=dest)
|
||||
```
|
||||
|
||||
### File format helpers
|
||||
|
||||
AzureStor includes convenience functions to transfer data in a number of commonly used formats: RDS, RData, TSV (tab-delimited), CSV, and CSV2 (semicolon-delimited). These work via connections and so don't create temporary files on disk.
|
||||
|
||||
```r
|
||||
# save an R object to storage and read it back again
|
||||
obj <- list(n=42L, x=pi, c="foo")
|
||||
storage_save_rds(obj, cont, "obj.rds")
|
||||
objnew <- storage_load_rds(cont, "obj.rds")
|
||||
identical(obj, objnew) # TRUE
|
||||
|
||||
# reading/writing data to CSV format
|
||||
storage_write_csv(mtcars, cont, "mtcars.csv")
|
||||
mtnew <- storage_read_csv(cont, "mtcars.csv")
|
||||
all(mapply(identical, mtcars, mtnew)) # TRUE
|
||||
```
|
||||
|
||||
### Transfer to and from connections
|
||||
|
||||
You can upload a (single) in-memory R object via a _connection_, and similarly, you can download a file to a connection, or return it as a raw vector. This lets you transfer an object without having to create a temporary file as an intermediate step.
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/transfer_format_utils.R
|
||||
\name{storage_save_rds}
|
||||
\alias{storage_save_rds}
|
||||
\alias{storage_load_rds}
|
||||
\alias{storage_save_rdata}
|
||||
\alias{storage_load_rdata}
|
||||
\title{Save and load R objects to/from a storage account}
|
||||
\usage{
|
||||
storage_save_rds(object, container, file, ...)
|
||||
|
||||
storage_load_rds(container, file, ...)
|
||||
|
||||
storage_save_rdata(..., container, file, envir = parent.frame())
|
||||
|
||||
storage_load_rdata(container, file, envir = parent.frame(), ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{An R object to save to storage.}
|
||||
|
||||
\item{container}{An Azure storage container object.}
|
||||
|
||||
\item{file}{The name of a file in storage.}
|
||||
|
||||
\item{...}{Further arguments passed to \code{serialize}, \code{unserialize}, \code{save} and \code{load} as appropriate.}
|
||||
|
||||
\item{envir}{For \code{storage_save_rdata} and \code{storage_load_rdata}, the environment from which to get objects to save, or in which to restore objects, respectively.}
|
||||
}
|
||||
\description{
|
||||
Save and load R objects to/from a storage account
|
||||
}
|
||||
\details{
|
||||
These are equivalents to \code{saveRDS}, \code{readRDS}, \code{save} and \code{load} for saving and loading R objects to a storage account. With the exception of \code{storage_save_rdata}, they work via connections and so do not create temporary files. \code{storage_save_rdata} uses a temporary file so that compression of the resulting image is enabled.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
||||
bl <- storage_endpoint("https://mystorage.blob.core.windows.net/", key="access_key")
|
||||
cont <- storage_container(bl, "mycontainer")
|
||||
|
||||
storage_save_rds(iris, cont, "iris.rds")
|
||||
irisnew <- storage_load_rds(iris, "iris.rds")
|
||||
identical(iris, irisnew) # TRUE
|
||||
|
||||
storage_save_rdata(iris, mtcars, container=cont, file="dataframes.rdata")
|
||||
storage_load_rdata(cont, "dataframes.rdata")
|
||||
|
||||
}
|
||||
}
|
||||
\seealso{
|
||||
\link{storage_download}, \link{download_blob}, \link{download_azure_file}, \link{download_adls_file}, \link{save}, \link{load}, \link{saveRDS}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/transfer_format_utils.R
|
||||
\name{storage_write_delim}
|
||||
\alias{storage_write_delim}
|
||||
\alias{storage_write_csv}
|
||||
\alias{storage_write_csv2}
|
||||
\alias{storage_read_delim}
|
||||
\alias{storage_read_csv}
|
||||
\alias{storage_read_csv2}
|
||||
\title{Read and write a data frame to/from a storage account}
|
||||
\usage{
|
||||
storage_write_delim(object, container, file, delim = "\\t ", ...)
|
||||
|
||||
storage_write_csv(object, container, file, ...)
|
||||
|
||||
storage_write_csv2(object, container, file, ...)
|
||||
|
||||
storage_read_delim(container, file, delim = "\\t", ...)
|
||||
|
||||
storage_read_csv(container, file, ...)
|
||||
|
||||
storage_read_csv2(container, file, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{A data frame to write to storage.}
|
||||
|
||||
\item{container}{An Azure storage container object.}
|
||||
|
||||
\item{file}{The name of a file in storage.}
|
||||
|
||||
\item{delim}{For \code{storage_write_delim} and \code{storage_read_delim}, the field delimiter. Defaults to \verb{\\t} (tab).}
|
||||
|
||||
\item{...}{Optional arguments passed to the file reading/writing functions. See 'Details'.}
|
||||
}
|
||||
\description{
|
||||
Read and write a data frame to/from a storage account
|
||||
}
|
||||
\details{
|
||||
These functions let you read and write data frames to storage. \code{storage_read_delim} and \code{write_delim} are for reading and writing arbitrary delimited files. \code{storage_read_csv} and \code{write_csv} are for comma-delimited (CSV) files. \code{storage_read_csv2} and \code{write_csv2} are for files with the semicolon \verb{;} as delimiter and comma \verb{,} as the decimal point, as used in some European countries.
|
||||
|
||||
If the readr package is installed, they call down to \code{read_delim}, \code{write_delim}, \code{read_csv2} and \code{write_csv2}. Otherwise, they use \code{read_delim} and \code{write.table}.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
||||
bl <- storage_endpoint("https://mystorage.blob.core.windows.net/", key="access_key")
|
||||
cont <- storage_container(bl, "mycontainer")
|
||||
|
||||
storage_write_csv(iris, cont, "iris.csv")
|
||||
# if readr is not installed
|
||||
irisnew <- storage_read_csv(cont, "iris.csv", stringsAsFactors=TRUE)
|
||||
# if readr is installed
|
||||
irisnew <- storage_read_csv(cont, "iris.csv", col_types="nnnnf")
|
||||
|
||||
all(mapply(identical, iris, irisnew)) # TRUE
|
||||
|
||||
}
|
||||
}
|
||||
\seealso{
|
||||
\link{storage_download}, \link{download_blob}, \link{download_azure_file}, \link{download_adls_file},
|
||||
\link{write.table}, \link{read.csv}, \link[readr:write_delim]{readr::write_delim}, \link[readr:read_delim]{readr::read_delim}
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
context("File format helpers")
|
||||
|
||||
tenant <- Sys.getenv("AZ_TEST_TENANT_ID")
|
||||
app <- Sys.getenv("AZ_TEST_APP_ID")
|
||||
password <- Sys.getenv("AZ_TEST_PASSWORD")
|
||||
subscription <- Sys.getenv("AZ_TEST_SUBSCRIPTION")
|
||||
|
||||
if(tenant == "" || app == "" || password == "" || subscription == "")
|
||||
skip("File format tests skipped: ARM credentials not set")
|
||||
|
||||
rgname <- Sys.getenv("AZ_TEST_STORAGE_RG")
|
||||
storname <- Sys.getenv("AZ_TEST_STORAGE_NOHNS")
|
||||
|
||||
if(rgname == "" || storname == "")
|
||||
skip("File format tests skipped: resource names not set")
|
||||
|
||||
sub <- AzureRMR::az_rm$new(tenant=tenant, app=app, password=password)$get_subscription(subscription)
|
||||
stor <- sub$get_resource_group(rgname)$get_storage_account(storname)
|
||||
|
||||
bl <- stor$get_blob_endpoint()
|
||||
cont_name <- make_name()
|
||||
cont <- create_blob_container(bl, cont_name)
|
||||
|
||||
opts <- options(azure_storage_progress_bar=FALSE)
|
||||
|
||||
|
||||
dfs_identical <- function(df1, df2)
|
||||
{
|
||||
identical(dim(df1), dim(df2)) &&
|
||||
names(df1) == names(df2) &&
|
||||
all(mapply(identical, df1, df2))
|
||||
}
|
||||
|
||||
|
||||
test_that("read/write RDS works",
|
||||
{
|
||||
obj <- list(c="foo", f=ls, n=42L, x=pi)
|
||||
fname <- paste0(make_name(), ".rds")
|
||||
expect_silent(storage_save_rds(obj, cont, fname))
|
||||
objnew <- storage_load_rds(cont, fname)
|
||||
expect_identical(obj, objnew)
|
||||
})
|
||||
|
||||
|
||||
test_that("read/write RData works",
|
||||
{
|
||||
objorig1 <- obj1 <- list(c="foo", f=ls, n=42L, x=pi)
|
||||
objorig2 <- obj2 <- mtcars
|
||||
fname <- paste0(make_name(), ".rdata")
|
||||
expect_silent(storage_save_rdata(obj1, obj2, container=cont, file=fname))
|
||||
rm(obj1, obj2)
|
||||
storage_load_rdata(cont, fname)
|
||||
expect_true(exists("obj1") &&
|
||||
exists("obj2") &&
|
||||
identical(obj1, objorig1) &&
|
||||
identical(obj2, objorig2))
|
||||
})
|
||||
|
||||
|
||||
test_that("read/write delim works",
|
||||
{
|
||||
fname0 <- paste0(make_name(), ".tsv")
|
||||
expect_silent(storage_write_delim(iris, cont, fname0))
|
||||
irisnew0 <- storage_read_delim(cont, fname0)
|
||||
irisnew0$Species <- as.factor(irisnew0$Species)
|
||||
expect_true(dfs_identical(iris, irisnew0))
|
||||
|
||||
# readr
|
||||
fname1 <- paste0(make_name(), ".tsv")
|
||||
expect_silent(storage_write_delim_readr(iris, cont, fname1))
|
||||
irisnew1 <- storage_read_delim_readr(cont, fname1, col_types="nnnnf")
|
||||
expect_true(dfs_identical(iris, irisnew1))
|
||||
|
||||
# base
|
||||
fname2 <- paste0(make_name(), ".tsv")
|
||||
expect_silent(storage_write_delim_base(iris, cont, fname2))
|
||||
irisnew2 <- storage_read_delim_base(cont, fname2, stringsAsFactors=TRUE)
|
||||
expect_true(dfs_identical(iris, irisnew2))
|
||||
})
|
||||
|
||||
|
||||
test_that("read/write CSV works",
|
||||
{
|
||||
# readr
|
||||
fname0 <- paste0(make_name(), ".csv")
|
||||
expect_silent(storage_write_csv(iris, cont, fname0))
|
||||
irisnew0 <- storage_read_csv(cont, fname0)
|
||||
irisnew0$Species <- as.factor(irisnew0$Species)
|
||||
expect_true(dfs_identical(iris, irisnew0))
|
||||
|
||||
# readr
|
||||
fname1 <- paste0(make_name(), ".csv")
|
||||
expect_silent(storage_write_csv_readr(iris, cont, fname1))
|
||||
irisnew1 <- storage_read_csv_readr(cont, fname1, col_types="nnnnf")
|
||||
expect_true(dfs_identical(iris, irisnew1))
|
||||
|
||||
# base
|
||||
fname2 <- paste0(make_name(), ".csv")
|
||||
expect_silent(storage_write_csv_base(iris, cont, fname2))
|
||||
irisnew2 <- storage_read_csv_base(cont, fname2, stringsAsFactors=TRUE)
|
||||
expect_true(dfs_identical(iris, irisnew2))
|
||||
})
|
||||
|
||||
|
||||
test_that("read/write CSV2 works",
|
||||
{
|
||||
# readr
|
||||
fname0 <- paste0(make_name(), ".csv2")
|
||||
expect_silent(storage_write_csv2(iris, cont, fname0))
|
||||
irisnew0 <- storage_read_csv2(cont, fname0)
|
||||
irisnew0$Species <- as.factor(irisnew0$Species)
|
||||
expect_true(dfs_identical(iris, irisnew0))
|
||||
|
||||
# readr
|
||||
fname1 <- paste0(make_name(), ".csv2")
|
||||
expect_silent(storage_write_csv2_readr(iris, cont, fname1))
|
||||
irisnew1 <- storage_read_csv2_readr(cont, fname1, col_types="nnnnf")
|
||||
expect_true(dfs_identical(iris, irisnew1))
|
||||
|
||||
# base
|
||||
fname2 <- paste0(make_name(), ".csv2")
|
||||
expect_silent(storage_write_csv2_base(iris, cont, fname2))
|
||||
irisnew2 <- storage_read_csv2_base(cont, fname2, stringsAsFactors=TRUE)
|
||||
expect_true(dfs_identical(iris, irisnew2))
|
||||
})
|
||||
|
||||
|
||||
teardown(
|
||||
{
|
||||
options(opts)
|
||||
conts <- list_blob_containers(bl)
|
||||
lapply(conts, delete_blob_container, confirm=FALSE)
|
||||
})
|
||||
|
|
@ -91,6 +91,23 @@ dest <- file.path("data/", src)
|
|||
storage_multiupload(cont, src, dest)
|
||||
```
|
||||
|
||||
### File format helpers
|
||||
|
||||
AzureStor includes convenience functions to transfer data in a number of commonly used formats: RDS, RData, TSV (tab-delimited), CSV, and CSV2 (semicolon-delimited). These work via connections and so don't create temporary files on disk.
|
||||
|
||||
```r
|
||||
# save an R object to storage and read it back again
|
||||
obj <- list(n=42L, x=pi, c="foo")
|
||||
storage_save_rds(obj, cont, "obj.rds")
|
||||
objnew <- storage_load_rds(cont, "obj.rds")
|
||||
identical(obj, objnew) # TRUE
|
||||
|
||||
# reading/writing data to CSV format
|
||||
storage_write_csv(mtcars, cont, "mtcars.csv")
|
||||
mtnew <- storage_read_csv(cont, "mtcars.csv")
|
||||
all(mapply(identical, mtcars, mtnew)) # TRUE
|
||||
```
|
||||
|
||||
### Transfer to and from connections
|
||||
|
||||
You can upload a (single) in-memory R object via a _connection_, and similarly, you can download a file to a connection, or return it as a raw vector. This lets you transfer an object without having to create a temporary file as an intermediate step.
|
||||
|
|
Загрузка…
Ссылка в новой задаче