Merge pull request #97 from omkarksa/master

R connector for ADLS
This commit is contained in:
Andrie de Vries 2017-09-19 12:08:29 +01:00 коммит произвёл GitHub
Родитель b731b844c6 1105323e51
Коммит 3d252c7c20
15 изменённых файлов: 773 добавлений и 3 удалений

Просмотреть файл

@ -18,6 +18,13 @@ export(azureCreateResourceGroup)
export(azureCreateStorageAccount)
export(azureCreateStorageContainer)
export(azureDataConsumption)
export(azureDataLakeAppend)
export(azureDataLakeCreate)
export(azureDataLakeDelete)
export(azureDataLakeGetFileStatus)
export(azureDataLakeListStatus)
export(azureDataLakeMkdirs)
export(azureDataLakeOpen)
export(azureDeleteBatchAccount)
export(azureDeleteBlob)
export(azureDeleteDeploy)

Просмотреть файл

@ -10,7 +10,7 @@
#' @family Azure resource functions
#'
#' @export
azureAuthenticate <- function(azureActiveContext, tenantID, clientID, authKey, verbose = FALSE) {
azureAuthenticate <- function(azureActiveContext, tenantID, clientID, authKey, verbose = FALSE, resource = "https%3A%2F%2Fmanagement.azure.com%2F") {
assert_that(is.azureActiveContext(azureActiveContext))
if (missing(tenantID)) tenantID <- azureActiveContext$tenantID
@ -27,7 +27,7 @@ azureAuthenticate <- function(azureActiveContext, tenantID, clientID, authKey, v
authKeyEncoded <- URLencode(authKey, reserved = TRUE)
bodyGT <- paste0("grant_type=client_credentials&resource=https%3A%2F%2Fmanagement.azure.com%2F&client_id=",
bodyGT <- paste0("grant_type=client_credentials&resource=", resource, "&client_id=",
clientID, "&client_secret=", authKeyEncoded)
r <- httr::POST(URLGT,

368
R/AzureDataLake.R Normal file
Просмотреть файл

@ -0,0 +1,368 @@
#' Azure Data Lake LISTSTATUS for specified relativePath of an azure data lake account.
#'
#' @inheritParams createAzureContext
#' @inheritParams azureAuthenticate
#'
#' @param azureActiveContext Provide an `azureActiveContext` object used for authentication.
#' @param azureDataLakeAccount Provide the name of the Azure Data Lake account.
#' @param relativePath Provide a relative path of the directory.
#' @param verbose Print tracing information (default FALSE).
#'
#' @return Returns a data frame.
#'
#' @template
#' @references
#' @family Azure Data Lake Store functions
#' @export
azureDataLakeListStatus <- function(azureActiveContext, azureDataLakeAccount, relativePath = "", verbose = FALSE) {
if (!missing(azureActiveContext) && !is.null(azureActiveContext)) {
assert_that(is.azureActiveContext(azureActiveContext))
azureCheckToken(azureActiveContext)
}
assert_that(is_storage_account(azureDataLakeAccount))
verbosity <- set_verbosity(verbose)
URL <- paste0(
"https://", azureDataLakeAccount, ".azuredatalakestore.net/webhdfs/v1/",
relativePath,
"?op=LISTSTATUS",
"&api-version=2016-11-01"
)
resHttp <- callAzureDataLakeApi(URL,
azureActiveContext = azureActiveContext,
verbose = verbose)
stopWithAzureError(resHttp)
resJsonStr <- content(resHttp, "text", encoding = "UTF-8")
resJsonObj <- jsonlite::fromJSON(resJsonStr)
if (length(resJsonObj$FileStatuses$FileStatus) == 0) {
#return empty data frame in case of an empty json object
return(
data.frame(
FileStatuses.FileStatus.length = character(0),
FileStatuses.FileStatus.pathSuffix = character(0),
FileStatuses.FileStatus.type = character(0),
FileStatuses.FileStatus.blockSize = character(0),
FileStatuses.FileStatus.accessTime = character(0),
FileStatuses.FileStatus.modificationTime = character(0),
FileStatuses.FileStatus.replication = character(0),
FileStatuses.FileStatus.permission = character(0),
FileStatuses.FileStatus.owner = character(0),
FileStatuses.FileStatus.group = character(0),
FileStatuses.FileStatus.aclBit = character(0)
)
)
}
resDf <- as.data.frame(resJsonObj)
resDf
}
#' Azure Data Lake GETFILESTATUS for specified relativePath of an azure data lake account.
#'
#' @inheritParams createAzureContext
#' @inheritParams azureAuthenticate
#'
#' @param azureActiveContext Provide an `azureActiveContext` object used for authentication.
#' @param azureDataLakeAccount Provide the name of the Azure Data Lake account.
#' @param relativePath Provide a relative path of the directory.
#' @param verbose Print tracing information (default FALSE).
#'
#' @return Returns a data frame.
#'
#' @template
#' @references
#' @family Azure Data Lake Store functions
#' @export
azureDataLakeGetFileStatus <- function(azureActiveContext, azureDataLakeAccount, relativePath = "", verbose = FALSE) {
if (!missing(azureActiveContext) && !is.null(azureActiveContext)) {
assert_that(is.azureActiveContext(azureActiveContext))
azureCheckToken(azureActiveContext)
}
assert_that(is_storage_account(azureDataLakeAccount))
verbosity <- set_verbosity(verbose)
URL <- paste0(
"https://", azureDataLakeAccount, ".azuredatalakestore.net/webhdfs/v1/",
relativePath,
"?op=GETFILESTATUS",
"&api-version=2016-11-01"
)
resHttp <- callAzureDataLakeApi(URL,
azureActiveContext = azureActiveContext,
verbose = verbose)
if (status_code(resHttp) == 404) {
warning("Azure data lake response: resource not found")
return(NULL)
}
stopWithAzureError(resHttp)
resJsonStr <- content(resHttp, "text", encoding = "UTF-8")
resJsonObj <- jsonlite::fromJSON(resJsonStr)
if (length(resJsonObj$FileStatus) == 0) {
#return empty data frame in case of an empty json object
return(
data.frame(
FileStatuses.FileStatus.length = character(0),
FileStatuses.FileStatus.pathSuffix = character(0),
FileStatuses.FileStatus.type = character(0),
FileStatuses.FileStatus.blockSize = character(0),
FileStatuses.FileStatus.accessTime = character(0),
FileStatuses.FileStatus.modificationTime = character(0),
FileStatuses.FileStatus.replication = character(0),
FileStatuses.FileStatus.permission = character(0),
FileStatuses.FileStatus.owner = character(0),
FileStatuses.FileStatus.group = character(0),
FileStatuses.FileStatus.aclBit = character(0)
)
)
}
resDf <- as.data.frame(resJsonObj)
resDf
}
#' Azure Data Lake MKDIRS for specified relativePath of an azure data lake account.
#'
#' @inheritParams createAzureContext
#' @inheritParams azureAuthenticate
#'
#' @param azureActiveContext Provide an `azureActiveContext` object used for authentication.
#' @param azureDataLakeAccount Provide the name of the Azure Data Lake account.
#' @param relativePath Provide a relative path of the directory.
#' @param permission Provide the permission to be set for the directory.
#' @param verbose Print tracing information (default FALSE).
#'
#' @return Returns a boolean.
#'
#' @template
#' @references
#' @family Azure Data Lake Store functions
#' @export
azureDataLakeMkdirs <- function(azureActiveContext, azureDataLakeAccount, relativePath, permission = NULL, verbose = FALSE) {
if (!missing(azureActiveContext) && !is.null(azureActiveContext)) {
assert_that(is.azureActiveContext(azureActiveContext))
azureCheckToken(azureActiveContext)
}
assert_that(is_storage_account(azureDataLakeAccount))
if (!missing(permission) && !is.null(permission)) assert_that(is_permission(permission))
verbosity <- set_verbosity(verbose)
URL <- paste0(
"https://", azureDataLakeAccount, ".azuredatalakestore.net/webhdfs/v1/",
relativePath,
"?op=MKDIRS",
"&api-version=2016-11-01"
)
if (!missing(permission) && !is.null(permission)) URL <- paste0(URL, "&permission=", permission)
resHttp <- callAzureDataLakeApi(URL, verb = "PUT",
azureActiveContext = azureActiveContext,
verbose = verbose)
if (status_code(resHttp) == 404) {
warning("Azure data lake response: resource not found")
return(NULL)
}
stopWithAzureError(resHttp)
resJsonStr <- content(resHttp, "text", encoding = "UTF-8")
resJsonObj <- jsonlite::fromJSON(resJsonStr)
resDf <- as.data.frame(resJsonObj)
resDf$boolean
}
#' Azure Data Lake CREATE for specified relativePath of an azure data lake account.
#'
#' @inheritParams createAzureContext
#' @inheritParams azureAuthenticate
#'
#' @param azureActiveContext Provide an `azureActiveContext` object used for authentication.
#' @param azureDataLakeAccount Provide the name of the Azure Data Lake account.
#' @param relativePath Provide a relative path of the directory.
#' @param overwrite Overwrite existing files (default FALSE).
#' @param permission Provide the permission to be set for the directory.
#' @param contents Provide contents to write to `relativePath`
#' @param verbose Print tracing information (default FALSE).
#'
#' @return NULL
#'
#' @template
#' @references
#' @family Azure Data Lake Store functions
#' @export
azureDataLakeCreate <- function(azureActiveContext, azureDataLakeAccount, relativePath, overwrite = FALSE, permission = NULL, contents = "", verbose = FALSE) {
if (!missing(azureActiveContext) && !is.null(azureActiveContext)) {
assert_that(is.azureActiveContext(azureActiveContext))
azureCheckToken(azureActiveContext)
}
assert_that(is_storage_account(azureDataLakeAccount))
if (!missing(permission) && !is.null(permission)) assert_that(is_permission(permission))
# TODO: Need a check for contents ?
#assert_that(is_content(contents))
verbosity <- set_verbosity(verbose)
URL <- paste0(
"https://", azureDataLakeAccount, ".azuredatalakestore.net/webhdfs/v1/",
relativePath,
"?op=CREATE&write=true",
"&api-version=2016-11-01"
)
if (!missing(overwrite) && !is.null(overwrite)) URL <- paste0(URL, "&overwrite=", overwrite)
if (!missing(permission) && !is.null(permission)) URL <- paste0(URL, "&permission=", permission)
resHttp <- callAzureDataLakeApi(URL, verb = "PUT",
azureActiveContext = azureActiveContext,
content = contents, contenttype = "text/plain; charset=UTF-8",
verbose = verbose)
stopWithAzureError(resHttp)
# TODO: Check why this returns NULL
}
#' Azure Data Lake APPEND for specified relativePath of an azure data lake account.
#'
#' @inheritParams createAzureContext
#' @inheritParams azureAuthenticate
#'
#' @param azureActiveContext Provide an `azureActiveContext` object used for authentication.
#' @param azureDataLakeAccount Provide the name of the Azure Data Lake account.
#' @param relativePath Provide a relative path of the directory.
#' @param contents Provide contents to write to `relativePath`
#' @param verbose Print tracing information (default FALSE).
#'
#' @return NULL
#'
#' @template
#' @references
#' @family Azure Data Lake Store functions
#' @export
azureDataLakeAppend <- function(azureActiveContext, azureDataLakeAccount, relativePath, contents = "", verbose = FALSE) {
if (!missing(azureActiveContext) && !is.null(azureActiveContext)) {
assert_that(is.azureActiveContext(azureActiveContext))
azureCheckToken(azureActiveContext)
}
assert_that(is_storage_account(azureDataLakeAccount))
# TODO: Need a check for contents ?
#assert_that(is_content(contents))
verbosity <- set_verbosity(verbose)
URL <- paste0(
"https://", azureDataLakeAccount, ".azuredatalakestore.net/webhdfs/v1/",
relativePath,
"?op=APPEND&append=true",
"&api-version=2016-11-01"
)
resHttp <- callAzureDataLakeApi(URL, verb = "POST",
azureActiveContext = azureActiveContext,
content = contents, contenttype = "text/plain; charset=UTF-8",
verbose = verbose)
stopWithAzureError(resHttp)
# TODO: Check why this returns NULL
}
#' Azure Data Lake OPEN for specified relativePath of an azure data lake account.
#'
#' @inheritParams createAzureContext
#' @inheritParams azureAuthenticate
#'
#' @param azureActiveContext Provide an `azureActiveContext` object used for authentication.
#' @param azureDataLakeAccount Provide the name of the Azure Data Lake account.
#' @param relativePath Provide a relative path of the directory.
#' @param offset Provide the offset to read from.
#' @param length Provide length of data to read.
#' @param verbose Print tracing information (default FALSE).
#'
#' @return Returns a data frame.
#'
#' @template
#' @references
#' @family Azure Data Lake Store functions
#' @export
azureDataLakeOpen <- function(azureActiveContext, azureDataLakeAccount, relativePath, offset, length, verbose = FALSE) {
if (!missing(azureActiveContext) && !is.null(azureActiveContext)) {
assert_that(is.azureActiveContext(azureActiveContext))
azureCheckToken(azureActiveContext)
}
assert_that(is_storage_account(azureDataLakeAccount))
verbosity <- set_verbosity(verbose)
URL <- paste0(
"https://", azureDataLakeAccount, ".azuredatalakestore.net/webhdfs/v1/",
relativePath,
"?op=OPEN&read=true",
"&api-version=2016-11-01"
)
if (!missing(offset) && !is.null(offset)) URL <- paste0(URL, "&offset=", offset)
if (!missing(length) && !is.null(length)) URL <- paste0(URL, "&length=", length)
resHttp <- callAzureDataLakeApi(URL,
azureActiveContext = azureActiveContext,
verbose = verbose)
stopWithAzureError(resHttp)
resStr <- content(resHttp, "text", encoding = "UTF-8")
resStr
}
#' Azure Data Lake DELETE for specified relativePath of an azure data lake account.
#'
#' @inheritParams createAzureContext
#' @inheritParams azureAuthenticate
#'
#' @param azureActiveContext Provide an `azureActiveContext` object used for authentication.
#' @param azureDataLakeAccount Provide the name of the Azure Data Lake account.
#' @param relativePath Provide a relative path of the directory.
#' @param recursive Provide recursive delete option.
#' @param verbose Print tracing information (default FALSE).
#'
#' @return Returns a boolean.
#'
#' @template
#' @references
#' @family Azure Data Lake Store functions
#' @export
azureDataLakeDelete <- function(azureActiveContext, azureDataLakeAccount, relativePath, recursive = FALSE, verbose = FALSE) {
if (!missing(azureActiveContext) && !is.null(azureActiveContext)) {
assert_that(is.azureActiveContext(azureActiveContext))
azureCheckToken(azureActiveContext)
}
assert_that(is_storage_account(azureDataLakeAccount))
verbosity <- set_verbosity(verbose)
URL <- paste0(
"https://", azureDataLakeAccount, ".azuredatalakestore.net/webhdfs/v1/",
relativePath,
"?op=DELETE",
"&api-version=2016-11-01"
)
if (!missing(recursive) && !is.null(recursive)) URL <- paste0(URL, "&recursive=", recursive)
resHttp <- callAzureDataLakeApi(URL, verb = "DELETE",
azureActiveContext = azureActiveContext,
verbose = verbose)
if (status_code(resHttp) == 404) {
warning("Azure data lake response: resource not found")
return(NULL)
}
stopWithAzureError(resHttp)
resJsonStr <- content(resHttp, "text", encoding = "UTF-8")
resJsonObj <- jsonlite::fromJSON(resJsonStr)
resDf <- as.data.frame(resJsonObj)
resDf$boolean
}

Просмотреть файл

@ -106,6 +106,54 @@ azure_storage_header <- function(shared_key, date = x_ms_date(), content_length
add_headers(.headers = headers)
}
callAzureDataLakeApi <- function(url, verb = "GET", azureActiveContext,
headers = NULL, CMD,
content = NULL, contenttype = "text/plain; charset=UTF-8",
verbose = FALSE) {
dateStamp <- httr::http_date(Sys.time())
verbosity <- set_verbosity(verbose)
if (missing(CMD) || is.null(CMD)) CMD <- extractUrlArguments(url)
switch(verb,
"GET" = GET(url,
add_headers(.headers = c(Authorization = azureActiveContext$Token,
`Content-Length` = "0"
)
),
verbosity
),
"PUT" = PUT(url,
add_headers(.headers = c(Authorization = azureActiveContext$Token,
`Transfer-Encoding` = "chunked",
`Content-Length` = nchar(content),
`Content-type` = contenttype
)
),
body = content,
verbosity
),
"POST" = POST(url,
add_headers(.headers = c(Authorization = azureActiveContext$Token,
`Transfer-Encoding` = "chunked",
`Content-Length` = nchar(content),
`Content-type` = contenttype
)
),
body = content,
verbosity
),
"DELETE" = DELETE(url,
add_headers(.headers = c(Authorization = azureActiveContext$Token,
`Content-Length` = "0"
)
),
verbosity
)
)
}
getSig <- function(azureActiveContext, url, verb, key, storageAccount,
headers = NULL, container = NULL, CMD = NULL, size = NULL, contenttype = NULL,
date = x_ms_date(), verbose = FALSE) {

Просмотреть файл

@ -265,3 +265,23 @@ is_ssh_password <- function(x) {
on_failure(is_ssh_password) <- function(call, env) {
"Provide an sshPassword"
}
# --- permission
is_permission <- function(x) {
is.character(x) && length(x) == 1 && assert_that(is_valid_permission(x))
}
on_failure(is_permission) <- function(call, env) {
"Provide a valid octal permission string"
}
is_valid_permission <- function(x) {
nchar(x) == 3 && grepl("^[0-7]*$", x)
}
on_failure(is_valid_permission) <- function(call, env) {
paste("Permission string must be 3 in length",
"and use numbers between 0 to 7 only.",
sep = "\n")
}

Просмотреть файл

@ -5,7 +5,7 @@
\title{Authenticates against Azure Active directory application.}
\usage{
azureAuthenticate(azureActiveContext, tenantID, clientID, authKey,
verbose = FALSE)
verbose = FALSE, resource = "https\%3A\%2F\%2Fmanagement.azure.com\%2F")
}
\arguments{
\item{azureActiveContext}{A container used for caching variables used by `AzureSMR`, created by [createAzureContext()]}

Просмотреть файл

@ -0,0 +1,31 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AzureDataLake.R
\name{azureDataLakeAppend}
\alias{azureDataLakeAppend}
\title{Azure Data Lake APPEND for specified relativePath of an azure data lake account.}
\usage{
azureDataLakeAppend(azureActiveContext, azureDataLakeAccount, relativePath,
contents = "", verbose = FALSE)
}
\arguments{
\item{azureActiveContext}{Provide an \code{azureActiveContext} object used for authentication.}
\item{azureDataLakeAccount}{Provide the name of the Azure Data Lake account.}
\item{relativePath}{Provide a relative path of the directory.}
\item{contents}{Provide contents to write to \code{relativePath}}
\item{verbose}{Print tracing information (default FALSE).}
}
\description{
Azure Data Lake APPEND for specified relativePath of an azure data lake account.
}
\seealso{
Other Azure Data Lake Store functions: \code{\link{azureDataLakeCreate}},
\code{\link{azureDataLakeDelete}},
\code{\link{azureDataLakeGetFileStatus}},
\code{\link{azureDataLakeListStatus}},
\code{\link{azureDataLakeMkdirs}},
\code{\link{azureDataLakeOpen}}
}

Просмотреть файл

@ -0,0 +1,35 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AzureDataLake.R
\name{azureDataLakeCreate}
\alias{azureDataLakeCreate}
\title{Azure Data Lake CREATE for specified relativePath of an azure data lake account.}
\usage{
azureDataLakeCreate(azureActiveContext, azureDataLakeAccount, relativePath,
overwrite = FALSE, permission = NULL, contents = "", verbose = FALSE)
}
\arguments{
\item{azureActiveContext}{Provide an \code{azureActiveContext} object used for authentication.}
\item{azureDataLakeAccount}{Provide the name of the Azure Data Lake account.}
\item{relativePath}{Provide a relative path of the directory.}
\item{overwrite}{Overwrite existing files (default FALSE).}
\item{permission}{Provide the permission to be set for the directory.}
\item{contents}{Provide contents to write to \code{relativePath}}
\item{verbose}{Print tracing information (default FALSE).}
}
\description{
Azure Data Lake CREATE for specified relativePath of an azure data lake account.
}
\seealso{
Other Azure Data Lake Store functions: \code{\link{azureDataLakeAppend}},
\code{\link{azureDataLakeDelete}},
\code{\link{azureDataLakeGetFileStatus}},
\code{\link{azureDataLakeListStatus}},
\code{\link{azureDataLakeMkdirs}},
\code{\link{azureDataLakeOpen}}
}

Просмотреть файл

@ -0,0 +1,34 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AzureDataLake.R
\name{azureDataLakeDelete}
\alias{azureDataLakeDelete}
\title{Azure Data Lake DELETE for specified relativePath of an azure data lake account.}
\usage{
azureDataLakeDelete(azureActiveContext, azureDataLakeAccount, relativePath,
recursive = FALSE, verbose = FALSE)
}
\arguments{
\item{azureActiveContext}{Provide an \code{azureActiveContext} object used for authentication.}
\item{azureDataLakeAccount}{Provide the name of the Azure Data Lake account.}
\item{relativePath}{Provide a relative path of the directory.}
\item{recursive}{Provide recursive delete option.}
\item{verbose}{Print tracing information (default FALSE).}
}
\value{
Returns a boolean.
}
\description{
Azure Data Lake DELETE for specified relativePath of an azure data lake account.
}
\seealso{
Other Azure Data Lake Store functions: \code{\link{azureDataLakeAppend}},
\code{\link{azureDataLakeCreate}},
\code{\link{azureDataLakeGetFileStatus}},
\code{\link{azureDataLakeListStatus}},
\code{\link{azureDataLakeMkdirs}},
\code{\link{azureDataLakeOpen}}
}

Просмотреть файл

@ -0,0 +1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AzureDataLake.R
\name{azureDataLakeGetFileStatus}
\alias{azureDataLakeGetFileStatus}
\title{Azure Data Lake GETFILESTATUS for specified relativePath of an azure data lake account.}
\usage{
azureDataLakeGetFileStatus(azureActiveContext, azureDataLakeAccount,
relativePath = "", verbose = FALSE)
}
\arguments{
\item{azureActiveContext}{Provide an \code{azureActiveContext} object used for authentication.}
\item{azureDataLakeAccount}{Provide the name of the Azure Data Lake account.}
\item{relativePath}{Provide a relative path of the directory.}
\item{verbose}{Print tracing information (default FALSE).}
}
\value{
Returns a data frame.
}
\description{
Azure Data Lake GETFILESTATUS for specified relativePath of an azure data lake account.
}
\seealso{
Other Azure Data Lake Store functions: \code{\link{azureDataLakeAppend}},
\code{\link{azureDataLakeCreate}},
\code{\link{azureDataLakeDelete}},
\code{\link{azureDataLakeListStatus}},
\code{\link{azureDataLakeMkdirs}},
\code{\link{azureDataLakeOpen}}
}

Просмотреть файл

@ -0,0 +1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AzureDataLake.R
\name{azureDataLakeListStatus}
\alias{azureDataLakeListStatus}
\title{Azure Data Lake LISTSTATUS for specified relativePath of an azure data lake account.}
\usage{
azureDataLakeListStatus(azureActiveContext, azureDataLakeAccount,
relativePath = "", verbose = FALSE)
}
\arguments{
\item{azureActiveContext}{Provide an \code{azureActiveContext} object used for authentication.}
\item{azureDataLakeAccount}{Provide the name of the Azure Data Lake account.}
\item{relativePath}{Provide a relative path of the directory.}
\item{verbose}{Print tracing information (default FALSE).}
}
\value{
Returns a data frame.
}
\description{
Azure Data Lake LISTSTATUS for specified relativePath of an azure data lake account.
}
\seealso{
Other Azure Data Lake Store functions: \code{\link{azureDataLakeAppend}},
\code{\link{azureDataLakeCreate}},
\code{\link{azureDataLakeDelete}},
\code{\link{azureDataLakeGetFileStatus}},
\code{\link{azureDataLakeMkdirs}},
\code{\link{azureDataLakeOpen}}
}

Просмотреть файл

@ -0,0 +1,34 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AzureDataLake.R
\name{azureDataLakeMkdirs}
\alias{azureDataLakeMkdirs}
\title{Azure Data Lake MKDIRS for specified relativePath of an azure data lake account.}
\usage{
azureDataLakeMkdirs(azureActiveContext, azureDataLakeAccount, relativePath,
permission = NULL, verbose = FALSE)
}
\arguments{
\item{azureActiveContext}{Provide an \code{azureActiveContext} object used for authentication.}
\item{azureDataLakeAccount}{Provide the name of the Azure Data Lake account.}
\item{relativePath}{Provide a relative path of the directory.}
\item{permission}{Provide the permission to be set for the directory.}
\item{verbose}{Print tracing information (default FALSE).}
}
\value{
Returns a boolean.
}
\description{
Azure Data Lake MKDIRS for specified relativePath of an azure data lake account.
}
\seealso{
Other Azure Data Lake Store functions: \code{\link{azureDataLakeAppend}},
\code{\link{azureDataLakeCreate}},
\code{\link{azureDataLakeDelete}},
\code{\link{azureDataLakeGetFileStatus}},
\code{\link{azureDataLakeListStatus}},
\code{\link{azureDataLakeOpen}}
}

36
man/azureDataLakeOpen.Rd Normal file
Просмотреть файл

@ -0,0 +1,36 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AzureDataLake.R
\name{azureDataLakeOpen}
\alias{azureDataLakeOpen}
\title{Azure Data Lake OPEN for specified relativePath of an azure data lake account.}
\usage{
azureDataLakeOpen(azureActiveContext, azureDataLakeAccount, relativePath,
offset, length, verbose = FALSE)
}
\arguments{
\item{azureActiveContext}{Provide an \code{azureActiveContext} object used for authentication.}
\item{azureDataLakeAccount}{Provide the name of the Azure Data Lake account.}
\item{relativePath}{Provide a relative path of the directory.}
\item{offset}{Provide the offset to read from.}
\item{length}{Provide length of data to read.}
\item{verbose}{Print tracing information (default FALSE).}
}
\value{
Returns a data frame.
}
\description{
Azure Data Lake OPEN for specified relativePath of an azure data lake account.
}
\seealso{
Other Azure Data Lake Store functions: \code{\link{azureDataLakeAppend}},
\code{\link{azureDataLakeCreate}},
\code{\link{azureDataLakeDelete}},
\code{\link{azureDataLakeGetFileStatus}},
\code{\link{azureDataLakeListStatus}},
\code{\link{azureDataLakeMkdirs}}
}

Просмотреть файл

@ -7,6 +7,7 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
test_check("AzureSMR")
# test_check("AzureSMR", filter = "1-authentication")
# test_check("AzureSMR", filter = "2-resources")
#test_check("AzureSMR", filter = "7-datalake")
} else {
# CRAN
@ -14,4 +15,5 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
test_check("AzureSMR")
# test_check("AzureSMR", filter = "1-authentication")
# test_check("AzureSMR", filter = "2-resources")
#test_check("AzureSMR", filter = "7-datalake")
}

Просмотреть файл

@ -0,0 +1,91 @@
#' ------------------------------------------------------------------------
#' Sample config.json file for ADLS tests to run:
#' Create and place the file in the below specified location
#' ------------------------------------------------------------------------
#'
#' {
#' "tenantID": "72f988bf-blah-41af-blah-2d7cd011blah",
#' "clientID": "1d604733-blah-4b37-blah-98fca981blah",
#' "authKey": "zTw5blah+IN+yIblahrKv2K8dM2/BLah4FogBLAH/ME=",
#' "azureDataLakeAccount": "azuresmrtestadls"
#' }
#'
#' ------------------------------------------------------------------------
if(interactive()) library("testthat")
settingsfile <- system.file("tests/testthat/config.json", package = "AzureSMR")
config <- read.AzureSMR.config(settingsfile)
# ------------------------------------------------------------------------
context("Data Lake Store")
asc <- createAzureContext()
with(config,
setAzureContext(asc, tenantID = tenantID, clientID = clientID, authKey = authKey)
)
azureAuthenticate(asc)
# NOTE: make sure to provide the azureDataLakeAccount name in the config file.
azureDataLakeAccount <- config$azureDataLakeAccount
context(" - data lake store")
test_that("Can create, list, get, update and delete items in an azure data lake account", {
skip_if_missing_config(settingsfile)
# cleanup the account before starting tests!
res <- azureDataLakeDelete(asc, azureDataLakeAccount, "tempfolder", TRUE)
# now start the tests
# LISTSTATUS on empty test directory
expect_error(azureDataLakeListStatus(asc, azureDataLakeAccount, "tempfolder"))
# MKDIRS
res <- azureDataLakeMkdirs(asc, azureDataLakeAccount, "tempfolder")
expect_true(res)
# MKDIRS - check 1 - LISTSTATUS
res <- azureDataLakeListStatus(asc, azureDataLakeAccount, "")
expect_is(res, "data.frame")
expect_equal(nrow(res), 1)
# MKDIRS - check 2 - GETFILESTATUS
res <- azureDataLakeGetFileStatus(asc, azureDataLakeAccount, "")
expect_is(res, "data.frame")
expect_equal(nrow(res), 1)
# CREATE
res <- azureDataLakeCreate(asc, azureDataLakeAccount, "tempfolder/tempfile00.txt", FALSE, "755", "abcd")
expect_null(res)
res <- azureDataLakeCreate(asc, azureDataLakeAccount, "tempfolder/tempfile01.txt", FALSE, "755", "efgh")
expect_null(res)
# CREATE - check
res <- azureDataLakeListStatus(asc, azureDataLakeAccount, "tempfolder")
expect_is(res, "data.frame")
expect_equal(nrow(res), 2)
expect_equal(res$FileStatuses.FileStatus.pathSuffix, c("tempfile00.txt", "tempfile01.txt"))
expect_equal(res$FileStatuses.FileStatus.length, c(4, 4))
# APPEND
res <- azureDataLakeAppend(asc, azureDataLakeAccount, "tempfolder/tempfile00.txt", "stuv")
expect_null(res)
res <- azureDataLakeAppend(asc, azureDataLakeAccount, "tempfolder/tempfile01.txt", "wxyz")
expect_null(res)
# APPEND - check
res <- azureDataLakeListStatus(asc, azureDataLakeAccount, "tempfolder")
expect_is(res, "data.frame")
expect_equal(nrow(res), 2)
expect_equal(res$FileStatuses.FileStatus.pathSuffix, c("tempfile00.txt", "tempfile01.txt"))
expect_equal(res$FileStatuses.FileStatus.length, c(8, 8))
# DELETE
res <- azureDataLakeDelete(asc, azureDataLakeAccount, "tempfolder", TRUE)
expect_true(res)
# DELETE - check
res <- azureDataLakeListStatus(asc, azureDataLakeAccount, "")
expect_is(res, "data.frame")
expect_equal(nrow(res), 0)
})