This commit is contained in:
hong-revo 2019-03-07 22:40:42 +11:00
Родитель aa87296df0
Коммит fc70ff19a5
11 изменённых файлов: 227 добавлений и 1 удалений

Просмотреть файл

@ -56,6 +56,18 @@
#'
#' @return
#' An object of class `kusto_database_endpoint`.
#'
#' @examples
#' \dontrun{
#'
#' kusto_database_endpoint(server="myclust.australiaeast.kusto.windows.net", database="db1")
#'
#' # supplying a token obtained previously
#' token <- get_kusto_token("myclust.australiaeast.kusto.windows.net")
#' kusto_database_endpoint(server="myclust.australiaeast.kusto.windows.net", database="db1",
#' .query_token=token)
#'
#' }
#' @seealso
#' [run_query], [az_kusto_database]
#' @rdname database_endpoint

Просмотреть файл

@ -17,6 +17,45 @@
#' - `method="streaming"`: The data is uploaded to the cluster ingestion endpoint. This is the default if the AzureStor package is not present, however be aware that currently (as of February 2019) streaming ingestion is in beta and has to be enabled for a cluster by filing a support ticket.
#' - `method="inline"`: The data is embedded into the command text itself. This is only recommended for testing purposes, or small datasets.
#'
#' Note that the destination table must be created ahead of time for the ingestion to proceed.
#'
#' @examples
#' \dontrun{
#'
#' # ingesting from local:
#'
#' # ingest via Azure storage
#' cont <- AzureStor::storage_container("https://mystorage.blob.core.windows.net/container", sas="mysas")
#' ingest_local(db, "file.csv", "table",
#' method="indirect", storage_container=cont)
#'
# # ingest by streaming
#' ingest_local(db, "file.csv", "table", method="streaming")
#'
#' # ingest by inlining data into query
#' ingest_inline(db, "file.csv", "table", method="inline")
#'
#' # ingesting online data:
#'
#' # a public dataset: Microsoft web data from UCI machine learning repository
#' ingest_url(db,
#' "https://archive.ics.uci.edu/ml/machine-learning-databases/anonymous/anonymous-msweb.data",
#' "table")
#'
#' # from blob storage:
#' ingest_blob(db,
#' "https://mystorage.blob.core.windows.net/container/myblob",
#' "table",
#' sas="mysas")
#'
#' # from ADLSGen2:
#' token <- AzureRMR::get_azure_token("https://storage.azure.com", "mytenant", "myapp", "password")
#' ingest_blob(db,
#' "abfss://filesystem@myadls2.dfs.core.windows.net/data/myfile",
#' "table",
#' token=token)
#'
#' }
#' @rdname ingest
#' @export
ingest_local <- function(database, src, dest_table, method=NULL, staging_container=NULL,
@ -164,6 +203,9 @@ ingest_indirect <- function(database, src, dest_table, staging_container=NULL, .
if(!requireNamespace("AzureStor", quietly=TRUE))
stop("AzureStor package must be installed to do indirect ingestion", call.=FALSE)
if(is.null(staging_container))
stop("Must provide an Azure storage container object for staging", call.=FALSE)
opts <- utils::modifyList(list(...), list(
key=staging_container$endpoint$key,
token=staging_container$endpoint$token,

Просмотреть файл

@ -24,6 +24,18 @@
#'
#' @seealso
#' [kusto_database_endpoint], [AzureAuth::get_azure_token]
#'
#' @examples
#' \dontrun{
#'
#' get_kusto_token("myclust.australiaeast.kusto.windows.net")
#' get_kusto_token(clustername="myclust", location="australiaeast")
#'
#' # authenticate using client_credentials method: see ?AzureAuth::get_azure_token
#' get_kusto_token("myclust.australiaeast.kusto.windows.net",
#' tenant="mytenant", app="myapp", password="password")
#'
#' }
#' @export
get_kusto_token <- function(server=NULL, clustername, location=NULL, tenant=NULL, app=.kusto_app_id, auth_type=NULL,
...)

Просмотреть файл

@ -17,6 +17,19 @@
#'
#' @seealso
#' [kusto_database_endpoint], [ingest_local], [ingest_url], [ingest_blob], [ingest_adls2]
#'
#' @examples
#' \dontrun{
#'
#' endp <- kusto_database_endpoint(server="myclust.australiaeast.kusto.windows.net", database="db1")
#'
#' # a command
#' run_query(endp, ".show table iris")
#'
#' # a query
#' run_query(endp, "iris | count")
#'
#' }
#' @export
run_query <- function(database, qry_cmd, ..., .http_status_handler="stop")
{

38
R/tbl.R
Просмотреть файл

@ -99,6 +99,23 @@ ungroup.tbl_kusto_abstract <- function(.data, ...)
#' @param .num_partitions The number of partitions for a shuffle query.
#' @seealso
#' [dplyr::summarise]
#'
#' @examples
#' \dontrun{
#'
#' tbl1 <- tbl_kusto(db, "table1")
#'
#' ## standard dplyr syntax:
#' summarise(tbl1, mx=mean(x))
#'
#' ## Kusto extensions:
#' summarise(tbl1, mx=mean(x), .strategy="broadcast") # a broadcast summarise
#'
#' summarise(tbl1, mx=mean(x), .shufflekeys=c("var1", "var2")) # shuffle summarise with shuffle keys
#'
#' summarise(tbl1, mx=mean(x), .num_partitions=5) # no. of partitions for a shuffle summarise
#' }
#'
#' @rdname summarise
#' @export
summarise.tbl_kusto_abstract <- function(.data, ..., .strategy = NULL, .shufflekeys = NULL, .num_partitions = NULL)
@ -128,7 +145,26 @@ head.tbl_kusto_abstract <- function(x, n = 6L, ...)
#' @param ... Other arguments passed to lower-level functions.
#' @seealso
#' [dplyr::join]
#' @aliases inner_join left_join right_join full_join semi_join anti_join
#'
#' @examples
#' \dontrun{
#'
#' tbl1 <- tbl_kusto(db, "table1")
#' tbl2 <- tbl_kusto(db, "table2")
#'
#' # standard dplyr syntax:
#' left_join(tbl1, tbl2)
#'
#' # Kusto extensions:
#' left_join(tbl1, tbl2, .strategy="broadcast") # a broadcast join
#'
#' left_join(tbl1, tbl2, .shufflekeys=c("var1", "var2")) # shuffle join with shuffle keys
#'
#' left_join(tbl1, tbl2, .num_partitions=5) # no. of partitions for a shuffle join
#' }
#'
#' @aliases inner_join left_join right_join full_join semi_join anti_join
#'
#' @rdname join
#' @export
inner_join.tbl_kusto_abstract <- function(x, y, by = NULL, suffix = c(".x", ".y"),

Просмотреть файл

@ -101,6 +101,18 @@ The way \code{kusto_database_endpoint} obtains an AAD token is as follows.
\item With the \code{user} and \code{pwd} properties if available
\item Otherwise do an interactive authentication and ask for the user credentials using a device code
}
}
}
\examples{
\dontrun{
kusto_database_endpoint(server="myclust.australiaeast.kusto.windows.net", database="db1")
# supplying a token obtained previously
token <- get_kusto_token("myclust.australiaeast.kusto.windows.net")
kusto_database_endpoint(server="myclust.australiaeast.kusto.windows.net", database="db1",
.query_token=token)
}
}
\seealso{

Просмотреть файл

@ -45,6 +45,18 @@ Manage AAD authentication tokens for Kusto clusters
By default, authentication tokens will be obtained using the main KustoClient Active Directory app. This app can be used to authenticate with any Kusto cluster (assuming, of course, you have the proper credentials).
}
\examples{
\dontrun{
get_kusto_token("myclust.australiaeast.kusto.windows.net")
get_kusto_token(clustername="myclust", location="australiaeast")
# authenticate using client_credentials method: see ?AzureAuth::get_azure_token
get_kusto_token("myclust.australiaeast.kusto.windows.net",
tenant="mytenant", app="myapp", password="password")
}
}
\seealso{
\link{kusto_database_endpoint}, \link[AzureAuth:get_azure_token]{AzureAuth::get_azure_token}
}

Просмотреть файл

@ -54,4 +54,43 @@ There are up to 3 possible ways to ingest a local dataset, specified by the \cod
\item \code{method="streaming"}: The data is uploaded to the cluster ingestion endpoint. This is the default if the AzureStor package is not present, however be aware that currently (as of February 2019) streaming ingestion is in beta and has to be enabled for a cluster by filing a support ticket.
\item \code{method="inline"}: The data is embedded into the command text itself. This is only recommended for testing purposes, or small datasets.
}
Note that the destination table must be created ahead of time for the ingestion to proceed.
}
\examples{
\dontrun{
# ingesting from local:
# ingest via Azure storage
cont <- AzureStor::storage_container("https://mystorage.blob.core.windows.net/container", sas="mysas")
ingest_local(db, "file.csv", "table",
method="indirect", storage_container=cont)
ingest_local(db, "file.csv", "table", method="streaming")
# ingest by inlining data into query
ingest_inline(db, "file.csv", "table", method="inline")
# ingesting online data:
# a public dataset: Microsoft web data from UCI machine learning repository
ingest_url(db,
"https://archive.ics.uci.edu/ml/machine-learning-databases/anonymous/anonymous-msweb.data",
"table")
# from blob storage:
ingest_blob(db,
"https://mystorage.blob.core.windows.net/container/myblob",
"table",
sas="mysas")
# from ADLSGen2:
token <- AzureRMR::get_azure_token("https://storage.azure.com", "mytenant", "myapp", "password")
ingest_blob(db,
"abfss://filesystem@myadls2.dfs.core.windows.net/data/myfile",
"table",
token=token)
}
}

Просмотреть файл

@ -56,6 +56,24 @@
}
\description{
These methods are the same as other joining methods, with the exception of the \code{.strategy}, \code{.shufflekeys} and \code{.num_partitions} optional arguments. They provide hints to the Kusto engine on how to execute the join, and can sometimes be useful to speed up a query. See the Kusto documentation for more details.
}
\examples{
\dontrun{
tbl1 <- tbl_kusto(db, "table1")
tbl2 <- tbl_kusto(db, "table2")
# standard dplyr syntax:
left_join(tbl1, tbl2)
# Kusto extensions:
left_join(tbl1, tbl2, .strategy="broadcast") # a broadcast join
left_join(tbl1, tbl2, .shufflekeys=c("var1", "var2")) # shuffle join with shuffle keys
left_join(tbl1, tbl2, .num_partitions=5) # no. of partitions for a shuffle join
}
}
\seealso{
\link[dplyr:join]{dplyr::join}

Просмотреть файл

@ -21,6 +21,19 @@ Run a query or command against a Kusto database
\details{
This function is the workhorse of the AzureKusto package. It communicates with the Kusto server and returns the query or command results, as data frames.
}
\examples{
\dontrun{
endp <- kusto_database_endpoint(server="myclust.australiaeast.kusto.windows.net", database="db1")
# a command
run_query(endp, ".show table iris")
# a query
run_query(endp, "iris | count")
}
}
\seealso{
\link{kusto_database_endpoint}, \link{ingest_local}, \link{ingest_url}, \link{ingest_blob}, \link{ingest_adls2}
}

Просмотреть файл

@ -20,6 +20,23 @@
}
\description{
This method is the same as other summarise methods, with the exception of the \code{.strategy}, \code{.shufflekeys} and \code{.num_partitions} optional arguments. They provide hints to the Kusto engine on how to execute the summarisation, and can sometimes be useful to speed up a query. See the Kusto documentation for more details.
}
\examples{
\dontrun{
tbl1 <- tbl_kusto(db, "table1")
## standard dplyr syntax:
summarise(tbl1, mx=mean(x))
## Kusto extensions:
summarise(tbl1, mx=mean(x), .strategy="broadcast") # a broadcast summarise
summarise(tbl1, mx=mean(x), .shufflekeys=c("var1", "var2")) # shuffle summarise with shuffle keys
summarise(tbl1, mx=mean(x), .num_partitions=5) # no. of partitions for a shuffle summarise
}
}
\seealso{
\link[dplyr:summarise]{dplyr::summarise}