зеркало из https://github.com/Azure/AzureKusto.git
add examples to docs
This commit is contained in:
Родитель
aa87296df0
Коммит
fc70ff19a5
12
R/endpoint.R
12
R/endpoint.R
|
@ -56,6 +56,18 @@
|
|||
#'
|
||||
#' @return
|
||||
#' An object of class `kusto_database_endpoint`.
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#'
|
||||
#' kusto_database_endpoint(server="myclust.australiaeast.kusto.windows.net", database="db1")
|
||||
#'
|
||||
#' # supplying a token obtained previously
|
||||
#' token <- get_kusto_token("myclust.australiaeast.kusto.windows.net")
|
||||
#' kusto_database_endpoint(server="myclust.australiaeast.kusto.windows.net", database="db1",
|
||||
#' .query_token=token)
|
||||
#'
|
||||
#' }
|
||||
#' @seealso
|
||||
#' [run_query], [az_kusto_database]
|
||||
#' @rdname database_endpoint
|
||||
|
|
42
R/ingest.R
42
R/ingest.R
|
@ -17,6 +17,45 @@
|
|||
#' - `method="streaming"`: The data is uploaded to the cluster ingestion endpoint. This is the default if the AzureStor package is not present, however be aware that currently (as of February 2019) streaming ingestion is in beta and has to be enabled for a cluster by filing a support ticket.
|
||||
#' - `method="inline"`: The data is embedded into the command text itself. This is only recommended for testing purposes, or small datasets.
|
||||
#'
|
||||
#' Note that the destination table must be created ahead of time for the ingestion to proceed.
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#'
|
||||
#' # ingesting from local:
|
||||
#'
|
||||
#' # ingest via Azure storage
|
||||
#' cont <- AzureStor::storage_container("https://mystorage.blob.core.windows.net/container", sas="mysas")
|
||||
#' ingest_local(db, "file.csv", "table",
|
||||
#' method="indirect", storage_container=cont)
|
||||
#'
|
||||
# # ingest by streaming
|
||||
#' ingest_local(db, "file.csv", "table", method="streaming")
|
||||
#'
|
||||
#' # ingest by inlining data into query
|
||||
#' ingest_inline(db, "file.csv", "table", method="inline")
|
||||
#'
|
||||
#' # ingesting online data:
|
||||
#'
|
||||
#' # a public dataset: Microsoft web data from UCI machine learning repository
|
||||
#' ingest_url(db,
|
||||
#' "https://archive.ics.uci.edu/ml/machine-learning-databases/anonymous/anonymous-msweb.data",
|
||||
#' "table")
|
||||
#'
|
||||
#' # from blob storage:
|
||||
#' ingest_blob(db,
|
||||
#' "https://mystorage.blob.core.windows.net/container/myblob",
|
||||
#' "table",
|
||||
#' sas="mysas")
|
||||
#'
|
||||
#' # from ADLSGen2:
|
||||
#' token <- AzureRMR::get_azure_token("https://storage.azure.com", "mytenant", "myapp", "password")
|
||||
#' ingest_blob(db,
|
||||
#' "abfss://filesystem@myadls2.dfs.core.windows.net/data/myfile",
|
||||
#' "table",
|
||||
#' token=token)
|
||||
#'
|
||||
#' }
|
||||
#' @rdname ingest
|
||||
#' @export
|
||||
ingest_local <- function(database, src, dest_table, method=NULL, staging_container=NULL,
|
||||
|
@ -164,6 +203,9 @@ ingest_indirect <- function(database, src, dest_table, staging_container=NULL, .
|
|||
if(!requireNamespace("AzureStor", quietly=TRUE))
|
||||
stop("AzureStor package must be installed to do indirect ingestion", call.=FALSE)
|
||||
|
||||
if(is.null(staging_container))
|
||||
stop("Must provide an Azure storage container object for staging", call.=FALSE)
|
||||
|
||||
opts <- utils::modifyList(list(...), list(
|
||||
key=staging_container$endpoint$key,
|
||||
token=staging_container$endpoint$token,
|
||||
|
|
|
@ -24,6 +24,18 @@
|
|||
#'
|
||||
#' @seealso
|
||||
#' [kusto_database_endpoint], [AzureAuth::get_azure_token]
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#'
|
||||
#' get_kusto_token("myclust.australiaeast.kusto.windows.net")
|
||||
#' get_kusto_token(clustername="myclust", location="australiaeast")
|
||||
#'
|
||||
#' # authenticate using client_credentials method: see ?AzureAuth::get_azure_token
|
||||
#' get_kusto_token("myclust.australiaeast.kusto.windows.net",
|
||||
#' tenant="mytenant", app="myapp", password="password")
|
||||
#'
|
||||
#' }
|
||||
#' @export
|
||||
get_kusto_token <- function(server=NULL, clustername, location=NULL, tenant=NULL, app=.kusto_app_id, auth_type=NULL,
|
||||
...)
|
||||
|
|
13
R/query.R
13
R/query.R
|
@ -17,6 +17,19 @@
|
|||
#'
|
||||
#' @seealso
|
||||
#' [kusto_database_endpoint], [ingest_local], [ingest_url], [ingest_blob], [ingest_adls2]
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#'
|
||||
#' endp <- kusto_database_endpoint(server="myclust.australiaeast.kusto.windows.net", database="db1")
|
||||
#'
|
||||
#' # a command
|
||||
#' run_query(endp, ".show table iris")
|
||||
#'
|
||||
#' # a query
|
||||
#' run_query(endp, "iris | count")
|
||||
#'
|
||||
#' }
|
||||
#' @export
|
||||
run_query <- function(database, qry_cmd, ..., .http_status_handler="stop")
|
||||
{
|
||||
|
|
38
R/tbl.R
38
R/tbl.R
|
@ -99,6 +99,23 @@ ungroup.tbl_kusto_abstract <- function(.data, ...)
|
|||
#' @param .num_partitions The number of partitions for a shuffle query.
|
||||
#' @seealso
|
||||
#' [dplyr::summarise]
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#'
|
||||
#' tbl1 <- tbl_kusto(db, "table1")
|
||||
#'
|
||||
#' ## standard dplyr syntax:
|
||||
#' summarise(tbl1, mx=mean(x))
|
||||
#'
|
||||
#' ## Kusto extensions:
|
||||
#' summarise(tbl1, mx=mean(x), .strategy="broadcast") # a broadcast summarise
|
||||
#'
|
||||
#' summarise(tbl1, mx=mean(x), .shufflekeys=c("var1", "var2")) # shuffle summarise with shuffle keys
|
||||
#'
|
||||
#' summarise(tbl1, mx=mean(x), .num_partitions=5) # no. of partitions for a shuffle summarise
|
||||
#' }
|
||||
#'
|
||||
#' @rdname summarise
|
||||
#' @export
|
||||
summarise.tbl_kusto_abstract <- function(.data, ..., .strategy = NULL, .shufflekeys = NULL, .num_partitions = NULL)
|
||||
|
@ -128,7 +145,26 @@ head.tbl_kusto_abstract <- function(x, n = 6L, ...)
|
|||
#' @param ... Other arguments passed to lower-level functions.
|
||||
#' @seealso
|
||||
#' [dplyr::join]
|
||||
#' @aliases inner_join left_join right_join full_join semi_join anti_join
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#'
|
||||
#' tbl1 <- tbl_kusto(db, "table1")
|
||||
#' tbl2 <- tbl_kusto(db, "table2")
|
||||
#'
|
||||
#' # standard dplyr syntax:
|
||||
#' left_join(tbl1, tbl2)
|
||||
#'
|
||||
#' # Kusto extensions:
|
||||
#' left_join(tbl1, tbl2, .strategy="broadcast") # a broadcast join
|
||||
#'
|
||||
#' left_join(tbl1, tbl2, .shufflekeys=c("var1", "var2")) # shuffle join with shuffle keys
|
||||
#'
|
||||
#' left_join(tbl1, tbl2, .num_partitions=5) # no. of partitions for a shuffle join
|
||||
#' }
|
||||
#'
|
||||
#' @aliases inner_join left_join right_join full_join semi_join anti_join
|
||||
#'
|
||||
#' @rdname join
|
||||
#' @export
|
||||
inner_join.tbl_kusto_abstract <- function(x, y, by = NULL, suffix = c(".x", ".y"),
|
||||
|
|
|
@ -101,6 +101,18 @@ The way \code{kusto_database_endpoint} obtains an AAD token is as follows.
|
|||
\item With the \code{user} and \code{pwd} properties if available
|
||||
\item Otherwise do an interactive authentication and ask for the user credentials using a device code
|
||||
}
|
||||
}
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
||||
kusto_database_endpoint(server="myclust.australiaeast.kusto.windows.net", database="db1")
|
||||
|
||||
# supplying a token obtained previously
|
||||
token <- get_kusto_token("myclust.australiaeast.kusto.windows.net")
|
||||
kusto_database_endpoint(server="myclust.australiaeast.kusto.windows.net", database="db1",
|
||||
.query_token=token)
|
||||
|
||||
}
|
||||
}
|
||||
\seealso{
|
||||
|
|
|
@ -45,6 +45,18 @@ Manage AAD authentication tokens for Kusto clusters
|
|||
|
||||
By default, authentication tokens will be obtained using the main KustoClient Active Directory app. This app can be used to authenticate with any Kusto cluster (assuming, of course, you have the proper credentials).
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
||||
get_kusto_token("myclust.australiaeast.kusto.windows.net")
|
||||
get_kusto_token(clustername="myclust", location="australiaeast")
|
||||
|
||||
# authenticate using client_credentials method: see ?AzureAuth::get_azure_token
|
||||
get_kusto_token("myclust.australiaeast.kusto.windows.net",
|
||||
tenant="mytenant", app="myapp", password="password")
|
||||
|
||||
}
|
||||
}
|
||||
\seealso{
|
||||
\link{kusto_database_endpoint}, \link[AzureAuth:get_azure_token]{AzureAuth::get_azure_token}
|
||||
}
|
||||
|
|
|
@ -54,4 +54,43 @@ There are up to 3 possible ways to ingest a local dataset, specified by the \cod
|
|||
\item \code{method="streaming"}: The data is uploaded to the cluster ingestion endpoint. This is the default if the AzureStor package is not present, however be aware that currently (as of February 2019) streaming ingestion is in beta and has to be enabled for a cluster by filing a support ticket.
|
||||
\item \code{method="inline"}: The data is embedded into the command text itself. This is only recommended for testing purposes, or small datasets.
|
||||
}
|
||||
|
||||
Note that the destination table must be created ahead of time for the ingestion to proceed.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
||||
# ingesting from local:
|
||||
|
||||
# ingest via Azure storage
|
||||
cont <- AzureStor::storage_container("https://mystorage.blob.core.windows.net/container", sas="mysas")
|
||||
ingest_local(db, "file.csv", "table",
|
||||
method="indirect", storage_container=cont)
|
||||
|
||||
ingest_local(db, "file.csv", "table", method="streaming")
|
||||
|
||||
# ingest by inlining data into query
|
||||
ingest_inline(db, "file.csv", "table", method="inline")
|
||||
|
||||
# ingesting online data:
|
||||
|
||||
# a public dataset: Microsoft web data from UCI machine learning repository
|
||||
ingest_url(db,
|
||||
"https://archive.ics.uci.edu/ml/machine-learning-databases/anonymous/anonymous-msweb.data",
|
||||
"table")
|
||||
|
||||
# from blob storage:
|
||||
ingest_blob(db,
|
||||
"https://mystorage.blob.core.windows.net/container/myblob",
|
||||
"table",
|
||||
sas="mysas")
|
||||
|
||||
# from ADLSGen2:
|
||||
token <- AzureRMR::get_azure_token("https://storage.azure.com", "mytenant", "myapp", "password")
|
||||
ingest_blob(db,
|
||||
"abfss://filesystem@myadls2.dfs.core.windows.net/data/myfile",
|
||||
"table",
|
||||
token=token)
|
||||
|
||||
}
|
||||
}
|
||||
|
|
18
man/join.Rd
18
man/join.Rd
|
@ -56,6 +56,24 @@
|
|||
}
|
||||
\description{
|
||||
These methods are the same as other joining methods, with the exception of the \code{.strategy}, \code{.shufflekeys} and \code{.num_partitions} optional arguments. They provide hints to the Kusto engine on how to execute the join, and can sometimes be useful to speed up a query. See the Kusto documentation for more details.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
||||
tbl1 <- tbl_kusto(db, "table1")
|
||||
tbl2 <- tbl_kusto(db, "table2")
|
||||
|
||||
# standard dplyr syntax:
|
||||
left_join(tbl1, tbl2)
|
||||
|
||||
# Kusto extensions:
|
||||
left_join(tbl1, tbl2, .strategy="broadcast") # a broadcast join
|
||||
|
||||
left_join(tbl1, tbl2, .shufflekeys=c("var1", "var2")) # shuffle join with shuffle keys
|
||||
|
||||
left_join(tbl1, tbl2, .num_partitions=5) # no. of partitions for a shuffle join
|
||||
}
|
||||
|
||||
}
|
||||
\seealso{
|
||||
\link[dplyr:join]{dplyr::join}
|
||||
|
|
|
@ -21,6 +21,19 @@ Run a query or command against a Kusto database
|
|||
\details{
|
||||
This function is the workhorse of the AzureKusto package. It communicates with the Kusto server and returns the query or command results, as data frames.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
||||
endp <- kusto_database_endpoint(server="myclust.australiaeast.kusto.windows.net", database="db1")
|
||||
|
||||
# a command
|
||||
run_query(endp, ".show table iris")
|
||||
|
||||
# a query
|
||||
run_query(endp, "iris | count")
|
||||
|
||||
}
|
||||
}
|
||||
\seealso{
|
||||
\link{kusto_database_endpoint}, \link{ingest_local}, \link{ingest_url}, \link{ingest_blob}, \link{ingest_adls2}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,23 @@
|
|||
}
|
||||
\description{
|
||||
This method is the same as other summarise methods, with the exception of the \code{.strategy}, \code{.shufflekeys} and \code{.num_partitions} optional arguments. They provide hints to the Kusto engine on how to execute the summarisation, and can sometimes be useful to speed up a query. See the Kusto documentation for more details.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
|
||||
tbl1 <- tbl_kusto(db, "table1")
|
||||
|
||||
## standard dplyr syntax:
|
||||
summarise(tbl1, mx=mean(x))
|
||||
|
||||
## Kusto extensions:
|
||||
summarise(tbl1, mx=mean(x), .strategy="broadcast") # a broadcast summarise
|
||||
|
||||
summarise(tbl1, mx=mean(x), .shufflekeys=c("var1", "var2")) # shuffle summarise with shuffle keys
|
||||
|
||||
summarise(tbl1, mx=mean(x), .num_partitions=5) # no. of partitions for a shuffle summarise
|
||||
}
|
||||
|
||||
}
|
||||
\seealso{
|
||||
\link[dplyr:summarise]{dplyr::summarise}
|
||||
|
|
Загрузка…
Ссылка в новой задаче