From ab456153123b4bd834c1582aea7859e0f347dc25 Mon Sep 17 00:00:00 2001 From: Alex Kyllo Date: Tue, 20 Dec 2022 21:13:22 -0800 Subject: [PATCH] fix and resubmit to CRAN --- DESCRIPTION | 2 +- R/add_methods.R | 6 +- R/az_database.R | 2 +- R/az_kusto.R | 2 +- README.md | 2 +- cran-comments.md | 7 ++ man/az_kusto.Rd | 2 +- man/az_kusto_database.Rd | 2 +- man/create_kusto_cluster.Rd | 2 +- man/delete_kusto_cluster.Rd | 2 +- man/get_kusto_cluster.Rd | 2 +- vignettes/AzureKusto.Rmd | 162 +++++++++++++++++++++++++++--------- 12 files changed, 141 insertions(+), 52 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 336890f..a7d05b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,7 @@ Authors@R: c( person("dbplyr development team", role="cph", comment="Original framework for dplyr/database interface"), person("Microsoft", role = "cph") ) -Description: An interface to 'Azure Data Explorer', also known as 'Kusto', a fast, highly scalable data exploration service from Microsoft: . Includes 'DBI' and 'dplyr' interfaces, with the latter modelled after the 'dbplyr' package, whereby queries are translated from R into the native 'KQL' query language and executed lazily. On the admin side, the package extends the object framework provided by 'AzureRMR' to support creation and deletion of databases, and management of database principals. Part of the 'AzureR' family of packages. +Description: An interface to 'Azure Data Explorer', also known as 'Kusto', a fast, highly scalable data exploration service from Microsoft: . Includes 'DBI' and 'dplyr' interfaces, with the latter modelled after the 'dbplyr' package, whereby queries are translated from R into the native 'KQL' query language and executed lazily. On the admin side, the package extends the object framework provided by 'AzureRMR' to support creation and deletion of databases, and management of database principals. Part of the 'AzureR' family of packages. URL: https://github.com/Azure/AzureKusto https://github.com/Azure/AzureR BugReports: https://github.com/Azure/AzureKusto/issues License: MIT + file LICENSE diff --git a/R/add_methods.R b/R/add_methods.R index a7e629a..082874c 100644 --- a/R/add_methods.R +++ b/R/add_methods.R @@ -27,7 +27,7 @@ #' @seealso #' [get_kusto_cluster], [delete_kusto_cluster], [az_kusto] #' -#' [Kusto/Azure Data Explorer documentation](https://docs.microsoft.com/en-us/azure/data-explorer/) +#' [Kusto/Azure Data Explorer documentation](https://learn.microsoft.com/en-us/azure/data-explorer/) #' #' @examples #' \dontrun{ @@ -66,7 +66,7 @@ NULL #' @seealso #' [create_kusto_cluster], [delete_kusto_cluster], [az_kusto] #' -#' [Kusto/Azure Data Explorer documentation](https://docs.microsoft.com/en-us/azure/data-explorer/) +#' [Kusto/Azure Data Explorer documentation](https://learn.microsoft.com/en-us/azure/data-explorer/) #' #' @examples #' \dontrun{ @@ -104,7 +104,7 @@ NULL #' @seealso #' [create_kusto_cluster], [get_kusto_cluster], [az_kusto] #' -#' [Kusto/Azure Data Explorer documentation](https://docs.microsoft.com/en-us/azure/data-explorer/) +#' [Kusto/Azure Data Explorer documentation](https://learn.microsoft.com/en-us/azure/data-explorer/) #' #' @examples #' \dontrun{ diff --git a/R/az_database.R b/R/az_database.R index 05fb868..f7afb85 100644 --- a/R/az_database.R +++ b/R/az_database.R @@ -32,7 +32,7 @@ #' [az_kusto], [kusto_database_endpoint], #' [create_database], [get_database], [delete_database] #' -#' [Kusto/Azure Data Explorer documentation](https://docs.microsoft.com/en-us/azure/data-explorer/), +#' [Kusto/Azure Data Explorer documentation](https://learn.microsoft.com/en-us/azure/data-explorer/), #' #' @examples #' \dontrun{ diff --git a/R/az_kusto.R b/R/az_kusto.R index 895b5a4..569d686 100644 --- a/R/az_kusto.R +++ b/R/az_kusto.R @@ -34,7 +34,7 @@ #' [create_kusto_cluster], [get_kusto_cluster], [delete_kusto_cluster], #' [get_kusto_token] #' -#' [Kusto/Azure Data Explorer documentation](https://docs.microsoft.com/en-us/azure/data-explorer/), +#' [Kusto/Azure Data Explorer documentation](https://learn.microsoft.com/en-us/azure/data-explorer/), #' #' @examples #' \dontrun{ diff --git a/README.md b/README.md index 32c40ff..3dccbd6 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ ![R-CMD-check](https://github.com/Azure/AzureKusto/workflows/R-CMD-check/badge.svg) R interface to Kusto, also known as -[Azure Data Explorer](https://azure.microsoft.com/en-us/services/data-explorer/), +[Azure Data Explorer](https://azure.microsoft.com/en-us/products/data-explorer/), a fast and highly scalable data exploration service. ## Installation diff --git a/cran-comments.md b/cran-comments.md index 9b17516..f39c745 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,5 +1,12 @@ # CRAN Comments for AzureKusto +## Resubmission + +This is a resubmission. In this version I have: + +- Fixed hyperlinks in the documentation that pointed to 301 redirects. +- Updated vignettes/AzureKusto.Rmd to match new content in README.md + ## R CMD check results There were no ERRORs, WARNINGs or NOTEs. \ No newline at end of file diff --git a/man/az_kusto.Rd b/man/az_kusto.Rd index 457659d..6d3548f 100644 --- a/man/az_kusto.Rd +++ b/man/az_kusto.Rd @@ -72,5 +72,5 @@ kust$get_aad_token() \link{create_kusto_cluster}, \link{get_kusto_cluster}, \link{delete_kusto_cluster}, \link{get_kusto_token} -\href{https://docs.microsoft.com/en-us/azure/data-explorer/}{Kusto/Azure Data Explorer documentation}, +\href{https://learn.microsoft.com/en-us/azure/data-explorer/}{Kusto/Azure Data Explorer documentation}, } diff --git a/man/az_kusto_database.Rd b/man/az_kusto_database.Rd index 283d7c0..3b78e68 100644 --- a/man/az_kusto_database.Rd +++ b/man/az_kusto_database.Rd @@ -65,5 +65,5 @@ db$get_database_endpoint(use_integer64=FALSE) \link{az_kusto}, \link{kusto_database_endpoint}, \link{create_database}, \link{get_database}, \link{delete_database} -\href{https://docs.microsoft.com/en-us/azure/data-explorer/}{Kusto/Azure Data Explorer documentation}, +\href{https://learn.microsoft.com/en-us/azure/data-explorer/}{Kusto/Azure Data Explorer documentation}, } diff --git a/man/create_kusto_cluster.Rd b/man/create_kusto_cluster.Rd index d83ad8a..7c16c1c 100644 --- a/man/create_kusto_cluster.Rd +++ b/man/create_kusto_cluster.Rd @@ -50,5 +50,5 @@ rg$create_kusto_cluster("mykusto", node_size="L16") \seealso{ \link{get_kusto_cluster}, \link{delete_kusto_cluster}, \link{az_kusto} -\href{https://docs.microsoft.com/en-us/azure/data-explorer/}{Kusto/Azure Data Explorer documentation} +\href{https://learn.microsoft.com/en-us/azure/data-explorer/}{Kusto/Azure Data Explorer documentation} } diff --git a/man/delete_kusto_cluster.Rd b/man/delete_kusto_cluster.Rd index 9b30ab3..4e68bdf 100644 --- a/man/delete_kusto_cluster.Rd +++ b/man/delete_kusto_cluster.Rd @@ -45,5 +45,5 @@ rg$delete_kusto_cluster("mycluster") \seealso{ \link{create_kusto_cluster}, \link{get_kusto_cluster}, \link{az_kusto} -\href{https://docs.microsoft.com/en-us/azure/data-explorer/}{Kusto/Azure Data Explorer documentation} +\href{https://learn.microsoft.com/en-us/azure/data-explorer/}{Kusto/Azure Data Explorer documentation} } diff --git a/man/get_kusto_cluster.Rd b/man/get_kusto_cluster.Rd index 72da787..f47c5a3 100644 --- a/man/get_kusto_cluster.Rd +++ b/man/get_kusto_cluster.Rd @@ -47,5 +47,5 @@ rg$get_kusto_cluster("mykusto") \seealso{ \link{create_kusto_cluster}, \link{delete_kusto_cluster}, \link{az_kusto} -\href{https://docs.microsoft.com/en-us/azure/data-explorer/}{Kusto/Azure Data Explorer documentation} +\href{https://learn.microsoft.com/en-us/azure/data-explorer/}{Kusto/Azure Data Explorer documentation} } diff --git a/vignettes/AzureKusto.Rmd b/vignettes/AzureKusto.Rmd index 52f99b2..a2c2a89 100644 --- a/vignettes/AzureKusto.Rmd +++ b/vignettes/AzureKusto.Rmd @@ -1,6 +1,6 @@ --- title: "AzureKusto" -date: "2019-04-15" +date: "2022-12-20" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{AzureKusto} @@ -15,9 +15,17 @@ knitr::opts_chunk$set( ) ``` -AzureKusto is the R interface to [Azure Data Explorer](https://azure.microsoft.com/en-us/services/data-explorer/) (internally codenamed "Kusto"), a fast, fully managed data analytics service from Microsoft. +AzureKusto is the R interface to +[Azure Data Explorer](https://azure.microsoft.com/en-us/products/data-explorer/) +(internally codenamed "Kusto"), a fast, fully managed data analytics service +from Microsoft. -AzureKusto provides an interface (including [DBI](https://dbi.r-dbi.org/) compliant methods) for connecting to Kusto clusters and submitting [Kusto Query Language (KQL)](https://docs.microsoft.com/en-us/azure/kusto/query/) statements, as well as a [dbplyr](https://dbplyr.tidyverse.org/) style backend that translates [dplyr](https://dplyr.tidyverse.org/) queries into KQL statements. +AzureKusto provides an interface (including [DBI](https://dbi.r-dbi.org/) +compliant methods) for connecting to Kusto clusters and submitting +[Kusto Query Language (KQL)](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/) +statements, as well as a [dbplyr](https://dbplyr.tidyverse.org/) style backend +that translates [dplyr](https://dplyr.tidyverse.org/) queries into KQL +statements. ```{r setup, eval = FALSE} @@ -25,16 +33,22 @@ library(AzureKusto) ## The first time you import AzureKusto, you'll be asked if you'd like to create a directory to cache OAuth2 tokens. ## Connect to an AzureKusto database with (default) device code authentication: -Samples <- kusto_database_endpoint(server="https://help.kusto.windows.net", database="Samples") - -## To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code ######### to authenticate. -## Waiting for device code in browser... -## Press Esc/Ctrl + C to abort -## Authentication complete. +Samples <- kusto_database_endpoint(server = "https://help.kusto.windows.net", database = "Samples") +# (New in 1.1.0) Some other ways to call this that also work: +# Samples <- kusto_database_endpoint(server="help", database="Samples") +# Samples <- kusto_database_endpoint(cluster="help", database="Samples") +# No app ID supplied; using KustoClient app +# Waiting for authentication in browser... +# Press Esc/Ctrl + C to abort +# VSCode WebView only supports showing local http content. +# Opening in external browser... +# Browsing https://login.microsoftonline.com/common/oauth2/v2.0/authorize... +# Authentication complete. ``` -Now you can issue KQL queries to the Kusto database with `run_query()` and get the results back as a data.frame object. +Now you can issue KQL queries to the Kusto database with `run_query()` and get +the results back as a data.frame object. ```{r run_query, eval = FALSE} res <- run_query(Samples, "StormEvents | summarize EventCount = count() by State | order by State asc") @@ -47,13 +61,14 @@ head(res) ## 4 ARIZONA 340 ## 5 ARKANSAS 1028 ## 6 ATLANTIC NORTH 188 - ``` -`run_query()` also supports query parameters, to allow you to call parameterized Kusto functions. Simply pass your parameters as additional keyword arguments and they will be escaped and interpolated into the query string. +`run_query()` also supports query parameters, to allow you to call parameterized +Kusto functions. Simply pass your parameters as additional keyword arguments and +they will be escaped and interpolated into the query string. ```{r run_query_params, eval = FALSE} -res <- run_query(Samples, "MyFunction(lim)", lim=10L) +res <- run_query(Samples, "MyFunction(lim)", lim = 10L) head(res) ## StartTime EndTime EpisodeId EventId State @@ -63,12 +78,14 @@ head(res) ## 4 2007-12-30 16:00:00 2007-12-30 16:05:00 11749 64588 GEORGIA ## 5 2007-12-20 07:50:00 2007-12-20 07:53:00 12554 68796 MISSISSIPPI ## 6 2007-12-20 10:32:00 2007-12-20 10:36:00 12554 68814 MISSISSIPPI - ``` -`run_query()` can also handle command statements, which begin with a '.' character. Command statements do not accept parameters and cannot be combined together with query statements in the same request. +`run_query()` can also handle command statements, which begin with a '.' +character. Command statements do not accept parameters and cannot be combined +together with query statements in the same request. -Command statements return a list where the first element is the table returned by the command (if any) and the other elements contain command metadata. +Command statements return a list where the first element is the table returned +by the command (if any) and the other elements contain command metadata. ```{r run_query_commands, eval = FALSE} res <- run_query(Samples, ".show tables | count") @@ -76,12 +93,14 @@ res[[1]] ## Count ## 1 5 - ``` ### dplyr Interface -The package also implements a [dplyr](https://github.com/tidyverse/dplyr)-style interface for building a query upon a `tbl_kusto` object and then running it on the remote Kusto database and returning the result as a regular tibble object with `collect()`. +The package also implements a [dplyr](https://github.com/tidyverse/dplyr)-style +interface for building a query upon a `tbl_kusto` object and then running it on +the remote Kusto database and returning the result as a regular tibble object +with `collect()`. ```{r dplyr, eval = FALSE} library(dplyr) @@ -89,9 +108,9 @@ library(dplyr) StormEvents <- tbl_kusto(Samples, "StormEvents") q <- StormEvents %>% - group_by(State) %>% - summarize(EventCount=n()) %>% - arrange(State) + group_by(State) %>% + summarize(EventCount = n()) %>% + arrange(State) show_query(q) @@ -115,31 +134,94 @@ collect(q) ## 9 COLORADO 1654 ## 10 CONNECTICUT 148 ## # ... with 57 more rows - ``` -`tbl_kusto` also accepts query parameters, in case the Kusto source table is a parameterized function: + +(New in 1.1.0) The `$` operator can be used to access fields in dynamic columns: + +```{r dollar, eval = FALSE} +q <- StormEvents %>% + slice_sample(10) %>% + mutate(Description = as.character(StormSummary$Details$Description)) %>% + select(EventId, Description) + +show_query(q) + +# cluster('https://help.kusto.windows.net').database('Samples').['StormEvents'] +# | sample 10 +# | extend ['Description'] = tostring(['StormSummary'] . ['Details'] . ['Description']) +# | project ['EventId'], ['Description'] + +# # A tibble: 10 × 2 +# EventId Description +# +# 1 61032 A waterspout formed in the Atlantic southeast of Melbourne Beach and briefly moved toward shore. +# 2 60904 As much as 9 inches of rain fell in a 24-hour period across parts of coastal Volusia County. +# 3 60913 A tornado touched down in the Town of Eustis at the northern end of West Crooked Lake. The tornado quickly intensified to EF1 strength as it moved north northwest through Eustis. The track was just under two miles long… +# 4 64588 The county dispatch reported several trees were blown down along Quincey Batten Loop near State Road 206. The cost of tree removal was estimated. +# 5 68796 Numerous large trees were blown down with some down on power lines. Damage occurred in eastern Adams county. +# 6 68814 This tornado began as a small, narrow path of minor damage, including a porch being blown off a house. It reached its maximum intensity as it crossed highway 29. Here, a brick home had all of its roof structure blown o… +# 7 68834 Several trees and power lines were blown down along Zetus Road in the Zetus Community. A few of those trees were down on a mobile home which caused significant damage. +# 8 68846 A swath of penny to quarter sized hail fell from just east of French Camp to about 6 miles north of Weir. +# 9 73241 The heavy rain from an active monsoonal trough that had been nearly stationary just to the south of the islands caused widespread flooding across Tutuila. Flash Flooding was reported from the Malaeimi Valley to the Ba… +# 10 64725 State Route 8 and Rock Run Road were flooded and impassable +``` + +`tbl_kusto` also accepts query parameters, in case the Kusto source table is a +parameterized function: ```{r tbl_kusto_params, eval = FALSE} -MyFunctionDate <- tbl_kusto(Samples, "MyFunctionDate(dt)", dt=as.Date("2019-01-01")) +MyFunctionDate <- tbl_kusto(Samples, "MyFunctionDate(dt)", dt = as.Date("2019-01-01")) MyFunctionDate %>% - select(StartTime, EndTime, EpisodeId, EventId, State) %>% - head() %>% - collect() + select(StartTime, EndTime, EpisodeId, EventId, State) %>% + head() %>% + collect() ## # A tibble: 6 x 5 -## StartTime EndTime EpisodeId EventId State -## +## StartTime EndTime EpisodeId EventId State +## ## 1 2007-09-29 08:11:00 2007-09-29 08:11:00 11091 61032 ATLANTIC SOUTH -## 2 2007-09-18 20:00:00 2007-09-19 18:00:00 11074 60904 FLORIDA -## 3 2007-09-20 21:57:00 2007-09-20 22:05:00 11078 60913 FLORIDA -## 4 2007-12-30 16:00:00 2007-12-30 16:05:00 11749 64588 GEORGIA -## 5 2007-12-20 07:50:00 2007-12-20 07:53:00 12554 68796 MISSISSIPPI -## 6 2007-12-20 10:32:00 2007-12-20 10:36:00 12554 68814 MISSISSIPPI - +## 2 2007-09-18 20:00:00 2007-09-19 18:00:00 11074 60904 FLORIDA +## 3 2007-09-20 21:57:00 2007-09-20 22:05:00 11078 60913 FLORIDA +## 4 2007-12-30 16:00:00 2007-12-30 16:05:00 11749 64588 GEORGIA +## 5 2007-12-20 07:50:00 2007-12-20 07:53:00 12554 68796 MISSISSIPPI +## 6 2007-12-20 10:32:00 2007-12-20 10:36:00 12554 68814 MISSISSIPPI ``` + +### Exporting to storage + +(New in 1.1.0) The function `export()` enables you to export a query result to +Azure Storage in one step. + +```{r exporting, eval = FALSE} +export( + database = Samples, + storage_uri = "https://mystorage.blob.core.windows.net/StormEvents", + query = "StormEvents | summarize EventCount = count() by State | order by State", + name_prefix = "events", + format = "parquet" +) + +# Path NumRecords SizeInBytes +# 1 https://mystorage.blob.core.windows.net/StormEvents/events/events_1.snappy.parquet 67 1511 + +library(dplyr) +StormEvents <- tbl_kusto(Samples, "StormEvents") +q <- StormEvents %>% + group_by(State) %>% + summarize(EventCount = n()) %>% + arrange(State) %>% + export("https://mystorage.blob.core.windows.net/StormEvents") + +# # A tibble: 1 × 3 +# Path NumRecords SizeInBytes +# +# 1 https://mystorage.blob.core.windows.net/StormEvents/export/export_1.snappy.parquet 50 59284 +``` + + ### DBI interface AzureKusto implements a subset of the DBI specification for interfacing with databases in R. @@ -161,21 +243,21 @@ Azure Data Explorer is quite different to the SQL databases that DBI targets, wh library(DBI) Samples <- dbConnect(AzureKusto(), - server="https://help.kusto.windows.net", - database="Samples") + server = "https://help.kusto.windows.net", + database = "Samples" +) dbListTables(Samples) -## [1] "StormEvents" "demo_make_series1" "demo_series2" +## [1] "StormEvents" "demo_make_series1" "demo_series2" ## [4] "demo_series3" "demo_many_series1" dbExistsTable(Samples, "StormEvents") -##[1] TRUE +## [1] TRUE dbGetQuery(Samples, "StormEvents | summarize ct = count()") ## ct ## 1 59066 - ```