Merge pull request #32 from microsoft/feature/network-community-detection

Feature: add Community Detection capability for network plots
2021-01-06 12:13:28 +00:00 · 2021-01-06 12:13:28 +00:00 · 2118a61a1a
--- a/3
+++ b/3
@ -63,5 +63,6 @@ RoxygenNote: 7.1.1
 Roxygen: list(markdown = TRUE)
 VignetteBuilder: knitr
 Suggests: 
-    extrafont
+    extrafont,
    leiden
 Language: en-US
--- a/8
+++ b/8
@ -49,6 +49,7 @@ export(create_line)
 export(create_line_asis)
 export(create_period_scatter)
 export(create_rank)
 export(create_sankey)
 export(create_scatter)
 export(create_stacked)
 export(create_trend)
@ -105,7 +106,10 @@ export(meetingtype_sum)
 export(meetingtype_summary)
 export(mgrcoatt_dist)
 export(mgrrel_matrix)
 export(network_describe)
 export(network_g2g)
 export(network_leiden)
 export(network_louvain)
 export(network_p2p)
 export(one2one_dist)
 export(one2one_fizz)
@ -150,16 +154,18 @@ export(workpatterns_rank)
 export(wrap)
 import(DT)
 import(Information)
 import(data.table)
 import(dplyr)
 import(ggplot2)
 import(ggraph)
 import(reshape2)
 import(scales)
 import(tidyr)
 import(tidyselect)
 importFrom(data.table,"%between%")
 importFrom(data.table,"%like%")
 importFrom(data.table,":=")
 importFrom(data.table,as.data.table)
 importFrom(data.table,rbindlist)
 importFrom(dplyr,`%>%`)
 importFrom(dplyr,mutate_if)
 importFrom(grDevices,rainbow)
--- a/R/create_sankey.R
+++ b/R/create_sankey.R
@ -0,0 +1,72 @@
 # --------------------------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License. See LICENSE.txt in the project root for license information.
 # --------------------------------------------------------------------------------------------
 #' @title Create a sankey chart from a two-column count table
 #'
 #' @description
 #' Create a networkD3 style sankey chart based on a long count table
 #' with two variables. The input data should have three columns, where
 #' each row is a unique group:
 #'   1. Variable 1
 #'   2. Variable 2
 #'   3. Count
 #'
 #' @param data Data frame of the long count table.
 #' @param var1 String containing the name of the variable to be shown on the left.
 #' @param var2 String containing the name of the variable to be shown on the right.
 #' @param count String containing the name of the count variable.
 #'
 #' @import dplyr
 #'
 #' @examples
 #' \donttest{
 #' sq_data %>%
 #'   dplyr::count(Organization, FunctionType) %>%
 #'   create_sankey(var1 = "Organization", var2 = "FunctionType")
 #' }
 #'
 #' @export
 create_sankey <- function(data, var1, var2, count = "n"){
  ## Rename
  data$pre_group <- data[[var1]]
  data$group <- data[[var2]]
  ## Set up `nodes`
  group_source <- unique(data$pre_group)
  group_target <- paste0(unique(data$group), " ")
  groups <- c(group_source, group_target)
  nodes_source <- tibble(name = group_source)
  nodes_target <- tibble(name = group_target)
  nodes <- rbind(nodes_source, nodes_target) %>% mutate(node = 0:(nrow(.) - 1))
  ## Set up `links`
  links <-
    data %>%
    mutate(group = paste0(group, " ")) %>%
    select(source = "pre_group",
           target = "group",
           value = count)
  nodes_source <- nodes_source %>% select(name) # Make `nodes` a single column data frame
  nodes_target <- nodes_target %>% select(name) # Make `nodes` a single column data frame
  links <-
    links %>%
    left_join(nodes %>% rename(IDsource = "node"), by = c("source" = "name")) %>%
    left_join(nodes %>% rename(IDtarget = "node"), by = c("target" = "name"))
  networkD3::sankeyNetwork(Links = as.data.frame(links),
                           Nodes = as.data.frame(nodes),
                           Source = 'IDsource', # Change reference to IDsource
                           Target = 'IDtarget', # Change reference to IDtarget
                           Value = 'value',
                           NodeID = 'name',
                           units="count",
                           sinksRight = FALSE)
 }
--- a/R/network_describe.R
+++ b/R/network_describe.R
@ -0,0 +1,125 @@
 # --------------------------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License. See LICENSE.txt in the project root for license information.
 # --------------------------------------------------------------------------------------------
 #' @title Uncover HR attributes which best represent a population for a Person to Person query
 #'
 #' @author Tannaz Sattari Tabrizi <Tannaz.Sattari@microsoft.com>
 #'
 #' @description
 #' Returns a data frame that gives a percentage of the group combinations that best represent
 #' the population provided. Uses a person to person query.
 #'
 #' @param data Data frame for a person to person query.
 #' @param hrvar Character vector of length 3 containing the HR attributes to be used.
 #'
 #' @import dplyr
 #' @import tidyr
 #'
 #' @export
 network_describe <- function(data, hrvar = c("Organization", "LevelDesignation", "FunctionType")){
  if(length(hrvar) != 3){
    stop("Please provide a character vector of length 3 for `hrvar`")
  }
  ## De-duplicated data containing only TieOrigins
  filtered_Data <- unique(select(data, starts_with("TieOrigin_")))
  ## Select features
  features <- select(filtered_Data, paste0("TieOrigin_", hrvar))
  ## Feature set: 1
  max_percentages_1f <-
    features %>%
    colnames() %>%
    purrr::map(function(c){
      agg <-
        features %>%
        group_by_at(.vars = vars(c)) %>%
        summarise(count = n(), .groups = "drop") %>%
        mutate(percentage = count / sum(count, na.rm = TRUE))
      agg %>%
        arrange(desc(percentage)) %>%
        slice(1) %>% # Extract first row
        mutate(feature_1 = c,
               feature_1_value = !!sym(c)) %>%
        select(feature_1, feature_1_value, Percentage = "percentage")
    }) %>%
    bind_rows()
  ## Feature set: 2
  max_percentages_2f <-
    list(c1 = colnames(features),
         c2 = colnames(features)) %>%
    expand.grid(stringsAsFactors = FALSE) %>%
    filter(c1 != c2) %>%
    purrr::pmap(function(c1, c2){
      agg <-
        features %>%
        group_by_at(.vars=vars(c1, c2)) %>%
        summarise(count = n(), .groups = "drop") %>%
        mutate(percentage = count / sum(count, na.rm = TRUE))
      agg %>%
        arrange(desc(percentage)) %>%
        slice(1) %>% # Extract first row
        mutate(feature_1 = c1,
               feature_1_value = !!sym(as.character(c1)),
               feature_2 = c2,
               feature_2_value = !!sym(as.character(c2))) %>%
      select(feature_1,
             feature_1_value,
             feature_2,
             feature_2_value,
             Percentage = "percentage")
    }) %>%
    bind_rows()
  ## Feature set: 3
  max_percentages_3f <-
    list(c1 = colnames(features),
         c2 = colnames(features),
         c3 = colnames(features)) %>%
    expand.grid(stringsAsFactors = FALSE) %>%
    filter(c1 != c2,
           c2 != c3,
           c3 != c1) %>%
    purrr::pmap(function(c1, c2, c3){
      agg <-
        features %>%
        group_by_at(.vars=vars(c1, c2, c3)) %>%
        summarise(count = n(), .groups = "drop") %>%
        mutate(percentage = count / sum(count, na.rm = TRUE))
      agg %>%
        arrange(desc(percentage)) %>%
        slice(1) %>% # Extract first row
        mutate(feature_1 = c1,
               feature_1_value = !!sym(c1),
               feature_2 = c2,
               feature_2_value = !!sym(c2),
               feature_3 = c3,
               feature_3_value = !!sym(c3)) %>%
        select(feature_1,
               feature_1_value,
               feature_2,
               feature_2_value,
               feature_3,
               feature_3_value,
               Percentage = "percentage")
    }) %>%
    bind_rows()
  list(max_percentages_1f,
       max_percentages_2f,
       max_percentages_3f) %>%
    bind_rows() %>%
    select(starts_with("feature"), Percentage)
 }
--- a/R/network_leiden.R
+++ b/R/network_leiden.R
@ -0,0 +1,225 @@
 # --------------------------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License. See LICENSE.txt in the project root for license information.
 # --------------------------------------------------------------------------------------------
 #' @title Implement the Leiden community detection on a Person to Person network query
 #'
 #' @description
 #' Take a P2P network query and implement the Leiden community detection method. To run
 #' this function, you will require all the pre-requisites of the **leiden** package installed,
 #' which includes Python and **reticulate**.
 #'
 #' @param data Data frame containing a Person to Person query.
 #' @param hrvar String containing the HR attribute to be matched in the dataset.
 #' @param bg_fill String to specify background fill colour.
 #' @param font_col String to specify font and link colour.
 #' @param node_alpha A numeric value between 0 and 1 to specify the transparency of the nodes.
 #' @param path File path for saving the PDF output. Defaults to "network_p2p_leiden".
 #' Since the network outputs are computationally intensive, the default behaviour is to save time by
 #' saving the plot output directly as a PDF in the specified path. To override this behaviour and return
 #' a plot object instead, you can pass `NULL` to `path`. What is passed to `path` makes no difference
 #' if returning anything other than "plot-leiden" or "plot-hrvar".
 #'
 #' @param algorithm String to specify the node placement algorithm to be used. Defaults to "mds" to perform
 #' a multidimensional scaling of nodes using a shortest path, which is also a deterministic method.
 #' See <https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html> for a full list of options.
 #'
 #' @param res Resolution parameter to be passed to `leiden::leiden()`. Defaults to 0.5.
 #' @param desc_hrvar Character vector of length 3 containing the HR attributes to use when returning the
 #' "describe" output. See `network_describe()`.
 #' @param return String specifying what output to return. Valid return options include:
 #'   - 'plot-leiden': return a network plot coloured by leiden communities.
 #'   - 'plot-hrvar': return a network plot coloured by HR attribute.
 #'   - 'plot-sankey': return a sankey plot combining communities and HR attribute.
 #'   - 'table': return a vertex summary table with counts in communities and HR attribute.
 #'   - 'data': return a vertex data file that matches vertices with communities and HR attributes.
 #'   - 'describe': return a list of data frames which describe each of the identified communities.
 #'   - 'network': return igraph object.
 #'
 #' @import dplyr
 #'
 #' @export
 network_leiden <- function(data,
                           hrvar,
                           bg_fill = "#000000",
                           font_col = "#FFFFFF",
                           algorithm = "mds",
                           path = "network_p2p_leiden",
                           node_alpha = 0.8,
                           res = 0.5,
                           desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
                           return){
  ## Set variables
  TO_hrvar <- paste0("TieOrigin_", hrvar)
  TD_hrvar <- paste0("TieDestination_", hrvar)
  ## Set edges df
  edges <-
    data %>%
    select(from = "TieOrigin_PersonId",
           to = "TieDestination_PersonId",
           weight = "StrongTieScore")
  ## Vertices data frame to provide meta-data
  vert_ft <-
    rbind(
      # TieOrigin
      edges %>%
        select(from) %>% # Single column
        unique() %>% # Remove duplications
        left_join(select(data, TieOrigin_PersonId, TO_hrvar),
                  by = c("from"  = "TieOrigin_PersonId")) %>%
        select(node = "from", !!sym(hrvar) := TO_hrvar),
      # TieDestination
      edges %>%
        select(to) %>% # Single column
        unique() %>% # Remove duplications
        left_join(select(data, TieDestination_PersonId, TD_hrvar),
                  by = c("to"  = "TieDestination_PersonId")) %>%
        select(node = "to", !!sym(hrvar) := TD_hrvar)
    )
  ## Create igraph object
  g_raw <-
    igraph::graph_from_data_frame(edges,
                                  directed = TRUE, # Directed, but FALSE for visualization
                                  vertices = unique(vert_ft)) # remove duplicates
  ## Return a numeric vector of partitions / clusters / modules
  ## Set a low resolution parameter to have fewer groups
  ld <- leiden::leiden(g_raw, resolution_parameter = res) # create partitions
  ## Add cluster
  g <-
    g_raw %>%
    # Add leiden partitions to graph object
    igraph::set_vertex_attr("cluster", value = as.character(ld)) %>%
    igraph::simplify()
  ## Create vertex table
  vertex_tb <-
    g %>%
    igraph::get.vertex.attribute() %>%
    as_tibble()
  g_layout <-
    g %>%
    ggraph::ggraph(layout = "igraph", algorithm = algorithm)
  ## Return
  if(return == "plot-leiden"){
    plot_output <-
      g_layout +
      ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
      ggraph::geom_node_point(aes(colour = cluster),
                              alpha = node_alpha,
                              pch = 16) +
      theme_void() +
      theme(legend.position = "bottom",
            legend.background = element_rect(fill = bg_fill),
            plot.background = element_rect(fill = bg_fill),
            text = element_text(colour = font_col),
            axis.line = element_blank()) +
      labs(title = "Person to person collaboration with Community Detection",
           subtitle = "Based on Leiden algorithm and Strong Tie Score",
           y = "",
           x = "")
    # Default PDF output unless NULL supplied to path
    if(is.null(path)){
      plot_output
    } else {
     ggsave(paste0(path, tstamp(), ".pdf"),
            plot = plot_output,
            width = 16,
            height = 9)
    }
  } else if(return == "plot-hrvar"){
    plot_output <-
      g_layout +
      ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
      ggraph::geom_node_point(aes(colour = !!sym(hrvar)),
                              alpha = node_alpha,
                              pch = 16) +
      theme_void() +
      theme(legend.position = "bottom",
            legend.background = element_rect(fill = bg_fill),
            plot.background = element_rect(fill = bg_fill),
            text = element_text(colour = font_col),
            axis.line = element_blank()) +
      labs(title = "Person to person collaboration",
           subtitle = paste0("Showing ", hrvar),
           y = "",
           x = "")
    # Default PDF output unless NULL supplied to path
    if(is.null(path)){
      plot_output
    } else {
      ggsave(paste0(path, tstamp(), ".pdf"),
             plot = plot_output,
             width = 16,
             height = 9)
    }
  } else if(return == "table"){
    vertex_tb %>%
      count(!!sym(hrvar), cluster)
  } else if(return == "data"){
    vertex_tb
  } else if(return == "network"){
    g
  } else if(return == "plot-sankey"){
    create_sankey(data = vertex_tb %>% count(!!sym(hrvar), cluster),
                  var1 = hrvar,
                  var2 = "cluster",
                  count = "n")
  } else if(return == "describe"){
    describe_tb <-
      vertex_tb %>%
      left_join(select(data, starts_with("TieOrigin_")),
                by = c("name" = "TieOrigin_PersonId"))
    desc_str <-
      describe_tb %>%
      pull(cluster) %>%
      unique()
    desc_str %>%
      purrr::map(function(x){
        describe_tb %>%
          filter(cluster == x) %>%
          network_describe(hrvar = desc_hrvar)
      }) %>%
      setNames(nm = desc_str)
  } else {
    stop("Please enter a valid input for `return`.")
  }
 }
--- a/R/network_louvain.R
+++ b/R/network_louvain.R
@ -0,0 +1,222 @@
 # --------------------------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License. See LICENSE.txt in the project root for license information.
 # --------------------------------------------------------------------------------------------
 #' @title Implement the Louvain community detection on a Person to Person network query
 #'
 #' @description
 #' Take a P2P network query and implement the Louvain community detection method. The
 #' **igraph** implementation of the Louvain method is used.
 #'
 #' @param data Data frame containing a Person to Person query.
 #' @param hrvar String containing the HR attribute to be matched in the dataset.
 #' @param bg_fill String to specify background fill colour.
 #' @param font_col String to specify font and link colour.
 #' @param node_alpha A numeric value between 0 and 1 to specify the transparency of the nodes.
 #' @param algorithm String to specify the node placement algorithm to be used. Defaults to "mds" to perform
 #' a multidimensional scaling of nodes using a shortest path, which is also a deterministic method.
 #' See <https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html> for a full list of options.
 #' @param path File path for saving the PDF output. Defaults to "network_p2p_louvain".
 #' Since the network outputs are computationally intensive, the default behaviour is to save time by
 #' saving the plot output directly as a PDF in the specified path. To override this behaviour and return
 #' a plot object instead, you can pass `NULL` to `path`. What is passed to `path` makes no difference
 #' if returning anything other than "plot-louvain" or "plot-hrvar".
 #' @param desc_hrvar Character vector of length 3 containing the HR attributes to use when returning the
 #' "describe" output. See `network_describe()`.
 #'
 #' @param return String specifying what output to return.Valid return options include:
 #'   - 'plot-louvain': return a network plot coloured by louvain communities.
 #'   - 'plot-hrvar': return a network plot coloured by HR attribute.
 #'   - 'plot-sankey': return a sankey plot combining communities and HR attribute.
 #'   - 'table': return a vertex summary table with counts in communities and HR attribute.
 #'   - 'data': return a vertex data file that matches vertices with communities and HR attributes.
 #'   - 'describe': returns a list of data frames which describe each of the identified communities.
 #'   - 'network': return igraph object.
 #'
 #' @import ggraph
 #' @import dplyr
 #'
 #' @export
 network_louvain <- function(data,
                            hrvar,
                            bg_fill = "#000000",
                            font_col = "#FFFFFF",
                            node_alpha = 0.8,
                            algorithm = "mds",
                            path = "network_p2p_louvain",
                            desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
                            return){
  ## Set variables
  TO_hrvar <- paste0("TieOrigin_", hrvar)
  TD_hrvar <- paste0("TieDestination_", hrvar)
  ## Set edges df
  edges <-
    data %>%
    select(from = "TieOrigin_PersonId",
           to = "TieDestination_PersonId",
           weight = "StrongTieScore")
  ## Vertices data frame to provide meta-data
  vert_ft <-
    rbind(
      # TieOrigin
      edges %>%
        select(from) %>% # Single column
        unique() %>% # Remove duplications
        left_join(select(data, TieOrigin_PersonId, TO_hrvar),
                  by = c("from"  = "TieOrigin_PersonId")) %>%
        select(node = "from", !!sym(hrvar) := TO_hrvar),
      # TieDestination
      edges %>%
        select(to) %>% # Single column
        unique() %>% # Remove duplications
        left_join(select(data, TieDestination_PersonId, TD_hrvar),
                  by = c("to"  = "TieDestination_PersonId")) %>%
        select(node = "to", !!sym(hrvar) := TD_hrvar)
    )
  ## Create igraph object
  g_raw <-
    igraph::graph_from_data_frame(edges,
                                  directed = FALSE, # Set to undirected for clustering
                                  vertices = unique(vert_ft)) # remove duplicates
  ## Return a numeric vector of partitions / clusters / modules
  ## Set a low resolution parameter to have fewer groups
  lc <- igraph::cluster_louvain(g_raw)
  ## Add cluster
  g <-
    g_raw %>%
    # Add louvain partitions to graph object
    igraph::set_vertex_attr("cluster", value = as.character(igraph::membership(lc))) %>% # Return membership - diff from Leiden
    igraph::simplify()
  ## Create vertex table
  vertex_tb <-
    g %>%
    igraph::get.vertex.attribute() %>%
    as_tibble()
  g_layout <-
    g %>%
    ggraph::ggraph(layout = "igraph", algorithm = algorithm)
  ## Return
  if(return == "plot-louvain"){
    plot_output <-
      g_layout +
      ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
      ggraph::geom_node_point(aes(colour = cluster),
                              alpha = node_alpha,
                              pch = 16) +
      theme_void() +
      theme(legend.position = "bottom",
            legend.background = element_rect(fill = bg_fill),
            plot.background = element_rect(fill = bg_fill),
            text = element_text(colour = font_col),
            axis.line = element_blank()) +
      labs(title = "Person to person collaboration with Community Detection",
           subtitle = "Based on Louvain algorithm and Strong Tie Score",
           y = "",
           x = "")
    # Default PDF output unless NULL supplied to path
    if(is.null(path)){
      plot_output
    } else {
      ggsave(paste0(path, tstamp(), ".pdf"),
             plot = plot_output,
             width = 16,
             height = 9)
    }
  } else if(return == "plot-hrvar"){
    plot_output <-
      g_layout +
      ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
      ggraph::geom_node_point(aes(colour = !!sym(hrvar)),
                              alpha = node_alpha,
                              pch = 16) +
      theme_void() +
      theme(legend.position = "bottom",
            legend.background = element_rect(fill = bg_fill),
            plot.background = element_rect(fill = bg_fill),
            text = element_text(colour = font_col),
            axis.line = element_blank()) +
      labs(title = "Person to person collaboration",
           subtitle = paste0("Showing ", hrvar),
           y = "",
           x = "")
    # Default PDF output unless NULL supplied to path
    if(is.null(path)){
      plot_output
    } else {
      ggsave(paste0(path, tstamp(), ".pdf"),
             plot = plot_output,
             width = 16,
             height = 9)
    }
  } else if(return == "table"){
    vertex_tb %>%
      count(!!sym(hrvar), cluster)
  } else if(return == "data"){
    vertex_tb
  } else if(return == "network"){
    g
  } else if(return == "plot-sankey"){
    create_sankey(data = vertex_tb %>% count(!!sym(hrvar), cluster),
                  var1 = hrvar,
                  var2 = "cluster",
                  count = "n")
  } else if(return == "describe"){
    describe_tb <-
      vertex_tb %>%
      left_join(select(data, starts_with("TieOrigin_")),
                by = c("name" = "TieOrigin_PersonId"))
    desc_str <-
      describe_tb %>%
      pull(cluster) %>%
      unique()
    desc_str %>%
      purrr::map(function(x){
        describe_tb %>%
          filter(cluster == x) %>%
          network_describe(hrvar = desc_hrvar)
      }) %>%
      setNames(nm = desc_str)
  } else {
    stop("Please enter a valid input for `return`.")
  }
 }
--- a/R/pairwise_count.R
+++ b/R/pairwise_count.R
@ -5,16 +5,25 @@
 #' within `tm_cooc()`.
 #'
 #' @param data Data frame output from `tm_clean()`.
-#' @param id String to represent the id variable. Defaults to "word".
+#' @param id String to represent the id variable. Defaults to "line".
 #' @param word String to represent the word variable. Defaults to "word".
 #'
-#' @import data.table
+#' @importFrom data.table ":=" "%like%" "%between%" rbindlist as.data.table
 #'
 #' @examples
 #' td <- data.frame(line = c(1, 1, 2, 2),
 #'                  word = c("work", "meeting", "catch", "up"))
 #'
 #' pairwise_count(td, id = "line", word = "word")
 #'
 #' @export
 pairwise_count <- function(data,
                           id = "line",
                           word = "word"){
  # Make sure data.table knows we know we're using it
  .datatable.aware = TRUE
  data <-
    data %>%
    dplyr::rename(word := !!sym(word),
--- a/man/create_sankey.Rd
+++ b/man/create_sankey.Rd
@ -0,0 +1,35 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/create_sankey.R
 \name{create_sankey}
 \alias{create_sankey}
 \title{Create a sankey chart from a two-column count table}
 \usage{
 create_sankey(data, var1, var2, count = "n")
 }
 \arguments{
 \item{data}{Data frame of the long count table.}
 \item{var1}{String containing the name of the variable to be shown on the left.}
 \item{var2}{String containing the name of the variable to be shown on the right.}
 \item{count}{String containing the name of the count variable.}
 }
 \description{
 Create a networkD3 style sankey chart based on a long count table
 with two variables. The input data should have three columns, where
 each row is a unique group:
 \enumerate{
 \item Variable 1
 \item Variable 2
 \item Count
 }
 }
 \examples{
 \donttest{
 sq_data \%>\%
  dplyr::count(Organization, FunctionType) \%>\%
  create_sankey(var1 = "Organization", var2 = "FunctionType")
 }
 }
--- a/man/network_describe.Rd
+++ b/man/network_describe.Rd
@ -0,0 +1,23 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/network_describe.R
 \name{network_describe}
 \alias{network_describe}
 \title{Uncover HR attributes which best represent a population for a Person to Person query}
 \usage{
 network_describe(
  data,
  hrvar = c("Organization", "LevelDesignation", "FunctionType")
 )
 }
 \arguments{
 \item{data}{Data frame for a person to person query.}
 \item{hrvar}{Character vector of length 3 containing the HR attributes to be used.}
 }
 \description{
 Returns a data frame that gives a percentage of the group combinations that best represent
 the population provided. Uses a person to person query.
 }
 \author{
 Tannaz Sattari Tabrizi \href{mailto:Tannaz.Sattari@microsoft.com}{Tannaz.Sattari@microsoft.com}
 }
--- a/man/network_leiden.Rd
+++ b/man/network_leiden.Rd
@ -0,0 +1,61 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/network_leiden.R
 \name{network_leiden}
 \alias{network_leiden}
 \title{Implement the Leiden community detection on a Person to Person network query}
 \usage{
 network_leiden(
  data,
  hrvar,
  bg_fill = "#000000",
  font_col = "#FFFFFF",
  algorithm = "mds",
  path = "network_p2p_leiden",
  node_alpha = 0.8,
  res = 0.5,
  desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
  return
 )
 }
 \arguments{
 \item{data}{Data frame containing a Person to Person query.}
 \item{hrvar}{String containing the HR attribute to be matched in the dataset.}
 \item{bg_fill}{String to specify background fill colour.}
 \item{font_col}{String to specify font and link colour.}
 \item{algorithm}{String to specify the node placement algorithm to be used. Defaults to "mds" to perform
 a multidimensional scaling of nodes using a shortest path, which is also a deterministic method.
 See \url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full list of options.}
 \item{path}{File path for saving the PDF output. Defaults to "network_p2p_leiden".
 Since the network outputs are computationally intensive, the default behaviour is to save time by
 saving the plot output directly as a PDF in the specified path. To override this behaviour and return
 a plot object instead, you can pass \code{NULL} to \code{path}. What is passed to \code{path} makes no difference
 if returning anything other than "plot-leiden" or "plot-hrvar".}
 \item{node_alpha}{A numeric value between 0 and 1 to specify the transparency of the nodes.}
 \item{res}{Resolution parameter to be passed to \code{leiden::leiden()}. Defaults to 0.5.}
 \item{desc_hrvar}{Character vector of length 3 containing the HR attributes to use when returning the
 "describe" output. See \code{network_describe()}.}
 \item{return}{String specifying what output to return. Valid return options include:
 \itemize{
 \item 'plot-leiden': return a network plot coloured by leiden communities.
 \item 'plot-hrvar': return a network plot coloured by HR attribute.
 \item 'plot-sankey': return a sankey plot combining communities and HR attribute.
 \item 'table': return a vertex summary table with counts in communities and HR attribute.
 \item 'data': return a vertex data file that matches vertices with communities and HR attributes.
 \item 'describe': return a list of data frames which describe each of the identified communities.
 \item 'network': return igraph object.
 }}
 }
 \description{
 Take a P2P network query and implement the Leiden community detection method. To run
 this function, you will require all the pre-requisites of the \strong{leiden} package installed,
 which includes Python and \strong{reticulate}.
 }
--- a/man/network_louvain.Rd
+++ b/man/network_louvain.Rd
@ -0,0 +1,57 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/network_louvain.R
 \name{network_louvain}
 \alias{network_louvain}
 \title{Implement the Louvain community detection on a Person to Person network query}
 \usage{
 network_louvain(
  data,
  hrvar,
  bg_fill = "#000000",
  font_col = "#FFFFFF",
  node_alpha = 0.8,
  algorithm = "mds",
  path = "network_p2p_louvain",
  desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
  return
 )
 }
 \arguments{
 \item{data}{Data frame containing a Person to Person query.}
 \item{hrvar}{String containing the HR attribute to be matched in the dataset.}
 \item{bg_fill}{String to specify background fill colour.}
 \item{font_col}{String to specify font and link colour.}
 \item{node_alpha}{A numeric value between 0 and 1 to specify the transparency of the nodes.}
 \item{algorithm}{String to specify the node placement algorithm to be used. Defaults to "mds" to perform
 a multidimensional scaling of nodes using a shortest path, which is also a deterministic method.
 See \url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full list of options.}
 \item{path}{File path for saving the PDF output. Defaults to "network_p2p_louvain".
 Since the network outputs are computationally intensive, the default behaviour is to save time by
 saving the plot output directly as a PDF in the specified path. To override this behaviour and return
 a plot object instead, you can pass \code{NULL} to \code{path}. What is passed to \code{path} makes no difference
 if returning anything other than "plot-louvain" or "plot-hrvar".}
 \item{desc_hrvar}{Character vector of length 3 containing the HR attributes to use when returning the
 "describe" output. See \code{network_describe()}.}
 \item{return}{String specifying what output to return.Valid return options include:
 \itemize{
 \item 'plot-louvain': return a network plot coloured by louvain communities.
 \item 'plot-hrvar': return a network plot coloured by HR attribute.
 \item 'plot-sankey': return a sankey plot combining communities and HR attribute.
 \item 'table': return a vertex summary table with counts in communities and HR attribute.
 \item 'data': return a vertex data file that matches vertices with communities and HR attributes.
 \item 'describe': returns a list of data frames which describe each of the identified communities.
 \item 'network': return igraph object.
 }}
 }
 \description{
 Take a P2P network query and implement the Louvain community detection method. The
 \strong{igraph} implementation of the Louvain method is used.
 }
--- a/man/pairwise_count.Rd
+++ b/man/pairwise_count.Rd
@ -9,7 +9,7 @@ pairwise_count(data, id = "line", word = "word")
 \arguments{
 \item{data}{Data frame output from \code{tm_clean()}.}
-\item{id}{String to represent the id variable. Defaults to "word".}
+\item{id}{String to represent the id variable. Defaults to "line".}
 \item{word}{String to represent the word variable. Defaults to "word".}
 }
@ -18,3 +18,10 @@ This is a \strong{data.table} implementation that mimics the output of
 \code{widyr::pairwise_count()} to reduce package dependency. This is used internally
 within \code{tm_cooc()}.
 }
 \examples{
 td <- data.frame(line = c(1, 1, 2, 2),
                 word = c("work", "meeting", "catch", "up"))
 pairwise_count(td, id = "line", word = "word")
 }