зеркало из https://github.com/microsoft/wpa.git
feat: refresh network_p2p and remove dependent functions
This commit is contained in:
Родитель
fd68a1d82b
Коммит
3576f05e48
|
@ -211,9 +211,6 @@ importFrom(dplyr,`%>%`)
|
|||
importFrom(dplyr,across)
|
||||
importFrom(dplyr,mutate)
|
||||
importFrom(dplyr,mutate_if)
|
||||
importFrom(grDevices,rainbow)
|
||||
importFrom(graphics,legend)
|
||||
importFrom(graphics,par)
|
||||
importFrom(htmltools,HTML)
|
||||
importFrom(igraph,graph_from_data_frame)
|
||||
importFrom(magrittr,"%>%")
|
||||
|
|
|
@ -1,105 +0,0 @@
|
|||
# --------------------------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
|
||||
# --------------------------------------------------------------------------------------------
|
||||
|
||||
#' @title Implement the Leiden community detection on a Person to Person network
|
||||
#' query
|
||||
#'
|
||||
#' @description
|
||||
#' `r lifecycle::badge('experimental')`
|
||||
#'
|
||||
#' Take a P2P network query and implement the Leiden community detection method.
|
||||
#' To run this function, you will require all the pre-requisites of the
|
||||
#' 'leiden' package installed, which includes Python and 'reticulate'.
|
||||
#'
|
||||
#' @inheritParams network_p2p
|
||||
#'
|
||||
#' @param return
|
||||
#' String specifying what output to return. Defaults to "plot-leiden". Valid
|
||||
#' return options include:
|
||||
#' - `'plot-leiden'`: return a network plot coloured by leiden communities,
|
||||
#' saving a PDF to path.
|
||||
#' - `'plot-hrvar'`: return a network plot coloured by HR attribute, saving a
|
||||
#' PDF to path.
|
||||
#' - `'plot-sankey'`: return a sankey plot combining communities and HR
|
||||
#' attribute.
|
||||
#' - `'table'`: return a vertex summary table with counts in communities and
|
||||
#' HR attribute.
|
||||
#' - `'data'`: return a vertex data file that matches vertices with
|
||||
#' communities and HR attributes.
|
||||
#' - `'describe'`: return a list of data frames which describe each of the
|
||||
#' identified communities. The first data frame is a summary table of all the
|
||||
#' communities.
|
||||
#' - `'network'`: return 'igraph' object.
|
||||
#'
|
||||
#' @return See `return`.
|
||||
#'
|
||||
#' @family Network
|
||||
#'
|
||||
#' @section Simulating and running Leiden Community Detection:
|
||||
#'
|
||||
#' Below is an example on how to simulate a network and run the function.
|
||||
#'
|
||||
#' ````
|
||||
#' # Simulate a small person-to-person dataset
|
||||
#' p2p_data <- p2p_data_sim(size = 50)
|
||||
#'
|
||||
#' # Return leiden, console, plot
|
||||
#' p2p_data %>%
|
||||
#' network_leiden(path = NULL,
|
||||
#' return = "plot")
|
||||
#' ```
|
||||
#'
|
||||
#'
|
||||
#' @export
|
||||
network_leiden <- function(data,
|
||||
hrvar = "Organization",
|
||||
bg_fill = "#000000",
|
||||
font_col = "#FFFFFF",
|
||||
algorithm = "mds",
|
||||
path = "network_p2p_leiden",
|
||||
node_alpha = 0.8,
|
||||
res = 0.5,
|
||||
seed = 1,
|
||||
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
|
||||
return = "plot-leiden",
|
||||
size_threshold = 5000){
|
||||
|
||||
## Default value for display
|
||||
display <- "leiden"
|
||||
|
||||
## Make code backward compatible
|
||||
|
||||
if(grepl(pattern = "plot-", x = return)){
|
||||
|
||||
return <- gsub(pattern = "plot-",
|
||||
replacement = "",
|
||||
x = return)
|
||||
|
||||
if(return %in% c("leiden", "hrvar")){
|
||||
|
||||
display <- return # Either "leiden" or "hrvar"
|
||||
return <- "plot"
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
## Wrapper
|
||||
network_p2p(data = data,
|
||||
hrvar = hrvar,
|
||||
display = display,
|
||||
return = return,
|
||||
path = path,
|
||||
desc_hrvar = desc_hrvar,
|
||||
bg_fill = bg_fill,
|
||||
font_col = font_col,
|
||||
node_alpha = node_alpha,
|
||||
res = res, # Leiden specific
|
||||
seed = seed, # Leiden specific
|
||||
algorithm = algorithm,
|
||||
size_threshold = size_threshold)
|
||||
|
||||
}
|
|
@ -1,95 +0,0 @@
|
|||
# --------------------------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
|
||||
# --------------------------------------------------------------------------------------------
|
||||
|
||||
#' @title Implement the Louvain community detection on a Person to Person
|
||||
#' network query
|
||||
#'
|
||||
#' @description
|
||||
#' `r lifecycle::badge('experimental')`
|
||||
#'
|
||||
#' Take a P2P network query and implement the Louvain community detection
|
||||
#' method. The 'igraph' implementation of the Louvain method is used.
|
||||
#'
|
||||
#' @inheritParams network_p2p
|
||||
#'
|
||||
#' @param return
|
||||
#' String specifying what output to return. Defaults to "plot-louvain". Valid
|
||||
#' return options include:
|
||||
#' - `'plot-louvain'`: return a network plot coloured by Louvain communities,
|
||||
#' saving a PDF to path.
|
||||
#' - `'plot-hrvar'`: return a network plot coloured by HR attribute, saving a
|
||||
#' PDF to path.
|
||||
#' - `'plot-sankey'`: return a sankey plot combining communities and HR
|
||||
#' attribute.
|
||||
#' - `'table'`: return a vertex summary table with counts in communities and
|
||||
#' HR attribute.
|
||||
#' - `'data'`: return a vertex data file that matches vertices with
|
||||
#' communities and HR attributes.
|
||||
#' - `'describe'`: return a list of data frames which describe each of the
|
||||
#' identified communities. The first data frame is a summary table of all the
|
||||
#' communities.
|
||||
#' - `'network'`: return 'igraph' object.
|
||||
#'
|
||||
#' @return See `return`.
|
||||
#'
|
||||
#' @family Network
|
||||
#'
|
||||
#' @examples
|
||||
#' # Simulate a small person-to-person dataset
|
||||
#' p2p_data <- p2p_data_sim(size = 50)
|
||||
#'
|
||||
#' # Return louvain, console, plot
|
||||
#' p2p_data %>%
|
||||
#' network_louvain(path = NULL,
|
||||
#' return = "plot")
|
||||
#'
|
||||
#' @export
|
||||
network_louvain <- function(data,
|
||||
hrvar = "Organization",
|
||||
bg_fill = "#000000",
|
||||
font_col = "#FFFFFF",
|
||||
node_alpha = 0.8,
|
||||
algorithm = "mds",
|
||||
path = "network_p2p_louvain",
|
||||
desc_hrvar = c("Organization",
|
||||
"LevelDesignation",
|
||||
"FunctionType"),
|
||||
return = "plot-louvain",
|
||||
size_threshold = 5000){
|
||||
|
||||
## Default value for display
|
||||
display <- "louvain"
|
||||
|
||||
## Make code backward compatible
|
||||
|
||||
if(grepl(pattern = "plot-", x = return)){
|
||||
|
||||
return <- gsub(pattern = "plot-",
|
||||
replacement = "",
|
||||
x = return)
|
||||
|
||||
if(return %in% c("louvain", "hrvar")){
|
||||
|
||||
display <- return # Either "louvain" or "hrvar"
|
||||
return <- "plot"
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
## Wrapper
|
||||
network_p2p(data = data,
|
||||
hrvar = hrvar,
|
||||
display = display,
|
||||
return = return,
|
||||
path = path,
|
||||
desc_hrvar = desc_hrvar,
|
||||
bg_fill = bg_fill,
|
||||
font_col = font_col,
|
||||
node_alpha = node_alpha,
|
||||
algorithm = algorithm,
|
||||
size_threshold = size_threshold)
|
||||
|
||||
}
|
851
R/network_p2p.R
851
R/network_p2p.R
|
@ -3,327 +3,364 @@
|
|||
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
|
||||
# --------------------------------------------------------------------------------------------
|
||||
|
||||
#' @title Create a network plot with the person-to-person query
|
||||
#' @title Perform network analysis with the person-to-person query
|
||||
#'
|
||||
#' @description
|
||||
#' `r lifecycle::badge('experimental')`
|
||||
#'
|
||||
#'
|
||||
#' Analyse a person-to-person (P2P) network query, with multiple visualisation
|
||||
#' and analysis output options. Pass a data frame containing a person-to-person
|
||||
#' query and return a network visualization. Options are available for community
|
||||
#' detection using either the Louvain or the Leiden algorithms.
|
||||
#'
|
||||
#'
|
||||
#' @param data Data frame containing a person-to-person query.
|
||||
#' @param hrvar String containing the label for the HR attribute.
|
||||
#' @param display String determining what output to return. Valid values
|
||||
#' include:
|
||||
#' - `"hrvar"` (default): compute analysis or visuals without computing
|
||||
#' communities.
|
||||
#' - `"louvain"`: compute analysis or visuals with community detection, using
|
||||
#' the Louvain algorithm.
|
||||
#' - `"leiden"`: compute analysis or visuals with community detection, using
|
||||
#' the Leiden algorithm. This requires all the pre-requisites of the
|
||||
#' **leiden** package installed, which includes Python and **reticulate**.
|
||||
#'
|
||||
#' @param return String specifying what output to return. This must be one of the
|
||||
#' following strings:
|
||||
#' @param return
|
||||
#' A different output is returned depending on the value passed to the `return`
|
||||
#' argument:
|
||||
#' - `'plot'` (default)
|
||||
#' - `'plot-pdf'`
|
||||
#' - `'sankey'`
|
||||
#' - `'table'`
|
||||
#' - `'data'`
|
||||
#' - `'describe'`
|
||||
#' - `'network'`
|
||||
#' @param centrality string to determines which centrality measure is used to
|
||||
#' scale the size of the nodes. All centrality measures are automatically
|
||||
#' calculated when it is set to one of the below values, and reflected in the
|
||||
#' `'network'` and `'data'` outputs.
|
||||
#' Measures include:
|
||||
#' - `betweenness`
|
||||
#' - `closeness`
|
||||
#' - `degree`
|
||||
#' - `eigenvector`
|
||||
#' - `pagerank`
|
||||
#'
|
||||
#' See `Value` for more information.
|
||||
#' When `centrality` is set to NULL, no centrality is calculated in the outputs
|
||||
#' and all the nodes would have the same size.
|
||||
#'
|
||||
#' @param community String determining which community detection algorithms to
|
||||
#' apply. Valid values include:
|
||||
#' - `NULL` (default): compute analysis or visuals without computing
|
||||
#' communities.
|
||||
#' - `"louvain"`
|
||||
#' - `"leiden"`
|
||||
#' - `"edge_betweenness"`
|
||||
#' - `"fast_greedy"`
|
||||
#' - `"fluid_communities"`
|
||||
#' - `"infomap"`
|
||||
#' - `"label_prop"`
|
||||
#' - `"leading_eigen"`
|
||||
#' - `"optimal"`
|
||||
#' - `"spinglass"`
|
||||
#' - `"walk_trap"`
|
||||
#'
|
||||
#' These values map to the community detection algorithms offered by `igraph`.
|
||||
#' For instance, `"leiden"` is based on `igraph::cluster_leiden()`. Please see
|
||||
#' the bottom of <https://igraph.org/r/html/1.3.0/cluster_leiden.html> on all
|
||||
#' applications and parameters of these algorithms.
|
||||
#' .
|
||||
#' @param weight String to specify which column to use as weights for the
|
||||
#' network. To create a graph without weights, supply `NULL` to this argument.
|
||||
#' @param comm_args list containing the arguments to be passed through to
|
||||
#' igraph's clustering algorithms. Arguments must be named. See examples
|
||||
#' section on how to supply arguments in a named list.
|
||||
#' @param layout String to specify the node placement algorithm to be used.
|
||||
#' Defaults to `"mds"` for the deterministic multi-dimensional scaling of
|
||||
#' nodes. See
|
||||
#' <https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html> for a full
|
||||
#' list of options.
|
||||
#' @param path File path for saving the PDF output. Defaults to a timestamped
|
||||
#' path based on current parameters.
|
||||
#' @param desc_hrvar Character vector of length 3 containing the HR attributes
|
||||
#' to use when returning the `"describe"` output. See `network_describe()`.
|
||||
#' @param style String to specify which plotting style to use for the network
|
||||
#' plot. Valid values include:
|
||||
#' - `"igraph"`
|
||||
#' - `"ggraph"`
|
||||
#' @param bg_fill String to specify background fill colour.
|
||||
#' @param font_col String to specify font and link colour.
|
||||
#' @param font_col String to specify font colour.
|
||||
#' @param legend_pos String to specify position of legend. Defaults to
|
||||
#' `"bottom"`. See `ggplot2::theme()`. This is applicable for both the
|
||||
#' `"right"`. See `ggplot2::theme()`. This is applicable for both the
|
||||
#' 'ggraph' and the fast plotting method. Valid inputs include:
|
||||
#' - `"bottom"`
|
||||
#' - `"top"`
|
||||
#' - `"left"`
|
||||
#' -`"right"`
|
||||
#'
|
||||
#' @param palette Function for generating a colour palette with a single
|
||||
#' argument `n`. Uses "rainbow" by default.
|
||||
#' @param palette String specifying the function to generate a colour palette
|
||||
#' with a single argument `n`. Uses `"rainbow"` by default.
|
||||
#' @param node_alpha A numeric value between 0 and 1 to specify the transparency
|
||||
#' of the nodes. Defaults to 0.7.
|
||||
#' @param edge_alpha A numeric value between 0 and 1 to specify the transparency
|
||||
#' of the edges (only for 'ggraph' mode). Defaults to 1.
|
||||
#' @param res Resolution parameter to be passed to `leiden::leiden()`. Defaults
|
||||
#' to 0.5.
|
||||
#' @param edge_col String to specify edge link colour.
|
||||
#' @param node_sizes Numeric vector of length two to specify the range of node
|
||||
#' sizes to rescale to, when `centrality` is set to a non-null value.
|
||||
#' @param seed Seed for the random number generator passed to either
|
||||
#' `set.seed()` when the Louvain algorithm is used, or `leiden::leiden()` when
|
||||
#' the Leiden algorithm is used, to ensure consistency. Only applicable when
|
||||
#' `display` is set to `"louvain"` or `"leiden"`.
|
||||
#' @param algorithm String to specify the node placement algorithm to be used.
|
||||
#' Defaults to `"mds"` for the deterministic multi-dimensional scaling of
|
||||
#' nodes. See
|
||||
#' <https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html> for a full
|
||||
#' list of options.
|
||||
#' @param size_threshold Numeric value representing the maximum number of edges
|
||||
#' before `network_leiden()` switches to use a more efficient, but less
|
||||
#' elegant plotting method (native igraph). Defaults to 5000. Set as `0` to
|
||||
#' coerce to a fast plotting method every time, and `Inf` to always use the
|
||||
#' default plotting method (with 'ggraph').
|
||||
#' @param weight String to specify which column to use as weights for the
|
||||
#' network. Defaults to `"StrongTieScore`. To create a graph without weights,
|
||||
#' supply `NULL` to this argument.
|
||||
#' `set.seed()` when the louvain or leiden community detection algorithm is
|
||||
#' used, to ensure consistency. Only applicable when `community` is set to
|
||||
#' one of the valid non-null values.
|
||||
#'
|
||||
#' @return
|
||||
#' A different output is returned depending on the value passed to the `return`
|
||||
#' argument:
|
||||
#' - `'plot'`: return a network plot.
|
||||
#' - `'plot'`: return a network plot, interactively within R.
|
||||
#' - `'plot-pdf'`: save a network plot as PDF. This option is recommended when
|
||||
#' the graph is large, which make take a long time to run if `return = 'plot'`
|
||||
#' is selected. Use this together with `path` to control the save location.
|
||||
#' - `'sankey'`: return a sankey plot combining communities and HR attribute.
|
||||
#' This is only valid if a community detection method is selected at
|
||||
#' `display`.
|
||||
#' `community`.
|
||||
#' - `'table'`: return a vertex summary table with counts in communities and
|
||||
#' HR attribute.
|
||||
#' HR attribute. When `centrality` is non-NULL, the average centrality values
|
||||
#' are calculated per group.
|
||||
#' - `'data'`: return a vertex data file that matches vertices with
|
||||
#' communities and HR attributes.
|
||||
#' - `'describe'`: return a list of data frames which describe each of the
|
||||
#' identified communities. The first data frame is a summary table of all the
|
||||
#' communities. This is only valid if a community detection method is selected
|
||||
#' at `display`.
|
||||
#' - `'network'`: return 'igraph' object.
|
||||
#'
|
||||
#' @family Network
|
||||
#'
|
||||
#' @examples
|
||||
#' # Simulate a small person-to-person dataset
|
||||
#' p2p_data <- p2p_data_sim(size = 50)
|
||||
#' p2p_df <- p2p_data_sim(dim = 1, size = 100)
|
||||
#'
|
||||
#' # Return a network plot to console, coloured by hrvar
|
||||
#' p2p_data %>%
|
||||
#' network_p2p(display = "hrvar",
|
||||
#' path = NULL,
|
||||
#' return = "plot")
|
||||
#' # default - ggraph visual
|
||||
#' network_p2p(data = p2p_df, style = "ggraph")
|
||||
#'
|
||||
#' # Return a network plot to console, coloured by Louvain communities
|
||||
#' p2p_data %>%
|
||||
#' network_p2p(display = "louvain",
|
||||
#' path = NULL,
|
||||
#' return = "plot")
|
||||
#' # return vertex table
|
||||
#' network_p2p(data = p2p_df, return = "table")
|
||||
#'
|
||||
#' # return vertex table with community detection
|
||||
#' network_p2p(data = p2p_df, community = "leiden", return = "table")
|
||||
#'
|
||||
#' # Return a network plot to console
|
||||
#' # Coloured by Leiden communities
|
||||
#' # Using Fruchterman-Reingold force-directed layout algorithm
|
||||
#' # Force the use of fast plotting method
|
||||
#' p2p_data %>%
|
||||
#' network_p2p(display = "hrvar",
|
||||
#' path = NULL,
|
||||
#' return = "plot",
|
||||
#' algorithm = "lgl",
|
||||
#' size_threshold = 0)
|
||||
#' # leiden - igraph style with custom resolution parameters
|
||||
#' network_p2p(data = p2p_df, community = "leiden", comm_args = list("resolution" = 0.1))
|
||||
#'
|
||||
#' # Return a data frame matching HR variable and communities to nodes
|
||||
#' # Using Louvain communities
|
||||
#' p2p_data %>%
|
||||
#' network_p2p(display = "louvain",
|
||||
#' return = "data",
|
||||
#' algorithm = "fr")
|
||||
#' # louvain - ggraph style, using custom palette
|
||||
#' network_p2p(
|
||||
#' data = p2p_df,
|
||||
#' style = "ggraph",
|
||||
#' community = "louvain",
|
||||
#' palette = "heat_colors"
|
||||
#' )
|
||||
#'
|
||||
#' @section Running Leiden communities:
|
||||
#' # leiden - return a sankey visual with custom resolution parameters
|
||||
#' network_p2p(
|
||||
#' data = p2p_df,
|
||||
#' community = "leiden",
|
||||
#' return = "sankey",
|
||||
#' comm_args = list("resolution" = 0.1)
|
||||
#' )
|
||||
#'
|
||||
#' Running Leiden communities requires python dependencies installed.
|
||||
#' You can run the following:
|
||||
#' # using `fluid_communities` algorithm with custom parameters
|
||||
#' network_p2p(
|
||||
#' data = p2p_df,
|
||||
#' community = "fluid_communities",
|
||||
#' comm_args = list("no.of.communities" = 5)
|
||||
#' )
|
||||
#'
|
||||
#' ```R
|
||||
#' # Return a network plot to console, coloured by Leiden communities
|
||||
#' p2p_data %>%
|
||||
#' network_p2p(display = "leiden",
|
||||
#' path = NULL,
|
||||
#' return = "plot")
|
||||
#' ```
|
||||
#' When installing the 'leiden' package, you may be required to install the Python
|
||||
#' libraries 'python-igraph' and 'leidenalg'. You can install them with:
|
||||
#'
|
||||
#' ```R
|
||||
#' reticulate::py_install("python-igraph")
|
||||
#' reticulate::py_install("leidenalg")
|
||||
#' ```
|
||||
#' # Calculate centrality measures and leiden communities, return at node level
|
||||
#' network_p2p(
|
||||
#' data = p2p_df,
|
||||
#' centrality = "betweenness",
|
||||
#' community = "leiden",
|
||||
#' return = "data"
|
||||
#' ) %>%
|
||||
#' dplyr::glimpse()
|
||||
#'
|
||||
#' @import ggplot2
|
||||
#' @import dplyr
|
||||
#' @importFrom grDevices rainbow
|
||||
#' @importFrom graphics legend
|
||||
#' @importFrom graphics par
|
||||
#'
|
||||
#' @export
|
||||
network_p2p <- function(data,
|
||||
hrvar = "Organization",
|
||||
display = "hrvar",
|
||||
return = "plot",
|
||||
path = paste0("network_p2p_", display),
|
||||
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
|
||||
bg_fill = "#FFFFFF",
|
||||
font_col = "grey20",
|
||||
legend_pos = "bottom",
|
||||
palette = "rainbow",
|
||||
node_alpha = 0.7,
|
||||
edge_alpha = 1,
|
||||
res = 0.5,
|
||||
seed = 1,
|
||||
algorithm = "mds",
|
||||
size_threshold = 5000,
|
||||
weight = "StrongTieScore"){
|
||||
|
||||
## Set edges df
|
||||
if(is.null(weight)){
|
||||
network_p2p <-
|
||||
function(
|
||||
data,
|
||||
hrvar = "Organization",
|
||||
return = "plot",
|
||||
centrality = NULL,
|
||||
community = NULL,
|
||||
weight = NULL,
|
||||
comm_args = NULL,
|
||||
layout = "mds",
|
||||
path = paste("p2p", NULL, sep = "_"),
|
||||
style = "igraph",
|
||||
bg_fill = "#FFFFFF",
|
||||
font_col = "grey20",
|
||||
legend_pos = "right",
|
||||
palette = "rainbow",
|
||||
node_alpha = 0.7,
|
||||
edge_alpha = 1,
|
||||
edge_col = "#777777",
|
||||
node_sizes = c(1, 20),
|
||||
seed = 1
|
||||
){
|
||||
|
||||
edges <-
|
||||
data %>%
|
||||
mutate(NoWeight = 1) %>% # No weight
|
||||
select(from = "TieOrigin_PersonId",
|
||||
to = "TieDestination_PersonId",
|
||||
weight = "NoWeight")
|
||||
if(length(node_sizes) != 2){
|
||||
stop("`node_sizes` must be of length 2")
|
||||
}
|
||||
|
||||
} else {
|
||||
## Set data frame for edges
|
||||
if(is.null(weight)){
|
||||
|
||||
edges <-
|
||||
data %>%
|
||||
select(from = "TieOrigin_PersonId",
|
||||
to = "TieDestination_PersonId",
|
||||
weight = weight)
|
||||
edges <-
|
||||
data %>%
|
||||
mutate(NoWeight = 1) %>% # No weight
|
||||
select(from = "TieOrigin_PersonId",
|
||||
to = "TieDestination_PersonId",
|
||||
weight = "NoWeight")
|
||||
|
||||
}
|
||||
} else {
|
||||
|
||||
## Set variables
|
||||
TO_hrvar <- paste0("TieOrigin_", hrvar)
|
||||
TD_hrvar <- paste0("TieDestination_", hrvar)
|
||||
edges <-
|
||||
data %>%
|
||||
select(from = "TieOrigin_PersonId",
|
||||
to = "TieDestination_PersonId",
|
||||
weight = weight)
|
||||
|
||||
## Vertices data frame to provide meta-data
|
||||
vert_ft <-
|
||||
rbind(
|
||||
# TieOrigin
|
||||
edges %>%
|
||||
select(from) %>% # Single column
|
||||
unique() %>% # Remove duplications
|
||||
left_join(select(data, TieOrigin_PersonId, TO_hrvar),
|
||||
by = c("from" = "TieOrigin_PersonId")) %>%
|
||||
select(node = "from", !!sym(hrvar) := TO_hrvar),
|
||||
}
|
||||
|
||||
# TieDestination
|
||||
edges %>%
|
||||
select(to) %>% # Single column
|
||||
unique() %>% # Remove duplications
|
||||
left_join(select(data, TieDestination_PersonId, TD_hrvar),
|
||||
by = c("to" = "TieDestination_PersonId")) %>%
|
||||
select(node = "to", !!sym(hrvar) := TD_hrvar)
|
||||
## Set variables
|
||||
# TieOrigin = PrimaryCollaborator
|
||||
# TieDestination = SecondaryCollaborator
|
||||
TO_hrvar <- paste0("TieOrigin_", hrvar)
|
||||
TD_hrvar <- paste0("TieDestination_", hrvar)
|
||||
|
||||
## Vertices data frame to provide meta-data
|
||||
vert_ft <-
|
||||
rbind(
|
||||
# TieOrigin
|
||||
edges %>%
|
||||
select(from) %>% # Single column
|
||||
unique() %>% # Remove duplications
|
||||
left_join(select(data, TieOrigin_PersonId, TO_hrvar),
|
||||
by = c("from" = "TieOrigin_PersonId")) %>%
|
||||
select(node = "from", !!sym(hrvar) := TO_hrvar),
|
||||
|
||||
# TieDestination
|
||||
edges %>%
|
||||
select(to) %>% # Single column
|
||||
unique() %>% # Remove duplications
|
||||
left_join(select(data, TieDestination_PersonId, TD_hrvar),
|
||||
by = c("to" = "TieDestination_PersonId")) %>%
|
||||
select(node = "to", !!sym(hrvar) := TD_hrvar)
|
||||
)
|
||||
|
||||
|
||||
|
||||
## Create 'igraph' object
|
||||
g_raw <-
|
||||
igraph::graph_from_data_frame(edges,
|
||||
directed = TRUE, # Directed, but FALSE for visualization
|
||||
vertices = unique(vert_ft)) # remove duplicates
|
||||
|
||||
## Assign weights
|
||||
g_raw$weight <- edges$weight
|
||||
|
||||
## allowed `community` values
|
||||
valid_comm <- c(
|
||||
"leiden",
|
||||
"louvain",
|
||||
"edge_betweenness",
|
||||
"fast_greedy",
|
||||
"fluid_communities",
|
||||
"infomap",
|
||||
"label_prop",
|
||||
"leading_eigen",
|
||||
"optimal",
|
||||
"spinglass",
|
||||
"walk_trap"
|
||||
)
|
||||
|
||||
## Finalise `g` object
|
||||
## If community detection is selected, this is where the communities are appended
|
||||
if(is.null(community)){ # no community detection
|
||||
|
||||
## Create 'igraph' object
|
||||
g_raw <-
|
||||
igraph::graph_from_data_frame(edges,
|
||||
directed = TRUE, # Directed, but FALSE for visualization
|
||||
vertices = unique(vert_ft)) # remove duplicates
|
||||
g <- igraph::simplify(g_raw)
|
||||
v_attr <- hrvar # Name of vertex attribute
|
||||
|
||||
## Assign weights
|
||||
g_raw$weight <- edges$weight
|
||||
|
||||
## Finalise `g` object
|
||||
## If community detection is selected, this is where the communities are appended
|
||||
|
||||
if(display == "hrvar"){
|
||||
|
||||
g <- g_raw %>% igraph::simplify()
|
||||
|
||||
## Name of vertex attribute
|
||||
v_attr <- hrvar
|
||||
|
||||
} else if(display == "louvain"){
|
||||
} else if(community %in% valid_comm){
|
||||
|
||||
set.seed(seed = seed)
|
||||
g_ud <- igraph::as.undirected(g_raw) # Convert to undirected
|
||||
|
||||
## Convert to undirected
|
||||
g_ud <- igraph::as.undirected(g_raw)
|
||||
alg_label <- paste0("igraph::cluster_", community)
|
||||
|
||||
## Return a numeric vector of partitions / clusters / modules
|
||||
## Set a low resolution parameter to have fewer groups
|
||||
## weights = NULL means that if the graph as a `weight` edge attribute, this
|
||||
## will be used by default.
|
||||
lc <- igraph::cluster_louvain(g_ud, weights = NULL)
|
||||
# combine arguments to clustering algorithm
|
||||
c_comm_args <- c(list("graph" = g_ud), comm_args)
|
||||
|
||||
# output `communities` object
|
||||
comm_out <- do.call(eval(parse(text = alg_label)), c_comm_args)
|
||||
|
||||
## Add cluster
|
||||
g <-
|
||||
g_ud %>%
|
||||
# Add louvain partitions to graph object
|
||||
igraph::set_vertex_attr("cluster", value = as.character(igraph::membership(lc))) %>% # Return membership - diff from Leiden
|
||||
# Add partitions to graph object
|
||||
# Return membership
|
||||
igraph::set_vertex_attr(
|
||||
"cluster",
|
||||
value = as.character(igraph::membership(comm_out))) %>%
|
||||
igraph::simplify()
|
||||
|
||||
## Name of vertex attribute
|
||||
v_attr <- "cluster"
|
||||
|
||||
} else if(display == "leiden"){
|
||||
} else {
|
||||
|
||||
# Check package installation
|
||||
check_pkg_installed(pkgname = "leiden")
|
||||
stop("Please enter a valid input for `community`.")
|
||||
|
||||
## Return a numeric vector of partitions / clusters / modules
|
||||
## Set a low resolution parameter to have fewer groups
|
||||
ld <- leiden::leiden(
|
||||
g_raw,
|
||||
resolution_parameter = res,
|
||||
seed = seed,
|
||||
weights = g_raw$weight) # create partitions
|
||||
|
||||
## Add cluster
|
||||
g <-
|
||||
g_raw %>%
|
||||
# Add leiden partitions to graph object
|
||||
igraph::set_vertex_attr("cluster", value = as.character(ld)) %>%
|
||||
igraph::simplify()
|
||||
|
||||
## Name of vertex attribute
|
||||
v_attr <- "cluster"
|
||||
|
||||
} else {
|
||||
|
||||
stop("Please enter a valid input for `display`.")
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Common area -------------------------------------------------------------
|
||||
# centrality calculations -------------------------------------------------
|
||||
# attach centrality calculations if `centrality` is not NULL
|
||||
|
||||
## Create vertex table
|
||||
vertex_tb <-
|
||||
g %>%
|
||||
igraph::get.vertex.attribute() %>%
|
||||
as_tibble()
|
||||
if(!is.null(centrality)){
|
||||
|
||||
## Set layout for graph
|
||||
g_layout <-
|
||||
g %>%
|
||||
ggraph::ggraph(layout = "igraph", algorithm = algorithm)
|
||||
g <- network_summary(g, return = "network")
|
||||
|
||||
## Timestamped File Path
|
||||
out_path <- paste0(path, "_", tstamp(), ".pdf")
|
||||
igraph::V(g)$node_size <-
|
||||
igraph::get.vertex.attribute(
|
||||
g,
|
||||
name = centrality # from argument
|
||||
) %>%
|
||||
scales::rescale(to = node_sizes) # min and max value
|
||||
|
||||
# Return ------------------------------------------------------------------
|
||||
} else {
|
||||
|
||||
if(return == "plot"){
|
||||
# all nodes with the same size if centrality is not calculated
|
||||
# adjust for plotting formats
|
||||
if(style == "igraph"){
|
||||
igraph::V(g)$node_size <- rep(3, igraph::vcount(g))
|
||||
} else if(style == "ggraph"){
|
||||
igraph::V(g)$node_size <- rep(2.5, igraph::vcount(g))
|
||||
node_sizes <- c(3, 3) # arbitrarily fix the node size
|
||||
}
|
||||
}
|
||||
|
||||
# Common area -------------------------------------------------------------
|
||||
|
||||
## Create vertex table
|
||||
vertex_tb <-
|
||||
g %>%
|
||||
igraph::get.vertex.attribute() %>%
|
||||
as_tibble() %>%
|
||||
select(-node_size) # never show `node_size` in data output
|
||||
|
||||
## Set layout for graph
|
||||
g_layout <-
|
||||
g %>%
|
||||
ggraph::ggraph(layout = "igraph", algorithm = layout)
|
||||
|
||||
## Timestamped File Path
|
||||
out_path <- paste0(path, "_", tstamp(), ".pdf")
|
||||
|
||||
# Return outputs ----------------------------------------------------------
|
||||
|
||||
## Use fast plotting method
|
||||
|
||||
if(igraph::ecount(g) > size_threshold){
|
||||
|
||||
message("Using fast plot method due to large network size...")
|
||||
if(return %in% c("plot", "plot-pdf")){
|
||||
|
||||
## Set colours
|
||||
colour_tb <-
|
||||
tibble(!!sym(v_attr) := unique(igraph::get.vertex.attribute(g, name = v_attr))) %>%
|
||||
mutate(colour = rainbow(nrow(.))) # No palette choice
|
||||
mutate(colour = eval(parse(text = paste0(palette,"(nrow(.))")))) # palette choice
|
||||
|
||||
## Colour vector
|
||||
colour_v <-
|
||||
|
@ -331,231 +368,217 @@ network_p2p <- function(data,
|
|||
left_join(colour_tb, by = v_attr) %>%
|
||||
pull(colour)
|
||||
|
||||
## Set graph plot colours
|
||||
igraph::V(g)$color <- grDevices::adjustcolor(colour_v, alpha.f = node_alpha)
|
||||
igraph::V(g)$frame.color <- NA
|
||||
igraph::E(g)$width <- 1
|
||||
if(style == "igraph"){
|
||||
|
||||
## Internal basic plotting function used inside `network_p2p()`
|
||||
plot_basic_graph <- function(lpos = legend_pos){
|
||||
# message("Using fast plot method due to large network size...")
|
||||
|
||||
old_par <- par(no.readonly = TRUE)
|
||||
on.exit(par(old_par))
|
||||
## Set graph plot colours
|
||||
igraph::V(g)$color <- grDevices::adjustcolor(colour_v, alpha.f = node_alpha)
|
||||
igraph::V(g)$frame.color <- NA
|
||||
igraph::E(g)$width <- 1
|
||||
|
||||
par(bg = bg_fill)
|
||||
## Internal basic plotting function used inside `network_p2p()`
|
||||
plot_basic_graph <- function(lpos = legend_pos){
|
||||
|
||||
layout_text <- paste0("igraph::layout_with_", algorithm)
|
||||
old_par <- graphics::par(no.readonly = TRUE)
|
||||
on.exit(graphics::par(old_par))
|
||||
|
||||
## Legend position
|
||||
graphics::par(bg = bg_fill)
|
||||
|
||||
if(lpos == "left"){
|
||||
layout_text <- paste0("igraph::layout_with_", layout)
|
||||
|
||||
leg_x <- -1.5
|
||||
leg_y <- 0.5
|
||||
## Legend position
|
||||
|
||||
} else if(lpos == "right"){
|
||||
if(lpos == "left"){
|
||||
|
||||
leg_x <- 1.5
|
||||
leg_y <- 0.5
|
||||
leg_x <- -1.5
|
||||
leg_y <- 0.5
|
||||
|
||||
} else if(lpos == "top"){
|
||||
} else if(lpos == "right"){
|
||||
|
||||
leg_x <- 0
|
||||
leg_y <- 1.5
|
||||
leg_x <- 1.5
|
||||
leg_y <- 0.5
|
||||
|
||||
} else if(lpos == "bottom"){
|
||||
} else if(lpos == "top"){
|
||||
|
||||
leg_x <- 0
|
||||
leg_y <- -1.0
|
||||
leg_x <- 0
|
||||
leg_y <- 1.5
|
||||
|
||||
} else {
|
||||
} else if(lpos == "bottom"){
|
||||
|
||||
stop("Invalid `legend_pos` input.")
|
||||
leg_x <- 0
|
||||
leg_y <- -1.0
|
||||
|
||||
} else {
|
||||
|
||||
stop("Invalid `legend_pos` input.")
|
||||
|
||||
}
|
||||
|
||||
graphics::plot(
|
||||
g,
|
||||
layout = eval(parse(text = layout_text)),
|
||||
vertex.label = NA,
|
||||
# vertex.size = 3,
|
||||
vertex.size = igraph::V(g)$node_size,
|
||||
edge.arrow.mode = "-",
|
||||
edge.color = "#adadad"
|
||||
)
|
||||
|
||||
graphics::legend(x = leg_x,
|
||||
y = leg_y,
|
||||
legend = colour_tb[[v_attr]], # vertex attribute
|
||||
pch = 21,
|
||||
text.col = font_col,
|
||||
col = edge_col,
|
||||
pt.bg = colour_tb$colour,
|
||||
pt.cex = 2,
|
||||
cex = .8,
|
||||
bty = "n",
|
||||
ncol = 1)
|
||||
}
|
||||
|
||||
## Default PDF output unless NULL supplied to path
|
||||
if(return == "plot"){
|
||||
|
||||
plot_basic_graph()
|
||||
|
||||
} else if(return == "plot-pdf"){
|
||||
|
||||
grDevices::pdf(out_path)
|
||||
|
||||
plot_basic_graph()
|
||||
|
||||
grDevices::dev.off()
|
||||
|
||||
message(paste0("Saved to ", out_path, "."))
|
||||
|
||||
}
|
||||
|
||||
graphics::plot(g,
|
||||
layout = eval(parse(text = layout_text)),
|
||||
vertex.label = NA,
|
||||
vertex.size = 3,
|
||||
edge.arrow.mode = "-",
|
||||
edge.color = "#adadad")
|
||||
} else if(style == "ggraph"){
|
||||
|
||||
graphics::legend(x = leg_x,
|
||||
y = leg_y,
|
||||
legend = colour_tb[[v_attr]], # vertex attribute
|
||||
pch = 21,
|
||||
text.col = font_col,
|
||||
col = "#777777",
|
||||
pt.bg = colour_tb$colour,
|
||||
pt.cex = 2,
|
||||
cex = .8,
|
||||
bty = "n",
|
||||
ncol = 1)
|
||||
}
|
||||
plot_output <-
|
||||
g_layout +
|
||||
ggraph::geom_edge_link(colour = edge_col,
|
||||
edge_width = 0.05,
|
||||
alpha = edge_alpha)+
|
||||
ggraph::geom_node_point(aes(colour = !!sym(v_attr),
|
||||
size = node_size),
|
||||
alpha = node_alpha,
|
||||
pch = 16) +
|
||||
scale_size_continuous(range = node_sizes) +
|
||||
scale_color_manual(values = unique(colour_v)) +
|
||||
theme_void() +
|
||||
theme(
|
||||
legend.position = legend_pos,
|
||||
legend.background = element_rect(fill = bg_fill, colour = bg_fill),
|
||||
|
||||
## Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
text = element_text(colour = font_col),
|
||||
axis.line = element_blank(),
|
||||
panel.grid = element_blank()
|
||||
) +
|
||||
labs(caption = paste0("Person to person collaboration showing ", v_attr, ". "), # spaces intentional
|
||||
y = "",
|
||||
x = "") +
|
||||
guides(size = "none")
|
||||
|
||||
plot_basic_graph()
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(return == "plot"){
|
||||
|
||||
plot_output
|
||||
|
||||
} else if(return == "plot-pdf"){
|
||||
|
||||
ggsave(out_path,
|
||||
plot = plot_output,
|
||||
width = 16,
|
||||
height = 9)
|
||||
|
||||
message(paste0("Saved to ", out_path, "."))
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
grDevices::pdf(out_path)
|
||||
stop("invalid input for `style`")
|
||||
|
||||
plot_basic_graph()
|
||||
}
|
||||
|
||||
grDevices::dev.off()
|
||||
} else if (return == "data"){
|
||||
|
||||
message(paste0("Saved to ", out_path, "."))
|
||||
vertex_tb
|
||||
|
||||
} else if(return == "network"){
|
||||
|
||||
g
|
||||
|
||||
} else if(return == "sankey"){
|
||||
|
||||
if(is.null(community)){
|
||||
|
||||
message("Note: no sankey return option is available if `NULL` is selected at `community`.
|
||||
Please specify a valid community detection algorithm.")
|
||||
|
||||
} else if(community %in% valid_comm){
|
||||
|
||||
create_sankey(
|
||||
data = vertex_tb %>% count(!!sym(hrvar), cluster),
|
||||
var1 = hrvar,
|
||||
var2 = "cluster",
|
||||
count = "n"
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
} else if(return == "table"){
|
||||
|
||||
if(is.null(community)){
|
||||
|
||||
if(is.null(centrality)){
|
||||
|
||||
vertex_tb %>% count(!!sym(hrvar))
|
||||
|
||||
} else {
|
||||
|
||||
# average centrality by group
|
||||
vertex_tb %>%
|
||||
group_by(!!sym(hrvar)) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
betweenness = mean(betweenness, na.rm = TRUE),
|
||||
closeness = mean(closeness, na.rm = TRUE),
|
||||
degree = mean(degree, na.rm = TRUE),
|
||||
eigenvector = mean(eigenvector, na.rm = TRUE),
|
||||
pagerank = mean(pagerank, na.rm = TRUE)
|
||||
)
|
||||
}
|
||||
|
||||
} else if(community %in% valid_comm){
|
||||
|
||||
if(is.null(centrality)){
|
||||
|
||||
vertex_tb %>% count(!!sym(hrvar), cluster)
|
||||
|
||||
} else {
|
||||
|
||||
# average centrality by group
|
||||
vertex_tb %>%
|
||||
group_by(!!sym(hrvar), cluster) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
betweenness = mean(betweenness, na.rm = TRUE),
|
||||
closeness = mean(closeness, na.rm = TRUE),
|
||||
degree = mean(degree, na.rm = TRUE),
|
||||
eigenvector = mean(eigenvector, na.rm = TRUE),
|
||||
pagerank = mean(pagerank, na.rm = TRUE)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
plot_output <-
|
||||
g_layout +
|
||||
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.05, alpha = edge_alpha) +
|
||||
ggraph::geom_node_point(aes(colour = !!sym(v_attr)),
|
||||
alpha = node_alpha,
|
||||
pch = 16) +
|
||||
theme_void() +
|
||||
theme(
|
||||
legend.position = legend_pos,
|
||||
legend.background = element_rect(fill = bg_fill, colour = bg_fill),
|
||||
|
||||
text = element_text(colour = font_col),
|
||||
axis.line = element_blank(),
|
||||
panel.grid = element_blank()
|
||||
) +
|
||||
labs(caption = paste0("Person to person collaboration showing ", v_attr, ". "), # spaces intentional
|
||||
y = "",
|
||||
x = "")
|
||||
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
|
||||
plot_output
|
||||
|
||||
} else {
|
||||
|
||||
ggsave(out_path,
|
||||
plot = plot_output,
|
||||
width = 16,
|
||||
height = 9)
|
||||
|
||||
message(paste0("Saved to ", out_path, "."))
|
||||
|
||||
}
|
||||
stop("invalid input for `return`")
|
||||
|
||||
}
|
||||
|
||||
} else if(return == "table"){
|
||||
|
||||
|
||||
if(display == "hrvar"){
|
||||
|
||||
vertex_tb %>% count(!!sym(hrvar))
|
||||
|
||||
} else if(display %in% c("louvain", "leiden")){
|
||||
|
||||
vertex_tb %>%
|
||||
count(!!sym(hrvar), cluster)
|
||||
|
||||
}
|
||||
|
||||
} else if(return == "data"){
|
||||
|
||||
vertex_tb
|
||||
|
||||
} else if(return == "network"){
|
||||
|
||||
g
|
||||
|
||||
} else if(return == "sankey"){
|
||||
|
||||
if(display == "hrvar"){
|
||||
|
||||
message("Note: no sankey return option is available if `display` is set to 'hrvar'.
|
||||
Please specify either 'louvain' or 'leiden'")
|
||||
|
||||
} else if(display %in% c("louvain", "leiden")){
|
||||
|
||||
create_sankey(data = vertex_tb %>% count(!!sym(hrvar), cluster),
|
||||
var1 = hrvar,
|
||||
var2 = "cluster",
|
||||
count = "n")
|
||||
|
||||
}
|
||||
|
||||
} else if(return == "describe"){
|
||||
|
||||
if(display == "hrvar"){
|
||||
|
||||
message("Note: no describe return option is available if `display` is set to 'hrvar'.
|
||||
Please specify either 'louvain' or 'leiden'")
|
||||
|
||||
} else if(display %in% c("louvain", "leiden")){
|
||||
|
||||
describe_tb <-
|
||||
vertex_tb %>%
|
||||
left_join(select(data, starts_with("TieOrigin_")),
|
||||
by = c("name" = "TieOrigin_PersonId"))
|
||||
|
||||
desc_str <-
|
||||
describe_tb %>%
|
||||
pull(cluster) %>%
|
||||
unique()
|
||||
|
||||
out_list <-
|
||||
desc_str %>%
|
||||
purrr::map(function(x){
|
||||
describe_tb %>%
|
||||
filter(cluster == x) %>%
|
||||
network_describe(hrvar = desc_hrvar)
|
||||
}) %>%
|
||||
setNames(nm = desc_str)
|
||||
|
||||
summaryTable <-
|
||||
list(i = out_list,
|
||||
j = names(out_list)) %>%
|
||||
purrr::pmap(function(i, j){
|
||||
i %>%
|
||||
arrange(desc(Percentage)) %>%
|
||||
# slice(1) %>%
|
||||
mutate_at(vars(starts_with("feature_")), ~tidyr::replace_na(., "")) %>%
|
||||
mutate(Community = j,
|
||||
`Attribute 1` = paste(feature_1, "=", feature_1_value),
|
||||
`Attribute 2` = paste(feature_2, "=", feature_2_value),
|
||||
`Attribute 3` = paste(feature_3, "=", feature_3_value)) %>%
|
||||
select(Community,
|
||||
`Attribute 1`,
|
||||
`Attribute 2`,
|
||||
`Attribute 3`,
|
||||
PercentageExplained = "Percentage") %>%
|
||||
mutate_at(vars(starts_with("Attribute")), ~ifelse(. == " = ", NA, .))
|
||||
}) %>%
|
||||
bind_rows() %>%
|
||||
mutate(sum_na = select(., `Attribute 1`, `Attribute 2`, `Attribute 3`) %>%
|
||||
apply(1, function(x) sum(is.na(x)))) %>%
|
||||
arrange(desc(PercentageExplained)) %>%
|
||||
group_by(Community, sum_na) %>%
|
||||
summarise_all(~first(.)) %>%
|
||||
select(-sum_na)
|
||||
|
||||
c(list("summaryTable" = summaryTable), out_list)
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
stop("Please enter a valid input for `return`.")
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,222 +0,0 @@
|
|||
# --------------------------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
|
||||
# --------------------------------------------------------------------------------------------
|
||||
|
||||
#' @title Create a network plot with the person-to-person query
|
||||
#'
|
||||
#' @description
|
||||
#' `r lifecycle::badge('experimental')`
|
||||
#'
|
||||
#'
|
||||
#' Analyse a person-to-person (P2P) network query, with multiple visualisation
|
||||
#' and analysis output options. Pass a data frame containing a person-to-person
|
||||
#' query and return a network visualization. Options are available for community
|
||||
#' detection using either the Louvain or the Leiden algorithms.
|
||||
#'
|
||||
#'
|
||||
#'
|
||||
#' @family Network
|
||||
#'
|
||||
#' @export
|
||||
network_p2p_test <- function(
|
||||
data,
|
||||
hrvar,
|
||||
centrality = NULL,
|
||||
community = NULL,
|
||||
return,
|
||||
bg_fill = "#FFFFFF",
|
||||
font_col = "grey20",
|
||||
legend_pos = "bottom",
|
||||
palette = "rainbow",
|
||||
node_alpha = 0.7,
|
||||
edge_alpha = 1,
|
||||
res = 0.5,
|
||||
seed = 1,
|
||||
algorithm = "mds",
|
||||
size_threshold = 5000,
|
||||
weight = "StrongTieScore"
|
||||
){
|
||||
|
||||
## valid values for centrality -------------------------------------------
|
||||
|
||||
valid_cen <- c(
|
||||
"betweenness",
|
||||
"closeness",
|
||||
"degree",
|
||||
"eigenvector",
|
||||
"pagerank"
|
||||
)
|
||||
|
||||
## valid values for community --------------------------------------------
|
||||
|
||||
valid_com <- c(
|
||||
"leiden",
|
||||
"louvain"
|
||||
)
|
||||
|
||||
## Set data frame for `edges` --------------------------------------------
|
||||
|
||||
if(is.null(weight)){
|
||||
|
||||
edges <-
|
||||
data %>%
|
||||
mutate(NoWeight = 1) %>% # No weight
|
||||
select(from = "TieOrigin_PersonId",
|
||||
to = "TieDestination_PersonId",
|
||||
weight = "NoWeight")
|
||||
|
||||
} else {
|
||||
|
||||
edges <-
|
||||
data %>%
|
||||
select(from = "TieOrigin_PersonId",
|
||||
to = "TieDestination_PersonId",
|
||||
weight = weight)
|
||||
|
||||
}
|
||||
|
||||
## Set variables ---------------------------------------------------------
|
||||
|
||||
TO_hrvar <- paste0("TieOrigin_", hrvar)
|
||||
TD_hrvar <- paste0("TieDestination_", hrvar)
|
||||
|
||||
## Vertices data frame to provide meta-data ------------------------------
|
||||
|
||||
vert_ft <-
|
||||
rbind(
|
||||
# TieOrigin
|
||||
edges %>%
|
||||
select(from) %>% # Single column
|
||||
unique() %>% # Remove duplications
|
||||
left_join(select(data, TieOrigin_PersonId, TO_hrvar),
|
||||
by = c("from" = "TieOrigin_PersonId")) %>%
|
||||
select(node = "from", !!sym(hrvar) := TO_hrvar),
|
||||
|
||||
# TieDestination
|
||||
edges %>%
|
||||
select(to) %>% # Single column
|
||||
unique() %>% # Remove duplications
|
||||
left_join(select(data, TieDestination_PersonId, TD_hrvar),
|
||||
by = c("to" = "TieDestination_PersonId")) %>%
|
||||
select(node = "to", !!sym(hrvar) := TD_hrvar)
|
||||
)
|
||||
|
||||
## Create 'igraph' object -----------------------------------------------
|
||||
|
||||
g_raw <-
|
||||
igraph::graph_from_data_frame(edges,
|
||||
directed = TRUE, # Directed, but FALSE for visualization
|
||||
vertices = unique(vert_ft)) # remove duplicates
|
||||
|
||||
## Assign weights --------------------------------------------------------
|
||||
|
||||
g_raw$weight <- edges$weight
|
||||
|
||||
## Main algorithm --------------------------------------------------------
|
||||
|
||||
if(is.null(centrality) & is.null(community)){
|
||||
|
||||
# PLOT -> Returns basic plot with HR attribute
|
||||
# PDF -> Exports plot as pdf file
|
||||
# Table -> HR Var count
|
||||
# Data -> Returns person dataset with HR attributes
|
||||
# Network -> Returns network object
|
||||
|
||||
g <- g_raw %>% igraph::simplify()
|
||||
|
||||
## Name of vertex attribute
|
||||
v_attr <- hrvar
|
||||
|
||||
|
||||
} else if(centrality %in% valid_cen & is.null(community)){
|
||||
|
||||
# PLOT -> Returns basic plot with HR attribute AND vertices proportional to centrality
|
||||
# PDF -> Exports plot as pdf file
|
||||
# Table -> HR Var count and average centrality
|
||||
# Data -> Returns person dataset with HR attributes and centrality scores (ALL)
|
||||
# Network -> Returns network object with centrality scores (ALL)
|
||||
|
||||
} else if(is.null(centrality) & community %in% valid_com){
|
||||
|
||||
# PLOT -> Returns basic plot with community (no hrvar)
|
||||
# PDF -> Exports plot as pdf file
|
||||
# Table -> HR Var x community count
|
||||
# Data -> Returns person dataset with HR attributes and community attribute
|
||||
# Network -> Returns network object with community attribute
|
||||
|
||||
|
||||
# TODO - modularise louvain and leiden?
|
||||
if(community == "louvain"){
|
||||
|
||||
set.seed(seed = seed)
|
||||
|
||||
## Convert to undirected
|
||||
g_ud <- igraph::as.undirected(g_raw)
|
||||
|
||||
## Return a numeric vector of partitions / clusters / modules
|
||||
## Set a low resolution parameter to have fewer groups
|
||||
## weights = NULL means that if the graph as a `weight` edge attribute, this
|
||||
## will be used by default.
|
||||
lc <- igraph::cluster_louvain(g_ud, weights = NULL)
|
||||
|
||||
## Add cluster
|
||||
g <-
|
||||
g_ud %>%
|
||||
# Add louvain partitions to graph object
|
||||
igraph::set_vertex_attr("cluster", value = as.character(igraph::membership(lc))) %>% # Return membership - diff from Leiden
|
||||
igraph::simplify()
|
||||
|
||||
## Name of vertex attribute
|
||||
v_attr <- "cluster"
|
||||
|
||||
} else if(community == "leiden"){
|
||||
|
||||
# Check package installation
|
||||
check_pkg_installed(pkgname = "leiden")
|
||||
|
||||
## Return a numeric vector of partitions / clusters / modules
|
||||
## Set a low resolution parameter to have fewer groups
|
||||
ld <- leiden::leiden(
|
||||
g_raw,
|
||||
resolution_parameter = res,
|
||||
seed = seed,
|
||||
weights = g_raw$weight) # create partitions
|
||||
|
||||
## Add cluster
|
||||
g <-
|
||||
g_raw %>%
|
||||
# Add leiden partitions to graph object
|
||||
igraph::set_vertex_attr("cluster", value = as.character(ld)) %>%
|
||||
igraph::simplify()
|
||||
|
||||
## Name of vertex attribute
|
||||
v_attr <- "cluster"
|
||||
|
||||
}
|
||||
|
||||
|
||||
} else if(centrality %in% valid_cen & community %in% valid_com){
|
||||
|
||||
# PLOT -> Returns basic plot with community AND vertices proportional to centrality
|
||||
# PDF -> Exports plot as pdf file
|
||||
# Table -> HR Var x community count and average centrality
|
||||
# Data -> Returns person dataset with HR attributes, community attribute and centrality scores (ALL)
|
||||
# Network -> Returns network object with community attribute and centrality scores (ALL)
|
||||
|
||||
|
||||
} else {
|
||||
|
||||
stop(
|
||||
"Invalid inputs to `centrality` or `community`."
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -8,13 +8,13 @@ keymetrics_scan(
|
|||
data,
|
||||
hrvar = "Organization",
|
||||
mingroup = 5,
|
||||
metrics = c("Workweek_span", "Collaboration_hours",
|
||||
"After_hours_collaboration_hours", "Meetings", "Meeting_hours",
|
||||
"After_hours_meeting_hours", "Low_quality_meeting_hours",
|
||||
"Meeting_hours_with_manager_1_on_1", "Meeting_hours_with_manager", "Emails_sent",
|
||||
"Email_hours", "After_hours_email_hours", "Generated_workload_email_hours",
|
||||
"Total_focus_hours", "Internal_network_size", "Networking_outside_organization",
|
||||
"External_network_size", "Networking_outside_company"),
|
||||
metrics = c("Workweek_span", "Collaboration_hours", "After_hours_collaboration_hours",
|
||||
"Meetings", "Meeting_hours", "After_hours_meeting_hours",
|
||||
"Low_quality_meeting_hours", "Meeting_hours_with_manager_1_on_1",
|
||||
"Meeting_hours_with_manager", "Emails_sent", "Email_hours",
|
||||
"After_hours_email_hours", "Generated_workload_email_hours", "Total_focus_hours",
|
||||
"Internal_network_size", "Networking_outside_organization", "External_network_size",
|
||||
"Networking_outside_company"),
|
||||
return = "plot",
|
||||
low = rgb2hex(7, 111, 161),
|
||||
mid = rgb2hex(241, 204, 158),
|
||||
|
|
|
@ -27,13 +27,7 @@ network_leiden(
|
|||
|
||||
\item{bg_fill}{String to specify background fill colour.}
|
||||
|
||||
\item{font_col}{String to specify font and link colour.}
|
||||
|
||||
\item{algorithm}{String to specify the node placement algorithm to be used.
|
||||
Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of
|
||||
nodes. See
|
||||
\url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full
|
||||
list of options.}
|
||||
\item{font_col}{String to specify font colour.}
|
||||
|
||||
\item{path}{File path for saving the PDF output. Defaults to a timestamped
|
||||
path based on current parameters.}
|
||||
|
@ -41,16 +35,10 @@ path based on current parameters.}
|
|||
\item{node_alpha}{A numeric value between 0 and 1 to specify the transparency
|
||||
of the nodes. Defaults to 0.7.}
|
||||
|
||||
\item{res}{Resolution parameter to be passed to \code{leiden::leiden()}. Defaults
|
||||
to 0.5.}
|
||||
|
||||
\item{seed}{Seed for the random number generator passed to either
|
||||
\code{set.seed()} when the Louvain algorithm is used, or \code{leiden::leiden()} when
|
||||
the Leiden algorithm is used, to ensure consistency. Only applicable when
|
||||
\code{display} is set to \code{"louvain"} or \code{"leiden"}.}
|
||||
|
||||
\item{desc_hrvar}{Character vector of length 3 containing the HR attributes
|
||||
to use when returning the \code{"describe"} output. See \code{network_describe()}.}
|
||||
\code{set.seed()} when the louvain or leiden community detection algorithm is
|
||||
used, to ensure consistency. Only applicable when \code{community} is set to
|
||||
one of the valid non-null values.}
|
||||
|
||||
\item{return}{String specifying what output to return. Defaults to "plot-leiden". Valid
|
||||
return options include:
|
||||
|
@ -70,12 +58,6 @@ identified communities. The first data frame is a summary table of all the
|
|||
communities.
|
||||
\item \code{'network'}: return 'igraph' object.
|
||||
}}
|
||||
|
||||
\item{size_threshold}{Numeric value representing the maximum number of edges
|
||||
before \code{network_leiden()} switches to use a more efficient, but less
|
||||
elegant plotting method (native igraph). Defaults to 5000. Set as \code{0} to
|
||||
coerce to a fast plotting method every time, and \code{Inf} to always use the
|
||||
default plotting method (with 'ggraph').}
|
||||
}
|
||||
\value{
|
||||
See \code{return}.
|
||||
|
|
|
@ -25,23 +25,14 @@ network_louvain(
|
|||
|
||||
\item{bg_fill}{String to specify background fill colour.}
|
||||
|
||||
\item{font_col}{String to specify font and link colour.}
|
||||
\item{font_col}{String to specify font colour.}
|
||||
|
||||
\item{node_alpha}{A numeric value between 0 and 1 to specify the transparency
|
||||
of the nodes. Defaults to 0.7.}
|
||||
|
||||
\item{algorithm}{String to specify the node placement algorithm to be used.
|
||||
Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of
|
||||
nodes. See
|
||||
\url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full
|
||||
list of options.}
|
||||
|
||||
\item{path}{File path for saving the PDF output. Defaults to a timestamped
|
||||
path based on current parameters.}
|
||||
|
||||
\item{desc_hrvar}{Character vector of length 3 containing the HR attributes
|
||||
to use when returning the \code{"describe"} output. See \code{network_describe()}.}
|
||||
|
||||
\item{return}{String specifying what output to return. Defaults to "plot-louvain". Valid
|
||||
return options include:
|
||||
\itemize{
|
||||
|
@ -60,12 +51,6 @@ identified communities. The first data frame is a summary table of all the
|
|||
communities.
|
||||
\item \code{'network'}: return 'igraph' object.
|
||||
}}
|
||||
|
||||
\item{size_threshold}{Numeric value representing the maximum number of edges
|
||||
before \code{network_leiden()} switches to use a more efficient, but less
|
||||
elegant plotting method (native igraph). Defaults to 5000. Set as \code{0} to
|
||||
coerce to a fast plotting method every time, and \code{Inf} to always use the
|
||||
default plotting method (with 'ggraph').}
|
||||
}
|
||||
\value{
|
||||
See \code{return}.
|
||||
|
|
|
@ -2,26 +2,28 @@
|
|||
% Please edit documentation in R/network_p2p.R
|
||||
\name{network_p2p}
|
||||
\alias{network_p2p}
|
||||
\title{Create a network plot with the person-to-person query}
|
||||
\title{Perform network analysis with the person-to-person query}
|
||||
\usage{
|
||||
network_p2p(
|
||||
data,
|
||||
hrvar = "Organization",
|
||||
display = "hrvar",
|
||||
return = "plot",
|
||||
path = paste0("network_p2p_", display),
|
||||
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
|
||||
centrality = NULL,
|
||||
community = NULL,
|
||||
weight = NULL,
|
||||
comm_args = NULL,
|
||||
layout = "mds",
|
||||
path = paste("p2p", NULL, sep = "_"),
|
||||
style = "igraph",
|
||||
bg_fill = "#FFFFFF",
|
||||
font_col = "grey20",
|
||||
legend_pos = "bottom",
|
||||
legend_pos = "right",
|
||||
palette = "rainbow",
|
||||
node_alpha = 0.7,
|
||||
edge_alpha = 1,
|
||||
res = 0.5,
|
||||
seed = 1,
|
||||
algorithm = "mds",
|
||||
size_threshold = 5000,
|
||||
weight = "StrongTieScore"
|
||||
edge_col = "#777777",
|
||||
node_sizes = c(1, 20),
|
||||
seed = 1
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
|
@ -29,43 +31,86 @@ network_p2p(
|
|||
|
||||
\item{hrvar}{String containing the label for the HR attribute.}
|
||||
|
||||
\item{display}{String determining what output to return. Valid values
|
||||
include:
|
||||
\itemize{
|
||||
\item \code{"hrvar"} (default): compute analysis or visuals without computing
|
||||
communities.
|
||||
\item \code{"louvain"}: compute analysis or visuals with community detection, using
|
||||
the Louvain algorithm.
|
||||
\item \code{"leiden"}: compute analysis or visuals with community detection, using
|
||||
the Leiden algorithm. This requires all the pre-requisites of the
|
||||
\strong{leiden} package installed, which includes Python and \strong{reticulate}.
|
||||
}}
|
||||
|
||||
\item{return}{String specifying what output to return. This must be one of the
|
||||
following strings:
|
||||
\item{return}{A different output is returned depending on the value passed to the \code{return}
|
||||
argument:
|
||||
\itemize{
|
||||
\item \code{'plot'} (default)
|
||||
\item \code{'plot-pdf'}
|
||||
\item \code{'sankey'}
|
||||
\item \code{'table'}
|
||||
\item \code{'data'}
|
||||
\item \code{'describe'}
|
||||
\item \code{'network'}
|
||||
}}
|
||||
|
||||
\item{centrality}{string to determines which centrality measure is used to
|
||||
scale the size of the nodes. All centrality measures are automatically
|
||||
calculated when it is set to one of the below values, and reflected in the
|
||||
\code{'network'} and \code{'data'} outputs.
|
||||
Measures include:
|
||||
\itemize{
|
||||
\item \code{betweenness}
|
||||
\item \code{closeness}
|
||||
\item \code{degree}
|
||||
\item \code{eigenvector}
|
||||
\item \code{pagerank}
|
||||
}
|
||||
|
||||
See \code{Value} for more information.}
|
||||
When \code{centrality} is set to NULL, no centrality is calculated in the outputs
|
||||
and all the nodes would have the same size.}
|
||||
|
||||
\item{community}{String determining which community detection algorithms to
|
||||
apply. Valid values include:
|
||||
\itemize{
|
||||
\item \code{NULL} (default): compute analysis or visuals without computing
|
||||
communities.
|
||||
\item \code{"louvain"}
|
||||
\item \code{"leiden"}
|
||||
\item \code{"edge_betweenness"}
|
||||
\item \code{"fast_greedy"}
|
||||
\item \code{"fluid_communities"}
|
||||
\item \code{"infomap"}
|
||||
\item \code{"label_prop"}
|
||||
\item \code{"leading_eigen"}
|
||||
\item \code{"optimal"}
|
||||
\item \code{"spinglass"}
|
||||
\item \code{"walk_trap"}
|
||||
}
|
||||
|
||||
These values map to the community detection algorithms offered by \code{igraph}.
|
||||
For instance, \code{"leiden"} is based on \code{igraph::cluster_leiden()}. Please see
|
||||
the bottom of \url{https://igraph.org/r/html/1.3.0/cluster_leiden.html} on all
|
||||
applications and parameters of these algorithms.
|
||||
.}
|
||||
|
||||
\item{weight}{String to specify which column to use as weights for the
|
||||
network. To create a graph without weights, supply \code{NULL} to this argument.}
|
||||
|
||||
\item{comm_args}{list containing the arguments to be passed through to
|
||||
igraph's clustering algorithms. Arguments must be named. See examples
|
||||
section on how to supply arguments in a named list.}
|
||||
|
||||
\item{layout}{String to specify the node placement algorithm to be used.
|
||||
Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of
|
||||
nodes. See
|
||||
\url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full
|
||||
list of options.}
|
||||
|
||||
\item{path}{File path for saving the PDF output. Defaults to a timestamped
|
||||
path based on current parameters.}
|
||||
|
||||
\item{desc_hrvar}{Character vector of length 3 containing the HR attributes
|
||||
to use when returning the \code{"describe"} output. See \code{network_describe()}.}
|
||||
\item{style}{String to specify which plotting style to use for the network
|
||||
plot. Valid values include:
|
||||
\itemize{
|
||||
\item \code{"igraph"}
|
||||
\item \code{"ggraph"}
|
||||
}}
|
||||
|
||||
\item{bg_fill}{String to specify background fill colour.}
|
||||
|
||||
\item{font_col}{String to specify font and link colour.}
|
||||
\item{font_col}{String to specify font colour.}
|
||||
|
||||
\item{legend_pos}{String to specify position of legend. Defaults to
|
||||
\code{"bottom"}. See \code{ggplot2::theme()}. This is applicable for both the
|
||||
\code{"right"}. See \code{ggplot2::theme()}. This is applicable for both the
|
||||
'ggraph' and the fast plotting method. Valid inputs include:
|
||||
\itemize{
|
||||
\item \code{"bottom"}
|
||||
|
@ -74,8 +119,8 @@ to use when returning the \code{"describe"} output. See \code{network_describe()
|
|||
-\code{"right"}
|
||||
}}
|
||||
|
||||
\item{palette}{Function for generating a colour palette with a single
|
||||
argument \code{n}. Uses "rainbow" by default.}
|
||||
\item{palette}{String specifying the function to generate a colour palette
|
||||
with a single argument \code{n}. Uses \code{"rainbow"} by default.}
|
||||
|
||||
\item{node_alpha}{A numeric value between 0 and 1 to specify the transparency
|
||||
of the nodes. Defaults to 0.7.}
|
||||
|
@ -83,46 +128,32 @@ of the nodes. Defaults to 0.7.}
|
|||
\item{edge_alpha}{A numeric value between 0 and 1 to specify the transparency
|
||||
of the edges (only for 'ggraph' mode). Defaults to 1.}
|
||||
|
||||
\item{res}{Resolution parameter to be passed to \code{leiden::leiden()}. Defaults
|
||||
to 0.5.}
|
||||
\item{edge_col}{String to specify edge link colour.}
|
||||
|
||||
\item{node_sizes}{Numeric vector of length two to specify the range of node
|
||||
sizes to rescale to, when \code{centrality} is set to a non-null value.}
|
||||
|
||||
\item{seed}{Seed for the random number generator passed to either
|
||||
\code{set.seed()} when the Louvain algorithm is used, or \code{leiden::leiden()} when
|
||||
the Leiden algorithm is used, to ensure consistency. Only applicable when
|
||||
\code{display} is set to \code{"louvain"} or \code{"leiden"}.}
|
||||
|
||||
\item{algorithm}{String to specify the node placement algorithm to be used.
|
||||
Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of
|
||||
nodes. See
|
||||
\url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full
|
||||
list of options.}
|
||||
|
||||
\item{size_threshold}{Numeric value representing the maximum number of edges
|
||||
before \code{network_leiden()} switches to use a more efficient, but less
|
||||
elegant plotting method (native igraph). Defaults to 5000. Set as \code{0} to
|
||||
coerce to a fast plotting method every time, and \code{Inf} to always use the
|
||||
default plotting method (with 'ggraph').}
|
||||
|
||||
\item{weight}{String to specify which column to use as weights for the
|
||||
network. Defaults to \verb{"StrongTieScore}. To create a graph without weights,
|
||||
supply \code{NULL} to this argument.}
|
||||
\code{set.seed()} when the louvain or leiden community detection algorithm is
|
||||
used, to ensure consistency. Only applicable when \code{community} is set to
|
||||
one of the valid non-null values.}
|
||||
}
|
||||
\value{
|
||||
A different output is returned depending on the value passed to the \code{return}
|
||||
argument:
|
||||
\itemize{
|
||||
\item \code{'plot'}: return a network plot.
|
||||
\item \code{'plot'}: return a network plot, interactively within R.
|
||||
\item \code{'plot-pdf'}: save a network plot as PDF. This option is recommended when
|
||||
the graph is large, which make take a long time to run if \code{return = 'plot'}
|
||||
is selected. Use this together with \code{path} to control the save location.
|
||||
\item \code{'sankey'}: return a sankey plot combining communities and HR attribute.
|
||||
This is only valid if a community detection method is selected at
|
||||
\code{display}.
|
||||
\code{community}.
|
||||
\item \code{'table'}: return a vertex summary table with counts in communities and
|
||||
HR attribute.
|
||||
HR attribute. When \code{centrality} is non-NULL, the average centrality values
|
||||
are calculated per group.
|
||||
\item \code{'data'}: return a vertex data file that matches vertices with
|
||||
communities and HR attributes.
|
||||
\item \code{'describe'}: return a list of data frames which describe each of the
|
||||
identified communities. The first data frame is a summary table of all the
|
||||
communities. This is only valid if a community detection method is selected
|
||||
at \code{display}.
|
||||
\item \code{'network'}: return 'igraph' object.
|
||||
}
|
||||
}
|
||||
|
@ -134,61 +165,52 @@ and analysis output options. Pass a data frame containing a person-to-person
|
|||
query and return a network visualization. Options are available for community
|
||||
detection using either the Louvain or the Leiden algorithms.
|
||||
}
|
||||
\section{Running Leiden communities}{
|
||||
|
||||
|
||||
Running Leiden communities requires python dependencies installed.
|
||||
You can run the following:
|
||||
|
||||
\if{html}{\out{<div class="sourceCode R">}}\preformatted{# Return a network plot to console, coloured by Leiden communities
|
||||
p2p_data \%>\%
|
||||
network_p2p(display = "leiden",
|
||||
path = NULL,
|
||||
return = "plot")
|
||||
}\if{html}{\out{</div>}}
|
||||
|
||||
When installing the 'leiden' package, you may be required to install the Python
|
||||
libraries 'python-igraph' and 'leidenalg'. You can install them with:
|
||||
|
||||
\if{html}{\out{<div class="sourceCode R">}}\preformatted{reticulate::py_install("python-igraph")
|
||||
reticulate::py_install("leidenalg")
|
||||
}\if{html}{\out{</div>}}
|
||||
}
|
||||
|
||||
\examples{
|
||||
# Simulate a small person-to-person dataset
|
||||
p2p_data <- p2p_data_sim(size = 50)
|
||||
p2p_df <- p2p_data_sim(dim = 1, size = 100)
|
||||
|
||||
# Return a network plot to console, coloured by hrvar
|
||||
p2p_data \%>\%
|
||||
network_p2p(display = "hrvar",
|
||||
path = NULL,
|
||||
return = "plot")
|
||||
# default - ggraph visual
|
||||
network_p2p(data = p2p_df, style = "ggraph")
|
||||
|
||||
# Return a network plot to console, coloured by Louvain communities
|
||||
p2p_data \%>\%
|
||||
network_p2p(display = "louvain",
|
||||
path = NULL,
|
||||
return = "plot")
|
||||
# return vertex table
|
||||
network_p2p(data = p2p_df, return = "table")
|
||||
|
||||
# return vertex table with community detection
|
||||
network_p2p(data = p2p_df, community = "leiden", return = "table")
|
||||
|
||||
# Return a network plot to console
|
||||
# Coloured by Leiden communities
|
||||
# Using Fruchterman-Reingold force-directed layout algorithm
|
||||
# Force the use of fast plotting method
|
||||
p2p_data \%>\%
|
||||
network_p2p(display = "hrvar",
|
||||
path = NULL,
|
||||
return = "plot",
|
||||
algorithm = "lgl",
|
||||
size_threshold = 0)
|
||||
# leiden - igraph style with custom resolution parameters
|
||||
network_p2p(data = p2p_df, community = "leiden", comm_args = list("resolution" = 0.1))
|
||||
|
||||
# Return a data frame matching HR variable and communities to nodes
|
||||
# Using Louvain communities
|
||||
p2p_data \%>\%
|
||||
network_p2p(display = "louvain",
|
||||
return = "data",
|
||||
algorithm = "fr")
|
||||
# louvain - ggraph style, using custom palette
|
||||
network_p2p(
|
||||
data = p2p_df,
|
||||
style = "ggraph",
|
||||
community = "louvain",
|
||||
palette = "heat_colors"
|
||||
)
|
||||
|
||||
# leiden - return a sankey visual with custom resolution parameters
|
||||
network_p2p(
|
||||
data = p2p_df,
|
||||
community = "leiden",
|
||||
return = "sankey",
|
||||
comm_args = list("resolution" = 0.1)
|
||||
)
|
||||
|
||||
# using `fluid_communities` algorithm with custom parameters
|
||||
network_p2p(
|
||||
data = p2p_df,
|
||||
community = "fluid_communities",
|
||||
comm_args = list("no.of.communities" = 5)
|
||||
)
|
||||
|
||||
# Calculate centrality measures and leiden communities, return at node level
|
||||
network_p2p(
|
||||
data = p2p_df,
|
||||
centrality = "betweenness",
|
||||
community = "leiden",
|
||||
return = "data"
|
||||
) \%>\%
|
||||
dplyr::glimpse()
|
||||
|
||||
}
|
||||
\seealso{
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
network_p2p_test(
|
||||
data,
|
||||
hrvar,
|
||||
centrality = FALSE,
|
||||
community = FALSE,
|
||||
centrality = NULL,
|
||||
community = NULL,
|
||||
return,
|
||||
bg_fill = "#FFFFFF",
|
||||
font_col = "grey20",
|
||||
|
|
Загрузка…
Ссылка в новой задаче