feat: refresh network_p2p and remove dependent functions

This commit is contained in:
Martin Chan 2023-08-16 11:39:23 +01:00
Родитель fd68a1d82b
Коммит 3576f05e48
10 изменённых файлов: 582 добавлений и 995 удалений

Просмотреть файл

@ -211,9 +211,6 @@ importFrom(dplyr,`%>%`)
importFrom(dplyr,across)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_if)
importFrom(grDevices,rainbow)
importFrom(graphics,legend)
importFrom(graphics,par)
importFrom(htmltools,HTML)
importFrom(igraph,graph_from_data_frame)
importFrom(magrittr,"%>%")

Просмотреть файл

@ -1,105 +0,0 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
#' @title Implement the Leiden community detection on a Person to Person network
#' query
#'
#' @description
#' `r lifecycle::badge('experimental')`
#'
#' Take a P2P network query and implement the Leiden community detection method.
#' To run this function, you will require all the pre-requisites of the
#' 'leiden' package installed, which includes Python and 'reticulate'.
#'
#' @inheritParams network_p2p
#'
#' @param return
#' String specifying what output to return. Defaults to "plot-leiden". Valid
#' return options include:
#' - `'plot-leiden'`: return a network plot coloured by leiden communities,
#' saving a PDF to path.
#' - `'plot-hrvar'`: return a network plot coloured by HR attribute, saving a
#' PDF to path.
#' - `'plot-sankey'`: return a sankey plot combining communities and HR
#' attribute.
#' - `'table'`: return a vertex summary table with counts in communities and
#' HR attribute.
#' - `'data'`: return a vertex data file that matches vertices with
#' communities and HR attributes.
#' - `'describe'`: return a list of data frames which describe each of the
#' identified communities. The first data frame is a summary table of all the
#' communities.
#' - `'network'`: return 'igraph' object.
#'
#' @return See `return`.
#'
#' @family Network
#'
#' @section Simulating and running Leiden Community Detection:
#'
#' Below is an example on how to simulate a network and run the function.
#'
#' ````
#' # Simulate a small person-to-person dataset
#' p2p_data <- p2p_data_sim(size = 50)
#'
#' # Return leiden, console, plot
#' p2p_data %>%
#' network_leiden(path = NULL,
#' return = "plot")
#' ```
#'
#'
#' @export
network_leiden <- function(data,
hrvar = "Organization",
bg_fill = "#000000",
font_col = "#FFFFFF",
algorithm = "mds",
path = "network_p2p_leiden",
node_alpha = 0.8,
res = 0.5,
seed = 1,
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
return = "plot-leiden",
size_threshold = 5000){
## Default value for display
display <- "leiden"
## Make code backward compatible
if(grepl(pattern = "plot-", x = return)){
return <- gsub(pattern = "plot-",
replacement = "",
x = return)
if(return %in% c("leiden", "hrvar")){
display <- return # Either "leiden" or "hrvar"
return <- "plot"
}
}
## Wrapper
network_p2p(data = data,
hrvar = hrvar,
display = display,
return = return,
path = path,
desc_hrvar = desc_hrvar,
bg_fill = bg_fill,
font_col = font_col,
node_alpha = node_alpha,
res = res, # Leiden specific
seed = seed, # Leiden specific
algorithm = algorithm,
size_threshold = size_threshold)
}

Просмотреть файл

@ -1,95 +0,0 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
#' @title Implement the Louvain community detection on a Person to Person
#' network query
#'
#' @description
#' `r lifecycle::badge('experimental')`
#'
#' Take a P2P network query and implement the Louvain community detection
#' method. The 'igraph' implementation of the Louvain method is used.
#'
#' @inheritParams network_p2p
#'
#' @param return
#' String specifying what output to return. Defaults to "plot-louvain". Valid
#' return options include:
#' - `'plot-louvain'`: return a network plot coloured by Louvain communities,
#' saving a PDF to path.
#' - `'plot-hrvar'`: return a network plot coloured by HR attribute, saving a
#' PDF to path.
#' - `'plot-sankey'`: return a sankey plot combining communities and HR
#' attribute.
#' - `'table'`: return a vertex summary table with counts in communities and
#' HR attribute.
#' - `'data'`: return a vertex data file that matches vertices with
#' communities and HR attributes.
#' - `'describe'`: return a list of data frames which describe each of the
#' identified communities. The first data frame is a summary table of all the
#' communities.
#' - `'network'`: return 'igraph' object.
#'
#' @return See `return`.
#'
#' @family Network
#'
#' @examples
#' # Simulate a small person-to-person dataset
#' p2p_data <- p2p_data_sim(size = 50)
#'
#' # Return louvain, console, plot
#' p2p_data %>%
#' network_louvain(path = NULL,
#' return = "plot")
#'
#' @export
network_louvain <- function(data,
hrvar = "Organization",
bg_fill = "#000000",
font_col = "#FFFFFF",
node_alpha = 0.8,
algorithm = "mds",
path = "network_p2p_louvain",
desc_hrvar = c("Organization",
"LevelDesignation",
"FunctionType"),
return = "plot-louvain",
size_threshold = 5000){
## Default value for display
display <- "louvain"
## Make code backward compatible
if(grepl(pattern = "plot-", x = return)){
return <- gsub(pattern = "plot-",
replacement = "",
x = return)
if(return %in% c("louvain", "hrvar")){
display <- return # Either "louvain" or "hrvar"
return <- "plot"
}
}
## Wrapper
network_p2p(data = data,
hrvar = hrvar,
display = display,
return = return,
path = path,
desc_hrvar = desc_hrvar,
bg_fill = bg_fill,
font_col = font_col,
node_alpha = node_alpha,
algorithm = algorithm,
size_threshold = size_threshold)
}

Просмотреть файл

@ -3,327 +3,364 @@
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
#' @title Create a network plot with the person-to-person query
#' @title Perform network analysis with the person-to-person query
#'
#' @description
#' `r lifecycle::badge('experimental')`
#'
#'
#' Analyse a person-to-person (P2P) network query, with multiple visualisation
#' and analysis output options. Pass a data frame containing a person-to-person
#' query and return a network visualization. Options are available for community
#' detection using either the Louvain or the Leiden algorithms.
#'
#'
#' @param data Data frame containing a person-to-person query.
#' @param hrvar String containing the label for the HR attribute.
#' @param display String determining what output to return. Valid values
#' include:
#' - `"hrvar"` (default): compute analysis or visuals without computing
#' communities.
#' - `"louvain"`: compute analysis or visuals with community detection, using
#' the Louvain algorithm.
#' - `"leiden"`: compute analysis or visuals with community detection, using
#' the Leiden algorithm. This requires all the pre-requisites of the
#' **leiden** package installed, which includes Python and **reticulate**.
#'
#' @param return String specifying what output to return. This must be one of the
#' following strings:
#' @param return
#' A different output is returned depending on the value passed to the `return`
#' argument:
#' - `'plot'` (default)
#' - `'plot-pdf'`
#' - `'sankey'`
#' - `'table'`
#' - `'data'`
#' - `'describe'`
#' - `'network'`
#' @param centrality string to determines which centrality measure is used to
#' scale the size of the nodes. All centrality measures are automatically
#' calculated when it is set to one of the below values, and reflected in the
#' `'network'` and `'data'` outputs.
#' Measures include:
#' - `betweenness`
#' - `closeness`
#' - `degree`
#' - `eigenvector`
#' - `pagerank`
#'
#' See `Value` for more information.
#' When `centrality` is set to NULL, no centrality is calculated in the outputs
#' and all the nodes would have the same size.
#'
#' @param community String determining which community detection algorithms to
#' apply. Valid values include:
#' - `NULL` (default): compute analysis or visuals without computing
#' communities.
#' - `"louvain"`
#' - `"leiden"`
#' - `"edge_betweenness"`
#' - `"fast_greedy"`
#' - `"fluid_communities"`
#' - `"infomap"`
#' - `"label_prop"`
#' - `"leading_eigen"`
#' - `"optimal"`
#' - `"spinglass"`
#' - `"walk_trap"`
#'
#' These values map to the community detection algorithms offered by `igraph`.
#' For instance, `"leiden"` is based on `igraph::cluster_leiden()`. Please see
#' the bottom of <https://igraph.org/r/html/1.3.0/cluster_leiden.html> on all
#' applications and parameters of these algorithms.
#' .
#' @param weight String to specify which column to use as weights for the
#' network. To create a graph without weights, supply `NULL` to this argument.
#' @param comm_args list containing the arguments to be passed through to
#' igraph's clustering algorithms. Arguments must be named. See examples
#' section on how to supply arguments in a named list.
#' @param layout String to specify the node placement algorithm to be used.
#' Defaults to `"mds"` for the deterministic multi-dimensional scaling of
#' nodes. See
#' <https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html> for a full
#' list of options.
#' @param path File path for saving the PDF output. Defaults to a timestamped
#' path based on current parameters.
#' @param desc_hrvar Character vector of length 3 containing the HR attributes
#' to use when returning the `"describe"` output. See `network_describe()`.
#' @param style String to specify which plotting style to use for the network
#' plot. Valid values include:
#' - `"igraph"`
#' - `"ggraph"`
#' @param bg_fill String to specify background fill colour.
#' @param font_col String to specify font and link colour.
#' @param font_col String to specify font colour.
#' @param legend_pos String to specify position of legend. Defaults to
#' `"bottom"`. See `ggplot2::theme()`. This is applicable for both the
#' `"right"`. See `ggplot2::theme()`. This is applicable for both the
#' 'ggraph' and the fast plotting method. Valid inputs include:
#' - `"bottom"`
#' - `"top"`
#' - `"left"`
#' -`"right"`
#'
#' @param palette Function for generating a colour palette with a single
#' argument `n`. Uses "rainbow" by default.
#' @param palette String specifying the function to generate a colour palette
#' with a single argument `n`. Uses `"rainbow"` by default.
#' @param node_alpha A numeric value between 0 and 1 to specify the transparency
#' of the nodes. Defaults to 0.7.
#' @param edge_alpha A numeric value between 0 and 1 to specify the transparency
#' of the edges (only for 'ggraph' mode). Defaults to 1.
#' @param res Resolution parameter to be passed to `leiden::leiden()`. Defaults
#' to 0.5.
#' @param edge_col String to specify edge link colour.
#' @param node_sizes Numeric vector of length two to specify the range of node
#' sizes to rescale to, when `centrality` is set to a non-null value.
#' @param seed Seed for the random number generator passed to either
#' `set.seed()` when the Louvain algorithm is used, or `leiden::leiden()` when
#' the Leiden algorithm is used, to ensure consistency. Only applicable when
#' `display` is set to `"louvain"` or `"leiden"`.
#' @param algorithm String to specify the node placement algorithm to be used.
#' Defaults to `"mds"` for the deterministic multi-dimensional scaling of
#' nodes. See
#' <https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html> for a full
#' list of options.
#' @param size_threshold Numeric value representing the maximum number of edges
#' before `network_leiden()` switches to use a more efficient, but less
#' elegant plotting method (native igraph). Defaults to 5000. Set as `0` to
#' coerce to a fast plotting method every time, and `Inf` to always use the
#' default plotting method (with 'ggraph').
#' @param weight String to specify which column to use as weights for the
#' network. Defaults to `"StrongTieScore`. To create a graph without weights,
#' supply `NULL` to this argument.
#' `set.seed()` when the louvain or leiden community detection algorithm is
#' used, to ensure consistency. Only applicable when `community` is set to
#' one of the valid non-null values.
#'
#' @return
#' A different output is returned depending on the value passed to the `return`
#' argument:
#' - `'plot'`: return a network plot.
#' - `'plot'`: return a network plot, interactively within R.
#' - `'plot-pdf'`: save a network plot as PDF. This option is recommended when
#' the graph is large, which make take a long time to run if `return = 'plot'`
#' is selected. Use this together with `path` to control the save location.
#' - `'sankey'`: return a sankey plot combining communities and HR attribute.
#' This is only valid if a community detection method is selected at
#' `display`.
#' `community`.
#' - `'table'`: return a vertex summary table with counts in communities and
#' HR attribute.
#' HR attribute. When `centrality` is non-NULL, the average centrality values
#' are calculated per group.
#' - `'data'`: return a vertex data file that matches vertices with
#' communities and HR attributes.
#' - `'describe'`: return a list of data frames which describe each of the
#' identified communities. The first data frame is a summary table of all the
#' communities. This is only valid if a community detection method is selected
#' at `display`.
#' - `'network'`: return 'igraph' object.
#'
#' @family Network
#'
#' @examples
#' # Simulate a small person-to-person dataset
#' p2p_data <- p2p_data_sim(size = 50)
#' p2p_df <- p2p_data_sim(dim = 1, size = 100)
#'
#' # Return a network plot to console, coloured by hrvar
#' p2p_data %>%
#' network_p2p(display = "hrvar",
#' path = NULL,
#' return = "plot")
#' # default - ggraph visual
#' network_p2p(data = p2p_df, style = "ggraph")
#'
#' # Return a network plot to console, coloured by Louvain communities
#' p2p_data %>%
#' network_p2p(display = "louvain",
#' path = NULL,
#' return = "plot")
#' # return vertex table
#' network_p2p(data = p2p_df, return = "table")
#'
#' # return vertex table with community detection
#' network_p2p(data = p2p_df, community = "leiden", return = "table")
#'
#' # Return a network plot to console
#' # Coloured by Leiden communities
#' # Using Fruchterman-Reingold force-directed layout algorithm
#' # Force the use of fast plotting method
#' p2p_data %>%
#' network_p2p(display = "hrvar",
#' path = NULL,
#' return = "plot",
#' algorithm = "lgl",
#' size_threshold = 0)
#' # leiden - igraph style with custom resolution parameters
#' network_p2p(data = p2p_df, community = "leiden", comm_args = list("resolution" = 0.1))
#'
#' # Return a data frame matching HR variable and communities to nodes
#' # Using Louvain communities
#' p2p_data %>%
#' network_p2p(display = "louvain",
#' return = "data",
#' algorithm = "fr")
#' # louvain - ggraph style, using custom palette
#' network_p2p(
#' data = p2p_df,
#' style = "ggraph",
#' community = "louvain",
#' palette = "heat_colors"
#' )
#'
#' @section Running Leiden communities:
#' # leiden - return a sankey visual with custom resolution parameters
#' network_p2p(
#' data = p2p_df,
#' community = "leiden",
#' return = "sankey",
#' comm_args = list("resolution" = 0.1)
#' )
#'
#' Running Leiden communities requires python dependencies installed.
#' You can run the following:
#' # using `fluid_communities` algorithm with custom parameters
#' network_p2p(
#' data = p2p_df,
#' community = "fluid_communities",
#' comm_args = list("no.of.communities" = 5)
#' )
#'
#' ```R
#' # Return a network plot to console, coloured by Leiden communities
#' p2p_data %>%
#' network_p2p(display = "leiden",
#' path = NULL,
#' return = "plot")
#' ```
#' When installing the 'leiden' package, you may be required to install the Python
#' libraries 'python-igraph' and 'leidenalg'. You can install them with:
#'
#' ```R
#' reticulate::py_install("python-igraph")
#' reticulate::py_install("leidenalg")
#' ```
#' # Calculate centrality measures and leiden communities, return at node level
#' network_p2p(
#' data = p2p_df,
#' centrality = "betweenness",
#' community = "leiden",
#' return = "data"
#' ) %>%
#' dplyr::glimpse()
#'
#' @import ggplot2
#' @import dplyr
#' @importFrom grDevices rainbow
#' @importFrom graphics legend
#' @importFrom graphics par
#'
#' @export
network_p2p <- function(data,
hrvar = "Organization",
display = "hrvar",
return = "plot",
path = paste0("network_p2p_", display),
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
bg_fill = "#FFFFFF",
font_col = "grey20",
legend_pos = "bottom",
palette = "rainbow",
node_alpha = 0.7,
edge_alpha = 1,
res = 0.5,
seed = 1,
algorithm = "mds",
size_threshold = 5000,
weight = "StrongTieScore"){
## Set edges df
if(is.null(weight)){
network_p2p <-
function(
data,
hrvar = "Organization",
return = "plot",
centrality = NULL,
community = NULL,
weight = NULL,
comm_args = NULL,
layout = "mds",
path = paste("p2p", NULL, sep = "_"),
style = "igraph",
bg_fill = "#FFFFFF",
font_col = "grey20",
legend_pos = "right",
palette = "rainbow",
node_alpha = 0.7,
edge_alpha = 1,
edge_col = "#777777",
node_sizes = c(1, 20),
seed = 1
){
edges <-
data %>%
mutate(NoWeight = 1) %>% # No weight
select(from = "TieOrigin_PersonId",
to = "TieDestination_PersonId",
weight = "NoWeight")
if(length(node_sizes) != 2){
stop("`node_sizes` must be of length 2")
}
} else {
## Set data frame for edges
if(is.null(weight)){
edges <-
data %>%
select(from = "TieOrigin_PersonId",
to = "TieDestination_PersonId",
weight = weight)
edges <-
data %>%
mutate(NoWeight = 1) %>% # No weight
select(from = "TieOrigin_PersonId",
to = "TieDestination_PersonId",
weight = "NoWeight")
}
} else {
## Set variables
TO_hrvar <- paste0("TieOrigin_", hrvar)
TD_hrvar <- paste0("TieDestination_", hrvar)
edges <-
data %>%
select(from = "TieOrigin_PersonId",
to = "TieDestination_PersonId",
weight = weight)
## Vertices data frame to provide meta-data
vert_ft <-
rbind(
# TieOrigin
edges %>%
select(from) %>% # Single column
unique() %>% # Remove duplications
left_join(select(data, TieOrigin_PersonId, TO_hrvar),
by = c("from" = "TieOrigin_PersonId")) %>%
select(node = "from", !!sym(hrvar) := TO_hrvar),
}
# TieDestination
edges %>%
select(to) %>% # Single column
unique() %>% # Remove duplications
left_join(select(data, TieDestination_PersonId, TD_hrvar),
by = c("to" = "TieDestination_PersonId")) %>%
select(node = "to", !!sym(hrvar) := TD_hrvar)
## Set variables
# TieOrigin = PrimaryCollaborator
# TieDestination = SecondaryCollaborator
TO_hrvar <- paste0("TieOrigin_", hrvar)
TD_hrvar <- paste0("TieDestination_", hrvar)
## Vertices data frame to provide meta-data
vert_ft <-
rbind(
# TieOrigin
edges %>%
select(from) %>% # Single column
unique() %>% # Remove duplications
left_join(select(data, TieOrigin_PersonId, TO_hrvar),
by = c("from" = "TieOrigin_PersonId")) %>%
select(node = "from", !!sym(hrvar) := TO_hrvar),
# TieDestination
edges %>%
select(to) %>% # Single column
unique() %>% # Remove duplications
left_join(select(data, TieDestination_PersonId, TD_hrvar),
by = c("to" = "TieDestination_PersonId")) %>%
select(node = "to", !!sym(hrvar) := TD_hrvar)
)
## Create 'igraph' object
g_raw <-
igraph::graph_from_data_frame(edges,
directed = TRUE, # Directed, but FALSE for visualization
vertices = unique(vert_ft)) # remove duplicates
## Assign weights
g_raw$weight <- edges$weight
## allowed `community` values
valid_comm <- c(
"leiden",
"louvain",
"edge_betweenness",
"fast_greedy",
"fluid_communities",
"infomap",
"label_prop",
"leading_eigen",
"optimal",
"spinglass",
"walk_trap"
)
## Finalise `g` object
## If community detection is selected, this is where the communities are appended
if(is.null(community)){ # no community detection
## Create 'igraph' object
g_raw <-
igraph::graph_from_data_frame(edges,
directed = TRUE, # Directed, but FALSE for visualization
vertices = unique(vert_ft)) # remove duplicates
g <- igraph::simplify(g_raw)
v_attr <- hrvar # Name of vertex attribute
## Assign weights
g_raw$weight <- edges$weight
## Finalise `g` object
## If community detection is selected, this is where the communities are appended
if(display == "hrvar"){
g <- g_raw %>% igraph::simplify()
## Name of vertex attribute
v_attr <- hrvar
} else if(display == "louvain"){
} else if(community %in% valid_comm){
set.seed(seed = seed)
g_ud <- igraph::as.undirected(g_raw) # Convert to undirected
## Convert to undirected
g_ud <- igraph::as.undirected(g_raw)
alg_label <- paste0("igraph::cluster_", community)
## Return a numeric vector of partitions / clusters / modules
## Set a low resolution parameter to have fewer groups
## weights = NULL means that if the graph as a `weight` edge attribute, this
## will be used by default.
lc <- igraph::cluster_louvain(g_ud, weights = NULL)
# combine arguments to clustering algorithm
c_comm_args <- c(list("graph" = g_ud), comm_args)
# output `communities` object
comm_out <- do.call(eval(parse(text = alg_label)), c_comm_args)
## Add cluster
g <-
g_ud %>%
# Add louvain partitions to graph object
igraph::set_vertex_attr("cluster", value = as.character(igraph::membership(lc))) %>% # Return membership - diff from Leiden
# Add partitions to graph object
# Return membership
igraph::set_vertex_attr(
"cluster",
value = as.character(igraph::membership(comm_out))) %>%
igraph::simplify()
## Name of vertex attribute
v_attr <- "cluster"
} else if(display == "leiden"){
} else {
# Check package installation
check_pkg_installed(pkgname = "leiden")
stop("Please enter a valid input for `community`.")
## Return a numeric vector of partitions / clusters / modules
## Set a low resolution parameter to have fewer groups
ld <- leiden::leiden(
g_raw,
resolution_parameter = res,
seed = seed,
weights = g_raw$weight) # create partitions
## Add cluster
g <-
g_raw %>%
# Add leiden partitions to graph object
igraph::set_vertex_attr("cluster", value = as.character(ld)) %>%
igraph::simplify()
## Name of vertex attribute
v_attr <- "cluster"
} else {
stop("Please enter a valid input for `display`.")
}
}
# Common area -------------------------------------------------------------
# centrality calculations -------------------------------------------------
# attach centrality calculations if `centrality` is not NULL
## Create vertex table
vertex_tb <-
g %>%
igraph::get.vertex.attribute() %>%
as_tibble()
if(!is.null(centrality)){
## Set layout for graph
g_layout <-
g %>%
ggraph::ggraph(layout = "igraph", algorithm = algorithm)
g <- network_summary(g, return = "network")
## Timestamped File Path
out_path <- paste0(path, "_", tstamp(), ".pdf")
igraph::V(g)$node_size <-
igraph::get.vertex.attribute(
g,
name = centrality # from argument
) %>%
scales::rescale(to = node_sizes) # min and max value
# Return ------------------------------------------------------------------
} else {
if(return == "plot"){
# all nodes with the same size if centrality is not calculated
# adjust for plotting formats
if(style == "igraph"){
igraph::V(g)$node_size <- rep(3, igraph::vcount(g))
} else if(style == "ggraph"){
igraph::V(g)$node_size <- rep(2.5, igraph::vcount(g))
node_sizes <- c(3, 3) # arbitrarily fix the node size
}
}
# Common area -------------------------------------------------------------
## Create vertex table
vertex_tb <-
g %>%
igraph::get.vertex.attribute() %>%
as_tibble() %>%
select(-node_size) # never show `node_size` in data output
## Set layout for graph
g_layout <-
g %>%
ggraph::ggraph(layout = "igraph", algorithm = layout)
## Timestamped File Path
out_path <- paste0(path, "_", tstamp(), ".pdf")
# Return outputs ----------------------------------------------------------
## Use fast plotting method
if(igraph::ecount(g) > size_threshold){
message("Using fast plot method due to large network size...")
if(return %in% c("plot", "plot-pdf")){
## Set colours
colour_tb <-
tibble(!!sym(v_attr) := unique(igraph::get.vertex.attribute(g, name = v_attr))) %>%
mutate(colour = rainbow(nrow(.))) # No palette choice
mutate(colour = eval(parse(text = paste0(palette,"(nrow(.))")))) # palette choice
## Colour vector
colour_v <-
@ -331,231 +368,217 @@ network_p2p <- function(data,
left_join(colour_tb, by = v_attr) %>%
pull(colour)
## Set graph plot colours
igraph::V(g)$color <- grDevices::adjustcolor(colour_v, alpha.f = node_alpha)
igraph::V(g)$frame.color <- NA
igraph::E(g)$width <- 1
if(style == "igraph"){
## Internal basic plotting function used inside `network_p2p()`
plot_basic_graph <- function(lpos = legend_pos){
# message("Using fast plot method due to large network size...")
old_par <- par(no.readonly = TRUE)
on.exit(par(old_par))
## Set graph plot colours
igraph::V(g)$color <- grDevices::adjustcolor(colour_v, alpha.f = node_alpha)
igraph::V(g)$frame.color <- NA
igraph::E(g)$width <- 1
par(bg = bg_fill)
## Internal basic plotting function used inside `network_p2p()`
plot_basic_graph <- function(lpos = legend_pos){
layout_text <- paste0("igraph::layout_with_", algorithm)
old_par <- graphics::par(no.readonly = TRUE)
on.exit(graphics::par(old_par))
## Legend position
graphics::par(bg = bg_fill)
if(lpos == "left"){
layout_text <- paste0("igraph::layout_with_", layout)
leg_x <- -1.5
leg_y <- 0.5
## Legend position
} else if(lpos == "right"){
if(lpos == "left"){
leg_x <- 1.5
leg_y <- 0.5
leg_x <- -1.5
leg_y <- 0.5
} else if(lpos == "top"){
} else if(lpos == "right"){
leg_x <- 0
leg_y <- 1.5
leg_x <- 1.5
leg_y <- 0.5
} else if(lpos == "bottom"){
} else if(lpos == "top"){
leg_x <- 0
leg_y <- -1.0
leg_x <- 0
leg_y <- 1.5
} else {
} else if(lpos == "bottom"){
stop("Invalid `legend_pos` input.")
leg_x <- 0
leg_y <- -1.0
} else {
stop("Invalid `legend_pos` input.")
}
graphics::plot(
g,
layout = eval(parse(text = layout_text)),
vertex.label = NA,
# vertex.size = 3,
vertex.size = igraph::V(g)$node_size,
edge.arrow.mode = "-",
edge.color = "#adadad"
)
graphics::legend(x = leg_x,
y = leg_y,
legend = colour_tb[[v_attr]], # vertex attribute
pch = 21,
text.col = font_col,
col = edge_col,
pt.bg = colour_tb$colour,
pt.cex = 2,
cex = .8,
bty = "n",
ncol = 1)
}
## Default PDF output unless NULL supplied to path
if(return == "plot"){
plot_basic_graph()
} else if(return == "plot-pdf"){
grDevices::pdf(out_path)
plot_basic_graph()
grDevices::dev.off()
message(paste0("Saved to ", out_path, "."))
}
graphics::plot(g,
layout = eval(parse(text = layout_text)),
vertex.label = NA,
vertex.size = 3,
edge.arrow.mode = "-",
edge.color = "#adadad")
} else if(style == "ggraph"){
graphics::legend(x = leg_x,
y = leg_y,
legend = colour_tb[[v_attr]], # vertex attribute
pch = 21,
text.col = font_col,
col = "#777777",
pt.bg = colour_tb$colour,
pt.cex = 2,
cex = .8,
bty = "n",
ncol = 1)
}
plot_output <-
g_layout +
ggraph::geom_edge_link(colour = edge_col,
edge_width = 0.05,
alpha = edge_alpha)+
ggraph::geom_node_point(aes(colour = !!sym(v_attr),
size = node_size),
alpha = node_alpha,
pch = 16) +
scale_size_continuous(range = node_sizes) +
scale_color_manual(values = unique(colour_v)) +
theme_void() +
theme(
legend.position = legend_pos,
legend.background = element_rect(fill = bg_fill, colour = bg_fill),
## Default PDF output unless NULL supplied to path
if(is.null(path)){
text = element_text(colour = font_col),
axis.line = element_blank(),
panel.grid = element_blank()
) +
labs(caption = paste0("Person to person collaboration showing ", v_attr, ". "), # spaces intentional
y = "",
x = "") +
guides(size = "none")
plot_basic_graph()
# Default PDF output unless NULL supplied to path
if(return == "plot"){
plot_output
} else if(return == "plot-pdf"){
ggsave(out_path,
plot = plot_output,
width = 16,
height = 9)
message(paste0("Saved to ", out_path, "."))
}
} else {
grDevices::pdf(out_path)
stop("invalid input for `style`")
plot_basic_graph()
}
grDevices::dev.off()
} else if (return == "data"){
message(paste0("Saved to ", out_path, "."))
vertex_tb
} else if(return == "network"){
g
} else if(return == "sankey"){
if(is.null(community)){
message("Note: no sankey return option is available if `NULL` is selected at `community`.
Please specify a valid community detection algorithm.")
} else if(community %in% valid_comm){
create_sankey(
data = vertex_tb %>% count(!!sym(hrvar), cluster),
var1 = hrvar,
var2 = "cluster",
count = "n"
)
}
} else if(return == "table"){
if(is.null(community)){
if(is.null(centrality)){
vertex_tb %>% count(!!sym(hrvar))
} else {
# average centrality by group
vertex_tb %>%
group_by(!!sym(hrvar)) %>%
summarise(
n = n(),
betweenness = mean(betweenness, na.rm = TRUE),
closeness = mean(closeness, na.rm = TRUE),
degree = mean(degree, na.rm = TRUE),
eigenvector = mean(eigenvector, na.rm = TRUE),
pagerank = mean(pagerank, na.rm = TRUE)
)
}
} else if(community %in% valid_comm){
if(is.null(centrality)){
vertex_tb %>% count(!!sym(hrvar), cluster)
} else {
# average centrality by group
vertex_tb %>%
group_by(!!sym(hrvar), cluster) %>%
summarise(
n = n(),
betweenness = mean(betweenness, na.rm = TRUE),
closeness = mean(closeness, na.rm = TRUE),
degree = mean(degree, na.rm = TRUE),
eigenvector = mean(eigenvector, na.rm = TRUE),
pagerank = mean(pagerank, na.rm = TRUE)
)
}
}
} else {
plot_output <-
g_layout +
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.05, alpha = edge_alpha) +
ggraph::geom_node_point(aes(colour = !!sym(v_attr)),
alpha = node_alpha,
pch = 16) +
theme_void() +
theme(
legend.position = legend_pos,
legend.background = element_rect(fill = bg_fill, colour = bg_fill),
text = element_text(colour = font_col),
axis.line = element_blank(),
panel.grid = element_blank()
) +
labs(caption = paste0("Person to person collaboration showing ", v_attr, ". "), # spaces intentional
y = "",
x = "")
# Default PDF output unless NULL supplied to path
if(is.null(path)){
plot_output
} else {
ggsave(out_path,
plot = plot_output,
width = 16,
height = 9)
message(paste0("Saved to ", out_path, "."))
}
stop("invalid input for `return`")
}
} else if(return == "table"){
if(display == "hrvar"){
vertex_tb %>% count(!!sym(hrvar))
} else if(display %in% c("louvain", "leiden")){
vertex_tb %>%
count(!!sym(hrvar), cluster)
}
} else if(return == "data"){
vertex_tb
} else if(return == "network"){
g
} else if(return == "sankey"){
if(display == "hrvar"){
message("Note: no sankey return option is available if `display` is set to 'hrvar'.
Please specify either 'louvain' or 'leiden'")
} else if(display %in% c("louvain", "leiden")){
create_sankey(data = vertex_tb %>% count(!!sym(hrvar), cluster),
var1 = hrvar,
var2 = "cluster",
count = "n")
}
} else if(return == "describe"){
if(display == "hrvar"){
message("Note: no describe return option is available if `display` is set to 'hrvar'.
Please specify either 'louvain' or 'leiden'")
} else if(display %in% c("louvain", "leiden")){
describe_tb <-
vertex_tb %>%
left_join(select(data, starts_with("TieOrigin_")),
by = c("name" = "TieOrigin_PersonId"))
desc_str <-
describe_tb %>%
pull(cluster) %>%
unique()
out_list <-
desc_str %>%
purrr::map(function(x){
describe_tb %>%
filter(cluster == x) %>%
network_describe(hrvar = desc_hrvar)
}) %>%
setNames(nm = desc_str)
summaryTable <-
list(i = out_list,
j = names(out_list)) %>%
purrr::pmap(function(i, j){
i %>%
arrange(desc(Percentage)) %>%
# slice(1) %>%
mutate_at(vars(starts_with("feature_")), ~tidyr::replace_na(., "")) %>%
mutate(Community = j,
`Attribute 1` = paste(feature_1, "=", feature_1_value),
`Attribute 2` = paste(feature_2, "=", feature_2_value),
`Attribute 3` = paste(feature_3, "=", feature_3_value)) %>%
select(Community,
`Attribute 1`,
`Attribute 2`,
`Attribute 3`,
PercentageExplained = "Percentage") %>%
mutate_at(vars(starts_with("Attribute")), ~ifelse(. == " = ", NA, .))
}) %>%
bind_rows() %>%
mutate(sum_na = select(., `Attribute 1`, `Attribute 2`, `Attribute 3`) %>%
apply(1, function(x) sum(is.na(x)))) %>%
arrange(desc(PercentageExplained)) %>%
group_by(Community, sum_na) %>%
summarise_all(~first(.)) %>%
select(-sum_na)
c(list("summaryTable" = summaryTable), out_list)
}
} else {
stop("Please enter a valid input for `return`.")
}
}

Просмотреть файл

@ -1,222 +0,0 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
#' @title Create a network plot with the person-to-person query
#'
#' @description
#' `r lifecycle::badge('experimental')`
#'
#'
#' Analyse a person-to-person (P2P) network query, with multiple visualisation
#' and analysis output options. Pass a data frame containing a person-to-person
#' query and return a network visualization. Options are available for community
#' detection using either the Louvain or the Leiden algorithms.
#'
#'
#'
#' @family Network
#'
#' @export
network_p2p_test <- function(
data,
hrvar,
centrality = NULL,
community = NULL,
return,
bg_fill = "#FFFFFF",
font_col = "grey20",
legend_pos = "bottom",
palette = "rainbow",
node_alpha = 0.7,
edge_alpha = 1,
res = 0.5,
seed = 1,
algorithm = "mds",
size_threshold = 5000,
weight = "StrongTieScore"
){
## valid values for centrality -------------------------------------------
valid_cen <- c(
"betweenness",
"closeness",
"degree",
"eigenvector",
"pagerank"
)
## valid values for community --------------------------------------------
valid_com <- c(
"leiden",
"louvain"
)
## Set data frame for `edges` --------------------------------------------
if(is.null(weight)){
edges <-
data %>%
mutate(NoWeight = 1) %>% # No weight
select(from = "TieOrigin_PersonId",
to = "TieDestination_PersonId",
weight = "NoWeight")
} else {
edges <-
data %>%
select(from = "TieOrigin_PersonId",
to = "TieDestination_PersonId",
weight = weight)
}
## Set variables ---------------------------------------------------------
TO_hrvar <- paste0("TieOrigin_", hrvar)
TD_hrvar <- paste0("TieDestination_", hrvar)
## Vertices data frame to provide meta-data ------------------------------
vert_ft <-
rbind(
# TieOrigin
edges %>%
select(from) %>% # Single column
unique() %>% # Remove duplications
left_join(select(data, TieOrigin_PersonId, TO_hrvar),
by = c("from" = "TieOrigin_PersonId")) %>%
select(node = "from", !!sym(hrvar) := TO_hrvar),
# TieDestination
edges %>%
select(to) %>% # Single column
unique() %>% # Remove duplications
left_join(select(data, TieDestination_PersonId, TD_hrvar),
by = c("to" = "TieDestination_PersonId")) %>%
select(node = "to", !!sym(hrvar) := TD_hrvar)
)
## Create 'igraph' object -----------------------------------------------
g_raw <-
igraph::graph_from_data_frame(edges,
directed = TRUE, # Directed, but FALSE for visualization
vertices = unique(vert_ft)) # remove duplicates
## Assign weights --------------------------------------------------------
g_raw$weight <- edges$weight
## Main algorithm --------------------------------------------------------
if(is.null(centrality) & is.null(community)){
# PLOT -> Returns basic plot with HR attribute
# PDF -> Exports plot as pdf file
# Table -> HR Var count
# Data -> Returns person dataset with HR attributes
# Network -> Returns network object
g <- g_raw %>% igraph::simplify()
## Name of vertex attribute
v_attr <- hrvar
} else if(centrality %in% valid_cen & is.null(community)){
# PLOT -> Returns basic plot with HR attribute AND vertices proportional to centrality
# PDF -> Exports plot as pdf file
# Table -> HR Var count and average centrality
# Data -> Returns person dataset with HR attributes and centrality scores (ALL)
# Network -> Returns network object with centrality scores (ALL)
} else if(is.null(centrality) & community %in% valid_com){
# PLOT -> Returns basic plot with community (no hrvar)
# PDF -> Exports plot as pdf file
# Table -> HR Var x community count
# Data -> Returns person dataset with HR attributes and community attribute
# Network -> Returns network object with community attribute
# TODO - modularise louvain and leiden?
if(community == "louvain"){
set.seed(seed = seed)
## Convert to undirected
g_ud <- igraph::as.undirected(g_raw)
## Return a numeric vector of partitions / clusters / modules
## Set a low resolution parameter to have fewer groups
## weights = NULL means that if the graph as a `weight` edge attribute, this
## will be used by default.
lc <- igraph::cluster_louvain(g_ud, weights = NULL)
## Add cluster
g <-
g_ud %>%
# Add louvain partitions to graph object
igraph::set_vertex_attr("cluster", value = as.character(igraph::membership(lc))) %>% # Return membership - diff from Leiden
igraph::simplify()
## Name of vertex attribute
v_attr <- "cluster"
} else if(community == "leiden"){
# Check package installation
check_pkg_installed(pkgname = "leiden")
## Return a numeric vector of partitions / clusters / modules
## Set a low resolution parameter to have fewer groups
ld <- leiden::leiden(
g_raw,
resolution_parameter = res,
seed = seed,
weights = g_raw$weight) # create partitions
## Add cluster
g <-
g_raw %>%
# Add leiden partitions to graph object
igraph::set_vertex_attr("cluster", value = as.character(ld)) %>%
igraph::simplify()
## Name of vertex attribute
v_attr <- "cluster"
}
} else if(centrality %in% valid_cen & community %in% valid_com){
# PLOT -> Returns basic plot with community AND vertices proportional to centrality
# PDF -> Exports plot as pdf file
# Table -> HR Var x community count and average centrality
# Data -> Returns person dataset with HR attributes, community attribute and centrality scores (ALL)
# Network -> Returns network object with community attribute and centrality scores (ALL)
} else {
stop(
"Invalid inputs to `centrality` or `community`."
)
}
}

Просмотреть файл

@ -8,13 +8,13 @@ keymetrics_scan(
data,
hrvar = "Organization",
mingroup = 5,
metrics = c("Workweek_span", "Collaboration_hours",
"After_hours_collaboration_hours", "Meetings", "Meeting_hours",
"After_hours_meeting_hours", "Low_quality_meeting_hours",
"Meeting_hours_with_manager_1_on_1", "Meeting_hours_with_manager", "Emails_sent",
"Email_hours", "After_hours_email_hours", "Generated_workload_email_hours",
"Total_focus_hours", "Internal_network_size", "Networking_outside_organization",
"External_network_size", "Networking_outside_company"),
metrics = c("Workweek_span", "Collaboration_hours", "After_hours_collaboration_hours",
"Meetings", "Meeting_hours", "After_hours_meeting_hours",
"Low_quality_meeting_hours", "Meeting_hours_with_manager_1_on_1",
"Meeting_hours_with_manager", "Emails_sent", "Email_hours",
"After_hours_email_hours", "Generated_workload_email_hours", "Total_focus_hours",
"Internal_network_size", "Networking_outside_organization", "External_network_size",
"Networking_outside_company"),
return = "plot",
low = rgb2hex(7, 111, 161),
mid = rgb2hex(241, 204, 158),

Просмотреть файл

@ -27,13 +27,7 @@ network_leiden(
\item{bg_fill}{String to specify background fill colour.}
\item{font_col}{String to specify font and link colour.}
\item{algorithm}{String to specify the node placement algorithm to be used.
Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of
nodes. See
\url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full
list of options.}
\item{font_col}{String to specify font colour.}
\item{path}{File path for saving the PDF output. Defaults to a timestamped
path based on current parameters.}
@ -41,16 +35,10 @@ path based on current parameters.}
\item{node_alpha}{A numeric value between 0 and 1 to specify the transparency
of the nodes. Defaults to 0.7.}
\item{res}{Resolution parameter to be passed to \code{leiden::leiden()}. Defaults
to 0.5.}
\item{seed}{Seed for the random number generator passed to either
\code{set.seed()} when the Louvain algorithm is used, or \code{leiden::leiden()} when
the Leiden algorithm is used, to ensure consistency. Only applicable when
\code{display} is set to \code{"louvain"} or \code{"leiden"}.}
\item{desc_hrvar}{Character vector of length 3 containing the HR attributes
to use when returning the \code{"describe"} output. See \code{network_describe()}.}
\code{set.seed()} when the louvain or leiden community detection algorithm is
used, to ensure consistency. Only applicable when \code{community} is set to
one of the valid non-null values.}
\item{return}{String specifying what output to return. Defaults to "plot-leiden". Valid
return options include:
@ -70,12 +58,6 @@ identified communities. The first data frame is a summary table of all the
communities.
\item \code{'network'}: return 'igraph' object.
}}
\item{size_threshold}{Numeric value representing the maximum number of edges
before \code{network_leiden()} switches to use a more efficient, but less
elegant plotting method (native igraph). Defaults to 5000. Set as \code{0} to
coerce to a fast plotting method every time, and \code{Inf} to always use the
default plotting method (with 'ggraph').}
}
\value{
See \code{return}.

Просмотреть файл

@ -25,23 +25,14 @@ network_louvain(
\item{bg_fill}{String to specify background fill colour.}
\item{font_col}{String to specify font and link colour.}
\item{font_col}{String to specify font colour.}
\item{node_alpha}{A numeric value between 0 and 1 to specify the transparency
of the nodes. Defaults to 0.7.}
\item{algorithm}{String to specify the node placement algorithm to be used.
Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of
nodes. See
\url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full
list of options.}
\item{path}{File path for saving the PDF output. Defaults to a timestamped
path based on current parameters.}
\item{desc_hrvar}{Character vector of length 3 containing the HR attributes
to use when returning the \code{"describe"} output. See \code{network_describe()}.}
\item{return}{String specifying what output to return. Defaults to "plot-louvain". Valid
return options include:
\itemize{
@ -60,12 +51,6 @@ identified communities. The first data frame is a summary table of all the
communities.
\item \code{'network'}: return 'igraph' object.
}}
\item{size_threshold}{Numeric value representing the maximum number of edges
before \code{network_leiden()} switches to use a more efficient, but less
elegant plotting method (native igraph). Defaults to 5000. Set as \code{0} to
coerce to a fast plotting method every time, and \code{Inf} to always use the
default plotting method (with 'ggraph').}
}
\value{
See \code{return}.

Просмотреть файл

@ -2,26 +2,28 @@
% Please edit documentation in R/network_p2p.R
\name{network_p2p}
\alias{network_p2p}
\title{Create a network plot with the person-to-person query}
\title{Perform network analysis with the person-to-person query}
\usage{
network_p2p(
data,
hrvar = "Organization",
display = "hrvar",
return = "plot",
path = paste0("network_p2p_", display),
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
centrality = NULL,
community = NULL,
weight = NULL,
comm_args = NULL,
layout = "mds",
path = paste("p2p", NULL, sep = "_"),
style = "igraph",
bg_fill = "#FFFFFF",
font_col = "grey20",
legend_pos = "bottom",
legend_pos = "right",
palette = "rainbow",
node_alpha = 0.7,
edge_alpha = 1,
res = 0.5,
seed = 1,
algorithm = "mds",
size_threshold = 5000,
weight = "StrongTieScore"
edge_col = "#777777",
node_sizes = c(1, 20),
seed = 1
)
}
\arguments{
@ -29,43 +31,86 @@ network_p2p(
\item{hrvar}{String containing the label for the HR attribute.}
\item{display}{String determining what output to return. Valid values
include:
\itemize{
\item \code{"hrvar"} (default): compute analysis or visuals without computing
communities.
\item \code{"louvain"}: compute analysis or visuals with community detection, using
the Louvain algorithm.
\item \code{"leiden"}: compute analysis or visuals with community detection, using
the Leiden algorithm. This requires all the pre-requisites of the
\strong{leiden} package installed, which includes Python and \strong{reticulate}.
}}
\item{return}{String specifying what output to return. This must be one of the
following strings:
\item{return}{A different output is returned depending on the value passed to the \code{return}
argument:
\itemize{
\item \code{'plot'} (default)
\item \code{'plot-pdf'}
\item \code{'sankey'}
\item \code{'table'}
\item \code{'data'}
\item \code{'describe'}
\item \code{'network'}
}}
\item{centrality}{string to determines which centrality measure is used to
scale the size of the nodes. All centrality measures are automatically
calculated when it is set to one of the below values, and reflected in the
\code{'network'} and \code{'data'} outputs.
Measures include:
\itemize{
\item \code{betweenness}
\item \code{closeness}
\item \code{degree}
\item \code{eigenvector}
\item \code{pagerank}
}
See \code{Value} for more information.}
When \code{centrality} is set to NULL, no centrality is calculated in the outputs
and all the nodes would have the same size.}
\item{community}{String determining which community detection algorithms to
apply. Valid values include:
\itemize{
\item \code{NULL} (default): compute analysis or visuals without computing
communities.
\item \code{"louvain"}
\item \code{"leiden"}
\item \code{"edge_betweenness"}
\item \code{"fast_greedy"}
\item \code{"fluid_communities"}
\item \code{"infomap"}
\item \code{"label_prop"}
\item \code{"leading_eigen"}
\item \code{"optimal"}
\item \code{"spinglass"}
\item \code{"walk_trap"}
}
These values map to the community detection algorithms offered by \code{igraph}.
For instance, \code{"leiden"} is based on \code{igraph::cluster_leiden()}. Please see
the bottom of \url{https://igraph.org/r/html/1.3.0/cluster_leiden.html} on all
applications and parameters of these algorithms.
.}
\item{weight}{String to specify which column to use as weights for the
network. To create a graph without weights, supply \code{NULL} to this argument.}
\item{comm_args}{list containing the arguments to be passed through to
igraph's clustering algorithms. Arguments must be named. See examples
section on how to supply arguments in a named list.}
\item{layout}{String to specify the node placement algorithm to be used.
Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of
nodes. See
\url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full
list of options.}
\item{path}{File path for saving the PDF output. Defaults to a timestamped
path based on current parameters.}
\item{desc_hrvar}{Character vector of length 3 containing the HR attributes
to use when returning the \code{"describe"} output. See \code{network_describe()}.}
\item{style}{String to specify which plotting style to use for the network
plot. Valid values include:
\itemize{
\item \code{"igraph"}
\item \code{"ggraph"}
}}
\item{bg_fill}{String to specify background fill colour.}
\item{font_col}{String to specify font and link colour.}
\item{font_col}{String to specify font colour.}
\item{legend_pos}{String to specify position of legend. Defaults to
\code{"bottom"}. See \code{ggplot2::theme()}. This is applicable for both the
\code{"right"}. See \code{ggplot2::theme()}. This is applicable for both the
'ggraph' and the fast plotting method. Valid inputs include:
\itemize{
\item \code{"bottom"}
@ -74,8 +119,8 @@ to use when returning the \code{"describe"} output. See \code{network_describe()
-\code{"right"}
}}
\item{palette}{Function for generating a colour palette with a single
argument \code{n}. Uses "rainbow" by default.}
\item{palette}{String specifying the function to generate a colour palette
with a single argument \code{n}. Uses \code{"rainbow"} by default.}
\item{node_alpha}{A numeric value between 0 and 1 to specify the transparency
of the nodes. Defaults to 0.7.}
@ -83,46 +128,32 @@ of the nodes. Defaults to 0.7.}
\item{edge_alpha}{A numeric value between 0 and 1 to specify the transparency
of the edges (only for 'ggraph' mode). Defaults to 1.}
\item{res}{Resolution parameter to be passed to \code{leiden::leiden()}. Defaults
to 0.5.}
\item{edge_col}{String to specify edge link colour.}
\item{node_sizes}{Numeric vector of length two to specify the range of node
sizes to rescale to, when \code{centrality} is set to a non-null value.}
\item{seed}{Seed for the random number generator passed to either
\code{set.seed()} when the Louvain algorithm is used, or \code{leiden::leiden()} when
the Leiden algorithm is used, to ensure consistency. Only applicable when
\code{display} is set to \code{"louvain"} or \code{"leiden"}.}
\item{algorithm}{String to specify the node placement algorithm to be used.
Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of
nodes. See
\url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full
list of options.}
\item{size_threshold}{Numeric value representing the maximum number of edges
before \code{network_leiden()} switches to use a more efficient, but less
elegant plotting method (native igraph). Defaults to 5000. Set as \code{0} to
coerce to a fast plotting method every time, and \code{Inf} to always use the
default plotting method (with 'ggraph').}
\item{weight}{String to specify which column to use as weights for the
network. Defaults to \verb{"StrongTieScore}. To create a graph without weights,
supply \code{NULL} to this argument.}
\code{set.seed()} when the louvain or leiden community detection algorithm is
used, to ensure consistency. Only applicable when \code{community} is set to
one of the valid non-null values.}
}
\value{
A different output is returned depending on the value passed to the \code{return}
argument:
\itemize{
\item \code{'plot'}: return a network plot.
\item \code{'plot'}: return a network plot, interactively within R.
\item \code{'plot-pdf'}: save a network plot as PDF. This option is recommended when
the graph is large, which make take a long time to run if \code{return = 'plot'}
is selected. Use this together with \code{path} to control the save location.
\item \code{'sankey'}: return a sankey plot combining communities and HR attribute.
This is only valid if a community detection method is selected at
\code{display}.
\code{community}.
\item \code{'table'}: return a vertex summary table with counts in communities and
HR attribute.
HR attribute. When \code{centrality} is non-NULL, the average centrality values
are calculated per group.
\item \code{'data'}: return a vertex data file that matches vertices with
communities and HR attributes.
\item \code{'describe'}: return a list of data frames which describe each of the
identified communities. The first data frame is a summary table of all the
communities. This is only valid if a community detection method is selected
at \code{display}.
\item \code{'network'}: return 'igraph' object.
}
}
@ -134,61 +165,52 @@ and analysis output options. Pass a data frame containing a person-to-person
query and return a network visualization. Options are available for community
detection using either the Louvain or the Leiden algorithms.
}
\section{Running Leiden communities}{
Running Leiden communities requires python dependencies installed.
You can run the following:
\if{html}{\out{<div class="sourceCode R">}}\preformatted{# Return a network plot to console, coloured by Leiden communities
p2p_data \%>\%
network_p2p(display = "leiden",
path = NULL,
return = "plot")
}\if{html}{\out{</div>}}
When installing the 'leiden' package, you may be required to install the Python
libraries 'python-igraph' and 'leidenalg'. You can install them with:
\if{html}{\out{<div class="sourceCode R">}}\preformatted{reticulate::py_install("python-igraph")
reticulate::py_install("leidenalg")
}\if{html}{\out{</div>}}
}
\examples{
# Simulate a small person-to-person dataset
p2p_data <- p2p_data_sim(size = 50)
p2p_df <- p2p_data_sim(dim = 1, size = 100)
# Return a network plot to console, coloured by hrvar
p2p_data \%>\%
network_p2p(display = "hrvar",
path = NULL,
return = "plot")
# default - ggraph visual
network_p2p(data = p2p_df, style = "ggraph")
# Return a network plot to console, coloured by Louvain communities
p2p_data \%>\%
network_p2p(display = "louvain",
path = NULL,
return = "plot")
# return vertex table
network_p2p(data = p2p_df, return = "table")
# return vertex table with community detection
network_p2p(data = p2p_df, community = "leiden", return = "table")
# Return a network plot to console
# Coloured by Leiden communities
# Using Fruchterman-Reingold force-directed layout algorithm
# Force the use of fast plotting method
p2p_data \%>\%
network_p2p(display = "hrvar",
path = NULL,
return = "plot",
algorithm = "lgl",
size_threshold = 0)
# leiden - igraph style with custom resolution parameters
network_p2p(data = p2p_df, community = "leiden", comm_args = list("resolution" = 0.1))
# Return a data frame matching HR variable and communities to nodes
# Using Louvain communities
p2p_data \%>\%
network_p2p(display = "louvain",
return = "data",
algorithm = "fr")
# louvain - ggraph style, using custom palette
network_p2p(
data = p2p_df,
style = "ggraph",
community = "louvain",
palette = "heat_colors"
)
# leiden - return a sankey visual with custom resolution parameters
network_p2p(
data = p2p_df,
community = "leiden",
return = "sankey",
comm_args = list("resolution" = 0.1)
)
# using `fluid_communities` algorithm with custom parameters
network_p2p(
data = p2p_df,
community = "fluid_communities",
comm_args = list("no.of.communities" = 5)
)
# Calculate centrality measures and leiden communities, return at node level
network_p2p(
data = p2p_df,
centrality = "betweenness",
community = "leiden",
return = "data"
) \%>\%
dplyr::glimpse()
}
\seealso{

Просмотреть файл

@ -7,8 +7,8 @@
network_p2p_test(
data,
hrvar,
centrality = FALSE,
community = FALSE,
centrality = NULL,
community = NULL,
return,
bg_fill = "#FFFFFF",
font_col = "grey20",