Merge pull request #105 from microsoft/feature/network-enhancements

Feature: introduce enhancements to network functions
This commit is contained in:
Martin Chan 2021-03-30 17:24:36 +01:00 коммит произвёл GitHub
Родитель 2454c175fe 7640958d32
Коммит 87f6d82b19
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 147 добавлений и 19 удалений

Просмотреть файл

@ -5,7 +5,9 @@ New functions:
Significant changes to existing functions:
- `create_rank()` is now able to call `create_rank_combine()` to compute
averages for pairwise combinations. The `mode` and `plot_mode` arguments are also made available to the key metric wrapper functions of `create_rank()`.
averages for pairwise combinations. The `mode` and `plot_mode` arguments are
also made available to the key metric wrapper functions of `create_rank()`.
- `network_g2g()` now has the option to specify algorithm and node colour.
# wpa 1.4.0

Просмотреть файл

@ -7,8 +7,8 @@
#'
#' @description
#' Pass a data frame containing a group-to-group query and return a network
#' plot. Automatically handles "Collaborators_within_group" and
#' "Other_collaborators" within query data.
#' plot. Automatically handles `"Collaborators_within_group"` and
#' `"Other_collaborators"` within query data.
#'
#' @param data Data frame containing a G2G query.
#' @param time_investor String containing the variable name for the Time
@ -17,7 +17,23 @@
#' column.
#' @param metric String containing the variable name for metric. Defaults to
#' `Collaboration_hours`.
#' @param algorithm String to specify the node placement algorithm to be used.
#' Defaults to `"fr"` for the force-directed algorithm of Fruchterman and
#' Reingold. See
#' <https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html> for a full
#' list of options.
#' @param node_colour String to specify the colour to be used for displaying
#' nodes. Defaults to `"lightblue"`. If `"vary"` is supplied, a different colour
#' is shown for each node at random.
#' @param exc_threshold Exclusion threshold to apply.
#' @param org_count Optional data frame to provide the size of each organization
#' in the `collaborator` attribute. The data frame should contain only two
#' columns:
#' - Name of the `collaborator` attribute excluding any prefixes, e.g.
#' `"Organization"`. Must be of character or factor type.
#' - `"n"`. Must be of numeric type.
#' Defaults to `NULL`, where node sizes will be fixed.
#'
#' @param subtitle String to override default plot subtitle.
#' @param return String specifying what to return. This must be one of the
#' following strings:
@ -52,6 +68,19 @@
#' metric = "Meeting_hours",
#' exc_threshold = 0.05)
#'
#' # Return a network plot with circle layout
#' # Vary node colours and add org sizes
#' org_tb <- hrvar_count(
#' sq_data,
#' hrvar = "Organization",
#' return = "table"
#' )
#'
#' g2g_data %>%
#' network_g2g(algorithm = "circle",
#' node_colour = "vary",
#' org_count = org_tb)
#'
#' # Return an interaction matrix
#' # Minimum arguments specified
#' g2g_data %>%
@ -63,7 +92,10 @@ network_g2g <- function(data,
time_investor = NULL,
collaborator = NULL,
metric = "Collaboration_hours",
algorithm = "fr",
node_colour = "lightblue",
exc_threshold = 0.1,
org_count = NULL,
subtitle = "Collaboration Across Organizations",
return = "plot"){
@ -95,6 +127,11 @@ network_g2g <- function(data,
}
## Get string of HR variable
hrvar_string <- gsub(pattern = "Collaborators_",
replacement = "",
x = collaborator)
## Warn if 'Collaborators Within Group' is not present in data
if(! "Collaborators Within Group" %in% unique(data[[collaborator]])){
@ -145,20 +182,70 @@ network_g2g <- function(data,
mutate(metric_prop = metric_prop * 10) %>%
igraph::graph_from_data_frame(directed = FALSE)
# Org count vary by size -------------------------------------------
if(!is.null(org_count)){
igraph::V(mynet_em)$org_size <-
tibble(id = igraph::get.vertex.attribute(mynet_em)$name) %>%
mutate(id = gsub(pattern = "\n", replacement = " ", x = id)) %>%
left_join(org_count, by = c("id" = hrvar_string)) %>%
pull(n)
} else {
# Imputed size if not specified
igraph::V(mynet_em)$org_size <-
tibble(id = igraph::get.vertex.attribute(mynet_em)$name) %>%
mutate(id = gsub(pattern = "\n", replacement = " ", x = id)) %>%
mutate(n = 20) %>%
pull(n)
}
## Plot object
plot_obj <-
mynet_em %>%
ggraph::ggraph(layout = algorithm) +
ggraph::geom_edge_link(aes(edge_width = metric_prop * 1),
edge_alpha = 0.5,
edge_colour = "grey")
if(return == "network"){
mynet_em # Return igraph object
} else {
## Plot object
mynet_em %>%
ggraph::ggraph(layout = "fr") +
ggraph::geom_edge_link(aes(edge_width = metric_prop * 1), edge_alpha = 0.5, edge_colour = "grey") +
ggraph::geom_node_point(size = 20, colour = "lightblue") +
# Custom node colours ----------------------------------------------
if(node_colour == "vary"){
plot_obj <-
plot_obj +
ggraph::geom_node_point(
aes(color = name,
size = org_size),
alpha = 0.9
)
} else {
plot_obj <-
plot_obj +
ggraph::geom_node_point(
aes(size = org_size),
colour = node_colour,
alpha = 0.9
)
}
plot_obj +
ggraph::geom_node_text(aes(label = name), size = 3, repel = FALSE) +
ggplot2::theme(panel.background = ggplot2::element_rect(fill = 'white'), legend.position = "none") +
ggplot2::theme(
panel.background = ggplot2::element_rect(fill = 'white'),
legend.position = "none") +
theme_wpa_basic() +
scale_size(range = c(1, 30)) +
labs(title = "Group to Group Collaboration",
subtitle = subtitle,
x = "",

Просмотреть файл

@ -58,8 +58,8 @@
#' @param seed Seed for the random number generator passed to `leiden::leiden()`
#' to ensure consistency. Only applicable when `display` is set to `"leiden"`.
#' @param algorithm String to specify the node placement algorithm to be used.
#' Defaults to `"fr"` for the force-directed algorithm of Fruchterman and
#' Reingold. See
#' Defaults to `"mds"` for the deterministic multi-dimensional scaling of
#' nodes. See
#' <https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html> for a full
#' list of options.
#' @param size_threshold Numeric value representing the maximum number of edges

Просмотреть файл

@ -10,7 +10,10 @@ network_g2g(
time_investor = NULL,
collaborator = NULL,
metric = "Collaboration_hours",
algorithm = "fr",
node_colour = "lightblue",
exc_threshold = 0.1,
org_count = NULL,
subtitle = "Collaboration Across Organizations",
return = "plot"
)
@ -20,7 +23,10 @@ g2g_network(
time_investor = NULL,
collaborator = NULL,
metric = "Collaboration_hours",
algorithm = "fr",
node_colour = "lightblue",
exc_threshold = 0.1,
org_count = NULL,
subtitle = "Collaboration Across Organizations",
return = "plot"
)
@ -37,8 +43,28 @@ column.}
\item{metric}{String containing the variable name for metric. Defaults to
\code{Collaboration_hours}.}
\item{algorithm}{String to specify the node placement algorithm to be used.
Defaults to \code{"fr"} for the force-directed algorithm of Fruchterman and
Reingold. See
\url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full
list of options.}
\item{node_colour}{String to specify the colour to be used for displaying
nodes. Defaults to \code{"lightblue"}. If \code{"vary"} is supplied, a different colour
is shown for each node at random.}
\item{exc_threshold}{Exclusion threshold to apply.}
\item{org_count}{Optional data frame to provide the size of each organization
in the \code{collaborator} attribute. The data frame should contain only two
columns:
\itemize{
\item Name of the \code{collaborator} attribute excluding any prefixes, e.g.
\code{"Organization"}. Must be of character or factor type.
\item \code{"n"}. Must be of numeric type.
Defaults to \code{NULL}, where node sizes will be fixed.
}}
\item{subtitle}{String to override default plot subtitle.}
\item{return}{String specifying what to return. This must be one of the
@ -64,8 +90,8 @@ argument:
}
\description{
Pass a data frame containing a group-to-group query and return a network
plot. Automatically handles "Collaborators_within_group" and
"Other_collaborators" within query data.
plot. Automatically handles \code{"Collaborators_within_group"} and
\code{"Other_collaborators"} within query data.
}
\examples{
# Return a network plot
@ -78,6 +104,19 @@ g2g_data \%>\%
metric = "Meeting_hours",
exc_threshold = 0.05)
# Return a network plot with circle layout
# Vary node colours and add org sizes
org_tb <- hrvar_count(
sq_data,
hrvar = "Organization",
return = "table"
)
g2g_data \%>\%
network_g2g(algorithm = "circle",
node_colour = "vary",
org_count = org_tb)
# Return an interaction matrix
# Minimum arguments specified
g2g_data \%>\%

Просмотреть файл

@ -30,8 +30,8 @@ network_leiden(
\item{font_col}{String to specify font and link colour.}
\item{algorithm}{String to specify the node placement algorithm to be used.
Defaults to \code{"fr"} for the force-directed algorithm of Fruchterman and
Reingold. See
Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of
nodes. See
\url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full
list of options.}

Просмотреть файл

@ -31,8 +31,8 @@ network_louvain(
of the nodes.}
\item{algorithm}{String to specify the node placement algorithm to be used.
Defaults to \code{"fr"} for the force-directed algorithm of Fruchterman and
Reingold. See
Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of
nodes. See
\url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full
list of options.}

Просмотреть файл

@ -83,8 +83,8 @@ to 0.5.}
to ensure consistency. Only applicable when \code{display} is set to \code{"leiden"}.}
\item{algorithm}{String to specify the node placement algorithm to be used.
Defaults to \code{"fr"} for the force-directed algorithm of Fruchterman and
Reingold. See
Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of
nodes. See
\url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full
list of options.}