зеркало из https://github.com/microsoft/wpa.git
Merge pull request #37 from microsoft/patch/1.3.1-1
Patch: v1.3.1-1 minor changes
This commit is contained in:
Коммит
275e7050e4
|
@ -171,6 +171,7 @@ importFrom(dplyr,mutate_if)
|
|||
importFrom(grDevices,rainbow)
|
||||
importFrom(htmltools,HTML)
|
||||
importFrom(igraph,graph_from_data_frame)
|
||||
importFrom(igraph,plot.igraph)
|
||||
importFrom(magrittr,"%>%")
|
||||
importFrom(markdown,markdownToHTML)
|
||||
importFrom(methods,is)
|
||||
|
|
24
NEWS.md
24
NEWS.md
|
@ -1,3 +1,25 @@
|
|||
# wpa 1.3.1
|
||||
|
||||
New functions, bug fixes, and performance improvements.
|
||||
|
||||
Significant changes to existing functions:
|
||||
- New plot visual is available for `keymetrics_scan()`
|
||||
- `combine_signals()` can now dynamically accept any metrics available in the Hourly Collaboration query.
|
||||
- `pairwise_count()` now uses a **data.table** implementation, instead of dependent on **widyr**.
|
||||
|
||||
New functions:
|
||||
- `network_p2p()`
|
||||
- `network_leiden()`
|
||||
- `network_louvain()`
|
||||
- `network_describe()`
|
||||
- `create_sankey()`
|
||||
- `totals_col()`
|
||||
|
||||
Some package dependencies have been removed (see #36):
|
||||
- **network**
|
||||
- **GGally**
|
||||
- **widyr**
|
||||
|
||||
# wpa 1.3.0
|
||||
|
||||
This is the first version of the **wpa** package to be released open-source on GitHub. If you have been using a previous developmental version, the main difference is that this release omits the more experimental _working patterns_ family of functions. The experimental functions are currently available upon request via mac@microsoft.com.
|
||||
This is the first version of the **wpa** package to be released open-source on GitHub. If you have been using a previous developmental version, the main difference is that this release omits the more experimental _working patterns_ family of functions. The experimental functions are currently available upon request via mac@microsoft.com.
|
||||
|
|
|
@ -26,6 +26,10 @@
|
|||
#' When 'table' is passed, a summary table is returned as a data frame.
|
||||
#'
|
||||
#' @examples
|
||||
#' ## Heatmap plot is returned by default
|
||||
#' keymetrics_scan(sq_data)
|
||||
#'
|
||||
#' ## Return a table
|
||||
#' keymetrics_scan(sq_data, hrvar = "LevelDesignation", return = "table")
|
||||
#'
|
||||
#' @export
|
||||
|
|
|
@ -10,8 +10,10 @@
|
|||
#' this function, you will require all the pre-requisites of the **leiden** package installed,
|
||||
#' which includes Python and **reticulate**.
|
||||
#'
|
||||
#' @param data Data frame containing a Person to Person query.
|
||||
#' @param data Data frame containing a Person to Person Network query. Note that this function is
|
||||
#' computationally intensive and may take a noticeably longer time to process beyond 5000 rows.
|
||||
#' @param hrvar String containing the HR attribute to be matched in the dataset.
|
||||
#' Defaults to "Organization".
|
||||
#' @param bg_fill String to specify background fill colour.
|
||||
#' @param font_col String to specify font and link colour.
|
||||
#' @param node_alpha A numeric value between 0 and 1 to specify the transparency of the nodes.
|
||||
|
@ -28,20 +30,28 @@
|
|||
#' @param res Resolution parameter to be passed to `leiden::leiden()`. Defaults to 0.5.
|
||||
#' @param desc_hrvar Character vector of length 3 containing the HR attributes to use when returning the
|
||||
#' "describe" output. See `network_describe()`.
|
||||
#' @param return String specifying what output to return. Valid return options include:
|
||||
#' - 'plot-leiden': return a network plot coloured by leiden communities.
|
||||
#' - 'plot-hrvar': return a network plot coloured by HR attribute.
|
||||
#' @param return String specifying what output to return. Defaults to "plot-leiden".
|
||||
#' Valid return options include:
|
||||
#' - 'plot-leiden': return a network plot coloured by leiden communities, saving a PDF to `path`.
|
||||
#' - 'plot-hrvar': return a network plot coloured by HR attribute, saving a PDF to `path`.
|
||||
#' - 'plot-sankey': return a sankey plot combining communities and HR attribute.
|
||||
#' - 'table': return a vertex summary table with counts in communities and HR attribute.
|
||||
#' - 'data': return a vertex data file that matches vertices with communities and HR attributes.
|
||||
#' - 'describe': return a list of data frames which describe each of the identified communities.
|
||||
#' The first data frame is a summary table of all the communities.
|
||||
#' - 'network': return igraph object.
|
||||
#' @param size_threshold Numeric value representing the maximum number of edges before `network_leiden()`
|
||||
#' switches to use a more efficient, but less elegant plotting method (native igraph). Defaults to 5000.
|
||||
#' Set as `0` to co-erce to a fast plotting method every time, and `Inf` to always use the default plotting
|
||||
#' method.
|
||||
#'
|
||||
#' @import dplyr
|
||||
#' @importFrom igraph plot.igraph
|
||||
#' @importFrom igraph layout_with_mds
|
||||
#'
|
||||
#' @export
|
||||
network_leiden <- function(data,
|
||||
hrvar,
|
||||
hrvar = "Organization",
|
||||
bg_fill = "#000000",
|
||||
font_col = "#FFFFFF",
|
||||
algorithm = "mds",
|
||||
|
@ -49,7 +59,8 @@ network_leiden <- function(data,
|
|||
node_alpha = 0.8,
|
||||
res = 0.5,
|
||||
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
|
||||
return){
|
||||
return = "plot-leiden",
|
||||
size_threshold = 5000){
|
||||
|
||||
## Set variables
|
||||
TO_hrvar <- paste0("TieOrigin_", hrvar)
|
||||
|
@ -109,70 +120,205 @@ network_leiden <- function(data,
|
|||
g %>%
|
||||
ggraph::ggraph(layout = "igraph", algorithm = algorithm)
|
||||
|
||||
## Timestamped File Path
|
||||
out_path <- paste0(path, tstamp(), ".pdf")
|
||||
|
||||
## Return
|
||||
if(return == "plot-leiden"){
|
||||
|
||||
plot_output <-
|
||||
g_layout +
|
||||
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
|
||||
ggraph::geom_node_point(aes(colour = cluster),
|
||||
alpha = node_alpha,
|
||||
pch = 16) +
|
||||
theme_void() +
|
||||
theme(legend.position = "bottom",
|
||||
legend.background = element_rect(fill = bg_fill),
|
||||
plot.background = element_rect(fill = bg_fill),
|
||||
text = element_text(colour = font_col),
|
||||
axis.line = element_blank()) +
|
||||
labs(title = "Person to person collaboration with Community Detection",
|
||||
subtitle = "Based on Leiden algorithm and Strong Tie Score",
|
||||
y = "",
|
||||
x = "")
|
||||
if(igraph::ecount(g) > size_threshold){
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
message("Using fast plot method due to large network size...")
|
||||
|
||||
plot_output
|
||||
## Set colours
|
||||
colour_tb <-
|
||||
tibble(cluster = unique(igraph::V(g)$cluster)) %>%
|
||||
mutate(colour = rainbow(nrow(.)))
|
||||
|
||||
## Colour vector
|
||||
colour_v <-
|
||||
tibble(cluster = igraph::V(g)$cluster) %>%
|
||||
left_join(colour_tb, by = "cluster") %>%
|
||||
pull(colour)
|
||||
|
||||
|
||||
igraph::V(g)$color <- grDevices::adjustcolor(colour_v, alpha.f = node_alpha)
|
||||
igraph::V(g)$frame.color <- NA
|
||||
igraph::E(g)$width <- 1
|
||||
|
||||
plot_cluster <- function(){
|
||||
|
||||
par(bg = bg_fill)
|
||||
|
||||
plot(g,
|
||||
layout = igraph::layout_with_mds,
|
||||
vertex.label = NA,
|
||||
vertex.size = 3,
|
||||
edge.arrow.mode = "-",
|
||||
edge.color = "#adadad")
|
||||
|
||||
legend(x = -1.5,
|
||||
y = 0.5,
|
||||
legend = colour_tb$cluster,
|
||||
pch = 21,
|
||||
text.col = font_col,
|
||||
col = "#777777",
|
||||
pt.bg= colour_tb$colour,
|
||||
pt.cex = 2,
|
||||
cex = .8,
|
||||
bty = "n",
|
||||
ncol = 1)
|
||||
|
||||
}
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
|
||||
plot_cluster()
|
||||
|
||||
} else {
|
||||
|
||||
grDevices::pdf(out_path)
|
||||
|
||||
plot_cluster()
|
||||
|
||||
message(paste0("Saved to ", out_path, "."))
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
ggsave(paste0(path, tstamp(), ".pdf"),
|
||||
plot = plot_output,
|
||||
width = 16,
|
||||
height = 9)
|
||||
plot_output <-
|
||||
g_layout +
|
||||
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
|
||||
ggraph::geom_node_point(aes(colour = cluster),
|
||||
alpha = node_alpha,
|
||||
pch = 16) +
|
||||
theme_void() +
|
||||
theme(legend.position = "bottom",
|
||||
legend.background = element_rect(fill = bg_fill),
|
||||
plot.background = element_rect(fill = bg_fill),
|
||||
text = element_text(colour = font_col),
|
||||
axis.line = element_blank()) +
|
||||
labs(caption = "Person to person collaboration with Community Detection
|
||||
based on the Leiden algorithm. ",
|
||||
y = "",
|
||||
x = "")
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
|
||||
plot_output
|
||||
|
||||
} else {
|
||||
|
||||
ggsave(paste0(path, tstamp(), ".pdf"),
|
||||
plot = plot_output,
|
||||
width = 16,
|
||||
height = 9)
|
||||
|
||||
message(paste0("Saved to ", out_path, "."))
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
} else if(return == "plot-hrvar"){
|
||||
|
||||
plot_output <-
|
||||
g_layout +
|
||||
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
|
||||
ggraph::geom_node_point(aes(colour = !!sym(hrvar)),
|
||||
alpha = node_alpha,
|
||||
pch = 16) +
|
||||
theme_void() +
|
||||
theme(legend.position = "bottom",
|
||||
legend.background = element_rect(fill = bg_fill),
|
||||
plot.background = element_rect(fill = bg_fill),
|
||||
text = element_text(colour = font_col),
|
||||
axis.line = element_blank()) +
|
||||
labs(title = "Person to person collaboration",
|
||||
subtitle = paste0("Showing ", hrvar),
|
||||
y = "",
|
||||
x = "")
|
||||
if(igraph::ecount(g) > size_threshold){
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
message("Using fast plot method due to large network size...")
|
||||
|
||||
plot_output
|
||||
## Set colours
|
||||
colour_tb <-
|
||||
tibble(!!sym(hrvar) := unique(igraph::get.vertex.attribute(g, name = hrvar))) %>%
|
||||
mutate(colour = rainbow(nrow(.)))
|
||||
|
||||
## Colour vector
|
||||
colour_v <-
|
||||
tibble(!!sym(hrvar) := igraph::get.vertex.attribute(g, name = hrvar)) %>%
|
||||
left_join(colour_tb, by = hrvar) %>%
|
||||
pull(colour)
|
||||
|
||||
|
||||
igraph::V(g)$color <- grDevices::adjustcolor(colour_v, alpha.f = node_alpha)
|
||||
igraph::V(g)$frame.color <- NA
|
||||
igraph::E(g)$width <- 1
|
||||
|
||||
plot_hrvar <- function(){
|
||||
|
||||
par(bg = bg_fill)
|
||||
|
||||
plot(g,
|
||||
layout = igraph::layout_with_mds,
|
||||
vertex.label = NA,
|
||||
vertex.size = 3,
|
||||
edge.arrow.mode = "-",
|
||||
edge.color = "#adadad")
|
||||
|
||||
legend(x = -1.5,
|
||||
y = 0.5,
|
||||
legend = colour_tb[[hrvar]],
|
||||
pch = 21,
|
||||
text.col = font_col,
|
||||
col = "#777777",
|
||||
pt.bg = colour_tb$colour,
|
||||
pt.cex = 2,
|
||||
cex = .8,
|
||||
bty = "n",
|
||||
ncol = 1)
|
||||
|
||||
}
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
|
||||
plot_hrvar()
|
||||
|
||||
} else {
|
||||
|
||||
grDevices::pdf(out_path)
|
||||
|
||||
plot_hrvar()
|
||||
|
||||
grDevices::dev.off()
|
||||
|
||||
message(paste0("Saved to ", out_path, "."))
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
ggsave(paste0(path, tstamp(), ".pdf"),
|
||||
plot = plot_output,
|
||||
width = 16,
|
||||
height = 9)
|
||||
plot_output <-
|
||||
g_layout +
|
||||
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
|
||||
ggraph::geom_node_point(aes(colour = !!sym(hrvar)),
|
||||
alpha = node_alpha,
|
||||
pch = 16) +
|
||||
theme_void() +
|
||||
theme(legend.position = "bottom",
|
||||
legend.background = element_rect(fill = bg_fill),
|
||||
plot.background = element_rect(fill = bg_fill),
|
||||
text = element_text(colour = font_col),
|
||||
axis.line = element_blank()) +
|
||||
labs(caption = paste0("Person to person collaboration showing ", hrvar, ". "), # spaces intentional
|
||||
y = "",
|
||||
x = "")
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
|
||||
plot_output
|
||||
|
||||
} else {
|
||||
|
||||
ggsave(paste0(path, tstamp(), ".pdf"),
|
||||
plot = plot_output,
|
||||
width = 16,
|
||||
height = 9)
|
||||
|
||||
message(paste0("Saved to ", out_path, "."))
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -209,7 +355,8 @@ network_leiden <- function(data,
|
|||
pull(cluster) %>%
|
||||
unique()
|
||||
|
||||
desc_str %>%
|
||||
out_list <-
|
||||
desc_str %>%
|
||||
purrr::map(function(x){
|
||||
describe_tb %>%
|
||||
filter(cluster == x) %>%
|
||||
|
@ -217,6 +364,29 @@ network_leiden <- function(data,
|
|||
}) %>%
|
||||
setNames(nm = desc_str)
|
||||
|
||||
summaryTable <-
|
||||
list(i = out_list,
|
||||
j = names(out_list)) %>%
|
||||
purrr::pmap(function(i, j){
|
||||
i %>%
|
||||
arrange(desc(Percentage)) %>%
|
||||
slice(1) %>%
|
||||
mutate_at(vars(starts_with("feature_")), ~tidyr::replace_na(., "")) %>%
|
||||
mutate(Community = j,
|
||||
`Attribute 1` = paste(feature_1, "=", feature_1_value),
|
||||
`Attribute 2` = paste(feature_2, "=", feature_2_value),
|
||||
`Attribute 3` = paste(feature_3, "=", feature_3_value)) %>%
|
||||
select(Community,
|
||||
`Attribute 1`,
|
||||
`Attribute 2`,
|
||||
`Attribute 3`,
|
||||
PercentageExplained = "Percentage") %>%
|
||||
mutate_at(vars(starts_with("Attribute")), ~ifelse(. == " = ", NA, .))
|
||||
}) %>%
|
||||
bind_rows()
|
||||
|
||||
c(list("summaryTable" = summaryTable), out_list)
|
||||
|
||||
} else {
|
||||
|
||||
stop("Please enter a valid input for `return`.")
|
||||
|
|
|
@ -9,8 +9,10 @@
|
|||
#' Take a P2P network query and implement the Louvain community detection method. The
|
||||
#' **igraph** implementation of the Louvain method is used.
|
||||
#'
|
||||
#' @param data Data frame containing a Person to Person query.
|
||||
#' @param data Data frame containing a Person to Person Network query. Note that this function is
|
||||
#' computationally intensive and may take a noticeably longer time to process beyond 5000 rows.
|
||||
#' @param hrvar String containing the HR attribute to be matched in the dataset.
|
||||
#' Defaults to "Organization".
|
||||
#' @param bg_fill String to specify background fill colour.
|
||||
#' @param font_col String to specify font and link colour.
|
||||
#' @param node_alpha A numeric value between 0 and 1 to specify the transparency of the nodes.
|
||||
|
@ -25,28 +27,37 @@
|
|||
#' @param desc_hrvar Character vector of length 3 containing the HR attributes to use when returning the
|
||||
#' "describe" output. See `network_describe()`.
|
||||
#'
|
||||
#' @param return String specifying what output to return.Valid return options include:
|
||||
#' - 'plot-louvain': return a network plot coloured by louvain communities.
|
||||
#' - 'plot-hrvar': return a network plot coloured by HR attribute.
|
||||
#' @param return String specifying what output to return. Defaults to "plot-louvain".
|
||||
#' Valid return options include:
|
||||
#' - 'plot-louvain': return a network plot coloured by louvain communities, saving a PDF to `path`.
|
||||
#' - 'plot-hrvar': return a network plot coloured by HR attribute, saving a PDF to `path`.
|
||||
#' - 'plot-sankey': return a sankey plot combining communities and HR attribute.
|
||||
#' - 'table': return a vertex summary table with counts in communities and HR attribute.
|
||||
#' - 'data': return a vertex data file that matches vertices with communities and HR attributes.
|
||||
#' - 'describe': returns a list of data frames which describe each of the identified communities.
|
||||
#' The first data frame is a summary table of all the communities.
|
||||
#' - 'network': return igraph object.
|
||||
#' @param size_threshold Numeric value representing the maximum number of edges before `network_leiden()`
|
||||
#' switches to use a more efficient, but less elegant plotting method (native igraph). Defaults to 5000.
|
||||
#' Set as `0` to co-erce to a fast plotting method every time, and `Inf` to always use the default plotting
|
||||
#' method.
|
||||
#'
|
||||
#' @import ggraph
|
||||
#' @import dplyr
|
||||
#' @importFrom igraph plot.igraph
|
||||
#' @importFrom igraph layout_with_mds
|
||||
#'
|
||||
#' @export
|
||||
network_louvain <- function(data,
|
||||
hrvar,
|
||||
hrvar = "Organization",
|
||||
bg_fill = "#000000",
|
||||
font_col = "#FFFFFF",
|
||||
node_alpha = 0.8,
|
||||
algorithm = "mds",
|
||||
path = "network_p2p_louvain",
|
||||
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
|
||||
return){
|
||||
return = "plot-louvain",
|
||||
size_threshold = 5000){
|
||||
|
||||
## Set variables
|
||||
TO_hrvar <- paste0("TieOrigin_", hrvar)
|
||||
|
@ -106,73 +117,208 @@ network_louvain <- function(data,
|
|||
g %>%
|
||||
ggraph::ggraph(layout = "igraph", algorithm = algorithm)
|
||||
|
||||
## Timestamped File Path
|
||||
out_path <- paste0(path, tstamp(), ".pdf")
|
||||
|
||||
## Return
|
||||
if(return == "plot-louvain"){
|
||||
|
||||
plot_output <-
|
||||
g_layout +
|
||||
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
|
||||
ggraph::geom_node_point(aes(colour = cluster),
|
||||
alpha = node_alpha,
|
||||
pch = 16) +
|
||||
theme_void() +
|
||||
theme(legend.position = "bottom",
|
||||
legend.background = element_rect(fill = bg_fill),
|
||||
plot.background = element_rect(fill = bg_fill),
|
||||
text = element_text(colour = font_col),
|
||||
axis.line = element_blank()) +
|
||||
labs(title = "Person to person collaboration with Community Detection",
|
||||
subtitle = "Based on Louvain algorithm and Strong Tie Score",
|
||||
y = "",
|
||||
x = "")
|
||||
if(igraph::ecount(g) > size_threshold){
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
message("Using fast plot method due to large network size...")
|
||||
|
||||
plot_output
|
||||
## Set colours
|
||||
colour_tb <-
|
||||
tibble(cluster = unique(igraph::V(g)$cluster)) %>%
|
||||
mutate(colour = rainbow(nrow(.)))
|
||||
|
||||
## Colour vector
|
||||
colour_v <-
|
||||
tibble(cluster = igraph::V(g)$cluster) %>%
|
||||
left_join(colour_tb, by = "cluster") %>%
|
||||
pull(colour)
|
||||
|
||||
|
||||
igraph::V(g)$color <- grDevices::adjustcolor(colour_v, alpha.f = node_alpha)
|
||||
igraph::V(g)$frame.color <- NA
|
||||
igraph::E(g)$width <- 1
|
||||
|
||||
plot_cluster <- function(){
|
||||
|
||||
par(bg = bg_fill)
|
||||
|
||||
plot(g,
|
||||
layout = igraph::layout_with_mds,
|
||||
vertex.label = NA,
|
||||
vertex.size = 3,
|
||||
edge.arrow.mode = "-",
|
||||
edge.color = "#adadad")
|
||||
|
||||
legend(x = -1.5,
|
||||
y = 0.5,
|
||||
legend = colour_tb$cluster,
|
||||
pch = 21,
|
||||
text.col = font_col,
|
||||
col = "#777777",
|
||||
pt.bg= colour_tb$colour,
|
||||
pt.cex = 2,
|
||||
cex = .8,
|
||||
bty = "n",
|
||||
ncol = 1)
|
||||
}
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
|
||||
plot_cluster()
|
||||
|
||||
} else {
|
||||
|
||||
grDevices::pdf(out_path)
|
||||
|
||||
plot_cluster()
|
||||
|
||||
message(paste0("Saved to ", out_path, "."))
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
ggsave(paste0(path, tstamp(), ".pdf"),
|
||||
plot = plot_output,
|
||||
width = 16,
|
||||
height = 9)
|
||||
plot_output <-
|
||||
g_layout +
|
||||
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
|
||||
ggraph::geom_node_point(aes(colour = cluster),
|
||||
alpha = node_alpha,
|
||||
pch = 16) +
|
||||
theme_void() +
|
||||
theme(legend.position = "bottom",
|
||||
legend.background = element_rect(fill = bg_fill),
|
||||
plot.background = element_rect(fill = bg_fill),
|
||||
text = element_text(colour = font_col),
|
||||
axis.line = element_blank()) +
|
||||
labs(caption = "Person to person collaboration with Community Detection
|
||||
based on the Louvain algorithm. ",
|
||||
y = "",
|
||||
x = "")
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
|
||||
plot_output
|
||||
|
||||
} else {
|
||||
|
||||
ggsave(out_path,
|
||||
plot = plot_output,
|
||||
width = 16,
|
||||
height = 9)
|
||||
|
||||
message(paste0("Saved to ", out_path, "."))
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} else if(return == "plot-hrvar"){
|
||||
|
||||
plot_output <-
|
||||
g_layout +
|
||||
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
|
||||
ggraph::geom_node_point(aes(colour = !!sym(hrvar)),
|
||||
alpha = node_alpha,
|
||||
pch = 16) +
|
||||
theme_void() +
|
||||
theme(legend.position = "bottom",
|
||||
legend.background = element_rect(fill = bg_fill),
|
||||
plot.background = element_rect(fill = bg_fill),
|
||||
text = element_text(colour = font_col),
|
||||
axis.line = element_blank()) +
|
||||
labs(title = "Person to person collaboration",
|
||||
subtitle = paste0("Showing ", hrvar),
|
||||
y = "",
|
||||
x = "")
|
||||
if(igraph::ecount(g) > size_threshold){
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
message("Using fast plot method due to large network size...")
|
||||
|
||||
plot_output
|
||||
## Set colours
|
||||
colour_tb <-
|
||||
tibble(!!sym(hrvar) := unique(igraph::get.vertex.attribute(g, name = hrvar))) %>%
|
||||
mutate(colour = rainbow(nrow(.)))
|
||||
|
||||
## Colour vector
|
||||
colour_v <-
|
||||
tibble(!!sym(hrvar) := igraph::get.vertex.attribute(g, name = hrvar)) %>%
|
||||
left_join(colour_tb, by = hrvar) %>%
|
||||
pull(colour)
|
||||
|
||||
|
||||
igraph::V(g)$color <- grDevices::adjustcolor(colour_v, alpha.f = node_alpha)
|
||||
igraph::V(g)$frame.color <- NA
|
||||
igraph::E(g)$width <- 1
|
||||
|
||||
plot_hrvar <- function(){
|
||||
|
||||
par(bg = bg_fill)
|
||||
|
||||
plot(g,
|
||||
layout = igraph::layout_with_mds,
|
||||
vertex.label = NA,
|
||||
vertex.size = 3,
|
||||
edge.arrow.mode = "-",
|
||||
edge.color = "#adadad")
|
||||
|
||||
legend(x = -1.5,
|
||||
y = 0.5,
|
||||
legend = colour_tb[[hrvar]],
|
||||
pch = 21,
|
||||
text.col = font_col,
|
||||
col = "#777777",
|
||||
pt.bg = colour_tb$colour,
|
||||
pt.cex = 2,
|
||||
cex = .8,
|
||||
bty = "n",
|
||||
ncol = 1)
|
||||
}
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
|
||||
plot_hrvar()
|
||||
|
||||
} else {
|
||||
|
||||
grDevices::pdf(out_path)
|
||||
|
||||
plot_hrvar()
|
||||
|
||||
grDevices::dev.off()
|
||||
|
||||
message(paste0("Saved to ", out_path, "."))
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
ggsave(paste0(path, tstamp(), ".pdf"),
|
||||
plot = plot_output,
|
||||
width = 16,
|
||||
height = 9)
|
||||
plot_output <-
|
||||
g_layout +
|
||||
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
|
||||
ggraph::geom_node_point(aes(colour = !!sym(hrvar)),
|
||||
alpha = node_alpha,
|
||||
pch = 16) +
|
||||
theme_void() +
|
||||
theme(legend.position = "bottom",
|
||||
legend.background = element_rect(fill = bg_fill),
|
||||
plot.background = element_rect(fill = bg_fill),
|
||||
text = element_text(colour = font_col),
|
||||
axis.line = element_blank()) +
|
||||
labs(caption = paste0("Person to person collaboration showing ", hrvar, ". "), # spaces intentional
|
||||
y = "",
|
||||
x = "")
|
||||
|
||||
# Default PDF output unless NULL supplied to path
|
||||
if(is.null(path)){
|
||||
|
||||
plot_output
|
||||
|
||||
} else {
|
||||
|
||||
ggsave(out_path,
|
||||
plot = plot_output,
|
||||
width = 16,
|
||||
height = 9)
|
||||
|
||||
}
|
||||
|
||||
message(paste0("Saved to ", out_path, "."))
|
||||
|
||||
}
|
||||
|
||||
|
||||
} else if(return == "table"){
|
||||
|
||||
vertex_tb %>%
|
||||
|
@ -206,7 +352,8 @@ network_louvain <- function(data,
|
|||
pull(cluster) %>%
|
||||
unique()
|
||||
|
||||
desc_str %>%
|
||||
out_list <-
|
||||
desc_str %>%
|
||||
purrr::map(function(x){
|
||||
describe_tb %>%
|
||||
filter(cluster == x) %>%
|
||||
|
@ -214,6 +361,29 @@ network_louvain <- function(data,
|
|||
}) %>%
|
||||
setNames(nm = desc_str)
|
||||
|
||||
summaryTable <-
|
||||
list(i = out_list,
|
||||
j = names(out_list)) %>%
|
||||
purrr::pmap(function(i, j){
|
||||
i %>%
|
||||
arrange(desc(Percentage)) %>%
|
||||
slice(1) %>%
|
||||
mutate_at(vars(starts_with("feature_")), ~tidyr::replace_na(., "")) %>%
|
||||
mutate(Community = j,
|
||||
`Attribute 1` = paste(feature_1, "=", feature_1_value),
|
||||
`Attribute 2` = paste(feature_2, "=", feature_2_value),
|
||||
`Attribute 3` = paste(feature_3, "=", feature_3_value)) %>%
|
||||
select(Community,
|
||||
`Attribute 1`,
|
||||
`Attribute 2`,
|
||||
`Attribute 3`,
|
||||
PercentageExplained = "Percentage") %>%
|
||||
mutate_at(vars(starts_with("Attribute")), ~ifelse(. == " = ", NA, .))
|
||||
}) %>%
|
||||
bind_rows()
|
||||
|
||||
c(list("summaryTable" = summaryTable), out_list)
|
||||
|
||||
} else {
|
||||
|
||||
stop("Please enter a valid input for `return`.")
|
||||
|
|
|
@ -41,6 +41,10 @@ When 'table' is passed, a summary table is returned as a data frame.
|
|||
Returns a heatmapped table by default, with options to return a table.
|
||||
}
|
||||
\examples{
|
||||
## Heatmap plot is returned by default
|
||||
keymetrics_scan(sq_data)
|
||||
|
||||
## Return a table
|
||||
keymetrics_scan(sq_data, hrvar = "LevelDesignation", return = "table")
|
||||
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
\usage{
|
||||
network_leiden(
|
||||
data,
|
||||
hrvar,
|
||||
hrvar = "Organization",
|
||||
bg_fill = "#000000",
|
||||
font_col = "#FFFFFF",
|
||||
algorithm = "mds",
|
||||
|
@ -14,13 +14,16 @@ network_leiden(
|
|||
node_alpha = 0.8,
|
||||
res = 0.5,
|
||||
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
|
||||
return
|
||||
return = "plot-leiden",
|
||||
size_threshold = 5000
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{Data frame containing a Person to Person query.}
|
||||
\item{data}{Data frame containing a Person to Person Network query. Note that this function is
|
||||
computationally intensive and may take a noticeably longer time to process beyond 5000 rows.}
|
||||
|
||||
\item{hrvar}{String containing the HR attribute to be matched in the dataset.}
|
||||
\item{hrvar}{String containing the HR attribute to be matched in the dataset.
|
||||
Defaults to "Organization".}
|
||||
|
||||
\item{bg_fill}{String to specify background fill colour.}
|
||||
|
||||
|
@ -43,16 +46,23 @@ if returning anything other than "plot-leiden" or "plot-hrvar".}
|
|||
\item{desc_hrvar}{Character vector of length 3 containing the HR attributes to use when returning the
|
||||
"describe" output. See \code{network_describe()}.}
|
||||
|
||||
\item{return}{String specifying what output to return. Valid return options include:
|
||||
\item{return}{String specifying what output to return. Defaults to "plot-leiden".
|
||||
Valid return options include:
|
||||
\itemize{
|
||||
\item 'plot-leiden': return a network plot coloured by leiden communities.
|
||||
\item 'plot-hrvar': return a network plot coloured by HR attribute.
|
||||
\item 'plot-leiden': return a network plot coloured by leiden communities, saving a PDF to \code{path}.
|
||||
\item 'plot-hrvar': return a network plot coloured by HR attribute, saving a PDF to \code{path}.
|
||||
\item 'plot-sankey': return a sankey plot combining communities and HR attribute.
|
||||
\item 'table': return a vertex summary table with counts in communities and HR attribute.
|
||||
\item 'data': return a vertex data file that matches vertices with communities and HR attributes.
|
||||
\item 'describe': return a list of data frames which describe each of the identified communities.
|
||||
The first data frame is a summary table of all the communities.
|
||||
\item 'network': return igraph object.
|
||||
}}
|
||||
|
||||
\item{size_threshold}{Numeric value representing the maximum number of edges before \code{network_leiden()}
|
||||
switches to use a more efficient, but less elegant plotting method (native igraph). Defaults to 5000.
|
||||
Set as \code{0} to co-erce to a fast plotting method every time, and \code{Inf} to always use the default plotting
|
||||
method.}
|
||||
}
|
||||
\description{
|
||||
Take a P2P network query and implement the Leiden community detection method. To run
|
||||
|
|
|
@ -6,20 +6,23 @@
|
|||
\usage{
|
||||
network_louvain(
|
||||
data,
|
||||
hrvar,
|
||||
hrvar = "Organization",
|
||||
bg_fill = "#000000",
|
||||
font_col = "#FFFFFF",
|
||||
node_alpha = 0.8,
|
||||
algorithm = "mds",
|
||||
path = "network_p2p_louvain",
|
||||
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
|
||||
return
|
||||
return = "plot-louvain",
|
||||
size_threshold = 5000
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{Data frame containing a Person to Person query.}
|
||||
\item{data}{Data frame containing a Person to Person Network query. Note that this function is
|
||||
computationally intensive and may take a noticeably longer time to process beyond 5000 rows.}
|
||||
|
||||
\item{hrvar}{String containing the HR attribute to be matched in the dataset.}
|
||||
\item{hrvar}{String containing the HR attribute to be matched in the dataset.
|
||||
Defaults to "Organization".}
|
||||
|
||||
\item{bg_fill}{String to specify background fill colour.}
|
||||
|
||||
|
@ -40,16 +43,23 @@ if returning anything other than "plot-louvain" or "plot-hrvar".}
|
|||
\item{desc_hrvar}{Character vector of length 3 containing the HR attributes to use when returning the
|
||||
"describe" output. See \code{network_describe()}.}
|
||||
|
||||
\item{return}{String specifying what output to return.Valid return options include:
|
||||
\item{return}{String specifying what output to return. Defaults to "plot-louvain".
|
||||
Valid return options include:
|
||||
\itemize{
|
||||
\item 'plot-louvain': return a network plot coloured by louvain communities.
|
||||
\item 'plot-hrvar': return a network plot coloured by HR attribute.
|
||||
\item 'plot-louvain': return a network plot coloured by louvain communities, saving a PDF to \code{path}.
|
||||
\item 'plot-hrvar': return a network plot coloured by HR attribute, saving a PDF to \code{path}.
|
||||
\item 'plot-sankey': return a sankey plot combining communities and HR attribute.
|
||||
\item 'table': return a vertex summary table with counts in communities and HR attribute.
|
||||
\item 'data': return a vertex data file that matches vertices with communities and HR attributes.
|
||||
\item 'describe': returns a list of data frames which describe each of the identified communities.
|
||||
The first data frame is a summary table of all the communities.
|
||||
\item 'network': return igraph object.
|
||||
}}
|
||||
|
||||
\item{size_threshold}{Numeric value representing the maximum number of edges before \code{network_leiden()}
|
||||
switches to use a more efficient, but less elegant plotting method (native igraph). Defaults to 5000.
|
||||
Set as \code{0} to co-erce to a fast plotting method every time, and \code{Inf} to always use the default plotting
|
||||
method.}
|
||||
}
|
||||
\description{
|
||||
Take a P2P network query and implement the Louvain community detection method. The
|
||||
|
|
Загрузка…
Ссылка в новой задаче