Merge pull request #32 from microsoft/feature/network-community-detection

Feature: add Community Detection capability for network plots
This commit is contained in:
Martin Chan 2021-01-06 12:13:28 +00:00 коммит произвёл GitHub
Родитель 95c621af1e e9a60a8c90
Коммит 2118a61a1a
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
12 изменённых файлов: 848 добавлений и 5 удалений

Просмотреть файл

@ -63,5 +63,6 @@ RoxygenNote: 7.1.1
Roxygen: list(markdown = TRUE) Roxygen: list(markdown = TRUE)
VignetteBuilder: knitr VignetteBuilder: knitr
Suggests: Suggests:
extrafont extrafont,
leiden
Language: en-US Language: en-US

Просмотреть файл

@ -49,6 +49,7 @@ export(create_line)
export(create_line_asis) export(create_line_asis)
export(create_period_scatter) export(create_period_scatter)
export(create_rank) export(create_rank)
export(create_sankey)
export(create_scatter) export(create_scatter)
export(create_stacked) export(create_stacked)
export(create_trend) export(create_trend)
@ -105,7 +106,10 @@ export(meetingtype_sum)
export(meetingtype_summary) export(meetingtype_summary)
export(mgrcoatt_dist) export(mgrcoatt_dist)
export(mgrrel_matrix) export(mgrrel_matrix)
export(network_describe)
export(network_g2g) export(network_g2g)
export(network_leiden)
export(network_louvain)
export(network_p2p) export(network_p2p)
export(one2one_dist) export(one2one_dist)
export(one2one_fizz) export(one2one_fizz)
@ -150,16 +154,18 @@ export(workpatterns_rank)
export(wrap) export(wrap)
import(DT) import(DT)
import(Information) import(Information)
import(data.table)
import(dplyr) import(dplyr)
import(ggplot2) import(ggplot2)
import(ggraph) import(ggraph)
import(reshape2) import(reshape2)
import(scales) import(scales)
import(tidyr)
import(tidyselect) import(tidyselect)
importFrom(data.table,"%between%") importFrom(data.table,"%between%")
importFrom(data.table,"%like%") importFrom(data.table,"%like%")
importFrom(data.table,":=") importFrom(data.table,":=")
importFrom(data.table,as.data.table)
importFrom(data.table,rbindlist)
importFrom(dplyr,`%>%`) importFrom(dplyr,`%>%`)
importFrom(dplyr,mutate_if) importFrom(dplyr,mutate_if)
importFrom(grDevices,rainbow) importFrom(grDevices,rainbow)

72
R/create_sankey.R Normal file
Просмотреть файл

@ -0,0 +1,72 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
#' @title Create a sankey chart from a two-column count table
#'
#' @description
#' Create a networkD3 style sankey chart based on a long count table
#' with two variables. The input data should have three columns, where
#' each row is a unique group:
#' 1. Variable 1
#' 2. Variable 2
#' 3. Count
#'
#' @param data Data frame of the long count table.
#' @param var1 String containing the name of the variable to be shown on the left.
#' @param var2 String containing the name of the variable to be shown on the right.
#' @param count String containing the name of the count variable.
#'
#' @import dplyr
#'
#' @examples
#' \donttest{
#' sq_data %>%
#' dplyr::count(Organization, FunctionType) %>%
#' create_sankey(var1 = "Organization", var2 = "FunctionType")
#' }
#'
#' @export
create_sankey <- function(data, var1, var2, count = "n"){
## Rename
data$pre_group <- data[[var1]]
data$group <- data[[var2]]
## Set up `nodes`
group_source <- unique(data$pre_group)
group_target <- paste0(unique(data$group), " ")
groups <- c(group_source, group_target)
nodes_source <- tibble(name = group_source)
nodes_target <- tibble(name = group_target)
nodes <- rbind(nodes_source, nodes_target) %>% mutate(node = 0:(nrow(.) - 1))
## Set up `links`
links <-
data %>%
mutate(group = paste0(group, " ")) %>%
select(source = "pre_group",
target = "group",
value = count)
nodes_source <- nodes_source %>% select(name) # Make `nodes` a single column data frame
nodes_target <- nodes_target %>% select(name) # Make `nodes` a single column data frame
links <-
links %>%
left_join(nodes %>% rename(IDsource = "node"), by = c("source" = "name")) %>%
left_join(nodes %>% rename(IDtarget = "node"), by = c("target" = "name"))
networkD3::sankeyNetwork(Links = as.data.frame(links),
Nodes = as.data.frame(nodes),
Source = 'IDsource', # Change reference to IDsource
Target = 'IDtarget', # Change reference to IDtarget
Value = 'value',
NodeID = 'name',
units="count",
sinksRight = FALSE)
}

125
R/network_describe.R Normal file
Просмотреть файл

@ -0,0 +1,125 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
#' @title Uncover HR attributes which best represent a population for a Person to Person query
#'
#' @author Tannaz Sattari Tabrizi <Tannaz.Sattari@microsoft.com>
#'
#' @description
#' Returns a data frame that gives a percentage of the group combinations that best represent
#' the population provided. Uses a person to person query.
#'
#' @param data Data frame for a person to person query.
#' @param hrvar Character vector of length 3 containing the HR attributes to be used.
#'
#' @import dplyr
#' @import tidyr
#'
#' @export
network_describe <- function(data, hrvar = c("Organization", "LevelDesignation", "FunctionType")){
if(length(hrvar) != 3){
stop("Please provide a character vector of length 3 for `hrvar`")
}
## De-duplicated data containing only TieOrigins
filtered_Data <- unique(select(data, starts_with("TieOrigin_")))
## Select features
features <- select(filtered_Data, paste0("TieOrigin_", hrvar))
## Feature set: 1
max_percentages_1f <-
features %>%
colnames() %>%
purrr::map(function(c){
agg <-
features %>%
group_by_at(.vars = vars(c)) %>%
summarise(count = n(), .groups = "drop") %>%
mutate(percentage = count / sum(count, na.rm = TRUE))
agg %>%
arrange(desc(percentage)) %>%
slice(1) %>% # Extract first row
mutate(feature_1 = c,
feature_1_value = !!sym(c)) %>%
select(feature_1, feature_1_value, Percentage = "percentage")
}) %>%
bind_rows()
## Feature set: 2
max_percentages_2f <-
list(c1 = colnames(features),
c2 = colnames(features)) %>%
expand.grid(stringsAsFactors = FALSE) %>%
filter(c1 != c2) %>%
purrr::pmap(function(c1, c2){
agg <-
features %>%
group_by_at(.vars=vars(c1, c2)) %>%
summarise(count = n(), .groups = "drop") %>%
mutate(percentage = count / sum(count, na.rm = TRUE))
agg %>%
arrange(desc(percentage)) %>%
slice(1) %>% # Extract first row
mutate(feature_1 = c1,
feature_1_value = !!sym(as.character(c1)),
feature_2 = c2,
feature_2_value = !!sym(as.character(c2))) %>%
select(feature_1,
feature_1_value,
feature_2,
feature_2_value,
Percentage = "percentage")
}) %>%
bind_rows()
## Feature set: 3
max_percentages_3f <-
list(c1 = colnames(features),
c2 = colnames(features),
c3 = colnames(features)) %>%
expand.grid(stringsAsFactors = FALSE) %>%
filter(c1 != c2,
c2 != c3,
c3 != c1) %>%
purrr::pmap(function(c1, c2, c3){
agg <-
features %>%
group_by_at(.vars=vars(c1, c2, c3)) %>%
summarise(count = n(), .groups = "drop") %>%
mutate(percentage = count / sum(count, na.rm = TRUE))
agg %>%
arrange(desc(percentage)) %>%
slice(1) %>% # Extract first row
mutate(feature_1 = c1,
feature_1_value = !!sym(c1),
feature_2 = c2,
feature_2_value = !!sym(c2),
feature_3 = c3,
feature_3_value = !!sym(c3)) %>%
select(feature_1,
feature_1_value,
feature_2,
feature_2_value,
feature_3,
feature_3_value,
Percentage = "percentage")
}) %>%
bind_rows()
list(max_percentages_1f,
max_percentages_2f,
max_percentages_3f) %>%
bind_rows() %>%
select(starts_with("feature"), Percentage)
}

225
R/network_leiden.R Normal file
Просмотреть файл

@ -0,0 +1,225 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
#' @title Implement the Leiden community detection on a Person to Person network query
#'
#' @description
#' Take a P2P network query and implement the Leiden community detection method. To run
#' this function, you will require all the pre-requisites of the **leiden** package installed,
#' which includes Python and **reticulate**.
#'
#' @param data Data frame containing a Person to Person query.
#' @param hrvar String containing the HR attribute to be matched in the dataset.
#' @param bg_fill String to specify background fill colour.
#' @param font_col String to specify font and link colour.
#' @param node_alpha A numeric value between 0 and 1 to specify the transparency of the nodes.
#' @param path File path for saving the PDF output. Defaults to "network_p2p_leiden".
#' Since the network outputs are computationally intensive, the default behaviour is to save time by
#' saving the plot output directly as a PDF in the specified path. To override this behaviour and return
#' a plot object instead, you can pass `NULL` to `path`. What is passed to `path` makes no difference
#' if returning anything other than "plot-leiden" or "plot-hrvar".
#'
#' @param algorithm String to specify the node placement algorithm to be used. Defaults to "mds" to perform
#' a multidimensional scaling of nodes using a shortest path, which is also a deterministic method.
#' See <https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html> for a full list of options.
#'
#' @param res Resolution parameter to be passed to `leiden::leiden()`. Defaults to 0.5.
#' @param desc_hrvar Character vector of length 3 containing the HR attributes to use when returning the
#' "describe" output. See `network_describe()`.
#' @param return String specifying what output to return. Valid return options include:
#' - 'plot-leiden': return a network plot coloured by leiden communities.
#' - 'plot-hrvar': return a network plot coloured by HR attribute.
#' - 'plot-sankey': return a sankey plot combining communities and HR attribute.
#' - 'table': return a vertex summary table with counts in communities and HR attribute.
#' - 'data': return a vertex data file that matches vertices with communities and HR attributes.
#' - 'describe': return a list of data frames which describe each of the identified communities.
#' - 'network': return igraph object.
#'
#' @import dplyr
#'
#' @export
network_leiden <- function(data,
hrvar,
bg_fill = "#000000",
font_col = "#FFFFFF",
algorithm = "mds",
path = "network_p2p_leiden",
node_alpha = 0.8,
res = 0.5,
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
return){
## Set variables
TO_hrvar <- paste0("TieOrigin_", hrvar)
TD_hrvar <- paste0("TieDestination_", hrvar)
## Set edges df
edges <-
data %>%
select(from = "TieOrigin_PersonId",
to = "TieDestination_PersonId",
weight = "StrongTieScore")
## Vertices data frame to provide meta-data
vert_ft <-
rbind(
# TieOrigin
edges %>%
select(from) %>% # Single column
unique() %>% # Remove duplications
left_join(select(data, TieOrigin_PersonId, TO_hrvar),
by = c("from" = "TieOrigin_PersonId")) %>%
select(node = "from", !!sym(hrvar) := TO_hrvar),
# TieDestination
edges %>%
select(to) %>% # Single column
unique() %>% # Remove duplications
left_join(select(data, TieDestination_PersonId, TD_hrvar),
by = c("to" = "TieDestination_PersonId")) %>%
select(node = "to", !!sym(hrvar) := TD_hrvar)
)
## Create igraph object
g_raw <-
igraph::graph_from_data_frame(edges,
directed = TRUE, # Directed, but FALSE for visualization
vertices = unique(vert_ft)) # remove duplicates
## Return a numeric vector of partitions / clusters / modules
## Set a low resolution parameter to have fewer groups
ld <- leiden::leiden(g_raw, resolution_parameter = res) # create partitions
## Add cluster
g <-
g_raw %>%
# Add leiden partitions to graph object
igraph::set_vertex_attr("cluster", value = as.character(ld)) %>%
igraph::simplify()
## Create vertex table
vertex_tb <-
g %>%
igraph::get.vertex.attribute() %>%
as_tibble()
g_layout <-
g %>%
ggraph::ggraph(layout = "igraph", algorithm = algorithm)
## Return
if(return == "plot-leiden"){
plot_output <-
g_layout +
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
ggraph::geom_node_point(aes(colour = cluster),
alpha = node_alpha,
pch = 16) +
theme_void() +
theme(legend.position = "bottom",
legend.background = element_rect(fill = bg_fill),
plot.background = element_rect(fill = bg_fill),
text = element_text(colour = font_col),
axis.line = element_blank()) +
labs(title = "Person to person collaboration with Community Detection",
subtitle = "Based on Leiden algorithm and Strong Tie Score",
y = "",
x = "")
# Default PDF output unless NULL supplied to path
if(is.null(path)){
plot_output
} else {
ggsave(paste0(path, tstamp(), ".pdf"),
plot = plot_output,
width = 16,
height = 9)
}
} else if(return == "plot-hrvar"){
plot_output <-
g_layout +
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
ggraph::geom_node_point(aes(colour = !!sym(hrvar)),
alpha = node_alpha,
pch = 16) +
theme_void() +
theme(legend.position = "bottom",
legend.background = element_rect(fill = bg_fill),
plot.background = element_rect(fill = bg_fill),
text = element_text(colour = font_col),
axis.line = element_blank()) +
labs(title = "Person to person collaboration",
subtitle = paste0("Showing ", hrvar),
y = "",
x = "")
# Default PDF output unless NULL supplied to path
if(is.null(path)){
plot_output
} else {
ggsave(paste0(path, tstamp(), ".pdf"),
plot = plot_output,
width = 16,
height = 9)
}
} else if(return == "table"){
vertex_tb %>%
count(!!sym(hrvar), cluster)
} else if(return == "data"){
vertex_tb
} else if(return == "network"){
g
} else if(return == "plot-sankey"){
create_sankey(data = vertex_tb %>% count(!!sym(hrvar), cluster),
var1 = hrvar,
var2 = "cluster",
count = "n")
} else if(return == "describe"){
describe_tb <-
vertex_tb %>%
left_join(select(data, starts_with("TieOrigin_")),
by = c("name" = "TieOrigin_PersonId"))
desc_str <-
describe_tb %>%
pull(cluster) %>%
unique()
desc_str %>%
purrr::map(function(x){
describe_tb %>%
filter(cluster == x) %>%
network_describe(hrvar = desc_hrvar)
}) %>%
setNames(nm = desc_str)
} else {
stop("Please enter a valid input for `return`.")
}
}

222
R/network_louvain.R Normal file
Просмотреть файл

@ -0,0 +1,222 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
#' @title Implement the Louvain community detection on a Person to Person network query
#'
#' @description
#' Take a P2P network query and implement the Louvain community detection method. The
#' **igraph** implementation of the Louvain method is used.
#'
#' @param data Data frame containing a Person to Person query.
#' @param hrvar String containing the HR attribute to be matched in the dataset.
#' @param bg_fill String to specify background fill colour.
#' @param font_col String to specify font and link colour.
#' @param node_alpha A numeric value between 0 and 1 to specify the transparency of the nodes.
#' @param algorithm String to specify the node placement algorithm to be used. Defaults to "mds" to perform
#' a multidimensional scaling of nodes using a shortest path, which is also a deterministic method.
#' See <https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html> for a full list of options.
#' @param path File path for saving the PDF output. Defaults to "network_p2p_louvain".
#' Since the network outputs are computationally intensive, the default behaviour is to save time by
#' saving the plot output directly as a PDF in the specified path. To override this behaviour and return
#' a plot object instead, you can pass `NULL` to `path`. What is passed to `path` makes no difference
#' if returning anything other than "plot-louvain" or "plot-hrvar".
#' @param desc_hrvar Character vector of length 3 containing the HR attributes to use when returning the
#' "describe" output. See `network_describe()`.
#'
#' @param return String specifying what output to return.Valid return options include:
#' - 'plot-louvain': return a network plot coloured by louvain communities.
#' - 'plot-hrvar': return a network plot coloured by HR attribute.
#' - 'plot-sankey': return a sankey plot combining communities and HR attribute.
#' - 'table': return a vertex summary table with counts in communities and HR attribute.
#' - 'data': return a vertex data file that matches vertices with communities and HR attributes.
#' - 'describe': returns a list of data frames which describe each of the identified communities.
#' - 'network': return igraph object.
#'
#' @import ggraph
#' @import dplyr
#'
#' @export
network_louvain <- function(data,
hrvar,
bg_fill = "#000000",
font_col = "#FFFFFF",
node_alpha = 0.8,
algorithm = "mds",
path = "network_p2p_louvain",
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
return){
## Set variables
TO_hrvar <- paste0("TieOrigin_", hrvar)
TD_hrvar <- paste0("TieDestination_", hrvar)
## Set edges df
edges <-
data %>%
select(from = "TieOrigin_PersonId",
to = "TieDestination_PersonId",
weight = "StrongTieScore")
## Vertices data frame to provide meta-data
vert_ft <-
rbind(
# TieOrigin
edges %>%
select(from) %>% # Single column
unique() %>% # Remove duplications
left_join(select(data, TieOrigin_PersonId, TO_hrvar),
by = c("from" = "TieOrigin_PersonId")) %>%
select(node = "from", !!sym(hrvar) := TO_hrvar),
# TieDestination
edges %>%
select(to) %>% # Single column
unique() %>% # Remove duplications
left_join(select(data, TieDestination_PersonId, TD_hrvar),
by = c("to" = "TieDestination_PersonId")) %>%
select(node = "to", !!sym(hrvar) := TD_hrvar)
)
## Create igraph object
g_raw <-
igraph::graph_from_data_frame(edges,
directed = FALSE, # Set to undirected for clustering
vertices = unique(vert_ft)) # remove duplicates
## Return a numeric vector of partitions / clusters / modules
## Set a low resolution parameter to have fewer groups
lc <- igraph::cluster_louvain(g_raw)
## Add cluster
g <-
g_raw %>%
# Add louvain partitions to graph object
igraph::set_vertex_attr("cluster", value = as.character(igraph::membership(lc))) %>% # Return membership - diff from Leiden
igraph::simplify()
## Create vertex table
vertex_tb <-
g %>%
igraph::get.vertex.attribute() %>%
as_tibble()
g_layout <-
g %>%
ggraph::ggraph(layout = "igraph", algorithm = algorithm)
## Return
if(return == "plot-louvain"){
plot_output <-
g_layout +
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
ggraph::geom_node_point(aes(colour = cluster),
alpha = node_alpha,
pch = 16) +
theme_void() +
theme(legend.position = "bottom",
legend.background = element_rect(fill = bg_fill),
plot.background = element_rect(fill = bg_fill),
text = element_text(colour = font_col),
axis.line = element_blank()) +
labs(title = "Person to person collaboration with Community Detection",
subtitle = "Based on Louvain algorithm and Strong Tie Score",
y = "",
x = "")
# Default PDF output unless NULL supplied to path
if(is.null(path)){
plot_output
} else {
ggsave(paste0(path, tstamp(), ".pdf"),
plot = plot_output,
width = 16,
height = 9)
}
} else if(return == "plot-hrvar"){
plot_output <-
g_layout +
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
ggraph::geom_node_point(aes(colour = !!sym(hrvar)),
alpha = node_alpha,
pch = 16) +
theme_void() +
theme(legend.position = "bottom",
legend.background = element_rect(fill = bg_fill),
plot.background = element_rect(fill = bg_fill),
text = element_text(colour = font_col),
axis.line = element_blank()) +
labs(title = "Person to person collaboration",
subtitle = paste0("Showing ", hrvar),
y = "",
x = "")
# Default PDF output unless NULL supplied to path
if(is.null(path)){
plot_output
} else {
ggsave(paste0(path, tstamp(), ".pdf"),
plot = plot_output,
width = 16,
height = 9)
}
} else if(return == "table"){
vertex_tb %>%
count(!!sym(hrvar), cluster)
} else if(return == "data"){
vertex_tb
} else if(return == "network"){
g
} else if(return == "plot-sankey"){
create_sankey(data = vertex_tb %>% count(!!sym(hrvar), cluster),
var1 = hrvar,
var2 = "cluster",
count = "n")
} else if(return == "describe"){
describe_tb <-
vertex_tb %>%
left_join(select(data, starts_with("TieOrigin_")),
by = c("name" = "TieOrigin_PersonId"))
desc_str <-
describe_tb %>%
pull(cluster) %>%
unique()
desc_str %>%
purrr::map(function(x){
describe_tb %>%
filter(cluster == x) %>%
network_describe(hrvar = desc_hrvar)
}) %>%
setNames(nm = desc_str)
} else {
stop("Please enter a valid input for `return`.")
}
}

Просмотреть файл

@ -5,16 +5,25 @@
#' within `tm_cooc()`. #' within `tm_cooc()`.
#' #'
#' @param data Data frame output from `tm_clean()`. #' @param data Data frame output from `tm_clean()`.
#' @param id String to represent the id variable. Defaults to "word". #' @param id String to represent the id variable. Defaults to "line".
#' @param word String to represent the word variable. Defaults to "word". #' @param word String to represent the word variable. Defaults to "word".
#' #'
#' @import data.table #' @importFrom data.table ":=" "%like%" "%between%" rbindlist as.data.table
#'
#' @examples
#' td <- data.frame(line = c(1, 1, 2, 2),
#' word = c("work", "meeting", "catch", "up"))
#'
#' pairwise_count(td, id = "line", word = "word")
#' #'
#' @export #' @export
pairwise_count <- function(data, pairwise_count <- function(data,
id = "line", id = "line",
word = "word"){ word = "word"){
# Make sure data.table knows we know we're using it
.datatable.aware = TRUE
data <- data <-
data %>% data %>%
dplyr::rename(word := !!sym(word), dplyr::rename(word := !!sym(word),

35
man/create_sankey.Rd Normal file
Просмотреть файл

@ -0,0 +1,35 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/create_sankey.R
\name{create_sankey}
\alias{create_sankey}
\title{Create a sankey chart from a two-column count table}
\usage{
create_sankey(data, var1, var2, count = "n")
}
\arguments{
\item{data}{Data frame of the long count table.}
\item{var1}{String containing the name of the variable to be shown on the left.}
\item{var2}{String containing the name of the variable to be shown on the right.}
\item{count}{String containing the name of the count variable.}
}
\description{
Create a networkD3 style sankey chart based on a long count table
with two variables. The input data should have three columns, where
each row is a unique group:
\enumerate{
\item Variable 1
\item Variable 2
\item Count
}
}
\examples{
\donttest{
sq_data \%>\%
dplyr::count(Organization, FunctionType) \%>\%
create_sankey(var1 = "Organization", var2 = "FunctionType")
}
}

23
man/network_describe.Rd Normal file
Просмотреть файл

@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/network_describe.R
\name{network_describe}
\alias{network_describe}
\title{Uncover HR attributes which best represent a population for a Person to Person query}
\usage{
network_describe(
data,
hrvar = c("Organization", "LevelDesignation", "FunctionType")
)
}
\arguments{
\item{data}{Data frame for a person to person query.}
\item{hrvar}{Character vector of length 3 containing the HR attributes to be used.}
}
\description{
Returns a data frame that gives a percentage of the group combinations that best represent
the population provided. Uses a person to person query.
}
\author{
Tannaz Sattari Tabrizi \href{mailto:Tannaz.Sattari@microsoft.com}{Tannaz.Sattari@microsoft.com}
}

61
man/network_leiden.Rd Normal file
Просмотреть файл

@ -0,0 +1,61 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/network_leiden.R
\name{network_leiden}
\alias{network_leiden}
\title{Implement the Leiden community detection on a Person to Person network query}
\usage{
network_leiden(
data,
hrvar,
bg_fill = "#000000",
font_col = "#FFFFFF",
algorithm = "mds",
path = "network_p2p_leiden",
node_alpha = 0.8,
res = 0.5,
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
return
)
}
\arguments{
\item{data}{Data frame containing a Person to Person query.}
\item{hrvar}{String containing the HR attribute to be matched in the dataset.}
\item{bg_fill}{String to specify background fill colour.}
\item{font_col}{String to specify font and link colour.}
\item{algorithm}{String to specify the node placement algorithm to be used. Defaults to "mds" to perform
a multidimensional scaling of nodes using a shortest path, which is also a deterministic method.
See \url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full list of options.}
\item{path}{File path for saving the PDF output. Defaults to "network_p2p_leiden".
Since the network outputs are computationally intensive, the default behaviour is to save time by
saving the plot output directly as a PDF in the specified path. To override this behaviour and return
a plot object instead, you can pass \code{NULL} to \code{path}. What is passed to \code{path} makes no difference
if returning anything other than "plot-leiden" or "plot-hrvar".}
\item{node_alpha}{A numeric value between 0 and 1 to specify the transparency of the nodes.}
\item{res}{Resolution parameter to be passed to \code{leiden::leiden()}. Defaults to 0.5.}
\item{desc_hrvar}{Character vector of length 3 containing the HR attributes to use when returning the
"describe" output. See \code{network_describe()}.}
\item{return}{String specifying what output to return. Valid return options include:
\itemize{
\item 'plot-leiden': return a network plot coloured by leiden communities.
\item 'plot-hrvar': return a network plot coloured by HR attribute.
\item 'plot-sankey': return a sankey plot combining communities and HR attribute.
\item 'table': return a vertex summary table with counts in communities and HR attribute.
\item 'data': return a vertex data file that matches vertices with communities and HR attributes.
\item 'describe': return a list of data frames which describe each of the identified communities.
\item 'network': return igraph object.
}}
}
\description{
Take a P2P network query and implement the Leiden community detection method. To run
this function, you will require all the pre-requisites of the \strong{leiden} package installed,
which includes Python and \strong{reticulate}.
}

57
man/network_louvain.Rd Normal file
Просмотреть файл

@ -0,0 +1,57 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/network_louvain.R
\name{network_louvain}
\alias{network_louvain}
\title{Implement the Louvain community detection on a Person to Person network query}
\usage{
network_louvain(
data,
hrvar,
bg_fill = "#000000",
font_col = "#FFFFFF",
node_alpha = 0.8,
algorithm = "mds",
path = "network_p2p_louvain",
desc_hrvar = c("Organization", "LevelDesignation", "FunctionType"),
return
)
}
\arguments{
\item{data}{Data frame containing a Person to Person query.}
\item{hrvar}{String containing the HR attribute to be matched in the dataset.}
\item{bg_fill}{String to specify background fill colour.}
\item{font_col}{String to specify font and link colour.}
\item{node_alpha}{A numeric value between 0 and 1 to specify the transparency of the nodes.}
\item{algorithm}{String to specify the node placement algorithm to be used. Defaults to "mds" to perform
a multidimensional scaling of nodes using a shortest path, which is also a deterministic method.
See \url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a full list of options.}
\item{path}{File path for saving the PDF output. Defaults to "network_p2p_louvain".
Since the network outputs are computationally intensive, the default behaviour is to save time by
saving the plot output directly as a PDF in the specified path. To override this behaviour and return
a plot object instead, you can pass \code{NULL} to \code{path}. What is passed to \code{path} makes no difference
if returning anything other than "plot-louvain" or "plot-hrvar".}
\item{desc_hrvar}{Character vector of length 3 containing the HR attributes to use when returning the
"describe" output. See \code{network_describe()}.}
\item{return}{String specifying what output to return.Valid return options include:
\itemize{
\item 'plot-louvain': return a network plot coloured by louvain communities.
\item 'plot-hrvar': return a network plot coloured by HR attribute.
\item 'plot-sankey': return a sankey plot combining communities and HR attribute.
\item 'table': return a vertex summary table with counts in communities and HR attribute.
\item 'data': return a vertex data file that matches vertices with communities and HR attributes.
\item 'describe': returns a list of data frames which describe each of the identified communities.
\item 'network': return igraph object.
}}
}
\description{
Take a P2P network query and implement the Louvain community detection method. The
\strong{igraph} implementation of the Louvain method is used.
}

Просмотреть файл

@ -9,7 +9,7 @@ pairwise_count(data, id = "line", word = "word")
\arguments{ \arguments{
\item{data}{Data frame output from \code{tm_clean()}.} \item{data}{Data frame output from \code{tm_clean()}.}
\item{id}{String to represent the id variable. Defaults to "word".} \item{id}{String to represent the id variable. Defaults to "line".}
\item{word}{String to represent the word variable. Defaults to "word".} \item{word}{String to represent the word variable. Defaults to "word".}
} }
@ -18,3 +18,10 @@ This is a \strong{data.table} implementation that mimics the output of
\code{widyr::pairwise_count()} to reduce package dependency. This is used internally \code{widyr::pairwise_count()} to reduce package dependency. This is used internally
within \code{tm_cooc()}. within \code{tm_cooc()}.
} }
\examples{
td <- data.frame(line = c(1, 1, 2, 2),
word = c("work", "meeting", "catch", "up"))
pairwise_count(td, id = "line", word = "word")
}