зеркало из https://github.com/microsoft/wpa.git
Merge branch 'main' into feature/network-community-detection
This commit is contained in:
Коммит
2f14a4931e
|
@ -54,11 +54,8 @@ Imports:
|
|||
tidytext,
|
||||
ggraph,
|
||||
igraph,
|
||||
widyr,
|
||||
proxy,
|
||||
rlang,
|
||||
GGally,
|
||||
network,
|
||||
ggwordcloud,
|
||||
methods,
|
||||
data.table
|
||||
|
|
|
@ -118,6 +118,7 @@ export(one2one_rank)
|
|||
export(one2one_sum)
|
||||
export(one2one_summary)
|
||||
export(one2one_trend)
|
||||
export(pairwise_count)
|
||||
export(period_change)
|
||||
export(personas_hclust)
|
||||
export(read_preamble)
|
||||
|
@ -153,10 +154,10 @@ export(workpatterns_rank)
|
|||
export(wrap)
|
||||
import(DT)
|
||||
import(Information)
|
||||
import(data.table)
|
||||
import(dplyr)
|
||||
import(ggplot2)
|
||||
import(ggraph)
|
||||
import(network)
|
||||
import(reshape2)
|
||||
import(scales)
|
||||
import(tidyr)
|
||||
|
@ -196,4 +197,3 @@ importFrom(tidyselect,all_of)
|
|||
importFrom(tidytext,unnest_tokens)
|
||||
importFrom(utils,write.csv)
|
||||
importFrom(utils,write.table)
|
||||
importFrom(widyr,pairwise_count)
|
||||
|
|
|
@ -35,7 +35,6 @@
|
|||
#' # Return a plot
|
||||
#' identify_holidayweeks(sq_data, return = "plot")
|
||||
#'
|
||||
#' @return
|
||||
#'
|
||||
#' @export
|
||||
identify_holidayweeks <- function(data, sd = 1, return = "message"){
|
||||
|
|
|
@ -15,21 +15,21 @@
|
|||
#' @param metric String containing the variable name for metric.
|
||||
#' @param exc_threshold Exclusion threshold to apply.
|
||||
#' @param subtitle String to override default plot subtitle.
|
||||
#' @param ... Additional arguments to pass to `GGally::ggnet2()`
|
||||
#' @param return Character vector specifying what to return, defaults to "plot".
|
||||
#' Valid inputs are "plot" and "table".
|
||||
#'
|
||||
#' Valid inputs include:
|
||||
#' - "plot": return a network plot.
|
||||
#' - "table": return a raw data table used to plot the network.
|
||||
#' - "network": return an **igraph** object
|
||||
#'
|
||||
#' @import ggplot2
|
||||
#' @import dplyr
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' ## Return a network plot
|
||||
#' g2g_data %>%
|
||||
#' network_g2g(time_investor = "TimeInvestors_Organization",
|
||||
#' collaborator = "Collaborators_Organization",
|
||||
#' metric = "Collaboration_hours")
|
||||
#' }
|
||||
#'
|
||||
#' @export
|
||||
network_g2g <- function(data,
|
||||
|
@ -38,8 +38,7 @@ network_g2g <- function(data,
|
|||
metric,
|
||||
exc_threshold = 0.1,
|
||||
subtitle = "Collaboration Across Organizations",
|
||||
return = "plot",
|
||||
...){
|
||||
return = "plot"){
|
||||
|
||||
plot_data <-
|
||||
data %>%
|
||||
|
@ -61,7 +60,7 @@ network_g2g <- function(data,
|
|||
|
||||
plot_data
|
||||
|
||||
} else if(return == "plot"){
|
||||
} else if(return %in% c("plot", "network")){
|
||||
|
||||
## Network object
|
||||
mynet_em <-
|
||||
|
@ -69,27 +68,32 @@ network_g2g <- function(data,
|
|||
filter(metric_prop > exc_threshold) %>%
|
||||
mutate_at(vars(TimeInvestorOrg, CollaboratorOrg), ~sub(pattern = " ", replacement = "\n", x = .)) %>%
|
||||
mutate(metric_prop = metric_prop * 10) %>%
|
||||
network::network(matrix.type = "edgelist",
|
||||
ignore.eval = FALSE,
|
||||
names.eval = "weights")
|
||||
igraph::graph_from_data_frame(directed = FALSE)
|
||||
|
||||
## Plot object
|
||||
mynet_em %>%
|
||||
GGally::ggnet2(size = 12,
|
||||
color = "lightblue",
|
||||
label = TRUE,
|
||||
label.size = 4,
|
||||
label.color = "black",
|
||||
edge.size = "weights",
|
||||
edge.alpha = .5,
|
||||
...) +
|
||||
ggtitle("Group to Group Collaboration",
|
||||
subtitle = subtitle) +
|
||||
xlab(label = "") +
|
||||
ylab(label = "") +
|
||||
theme_wpa_basic() +
|
||||
labs(caption = paste("Displays only collaboration above ", exc_threshold * 100, "% of node's total collaboration", sep = "")) +
|
||||
theme(axis.line = element_blank())
|
||||
if(return == "network"){
|
||||
|
||||
mynet_em # Return igraph object
|
||||
|
||||
} else {
|
||||
|
||||
## Plot object
|
||||
mynet_em %>%
|
||||
ggraph::ggraph(layout = "fr") +
|
||||
ggraph::geom_edge_link(aes(edge_width = metric_prop * 1), edge_alpha = 0.5, edge_colour = "grey") +
|
||||
ggraph::geom_node_point(size = 20, colour = "lightblue") +
|
||||
ggraph::geom_node_text(aes(label = name), size = 3, repel = FALSE) +
|
||||
ggplot2::theme(panel.background = ggplot2::element_rect(fill = 'white'), legend.position = "none") +
|
||||
theme_wpa_basic() +
|
||||
labs(title = "Group to Group Collaboration",
|
||||
subtitle = subtitle,
|
||||
x = "",
|
||||
y = "",
|
||||
caption = paste("Displays only collaboration above ", exc_threshold * 100, "% of node's total collaboration", sep = "")) +
|
||||
theme(axis.line = element_blank(),
|
||||
axis.text = element_blank(),
|
||||
legend.position = "none")
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
|
|
118
R/network_p2p.R
118
R/network_p2p.R
|
@ -12,25 +12,40 @@
|
|||
#' @param data Data frame containing a person-to-person query.
|
||||
#' @param hrvar String containing the label for the HR attribute.
|
||||
#' @param return Character vector specifying what to return, defaults to "pdf".
|
||||
#' Other valid inputs are "plot", "network" and "table". "network" returns the `network`
|
||||
#' object used to generate the network plot. The "pdf" option is highly recommended over the "plot"
|
||||
#' option as the PDF export format has significantly higher performance. The "plot" option is highly
|
||||
#' computationally intensive and is not generally recommended.
|
||||
#' Valid inputs are:
|
||||
#' - "pdf": saves the network plot as a PDF in the specified path. See `path`. This is the recommended
|
||||
#' output format as large networks can be slow in other formats.
|
||||
#' - "plot": returns a ggplot object of the network plot. It is not recommended that you run this without
|
||||
#' assigning the output to an object as plotting to the R console can be slow for large networks.
|
||||
#' - "table": returns the edgelist data frame used in the network.
|
||||
#' - "network": returns the igraph object used to create the network plot.
|
||||
#' @param path File path for saving the PDF output. Defaults to "network_p2p".
|
||||
#' @param bg_fill String to specify background fill colour.
|
||||
#' @param font_col String to specify font and link colour.
|
||||
#' @param legend_pos String to specify position of legend. Defaults to "bottom". See `ggplot2::theme()`.
|
||||
#' @param palette Function for generating a colour palette with a single argument `n`. Uses `rainbow()` by default.
|
||||
#' @param ... Additional arguments to pass to `GGally::ggnet2()`.
|
||||
#' For instance, you may specify the argument `mode` to change the node placement algorithm.
|
||||
#' @param palette Function for generating a colour palette with a single argument `n`. Uses "rainbow" by default.
|
||||
#' @param node_alpha A numeric value between 0 and 1 to specify the transparency of the nodes.
|
||||
#' @param algorithm String to specify the node placement algorithm to be used. Defaults to "fr" for the force-directed
|
||||
#' algorithm of Fruchterman and Reingold. See <https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html> for a
|
||||
#' full list of options.
|
||||
#'
|
||||
#' @details
|
||||
#' For specifying the node placement algorithm, please see the `gplot.layout` documentation for the **sna**
|
||||
#' package, which provides a full list of the valid functions that can be passed into the `mode` argument.
|
||||
#' @examples
|
||||
#' ## Simulate simple P2P network
|
||||
#' sim_net <-
|
||||
#' data.frame(TieOrigin_PersonId = sample(seq(1, 100), size = 100, replace = TRUE),
|
||||
#' TieDestination_PersonId = sample(seq(1, 100), size = 100, replace = TRUE)) %>%
|
||||
#' dplyr::mutate(TieOrigin_Organization = ifelse(TieOrigin_PersonId >= 50, "A", "B"),
|
||||
#' TieDestination_Organization = ifelse(TieDestination_PersonId >= 50, "A", "B"),
|
||||
#' StrongTieScore = rep(1, 100))
|
||||
#'
|
||||
#' ## Run plot
|
||||
#' ## ONLY return 'plot' instead of 'pdf' when data size is small
|
||||
#' network_p2p(data = sim_net,
|
||||
#' hrvar = "Organization",
|
||||
#' return = "plot")
|
||||
#'
|
||||
#' @import ggplot2
|
||||
#' @import dplyr
|
||||
#' @import network
|
||||
#' @importFrom grDevices rainbow
|
||||
#'
|
||||
#' @export
|
||||
|
@ -42,44 +57,52 @@ network_p2p <- function(data,
|
|||
font_col = "#FFFFFF",
|
||||
legend_pos = "bottom",
|
||||
palette = "rainbow",
|
||||
...){
|
||||
|
||||
data <-
|
||||
data %>%
|
||||
filter(StrongTieType > 0)
|
||||
node_alpha = 0.7,
|
||||
algorithm = "fr"){
|
||||
|
||||
## No filtering
|
||||
tieorigin_var <- paste0("TieOrigin_", hrvar)
|
||||
tiedestin_var <- paste0("TieDestination_", hrvar)
|
||||
|
||||
|
||||
## Extract edge list
|
||||
strong_edgelist <-
|
||||
## Set edges df
|
||||
edges <-
|
||||
data %>%
|
||||
select(TieOrigin_PersonId, TieDestination_PersonId)
|
||||
select(from = "TieOrigin_PersonId",
|
||||
to = "TieDestination_PersonId",
|
||||
weight = "StrongTieScore") %>%
|
||||
select(-weight) # Overwrite - no info on edge
|
||||
|
||||
## Create basic network object
|
||||
mynet <-
|
||||
network::network(strong_edgelist, matrix.type="edgelist")
|
||||
## Vertices data frame to provide meta-data
|
||||
vert_ft <-
|
||||
rbind(
|
||||
# TieOrigin
|
||||
edges %>%
|
||||
left_join(select(data, TieOrigin_PersonId, tieorigin_var),
|
||||
by = c("from" = "TieOrigin_PersonId")) %>%
|
||||
select(node = "from", !!sym(hrvar) := tieorigin_var),
|
||||
|
||||
## Extract list of nodes from network object
|
||||
myedges <- data.frame(PersonId = network.vertex.names(mynet))
|
||||
# TieDestination
|
||||
edges %>%
|
||||
left_join(select(data, TieDestination_PersonId, tiedestin_var),
|
||||
by = c("to" = "TieDestination_PersonId")) %>%
|
||||
select(node = "to", !!sym(hrvar) := tiedestin_var)
|
||||
)
|
||||
|
||||
## Extract attributes
|
||||
totalAttributes <-
|
||||
bind_rows(select(data, PersonId = "TieOrigin_PersonId", !!sym(hrvar) := tieorigin_var),
|
||||
select(data, PersonId = "TieOrigin_PersonId", !!sym(hrvar) := tiedestin_var)) %>%
|
||||
unique()
|
||||
|
||||
## Merge list of nodes and attributes
|
||||
myedges <- merge(myedges, totalAttributes, by = "PersonId", all.x = TRUE)
|
||||
|
||||
## Add attributes to network object
|
||||
mynet %v% hrvar <- myedges[[hrvar]]
|
||||
## Create igraph object
|
||||
g <-
|
||||
igraph::graph_from_data_frame(edges,
|
||||
directed = FALSE, # Directed, but FALSE for visualization
|
||||
vertices = unique(vert_ft)) %>% # remove duplicates
|
||||
igraph::simplify()
|
||||
|
||||
## Palette
|
||||
## Create tibble
|
||||
pal <-
|
||||
tibble(!!sym(hrvar) := g %>%
|
||||
igraph::get.vertex.attribute(hrvar) %>%
|
||||
unique())
|
||||
|
||||
pal <- tibble(!!sym(hrvar) := unique(myedges[[hrvar]]))
|
||||
|
||||
## Apply palette function
|
||||
col_pal <- do.call(what = palette, args = list(nrow(pal)))
|
||||
|
||||
## named character vector
|
||||
|
@ -90,24 +113,21 @@ network_p2p <- function(data,
|
|||
|
||||
if(return == "table"){
|
||||
|
||||
data
|
||||
edges
|
||||
|
||||
} else if(return == "network"){
|
||||
|
||||
mynet
|
||||
g
|
||||
|
||||
} else if(return %in% c("plot", "pdf")){
|
||||
|
||||
outputPlot <-
|
||||
mynet %>%
|
||||
GGally::ggnet2(size = 1,
|
||||
color = hrvar,
|
||||
label = FALSE,
|
||||
edge.size = .01,
|
||||
edge.alpha = .15,
|
||||
node.alpha = .8,
|
||||
palette = pal,
|
||||
...) +
|
||||
g %>%
|
||||
ggraph::ggraph(layout = "igraph", algorithm = algorithm) +
|
||||
ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
|
||||
ggraph::geom_node_point(aes(colour = !!sym(hrvar)), alpha = node_alpha) +
|
||||
scale_colour_discrete(type = pal) +
|
||||
theme_void() +
|
||||
theme(legend.position = legend_pos,
|
||||
legend.background = element_rect(fill = bg_fill),
|
||||
plot.background = element_rect(fill = bg_fill),
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
#' @title Perform a pairwise count of words by id
|
||||
#'
|
||||
#' @description This is a **data.table** implementation that mimics the output of
|
||||
#' `widyr::pairwise_count()` to reduce package dependency. This is used internally
|
||||
#' within `tm_cooc()`.
|
||||
#'
|
||||
#' @param data Data frame output from `tm_clean()`.
|
||||
#' @param id String to represent the id variable. Defaults to "word".
|
||||
#' @param word String to represent the word variable. Defaults to "word".
|
||||
#'
|
||||
#' @import data.table
|
||||
#'
|
||||
#' @export
|
||||
pairwise_count <- function(data,
|
||||
id = "line",
|
||||
word = "word"){
|
||||
|
||||
data <-
|
||||
data %>%
|
||||
dplyr::rename(word := !!sym(word),
|
||||
id := !!sym(id))
|
||||
|
||||
|
||||
DT <- data.table::as.data.table(data)
|
||||
|
||||
# convert to character
|
||||
DT[, word := as.character(word)]
|
||||
|
||||
# subset those with >1 per id
|
||||
DT2 <- DT[, N := .N, by = id][N>1]
|
||||
|
||||
# create all combinations of 2
|
||||
# return as a data.table with these as columns `V1` and `V2`
|
||||
# then count the numbers in each id
|
||||
out_data <-
|
||||
DT2[, rbindlist(utils::combn(word,2,
|
||||
FUN = function(x) as.data.table(as.list(x)),
|
||||
simplify = FALSE)), by = id] %>%
|
||||
.[, .N, by = list(V1,V2)]
|
||||
|
||||
# format and sort
|
||||
out_data %>%
|
||||
dplyr::as_tibble() %>%
|
||||
dplyr::rename(item1 = "V1",
|
||||
item2 = "V2",
|
||||
n = "N") %>%
|
||||
dplyr::arrange(desc(n))
|
||||
}
|
10
R/themes.R
10
R/themes.R
|
@ -83,18 +83,18 @@ theme_wpa_basic <- function(font_size = 12){
|
|||
|
||||
text_small_dark <- element_text(size = font_size - 2, colour = text_colour, face = "plain")
|
||||
text_small_light <- element_text(size = font_size - 2, colour = "#FFFFFF", face = "plain")
|
||||
text_normal <- element_text(size = font_size + 2, colour = text_colour, face = "plain")
|
||||
text_italic <- element_text(size = font_size + 2, colour = text_colour, face = "italic")
|
||||
text_normal <- element_text(size = font_size + 0, colour = text_colour, face = "plain")
|
||||
text_italic <- element_text(size = font_size + 0, colour = text_colour, face = "italic")
|
||||
|
||||
text_bold <- element_text(size = font_size + 2, colour = text_colour, face = "bold")
|
||||
text_title <- element_text(size = font_size + 5, colour = text_colour, face = "bold")
|
||||
text_bold <- element_text(size = font_size + 0, colour = text_colour, face = "bold")
|
||||
text_title <- element_text(size = font_size + 2, colour = text_colour, face = "bold")
|
||||
|
||||
theme_minimal() +
|
||||
theme(plot.background = element_blank(),
|
||||
# plot.background = element_rect(fill = bg_colour),
|
||||
text = text_normal,
|
||||
plot.title = text_title,
|
||||
plot.subtitle = text_italic,
|
||||
plot.subtitle = text_normal,
|
||||
|
||||
axis.title = text_normal,
|
||||
axis.text = text_small_dark,
|
||||
|
|
14
R/tm_cooc.R
14
R/tm_cooc.R
|
@ -26,15 +26,16 @@
|
|||
#' @import ggplot2
|
||||
#' @import ggraph
|
||||
#' @importFrom igraph graph_from_data_frame
|
||||
#' @importFrom widyr pairwise_count
|
||||
#' @importFrom tidytext unnest_tokens
|
||||
#'
|
||||
#' @family Text-mining
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' tm_cooc(mt_data,lmult = 0.01)
|
||||
#' }
|
||||
#' # Demo using a subset of `mt_data`
|
||||
#' mt_data %>%
|
||||
#' dplyr::slice(1:20) %>%
|
||||
#' tm_cooc(lmult = 0.01)
|
||||
#'
|
||||
#' @export
|
||||
tm_cooc <- function(data,
|
||||
stopwords = NULL,
|
||||
|
@ -50,10 +51,7 @@ tm_cooc <- function(data,
|
|||
# Calculate frequency of pairs
|
||||
title_word_pairs <-
|
||||
text_df %>%
|
||||
widyr::pairwise_count(word,
|
||||
line,
|
||||
sort = TRUE,
|
||||
upper = FALSE)
|
||||
pairwise_count(id = "line", word = "word")
|
||||
|
||||
# Graph networks
|
||||
set.seed(seed)
|
||||
|
|
|
@ -20,6 +20,9 @@
|
|||
#' @param hrvar HR Variable by which to split metrics, defaults to "Organization"
|
||||
#' but accepts any character vector, e.g. "Organization"
|
||||
#' @param path Pass the file path and the desired file name, _excluding the file extension_.
|
||||
#' @param hrvar_threshold Numeric value determining the maximum number of unique values
|
||||
#' to be allowed to qualify as a HR variable. This is passed directly to the `threshold`
|
||||
#' argument within `hrvar_count_all()`.
|
||||
#' @param timestamp Logical vector specifying whether to include a timestamp in the file name.
|
||||
#' Defaults to TRUE.
|
||||
#'
|
||||
|
@ -41,6 +44,7 @@ validation_report <- function(data,
|
|||
meeting_data = NULL,
|
||||
hrvar = "Organization",
|
||||
path = "validation report",
|
||||
hrvar_threshold = 150,
|
||||
timestamp = TRUE){
|
||||
|
||||
## Create timestamped path (if applicable)
|
||||
|
@ -114,7 +118,7 @@ validation_report <- function(data,
|
|||
|
||||
read_preamble("organizational_data_quality.md"), #13, Header - 2. Organizational Data Quality
|
||||
read_preamble("attributes_available.md"),#14
|
||||
data %>% hrvar_count_all(return = "table"),
|
||||
data %>% hrvar_count_all(return = "table", threshold = hrvar_threshold),
|
||||
|
||||
read_preamble("groups_under_privacy_threshold_1.md"), #16, Header - 2.2 Groups under Privacy Threshold
|
||||
paste(">", data %>% identify_privacythreshold(return="text")),
|
||||
|
|
|
@ -20,9 +20,6 @@ Valid options are:
|
|||
\item when 'data_dirty' is passed, a dataset with only outlier weeks is returned as a dataframe.
|
||||
\item when 'plot' is passed, a pot with holiday weeks highlighted is returned as a dataframe.
|
||||
}}
|
||||
}
|
||||
\value{
|
||||
|
||||
}
|
||||
\description{
|
||||
This function scans a standard query output for weeks where collaboration hours is far outside the mean.
|
||||
|
@ -30,6 +27,15 @@ Returns a list of weeks that appear to be holiday weeks and optionally an edited
|
|||
By default, missing values are excluded.
|
||||
|
||||
As best practice, run this function prior to any analysis to remove atypical collaboration weeks from your dataset.
|
||||
}
|
||||
\examples{
|
||||
# Return a message by default
|
||||
identify_holidayweeks(sq_data)
|
||||
|
||||
# Return a plot
|
||||
identify_holidayweeks(sq_data, return = "plot")
|
||||
|
||||
|
||||
}
|
||||
\seealso{
|
||||
Other Data Validation:
|
||||
|
|
|
@ -12,8 +12,7 @@ network_g2g(
|
|||
metric,
|
||||
exc_threshold = 0.1,
|
||||
subtitle = "Collaboration Across Organizations",
|
||||
return = "plot",
|
||||
...
|
||||
return = "plot"
|
||||
)
|
||||
|
||||
g2g_network(
|
||||
|
@ -23,8 +22,7 @@ g2g_network(
|
|||
metric,
|
||||
exc_threshold = 0.1,
|
||||
subtitle = "Collaboration Across Organizations",
|
||||
return = "plot",
|
||||
...
|
||||
return = "plot"
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
|
@ -41,20 +39,22 @@ g2g_network(
|
|||
\item{subtitle}{String to override default plot subtitle.}
|
||||
|
||||
\item{return}{Character vector specifying what to return, defaults to "plot".
|
||||
Valid inputs are "plot" and "table".}
|
||||
|
||||
\item{...}{Additional arguments to pass to \code{GGally::ggnet2()}}
|
||||
Valid inputs include:
|
||||
\itemize{
|
||||
\item "plot": return a network plot.
|
||||
\item "table": return a raw data table used to plot the network.
|
||||
\item "network": return an \strong{igraph} object
|
||||
}}
|
||||
}
|
||||
\description{
|
||||
Pass a data frame containing a group-to-group query and return a network plot.
|
||||
Automatically handles "Collaborators_within_group" and "Other_collaborators" within query data.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
## Return a network plot
|
||||
g2g_data \%>\%
|
||||
network_g2g(time_investor = "TimeInvestors_Organization",
|
||||
collaborator = "Collaborators_Organization",
|
||||
metric = "Collaboration_hours")
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -13,7 +13,8 @@ network_p2p(
|
|||
font_col = "#FFFFFF",
|
||||
legend_pos = "bottom",
|
||||
palette = "rainbow",
|
||||
...
|
||||
node_alpha = 0.7,
|
||||
algorithm = "fr"
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
|
@ -22,10 +23,15 @@ network_p2p(
|
|||
\item{hrvar}{String containing the label for the HR attribute.}
|
||||
|
||||
\item{return}{Character vector specifying what to return, defaults to "pdf".
|
||||
Other valid inputs are "plot", "network" and "table". "network" returns the \code{network}
|
||||
object used to generate the network plot. The "pdf" option is highly recommended over the "plot"
|
||||
option as the PDF export format has significantly higher performance. The "plot" option is highly
|
||||
computationally intensive and is not generally recommended.}
|
||||
Valid inputs are:
|
||||
\itemize{
|
||||
\item "pdf": saves the network plot as a PDF in the specified path. See \code{path}. This is the recommended
|
||||
output format as large networks can be slow in other formats.
|
||||
\item "plot": returns a ggplot object of the network plot. It is not recommended that you run this without
|
||||
assigning the output to an object as plotting to the R console can be slow for large networks.
|
||||
\item "table": returns the edgelist data frame used in the network.
|
||||
\item "network": returns the igraph object used to create the network plot.
|
||||
}}
|
||||
|
||||
\item{path}{File path for saving the PDF output. Defaults to "network_p2p".}
|
||||
|
||||
|
@ -35,16 +41,31 @@ computationally intensive and is not generally recommended.}
|
|||
|
||||
\item{legend_pos}{String to specify position of legend. Defaults to "bottom". See \code{ggplot2::theme()}.}
|
||||
|
||||
\item{palette}{Function for generating a colour palette with a single argument \code{n}. Uses \code{rainbow()} by default.}
|
||||
\item{palette}{Function for generating a colour palette with a single argument \code{n}. Uses "rainbow" by default.}
|
||||
|
||||
\item{...}{Additional arguments to pass to \code{GGally::ggnet2()}.
|
||||
For instance, you may specify the argument \code{mode} to change the node placement algorithm.}
|
||||
\item{node_alpha}{A numeric value between 0 and 1 to specify the transparency of the nodes.}
|
||||
|
||||
\item{algorithm}{String to specify the node placement algorithm to be used. Defaults to "fr" for the force-directed
|
||||
algorithm of Fruchterman and Reingold. See \url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a
|
||||
full list of options.}
|
||||
}
|
||||
\description{
|
||||
Pass a data frame containing a person-to-person query and save a network
|
||||
plot as a PDF file.
|
||||
}
|
||||
\details{
|
||||
For specifying the node placement algorithm, please see the \code{gplot.layout} documentation for the \strong{sna}
|
||||
package, which provides a full list of the valid functions that can be passed into the \code{mode} argument.
|
||||
\examples{
|
||||
## Simulate simple P2P network
|
||||
sim_net <-
|
||||
data.frame(TieOrigin_PersonId = sample(seq(1, 100), size = 100, replace = TRUE),
|
||||
TieDestination_PersonId = sample(seq(1, 100), size = 100, replace = TRUE)) \%>\%
|
||||
dplyr::mutate(TieOrigin_Organization = ifelse(TieOrigin_PersonId >= 50, "A", "B"),
|
||||
TieDestination_Organization = ifelse(TieDestination_PersonId >= 50, "A", "B"),
|
||||
StrongTieScore = rep(1, 100))
|
||||
|
||||
## Run plot
|
||||
## ONLY return 'plot' instead of 'pdf' when data size is small
|
||||
network_p2p(data = sim_net,
|
||||
hrvar = "Organization",
|
||||
return = "plot")
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/pairwise_count.R
|
||||
\name{pairwise_count}
|
||||
\alias{pairwise_count}
|
||||
\title{Perform a pairwise count of words by id}
|
||||
\usage{
|
||||
pairwise_count(data, id = "line", word = "word")
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{Data frame output from \code{tm_clean()}.}
|
||||
|
||||
\item{id}{String to represent the id variable. Defaults to "word".}
|
||||
|
||||
\item{word}{String to represent the word variable. Defaults to "word".}
|
||||
}
|
||||
\description{
|
||||
This is a \strong{data.table} implementation that mimics the output of
|
||||
\code{widyr::pairwise_count()} to reduce package dependency. This is used internally
|
||||
within \code{tm_cooc()}.
|
||||
}
|
|
@ -29,9 +29,11 @@ There is an option to remove stopwords by passing a data frame into the \code{st
|
|||
argument.
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
tm_cooc(mt_data,lmult = 0.01)
|
||||
}
|
||||
# Demo using a subset of `mt_data`
|
||||
mt_data \%>\%
|
||||
dplyr::slice(1:20) \%>\%
|
||||
tm_cooc(lmult = 0.01)
|
||||
|
||||
}
|
||||
\seealso{
|
||||
Other Text-mining:
|
||||
|
|
|
@ -9,6 +9,7 @@ validation_report(
|
|||
meeting_data = NULL,
|
||||
hrvar = "Organization",
|
||||
path = "validation report",
|
||||
hrvar_threshold = 150,
|
||||
timestamp = TRUE
|
||||
)
|
||||
}
|
||||
|
@ -22,6 +23,10 @@ but accepts any character vector, e.g. "Organization"}
|
|||
|
||||
\item{path}{Pass the file path and the desired file name, \emph{excluding the file extension}.}
|
||||
|
||||
\item{hrvar_threshold}{Numeric value determining the maximum number of unique values
|
||||
to be allowed to qualify as a HR variable. This is passed directly to the \code{threshold}
|
||||
argument within \code{hrvar_count_all()}.}
|
||||
|
||||
\item{timestamp}{Logical vector specifying whether to include a timestamp in the file name.
|
||||
Defaults to TRUE.}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ var config = {
|
|||
lineage: true
|
||||
},
|
||||
coreData: {
|
||||
appId: "JS:JSLLTest"
|
||||
appId: "wpaGitHubSite"
|
||||
}
|
||||
};
|
||||
awa.init(config);
|
||||
awa.init(config);
|
||||
|
|
Загрузка…
Ссылка в новой задаче