Merge branch 'main' into Feature/Sales-Insights

This commit is contained in:
C Morales 2022-06-30 15:12:59 +01:00
Родитель bb43d5c143 6a23f3b569
Коммит c0f2e09cd2
27 изменённых файлов: 55875 добавлений и 266 удалений

Просмотреть файл

@ -1,3 +1,3 @@
Version: 1.6.4
Date: 2022-01-19 15:26:54 UTC
SHA: 3edf7394a94cd59be8b6d83362ab0cfad81c4afa
Version: 1.7.0
Date: 2022-06-06 13:37:15 UTC
SHA: 7aefaa599891ef7c20db669a8f52b59e80ef48ef

Просмотреть файл

@ -1,7 +1,7 @@
Package: wpa
Type: Package
Title: Tools for Analysing and Visualising Viva Insights Data
Version: 1.7.0
Version: 1.7.0.9000
Authors@R: c(
person(given = "Martin", family = "Chan", role = c("aut", "cre"), email = "martin.chan@microsoft.com"),
person(given = "Carlos", family = "Morales", role = "aut", email = "carlos.morales@microsoft.com"),
@ -67,5 +67,7 @@ Suggests:
glue,
flexdashboard,
lmtest,
sandwich
sandwich,
testthat (>= 3.0.0)
Language: en-US
Config/testthat/edition: 3

Просмотреть файл

@ -152,6 +152,7 @@ export(period_change)
export(personas_hclust)
export(plot_WOE)
export(plot_flex_index)
export(plot_hourly_pat)
export(read_preamble)
export(remove_outliers)
export(rgb2hex)

Просмотреть файл

@ -1,3 +1,8 @@
# wpa (development version)
- Updated and improved output and algorithm for `workpatterns_classify()`
- Additional visual options for `workpatterns_classify()` and `flex_index()`
# wpa 1.7.0
- Renamed 'Workplace Analytics' to 'Viva Insights'

Просмотреть файл

@ -14,6 +14,8 @@
#' @param rounding Numeric vector to specify the number of decimal points to display
#' @param freeze Number of columns from the left to 'freeze'. Defaults to 2,
#' which includes the row number column.
#' @param percent Logical value specifying whether to display numeric columns
#' as percentages.
#'
#' @import DT
#' @importFrom dplyr mutate_if
@ -24,14 +26,15 @@
#' Returns an HTML widget displaying rectangular data.
#'
#' @export
create_dt <- function(x, rounding = 1, freeze = 2){
create_dt <- function(x, rounding = 1, freeze = 2, percent = FALSE){
# Round all numeric to "rounding" number of dp
num_cols <- dplyr::select_if(x, is.numeric) %>% names()
if(length(num_cols) == 0){ # No numeric columns
DT::datatable(x,
out <-
DT::datatable(x,
extensions = c('Buttons',
'FixedColumns'),
options = list(dom = 'Blfrtip',
@ -43,7 +46,8 @@ create_dt <- function(x, rounding = 1, freeze = 2){
} else {
DT::datatable(x,
out <-
DT::datatable(x,
extensions = c('Buttons',
'FixedColumns'),
options = list(dom = 'Blfrtip',
@ -54,6 +58,15 @@ create_dt <- function(x, rounding = 1, freeze = 2){
c(10,25,50,"All")))) %>%
DT::formatRound(columns = num_cols, rounding)
if(percent == TRUE){
out <-
out %>%
DT::formatPercentage(columns = num_cols, rounding)
}
}
out
}

Просмотреть файл

@ -94,6 +94,14 @@
#' - `"common"` plots the ten most common working patterns
#' - `"time"` plots the Flexibility Index for the group over time
#'
#' @param mode String specifying aggregation method for plot. Only applicable
#' when `return = "plot"`. Valid options include:
#' - `"binary"`: convert hourly activity into binary blocks. In the plot, each
#' block would display as solid.
#' - `"prop"`: calculate proportion of signals in each hour over total signals
#' across 24 hours, then average across all work weeks. In the plot, each
#' block would display as a heatmap.
#'
#' @return
#' A different output is returned depending on the value passed to the `return`
#' argument:
@ -104,6 +112,7 @@
#' `plot_flex_index()` to recreate visuals found in `flex_index()`.
#' - `"table"`: data frame. A summary table for the metric.
#'
#'
#' @import dplyr
#' @importFrom data.table ":=" "%like%" "%between%"
#'
@ -147,7 +156,8 @@ flex_index <- function(data,
start_hour = "0900",
end_hour = "1700",
return = "plot",
plot_method = "common"){
plot_method = "common",
mode = "binary"){
## Bindings for variables
TakeBreaks <- NULL
@ -202,6 +212,15 @@ flex_index <- function(data,
.[, c("PersonId", "Date")] %>%
cbind(signal_cols)
## Save original `signals_df` before manipulating ------------------------
## Rename `Signals_sent` columns to prevent conflict
signals_df_o <- signals_df %>%
purrr::set_names(
nm = gsub(x = names(.),
replacement = "_ori_",
pattern = "_sent_")
)
## Signal label
sig_label <- ifelse(length(signal_set) > 1, "Signals_sent", signal_set)
@ -275,6 +294,7 @@ flex_index <- function(data,
WpA_classify %>%
left_join(signals_df, by = c("PersonId","Date")) %>%
left_join(hr_dt, by = c("PersonId","Date")) %>%
left_join(signals_df_o, by = c("PersonId","Date")) %>%
filter(Signals_Total >= 3) %>% # At least 3 signals required
## Additional calculations for Flexibility Index
@ -306,7 +326,8 @@ flex_index <- function(data,
sig_label = sig_label_,
start_hour = start_hour,
end_hour = end_hour,
method = plot_method)
method = plot_method,
mode = mode)
} else if(return == "data"){

Просмотреть файл

@ -60,8 +60,10 @@
#' of the edges (only for 'ggraph' mode). Defaults to 1.
#' @param res Resolution parameter to be passed to `leiden::leiden()`. Defaults
#' to 0.5.
#' @param seed Seed for the random number generator passed to `leiden::leiden()`
#' to ensure consistency. Only applicable when `display` is set to `"leiden"`.
#' @param seed Seed for the random number generator passed to either
#' `set.seed()` when the Louvain algorithm is used, or `leiden::leiden()` when
#' the Leiden algorithm is used, to ensure consistency. Only applicable when
#' `display` is set to `"louvain"` or `"leiden"`.
#' @param algorithm String to specify the node placement algorithm to be used.
#' Defaults to `"mds"` for the deterministic multi-dimensional scaling of
#' nodes. See
@ -241,24 +243,26 @@ network_p2p <- function(data,
} else if(display == "louvain"){
## Convert to undirected
g_ud <- igraph::as.undirected(g_raw)
set.seed(seed = seed)
## Return a numeric vector of partitions / clusters / modules
## Set a low resolution parameter to have fewer groups
## weights = NULL means that if the graph as a `weight` edge attribute, this
## will be used by default.
lc <- igraph::cluster_louvain(g_ud, weights = NULL)
## Convert to undirected
g_ud <- igraph::as.undirected(g_raw)
## Add cluster
g <-
g_ud %>%
# Add louvain partitions to graph object
igraph::set_vertex_attr("cluster", value = as.character(igraph::membership(lc))) %>% # Return membership - diff from Leiden
igraph::simplify()
## Return a numeric vector of partitions / clusters / modules
## Set a low resolution parameter to have fewer groups
## weights = NULL means that if the graph as a `weight` edge attribute, this
## will be used by default.
lc <- igraph::cluster_louvain(g_ud, weights = NULL)
## Name of vertex attribute
v_attr <- "cluster"
## Add cluster
g <-
g_ud %>%
# Add louvain partitions to graph object
igraph::set_vertex_attr("cluster", value = as.character(igraph::membership(lc))) %>% # Return membership - diff from Leiden
igraph::simplify()
## Name of vertex attribute
v_attr <- "cluster"
} else if(display == "leiden"){

Просмотреть файл

@ -12,7 +12,7 @@
#' working patterns; "time" plots the Flexibility Index for the group over time.
#' @param start_hour See `flex_index()`.
#' @param end_hour See `flex_index()`.
#'
#' @param mode See `flex_index()`.
#' @import dplyr
#' @import ggplot2
#' @importFrom data.table ":=" "%like%" "%between%"
@ -41,7 +41,8 @@ plot_flex_index <- function(data,
sig_label = "Signals_sent_",
method = "sample",
start_hour = 9,
end_hour = 17){
end_hour = 17,
mode = "binary"){
## Bindings for variables
TakeBreaks <- NULL
@ -60,12 +61,19 @@ plot_flex_index <- function(data,
dplyr::mutate(FlexibilityIndex = select(., TakeBreaks, ChangeHours, ControlHours) %>% apply(1, mean),
patternRank = 5) # 5 so that it shows right in the middle
## Used for captions
score_tb <-
myTable_legends %>%
dplyr::mutate_at(vars(FlexibilityIndex), ~round(.*100)) %>%
dplyr::mutate_at(vars(TakeBreaks, ChangeHours, ControlHours), ~scales::percent(.))
## Make for pretty printing
myTable_legends <-
myTable_legends %>%
dplyr::mutate(FlexibilityIndex = scales::percent(FlexibilityIndex))
## Main plot
## Different plots if different `method` is specified
@ -77,6 +85,11 @@ plot_flex_index <- function(data,
data %>%
.[sample(nrow(.), size = 10), ]
## Make sure data.table knows we know we're using it
.datatable.aware = TRUE
data_tb <- data.table::as.data.table(data)
plot_title <- "Random sample of 10 Working patterns"
} else if(method == "common"){
@ -88,18 +101,6 @@ plot_flex_index <- function(data,
data_tb <- data.table::as.data.table(data)
input_var <- names(data)[grepl(sig_label_, names(data))]
data_tb <- data_tb[, list(WeekCount = .N,
PersonCount = dplyr::n_distinct(PersonId)),
by = input_var]
plot_data <-
data_tb %>%
as.data.frame() %>%
dplyr::arrange(desc(WeekCount)) %>%
slice(1:10)
plot_title <- "Top 10 Most Common Working patterns"
} else if(method == "time"){
@ -117,51 +118,152 @@ plot_flex_index <- function(data,
if(method %in% c("sample", "common")){
plot_data %>%
mutate(patternRank = 1:nrow(.)) %>%
dplyr::select(patternRank, dplyr::starts_with(sig_label_)) %>%
purrr::set_names(nm = gsub(pattern = sig_label_, replacement = "", x = names(.))) %>%
purrr::set_names(nm = gsub(pattern = "_.+", replacement = "", x = names(.))) %>%
tidyr::gather(Hours, Freq, -patternRank) %>%
ggplot2::ggplot(ggplot2::aes(x = Hours, y = patternRank, fill = Freq)) +
ggplot2::geom_tile(height=.5) +
ggplot2::ylab("Work Patterns") +
ggplot2::scale_fill_gradient2(low = "white", high = "#1d627e") +
ggplot2::scale_y_reverse(breaks=seq(1,10)) +
wpa::theme_wpa_basic() +
ggplot2::theme(legend.position = "none") +
ggplot2::annotate("text",
y = myTable_legends$patternRank,
x = 26.5,
label = scales::percent(myTable_legends$FlexibilityIndex), size = 3) +
ggplot2::annotate("rect",
xmin = 25,
xmax = 28,
ymin = 0.5,
ymax = 10 + 0.5,
alpha = .2) +
ggplot2::annotate("rect",
xmin = 0.5,
xmax = start_hour + 0.5,
ymin = 0.5,
ymax = 10 + 0.5,
alpha = .1,
fill = "gray50") +
ggplot2::annotate("rect",
xmin = end_hour + 0.5,
xmax = 24.5,
ymin = 0.5,
ymax = 10 + 0.5,
alpha = .1,
fill = "gray50") +
ggplot2::labs(title = "Work Patterns and Flexibility Index",
subtitle = paste0(plot_title,
"\n",
"Group Flexibility Index: ", score_tb$FlexibilityIndex),
caption = paste0("% Taking Breaks: ", score_tb$TakeBreaks, "\n",
"% Change Hours: ", score_tb$ChangeHours, "\n",
"% Keep Hours Under Control: ", score_tb$ControlHours, "\n",
extract_date_range(data, return = "text")))
if(mode == "binary"){
input_var <- names(data)[grepl(sig_label_, names(data))]
data_tb <- data_tb[, list(WeekCount = .N,
PersonCount = dplyr::n_distinct(PersonId)),
by = input_var]
plot_data <-
data_tb %>%
as.data.frame() %>%
dplyr::arrange(desc(WeekCount)) %>%
slice(1:10)
plot_data_long <-
plot_data %>%
mutate(patternRank = 1:nrow(.)) %>%
dplyr::select(patternRank, dplyr::starts_with(sig_label_)) %>%
purrr::set_names(nm = gsub(pattern = sig_label_, replacement = "", x = names(.))) %>%
purrr::set_names(nm = gsub(pattern = "_.+", replacement = "", x = names(.)))
} else if(mode == "prop"){
input_var <- names(data)[grepl(sig_label_, names(data))]
sig_label_ <- gsub(
pattern = "_sent_",
replacement = "_ori_",
x = sig_label_
)
## 00, 01, 02, etc.
hours_col <- stringr::str_pad(seq(0,23), width = 2, pad = 0)
# Use `mutate()` method
# Will get 10 IDs, not 10 rows
# NOTE: `input_var` is used to identify a distinct work pattern
plot_data <-
data_tb %>%
data.table::as.data.table() %>%
.[, `:=`(WeekCount = .N,
PersonCount = dplyr::n_distinct(PersonId),
Id = .GRP), # group id assignment
by = input_var] %>%
dplyr::arrange(desc(WeekCount))
plot_data <-
plot_data %>%
dplyr::select(Id, dplyr::contains("_ori_"), WeekCount) %>%
purrr::set_names(nm = gsub(
pattern = ".+_ori_",
replacement = "",
x = names(.)
)) %>%
purrr::set_names(nm = gsub(
pattern = "_.+",
replacement = "",
x = names(.)
)) %>%
# Need aggregation
.[, Signals_Total := rowSums(.SD), .SDcols = hours_col] %>%
.[, c(hours_col) := .SD / Signals_Total, .SDcols = hours_col] %>%
.[, Signals_Total := NULL] %>% # Remove unneeded column
.[, lapply(.SD, mean, na.rm = TRUE), .SDcols = hours_col, by = list(Id, WeekCount)]
plot_data_long <-
plot_data %>%
dplyr::arrange(desc(WeekCount)) %>%
dplyr::mutate(patternRank = 1:nrow(.)) %>%
slice(1:10)
} else {
stop("Invalid value to `mode`")
}
plot_data_long %>%
plot_hourly_pat(
start_hour = start_hour,
end_hour = end_hour,
legend = myTable_legends,
legend_label = "FlexibilityIndex",
legend_text = paste("Observed activity"),
rows = 10, # static
title = "Work Patterns and Flexibility Index",
subtitle = paste0(plot_title,
"\n",
"Group Flexibility Index: ",
score_tb$FlexibilityIndex),
caption = paste0("% Taking Breaks: ", score_tb$TakeBreaks, "\n",
"% Change Hours: ", score_tb$ChangeHours, "\n",
"% Keep Hours Under Control: ", score_tb$ControlHours, "\n",
extract_date_range(data, return = "text")),
ylab = "Work patterns"
)
# plot_data %>%
# mutate(patternRank = 1:nrow(.)) %>%
# dplyr::select(patternRank, dplyr::starts_with(sig_label_)) %>%
# purrr::set_names(nm = gsub(pattern = sig_label_, replacement = "", x = names(.))) %>%
# purrr::set_names(nm = gsub(pattern = "_.+", replacement = "", x = names(.))) %>%
# tidyr::gather(Hours, Freq, -patternRank) %>%
# ggplot2::ggplot(ggplot2::aes(x = Hours, y = patternRank, fill = Freq)) +
# ggplot2::geom_tile(height=.5) +
# ggplot2::ylab("Work Patterns") +
# ggplot2::scale_fill_gradient2(low = "white", high = "#1d627e") +
# ggplot2::scale_y_reverse(breaks=seq(1,10)) +
# wpa::theme_wpa_basic() +
# ggplot2::theme(legend.position = "none") +
# ggplot2::annotate("text",
# y = myTable_legends$patternRank,
# x = 26.5,
# label = scales::percent(myTable_legends$FlexibilityIndex), size = 3) +
# ggplot2::annotate("rect",
# xmin = 25,
# xmax = 28,
# ymin = 0.5,
# ymax = 10 + 0.5,
# alpha = .2) +
# ggplot2::annotate("rect",
# xmin = 0.5,
# xmax = start_hour + 0.5,
# ymin = 0.5,
# ymax = 10 + 0.5,
# alpha = .1,
# fill = "gray50") +
# ggplot2::annotate("rect",
# xmin = end_hour + 0.5,
# xmax = 24.5,
# ymin = 0.5,
# ymax = 10 + 0.5,
# alpha = .1,
# fill = "gray50") +
# ggplot2::labs(title = "Work Patterns and Flexibility Index",
# subtitle = paste0(plot_title,
# "\n",
# "Group Flexibility Index: ", score_tb$FlexibilityIndex),
# caption = paste0("% Taking Breaks: ", score_tb$TakeBreaks, "\n",
# "% Change Hours: ", score_tb$ChangeHours, "\n",
# "% Keep Hours Under Control: ", score_tb$ControlHours, "\n",
# extract_date_range(data, return = "text")))
} else if(method == "time"){

105
R/plot_hourly_pat.R Normal file
Просмотреть файл

@ -0,0 +1,105 @@
#' @title
#' Internal function for plotting the hourly activity patterns.
#'
#' @description
#' This is used within `plot_flex_index()` and `workpatterns_rank()`.
#'
#' @param data Data frame containing three columns:
#' - `patternRank`
#' - `Hours`
#' - `Freq`
#'
#' @param start_hour Numeric value to specify expected start hour.
#' @param end_hour Numeric value to specify expected end hour.
#'
#' @param legend Data frame containing the columns:
#' - `patternRank`
#' - Any column to be used in the grey label box, supplied to `legend_label`
#'
#' @param legend_label String specifying column to display in the grey label
#' box
#'
#' @param legend_text String to be used in the bottom legend label.
#'
#' @param rows Number of rows to show in plot.
#' @noRd
#'
#' @export
plot_hourly_pat <- function(
data,
start_hour,
end_hour,
legend,
legend_label,
legend_text = "Observed activity",
rows,
title,
subtitle,
caption,
ylab = paste("Top", rows, "activity patterns")
){
## 00, 01, 02, etc.
hours_col <- stringr::str_pad(seq(0,23), width = 2, pad = 0)
data %>%
utils::head(rows) %>%
tidyr::pivot_longer(
cols = hours_col,
names_to = "Hours",
values_to = "Freq"
) %>%
ggplot2::ggplot(ggplot2::aes(x = Hours, y = patternRank, fill = Freq)) +
ggplot2::geom_tile(height = .5) +
ggplot2::ylab(ylab) +
ggplot2::scale_y_reverse(expand = c(0, 0), breaks = seq(1, rows)) +
wpa::theme_wpa_basic() +
ggplot2::scale_x_discrete(position = "top")+
ggplot2::theme(
axis.title.x = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank()
) +
# Not operational if not binary
scale_fill_continuous(
guide = "legend",
low = "white",
high = "#1d627e",
breaks = 0:1,
name = "",
labels = c("", legend_text)
) +
ggplot2::annotate(
"text",
y = legend$patternRank,
x = 26.5,
label = legend[[legend_label]],
size = 3
)+
ggplot2::annotate("rect",
xmin = 25,
xmax = 28,
ymin = 0.5,
ymax = rows + 0.5,
alpha = .2) +
ggplot2::annotate("rect",
xmin = 0.5,
xmax = start_hour + 0.5,
ymin = 0.5,
ymax = rows + 0.5,
alpha = .1,
fill = "gray50") +
ggplot2::annotate("rect",
xmin = end_hour + 0.5,
xmax = 24.5,
ymin = 0.5,
ymax = rows + 0.5,
alpha = .1,
fill = "gray50") +
labs(
title = title,
subtitle = subtitle,
caption = caption
)
}

Просмотреть файл

@ -35,23 +35,35 @@
#'
#' @section Binary Week method:
#'
#' This method classifies each **person-week** into one of the seven
#' This method classifies each **person-week** into one of the eight
#' archetypes:
#' - **0 < 3 hours on**: fewer than 3 hours of active hours
#' - **1 Standard with breaks workday**: active for fewer than _expected
#' hours_, with no activity outside working hours
#' - **2 Standard continuous workday**: number of active hours equal _expected
#' hours_, with no activity outside working hours
#' - **3 Standard flexible workday**: number of active hours are less than or
#' equal to _expected hours_, with some activity outside working hours
#' - **4 Long flexible workday**: number of active hours exceed _expected
#' - **0 Low Activity (< 3 hours on)**: fewer than 3 hours of active hours
#' - **1.1 Standard continuous (expected schedule)**: active hours equal to
#' _expected hours_, with all activity confined within the expected start and
#' end time
#' - **1.2 Standard continuous (shifted schedule)**: active hours equal to
#' _expected hours_, with activity occurring beyond either the expected start
#' or end time.
#' - **2.1 Standard flexible (expected schedule)**: active hours less than or
#' equal to _expected hours_, with all activity confined within the expected
#' start and end time
#' - **2.2 Standard flexible (shifted schedule)**: active hours less than or
#' equal to _expected hours_, with activity occurring beyond either the
#' expected start or end time.
#' - **3 Long flexible workday**: number of active hours exceed _expected
#' hours_, with breaks occurring throughout
#' - **5 Long continuous workday**: number of active hours exceed _expected
#' - **4 Long continuous workday**: number of active hours exceed _expected
#' hours_, with activity happening in a continuous block (no breaks)
#' - **6 Always on (13h+)**: number of active hours greater than or equal to
#' - **5 Always on (13h+)**: number of active hours greater than or equal to
#' 13
#'
#' This is the recommended method over `pav` for several reasons:
#' _Standard_ here denotes the behaviour of not exhibiting total number of
#' active hours which exceed the expected total number of hours, as supplied by
#' `exp_hours`. _Continuous_ refers to the behaviour of _not_ taking breaks,
#' i.e. no inactive hours between the first and last active hours of the day,
#' where _flexible_ refers to the contrary.
#'
#' This is the recommended method over `pav` for several reasons:
#' 1. `bw` ignores _volume effects_, where activity volume can still bias the
#' results towards the 'standard working hours'.
#' 2. It captures the intuition that each individual can have 'light' and
@ -67,11 +79,11 @@
#' In the standard plot output, the archetypes have been abbreviated to show the
#' following:
#' - **Low Activity** - archetype 0
#' - **Standard** - archetype 2
#' - **Flexible** - archetypes 1 and 3
#' - **Long continuous** - archetype 5
#' - **Long flexible** - archetype 4
#' - **Always On** - archetype 6
#' - **Standard** - archetypes 1.1 and 1.2
#' - **Flexible** - archetypes 2.1 and 2.2
#' - **Long continuous** - archetype 4
#' - **Long flexible** - archetype 3
#' - **Always On** - archetype 5
#'
#' @section Person Average method:
#'
@ -148,6 +160,11 @@
#' official hours specifying checking in and 9 AM and checking out at 5 PM,
#' then `"1700"` should be supplied here.
#'
#' @param exp_hours Numeric value representing the number of hours the
#' population is expected to be active for throughout the workday. By default,
#' this uses the difference between `end_hour` and `start_hour`. Only
#' applicable with the 'bw' method.
#'
#' @param mingroup Numeric value setting the privacy threshold / minimum group
#' size. Defaults to 5.
#'
@ -184,7 +201,13 @@
#' em_data %>% workpatterns_classify(method = "bw")
#'
#' # Return an area plot
#' em_data %>% workpatterns_classify(method = "bw", return = "plot-area")
#' # With custom expected hours
#' em_data %>%
#' workpatterns_classify(
#' method = "bw",
#' return = "plot-area",
#' exp_hours = 7
#' )
#'
#' \donttest{
#'
@ -206,6 +229,7 @@ workpatterns_classify <- function(data,
signals = c("email", "IM"),
start_hour = "0900",
end_hour = "1700",
exp_hours = NULL,
mingroup = 5,
active_threshold = 0,
method = "bw",
@ -232,14 +256,17 @@ workpatterns_classify <- function(data,
# Method flow -------------------------------------------------------------
if(method == "bw"){
workpatterns_classify_bw(data = data,
hrvar = hrvar,
signals = signals,
start_hour = start_hour,
end_hour = end_hour,
exp_hours = exp_hours,
mingroup = mingroup,
active_threshold = active_threshold,
return = return)
} else if(method == "pav"){
workpatterns_classify_pav(data = data,

Просмотреть файл

@ -53,6 +53,10 @@
#' official hours specifying checking in and 9 AM and checking out at 5 PM,
#' then `"1700"` should be supplied here.
#'
#' @param exp_hours Numeric value representing the number of hours the population
#' is expected to be active for throughout the workday. By default, this uses
#' the difference between `end_hour` and `start_hour`.
#'
#' @param mingroup Numeric value setting the privacy threshold / minimum group
#' size. Defaults to 5.
#'
@ -82,10 +86,14 @@ workpatterns_classify_bw <- function(data,
start_hour = "0900",
end_hour = "1700",
mingroup = 5,
exp_hours = NULL,
active_threshold = 0,
return = "plot"){
## Handling NULL values passed to hrvar
## set up variable -------------------------------------------------------
Active_Hours <- NULL
## Handling NULL values passed to hrvar ----------------------------------
if(is.null(hrvar)){
data <- totals_col(data)
@ -100,7 +108,7 @@ workpatterns_classify_bw <- function(data,
}
## convert to data.table
## convert to data.table -------------------------------------------------
data2 <-
data %>%
dplyr::mutate(Date = as.Date(Date, format = "%m/%d/%Y")) %>%
@ -110,49 +118,53 @@ workpatterns_classify_bw <- function(data,
# Make sure data.table knows we know we're using it
.datatable.aware = TRUE
## Save original
start_hour_o <- start_hour
end_hour_o <- end_hour
## Coerce to numeric, remove trailing zeros
start_hour <- as.numeric(gsub(pattern = "00$", replacement = "", x = start_hour))
end_hour <- as.numeric(gsub(pattern = "00$", replacement = "", x = end_hour))
## Calculate hours within working hours
## e.g. if `end_hour` value is 17, then the reference slot should be 16
d <- (end_hour - 1) - start_hour
## Total expected hours --------------------------------------------------
## If `NULL`, use the difference between `end_hour` and `start_hour`
## Warning message
if(d >= 23){
if(is.null(exp_hours)){
exp_hours <- end_hour - start_hour
}
## Warning message for extreme values of `exp_hours` ---------------------
if(exp_hours >= 23){
stop(
glue::glue(
"the total working hours is {d + 1}.
"the total working hours is {exp_hours}.
Please provide a valid range."
)
)
} else if(d >= 11){
} else if(exp_hours >= 12){
message(
glue::glue(
"Note: the total working hours is {d + 1}.
"Note: the total working hours is {exp_hours}.
Output archetypes will be reduced as the total number of hours is greater than or equal to 12."
)
)
} else if(d <= 3){
} else if(exp_hours <= 3){
message(
glue::glue(
"Note: the total working hours is {d + 1}.
"Note: the total working hours is {exp_hours}.
Output archetypes will be reduced as the total number of hours is fewer than or equal to 3."
)
)
}
## Text replacement only for allowed values
## Text replacement only for allowed values ------------------------------
if(any(signals %in% c("email", "IM", "unscheduled_calls", "meetings"))){
signal_set <- gsub(pattern = "email", replacement = "Emails_sent", x = signals) # case-sensitive
@ -166,7 +178,8 @@ workpatterns_classify_bw <- function(data,
}
## Create 24 summed `Signals_sent` columns
## Create 24 summed `Signals_sent` columns -------------------------------
signal_cols <- purrr::map(0:23, ~combine_signals(data, hr = ., signals = signal_set))
signal_cols <- bind_cols(signal_cols)
@ -174,15 +187,20 @@ workpatterns_classify_bw <- function(data,
input_var <- names(signal_cols)
## Signals sent by Person and Date
## Data frame with `PersonId`, `Date`, and the 24 signal columns
signals_df <-
data2 %>%
.[, c("PersonId", "Date")] %>%
cbind(signal_cols)
## Signal label
## Only show as `Signals_sent` if more than one signal, i.e. if there is
## aggregation of multiple signals
sig_label <- ifelse(length(signal_set) > 1, "Signals_sent", signal_set)
## Create binary variable 0 or 1
## Create binary variable 0 or 1 ----------------------------------------
num_cols <- names(which(sapply(signals_df, is.numeric))) # Get numeric columns
signals_df <-
@ -190,13 +208,15 @@ workpatterns_classify_bw <- function(data,
data.table::as.data.table() %>%
# active_threshold: minimum signals to qualify as active
.[, (num_cols) := lapply(.SD, function(x) ifelse(x > active_threshold, 1, 0)), .SDcols = num_cols] %>%
.[, ("Signals_Total") := apply(.SD, 1, sum), .SDcols = input_var]
.[, ("Active_Hours") := apply(.SD, 1, sum), .SDcols = input_var]
## Classify PersonId-Signal data by time of day
## Classify PersonId-Signal data by time of day --------------------------
## Long format table that classifies each hour of the day on whether it is
## before, within, or after standard hours
WpA_classify <-
signals_df %>%
tidyr::gather(!!sym(sig_label), sent, -PersonId,-Date,-Signals_Total) %>%
tidyr::gather(!!sym(sig_label), sent, -PersonId,-Date,-Active_Hours) %>%
data.table::as.data.table()
WpA_classify[, StartEnd := gsub(pattern = "[^[:digit:]]", replacement = "", x = get(sig_label))]
@ -212,54 +232,112 @@ workpatterns_classify_bw <- function(data,
WpA_classify <-
WpA_classify[, c("PersonId", "Date", "Signals_Total", "HourType", "sent")] %>%
.[, .(sent = sum(sent)), by = c("PersonId", "Date", "Signals_Total", "HourType")] %>%
WpA_classify[, c("PersonId", "Date", "Active_Hours", "HourType", "sent")] %>%
.[, .(sent = sum(sent)), by = c("PersonId", "Date", "Active_Hours", "HourType")] %>%
tidyr::spread(HourType, sent)%>%
left_join(WpA_classify%>% ## Calculate first and last activity for day_span
filter(sent>0)%>%
group_by(PersonId,Date)%>%
summarise(First_signal=min(Start),
Last_signal=max(End)),
by=c("PersonId","Date"))%>%
left_join(WpA_classify %>% ## Calculate first and last activity for day_span
filter(sent > 0)%>%
group_by(PersonId, Date)%>%
summarise(First_signal = min(Start),
Last_signal = max(End)),
by = c("PersonId","Date"))%>%
mutate(Day_Span = Last_signal - First_signal,
Signals_Break_hours = Day_Span - Signals_Total)
Signals_Break_hours = Day_Span - Active_Hours)
## Working patterns classification ---------------------------------------
# # Level 1 with 7 personas
# personas_levels <-
# c("0 < 3 hours on",
# "1 Standard with breaks workday",
# "2 Standard continuous workday",
# "3 Standard flexible workday",
# "4 Long flexible workday",
# "5 Long continuous workday",
# "6 Always on (13h+)")
#
# ptn_data_personas <- data.table::copy(WpA_classify)
# ptn_data_personas[, Personas := "Unclassified"]
# ptn_data_personas[Active_Hours > exp_hours & Active_Hours==Day_Span , Personas := "5 Long continuous workday"]
# ptn_data_personas[Active_Hours > exp_hours & Active_Hours<Day_Span, Personas := "4 Long flexible workday"]
# ptn_data_personas[Active_Hours <= exp_hours & (Before_start>0|After_end>0), Personas := "3 Standard flexible workday"] #do we want to split betwen block and non block?
# ptn_data_personas[Active_Hours == exp_hours & Within_hours == exp_hours , Personas := "2 Standard continuous workday"]
# ptn_data_personas[Active_Hours < exp_hours & Before_start==0 & After_end == 0, Personas := "1 Standard with breaks workday"]
# ptn_data_personas[Active_Hours >= 13, Personas := "6 Always on (13h+)"]
# ptn_data_personas[Active_Hours < 3, Personas := "0 < 3 hours on"]
# ptn_data_personas[, Personas := factor(Personas, levels = personas_levels)]
# Level 2 with 8 personas
personas_levels <-
c("0 < 3 hours on",
"1 Standard with breaks workday",
"2 Standard continuous workday",
"3 Standard flexible workday",
"4 Long flexible workday",
"5 Long continuous workday",
"6 Always on (13h+)")
c(
"0 Low Activity (< 3 hours on)",
"1.1 Standard continuous (expected schedule)",
"1.2 Standard continuous (shifted schedule)",
"2.1 Standard flexible (expected schedule)",
"2.2 Standard flexible (shifted schedule)",
"3 Long flexible workday",
"4 Long continuous workday",
"5 Always on (13h+)"
)
ptn_data_personas <- data.table::copy(WpA_classify)
ptn_data_personas[, Personas := "Unclassified"]
ptn_data_personas[Signals_Total > d & Signals_Total==Day_Span , Personas := "5 Long continuous workday"]
ptn_data_personas[Signals_Total > d & Signals_Total<Day_Span, Personas := "4 Long flexible workday"]
ptn_data_personas[Signals_Total <= d & (Before_start>0|After_end>0), Personas := "3 Standard flexible workday"] #do we want to split betwen block and non block?
ptn_data_personas[Signals_Total == d+1 & Within_hours ==d+1, Personas := "2 Standard continuous workday"]
ptn_data_personas[Signals_Total<= d & Before_start==0 & After_end == 0, Personas := "1 Standard with breaks workday"]
ptn_data_personas[Signals_Total >= 13, Personas := "6 Always on (13h+)"]
ptn_data_personas[Signals_Total < 3, Personas := "0 < 3 hours on"]
ptn_data_personas[Active_Hours > exp_hours & Active_Hours==Day_Span , Personas := "4 Long continuous workday"]
ptn_data_personas[Active_Hours > exp_hours & Active_Hours<Day_Span, Personas := "3 Long flexible workday"]
ptn_data_personas[Active_Hours <= exp_hours & (Before_start>0|After_end>0), Personas := "2.2 Standard flexible (shifted schedule)"]
ptn_data_personas[Active_Hours <= exp_hours & Before_start == 0 & After_end == 0, Personas := "2.1 Standard flexible (expected schedule)"]
ptn_data_personas[Active_Hours == exp_hours & (Before_start > 0 | After_end > 0) & Active_Hours == Day_Span, Personas := "1.2 Standard continuous (shifted schedule)"]
ptn_data_personas[Active_Hours == exp_hours & Before_start == 0 & After_end == 0 & Active_Hours == Day_Span, Personas := "1.1 Standard continuous (expected schedule)"]
ptn_data_personas[Active_Hours >= 13, Personas := "5 Always on (13h+)"]
ptn_data_personas[Active_Hours < 3, Personas := "0 Low Activity (< 3 hours on)"]
ptn_data_personas[, Personas := factor(Personas, levels = personas_levels)]
# bind cut tree to data frame
ptn_data_final <-
ptn_data_personas %>%
left_join(
signals_df %>%
select(-Signals_Total), # Avoid duplication
select(-Active_Hours), # Avoid duplication
by = c("PersonId","Date")) %>%
left_join(
data2 %>%
select(PersonId, Date, hrvar_str), # Avoid duplication
by = c("PersonId","Date"))
## Return-chunks
## Long caption -----------------------------------------------------------
## Parameters used in creating visualization
## Change first character to upper case
firstup <- function(x){
substr(x, start = 1, stop = 1) <- toupper(substr(x, start = 1, stop = 1))
x
}
signals_str <- firstup(paste(signals, collapse = ", "))
cap_long <-
glue::glue(
"Signals used: {signals_str}.
The official hours are {start_hour}:00 and {end_hour}:00, with a total of {exp_hours} expected hours.
\n"
) %>%
paste(extract_date_range(data2, return = "text"))
## Return-chunks ----------------------------------------------------------
return_data <- function(){
dplyr::as_tibble(ptn_data_final)
dplyr::as_tibble(ptn_data_final) %>%
dplyr::mutate(
Start_hour = start_hour,
End_hour = end_hour,
Exp_hours = exp_hours
)
}
# NOW DEFUNCT - NOT USED ---------------------------------------------------
@ -337,7 +415,7 @@ workpatterns_classify_bw <- function(data,
theme_wpa_basic() +
labs(title = "Distribution of Working Patterns over time",
y = "Percentage",
caption = extract_date_range(data2, return = "text")) +
caption = cap_long) +
theme(legend.position = "right") +
scale_y_continuous(labels = scales::percent)
}
@ -361,9 +439,11 @@ workpatterns_classify_bw <- function(data,
ptn_data_final %>%
hrvar_count(hrvar = hrvar, return = "table") %>%
dplyr::filter(n >= mingroup) %>%
dplyr::pull(hrvar)
dplyr::pull(hrvar) %>%
c("Total") # Ensure included in filter
ptn_data_final %>%
totals_bind(target_col = hrvar, target_value = "Total") %>%
data.table::as.data.table() %>%
.[, .(n = .N), by = c("Personas", hrvar)] %>%
dplyr::as_tibble() %>%
@ -386,7 +466,11 @@ workpatterns_classify_bw <- function(data,
} else if(return == "plot"){
plot_workpatterns_classify_bw(ptn_data_final, range = d)
plot_workpatterns_classify_bw(
ptn_data_final,
range = exp_hours,
caption = cap_long
)
} else if(return == "plot-dist"){
@ -399,7 +483,8 @@ workpatterns_classify_bw <- function(data,
} else if(return == "plot-hrvar"){
plot_wp_bw_hrvar(
x = return_table()
x = return_table(),
caption = cap_long
)
} else if (return == "table"){
@ -409,8 +494,15 @@ workpatterns_classify_bw <- function(data,
} else if (return == "list"){
list(data = return_data(),
plot = plot_workpatterns_classify_bw(ptn_data_final, range = d),
plot_hrvar = plot_wp_bw_hrvar(x = return_table()),
plot = plot_workpatterns_classify_bw(
ptn_data_final,
range = exp_hours,
caption = cap_long
),
plot_hrvar = plot_wp_bw_hrvar(
x = return_table(),
caption = cap_long
),
plot_area = return_plot_area(),
table = return_table())
@ -425,25 +517,27 @@ workpatterns_classify_bw <- function(data,
#'
#' @description Internal use only.
#'
#' @param range Numeric. Accepts `d` from the main `workpatterns_classify_bw()`
#' @param range Numeric. Accepts `exp_hours` from the main `workpatterns_classify_bw()`
#' function. Used to update labels on main plot.
#' @param caption String to override plot captions.
#'
#' @noRd
plot_workpatterns_classify_bw <- function(data, range){
plot_workpatterns_classify_bw <- function(data, range, caption){
plot_table <-
data %>%
dplyr::mutate(
PersonasNet =
case_when(
Personas == "0 < 3 hours on" ~ "Low activity",
Personas == "1 Standard with breaks workday" ~ "Flexible",
Personas == "2 Standard continuous workday" ~ "Standard",
Personas == "3 Standard flexible workday" ~ "Flexible",
Personas == "4 Long flexible workday" ~ "Long flexible",
Personas == "5 Long continuous workday" ~ "Long continuous",
Personas == "6 Always on (13h+)" ~ "Always On",
Personas == "0 Low Activity (< 3 hours on)" ~ "Low activity",
Personas == "2.1 Standard flexible (expected schedule)" ~ "Flexible",
Personas == "2.2 Standard flexible (shifted schedule)" ~ "Flexible",
Personas == "1.1 Standard continuous (expected schedule)" ~ "Standard",
Personas == "1.2 Standard continuous (shifted schedule)" ~ "Standard",
Personas == "3 Long flexible workday" ~ "Long flexible",
Personas == "4 Long continuous workday" ~ "Long continuous",
Personas == "5 Always on (13h+)" ~ "Always On",
TRUE ~ NA_character_
)
) %>%
@ -591,7 +685,7 @@ plot_workpatterns_classify_bw <- function(data, range){
subtitle = "Classification of employee-weeks",
x = "Flexibility level (breaks)",
y = "Average activity level",
caption = extract_date_range(data, return = "text")) +
caption = caption) +
theme_wpa_basic() +
theme(
legend.position = "none",
@ -611,7 +705,7 @@ plot_workpatterns_classify_bw <- function(data, range){
#' @import ggplot2
#'
#' @noRd
plot_wp_bw_hrvar <- function(x){
plot_wp_bw_hrvar <- function(x, caption){
x %>%
tidyr::pivot_longer(cols = -Personas) %>%
@ -625,7 +719,8 @@ plot_wp_bw_hrvar <- function(x){
coord_flip() +
scale_y_continuous(labels = scales::percent,
limits = c(NA, 1)) +
theme_wpa_basic()
theme_wpa_basic() +
labs(caption = caption)
}

Просмотреть файл

@ -22,8 +22,17 @@
#' e.g. "`0900"`
#' @param end_hour A character vector specifying starting hours,
#' e.g. `"1700"`
#' @param top number specifying how many top working patterns to display in plot,
#' @param top numeric value specifying how many top working patterns to display in plot,
#' e.g. `"10"`
#'
#' @param mode string specifying aggregation method for plot. Valid
#' options include:
#' - `"binary"`: convert hourly activity into binary blocks. In the plot, each
#' block would display as solid.
#' - `"prop"`: calculate proportion of signals in each hour over total signals
#' across 24 hours, then average across all work weeks. In the plot, each
#' block would display as a heatmap.
#'
#' @param return String specifying what to return. This must be one of the
#' following strings:
#' - `"plot"`
@ -41,6 +50,7 @@
#' @importFrom data.table ":=" "%like%" "%between%"
#'
#' @examples
#' # Plot by default
#' workpatterns_rank(
#' data = em_data,
#' signals = c(
@ -51,6 +61,13 @@
#' )
#' )
#'
#' # Plot with prop / heatmap mode
#' workpatterns_rank(
#' data = em_data,
#' mode = "prop"
#' )
#'
#'
#' @family Visualization
#' @family Working Patterns
#'
@ -60,6 +77,7 @@ workpatterns_rank <- function(data,
start_hour = "0900",
end_hour = "1700",
top = 10,
mode = "binary",
return = "plot"){
# Make sure data.table knows we know we're using it
@ -114,28 +132,86 @@ workpatterns_rank <- function(data,
## Signal label
sig_label <- ifelse(length(signal_set) > 1, "Signals_sent", signal_set)
## Create binary variable 0 or 1
## This should only pick up `Signals_sent_` prefixed columns
## This is run on `signals_df`
num_cols <- names(which(sapply(signals_df, is.numeric))) # Get numeric columns
signals_df <-
signals_df %>%
data.table::as.data.table() %>%
.[, (num_cols) := lapply(.SD, function(x) ifelse(x > 0, 1, 0)), .SDcols = num_cols]
if(mode == "binary"){
signals_df <- signals_df[, list(WeekCount = .N,
PersonCount = dplyr::n_distinct(PersonId)), by = input_var]
## Summarized table performed on `signals_df` ----------------------------
## Section ignoring `signals_df_o`
myTable_return <- data.table::setorder(signals_df, -PersonCount)
signals_df <-
signals_df %>%
data.table::as.data.table() %>%
.[, (num_cols) := lapply(.SD, function(x) ifelse(x > 0, 1, 0)), .SDcols = num_cols] %>%
.[, list(WeekCount = .N, PersonCount = dplyr::n_distinct(PersonId)), by = input_var]
if(return == "plot"){
myTable_return <- data.table::setorder(signals_df, -PersonCount)
} else if(mode == "prop"){
## Save original `signals_df` before manipulating ------------------------
## Rename `Signals_sent` columns to prevent conflict
signals_df_o <- signals_df %>%
purrr::set_names(
nm = gsub(x = names(.),
replacement = "_ori_",
pattern = "_sent_")
) %>%
cbind(select(signals_df, num_cols)) %>% # duplicate signals
# Convert `Signals_sent_` prefixed to binary. `Signals_ori_` are intact
# Create binary variable 0 or 1
.[, (num_cols) := lapply(.SD, function(x) ifelse(x > 0, 1, 0)), .SDcols = num_cols] %>%
# Use `mutate()` method
.[, `:=`(WeekCount = .N,
PersonCount = dplyr::n_distinct(PersonId),
Id = .GRP), # group id assignment
by = num_cols]
## 00, 01, 02, etc.
hours_col <- stringr::str_pad(seq(0,23), width = 2, pad = 0)
# Wide table showing proportion of signals by hour
# Ranked descending by `WeekCount`
wp_prop_tb <-
signals_df_o %>%
arrange(desc(WeekCount)) %>%
dplyr::select(Id, dplyr::contains("_ori_"), WeekCount) %>%
purrr::set_names(nm = gsub(
pattern = ".+_ori_",
replacement = "",
x = names(.)
)) %>%
purrr::set_names(nm = gsub(
pattern = "_.+",
replacement = "",
x = names(.)
)) %>%
# Need aggregation
.[, Signals_Total := rowSums(.SD), .SDcols = hours_col] %>%
.[, c(hours_col) := .SD / Signals_Total, .SDcols = hours_col] %>%
.[, Signals_Total := NULL] %>% # Remove unneeded column
.[, lapply(.SD, mean, na.rm = TRUE), .SDcols = hours_col, by = list(Id, WeekCount)]
} else {
stop("invalid value to `mode`.")
}
if(return == "plot" & mode == "binary"){
## Plot return
sig_label_ <- paste0(sig_label, "_")
myTable_return <-
myTable_return %>%
arrange(desc(WeekCount)) %>%
mutate(patternRank= 1:nrow(.))
myTable_return <-
myTable_return %>%
arrange(desc(WeekCount)) %>%
mutate(patternRank= 1:nrow(.))
## Table for annotation
myTable_legends <-
@ -163,66 +239,85 @@ workpatterns_rank <- function(data,
replacement = "",
x = names(.)
)) %>%
utils::head(top) %>%
tidyr::gather(Hours, Freq, -patternRank) %>%
ggplot2::ggplot(ggplot2::aes(x = Hours, y = patternRank, fill = Freq)) +
ggplot2::geom_tile(height = .5) +
ggplot2::ylab(paste("Top", top, "activity patterns")) +
#ggplot2::scale_fill_gradient2(low = "white", high = "#1d627e") +
ggplot2::scale_y_reverse(expand = c(0, 0), breaks = seq(1, top)) +
theme_wpa_basic() +
ggplot2::scale_x_discrete(position = "top")+
ggplot2::theme(
axis.title.x = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank()
) +
scale_fill_continuous(
guide = "legend",
low = "white",
high = "#1d627e",
breaks = 0:1,
name = "",
labels = c("", paste("Observed", subtitle_signal, "activity"))
) +
ggplot2::annotate(
"text",
y = myTable_legends$patternRank,
x = 26.5,
label = myTable_legends$WeekCount,
size = 3
)+
ggplot2::annotate("rect",
xmin = 25,
xmax = 28,
ymin = 0.5,
ymax = length(myTable_legends$patternRank) + 0.5,
alpha = .2) +
ggplot2::annotate("rect",
xmin = 0.5,
xmax = start_hour + 0.5,
ymin = 0.5,
ymax = length(myTable_legends$patternRank) + 0.5,
alpha = .1,
fill = "gray50") +
ggplot2::annotate("rect",
xmin = end_hour + 0.5,
xmax = 24.5,
ymin = 0.5,
ymax = length(myTable_legends$patternRank) + 0.5,
alpha = .1,
fill = "gray50") +
labs(
title = "Patterns of digital activity",
subtitle = paste("Hourly activity based on", subtitle_signal ,"sent over a week"),
caption = paste(
"Top", top, "patterns represent", coverage, "of workweeks.", extract_date_range(data, return = "text"))
plot_hourly_pat(
start_hour = start_hour,
end_hour = end_hour,
legend = myTable_legends,
legend_label = "WeekCount",
legend_text = paste("Observed", subtitle_signal, "activity"),
rows = top,
title = "Patterns of digital activity",
subtitle = paste(
"Hourly activity based on",
subtitle_signal,
"sent over a week"),
caption = paste(
"Top",
top,
"patterns represent",
coverage,
"of workweeks.\n",
extract_date_range(data, return = "text")
),
ylab = paste("Top", top, "activity patterns")
)
} else if(return == "plot" & mode == "prop"){
## Table for annotation
myTable_legends <-
wp_prop_tb %>%
arrange(desc(WeekCount)) %>%
mutate(patternRank= 1:nrow(.)) %>%
dplyr::select(patternRank, WeekCount) %>%
dplyr::mutate(WeekPercentage = WeekCount / sum(WeekCount, na.rm = TRUE),
WeekCount = paste0(scales::percent(WeekPercentage, accuracy = 0.1))) %>%
utils::head(top)
## Coverage
coverage <-
myTable_legends %>%
summarize(total = sum(WeekPercentage)) %>%
pull(1) %>%
scales::percent(accuracy = 0.1)
## Run plot
wp_prop_tb %>%
dplyr::mutate(patternRank = 1:nrow(.)) %>%
plot_hourly_pat(
start_hour = start_hour,
end_hour = end_hour,
legend = myTable_legends,
legend_label = "WeekCount",
legend_text = paste("Observed", subtitle_signal, "activity"),
rows = top,
title = "Patterns of digital activity",
subtitle = paste(
"Hourly activity based on",
subtitle_signal,
"sent over a week"),
caption = paste(
"Top",
top,
"patterns represent",
coverage,
"of workweeks.\n",
extract_date_range(data, return = "text")
),
ylab = paste("Top", top, "activity patterns")
)
} else if(return == "table"){
dplyr::as_tibble(myTable_return)
} else if(return == "test"){
signals_df_o
} else {
stop("Invalid `return`")

Просмотреть файл

@ -13,6 +13,8 @@
#' @param signals See `workpatterns_classify()`.
#' @param start_hour See `workpatterns_classify()`.
#' @param end_hour See `workpatterns_classify()`.
#' @param exp_hours See `workpatterns_classify()`.
#'
#' @param path Pass the file path and the desired file name, _excluding the file
#' extension_. For example, `"scope report"`.
#' @param timestamp Logical vector specifying whether to include a timestamp in
@ -32,6 +34,7 @@ workpatterns_report <- function(data,
signals = c("email", "IM"),
start_hour = "0900",
end_hour = "1700",
exp_hours = NULL,
path = "workpatterns report",
timestamp = TRUE){
@ -52,6 +55,7 @@ workpatterns_report <- function(data,
signals = signals,
start_hour = start_hour,
end_hour = end_hour,
exp_hours = exp_hours,
return = "list")
## plot for `workpatterns_rank`
@ -132,8 +136,10 @@ workpatterns_report <- function(data,
plot_rank_list[[4]],
plot_rank_list[[5]],
plot_rank_list[[6]],
plot_rank_list[[7]]) %>% # Expand objects to this list
purrr::map_if(is.data.frame, wpa::create_dt, rounding = 2) %>%
plot_rank_list[[7]],
plot_rank_list[[8]]
) %>% # Expand objects to this list
purrr::map_if(is.data.frame, wpa::create_dt, rounding = 1, percent = TRUE) %>%
purrr::map_if(is.character, md2html)
## Set header titles
@ -150,7 +156,9 @@ workpatterns_report <- function(data,
paste(names(plot_table_list)[[4]]),
paste(names(plot_table_list)[[5]]),
paste(names(plot_table_list)[[6]]),
paste(names(plot_table_list)[[7]]))
paste(names(plot_table_list)[[7]]),
paste(names(plot_table_list)[[8]])
)
## Set header levels
## Makes use of level/header system for Markdown syntax

Просмотреть файл

@ -7,6 +7,10 @@
0 errors | 0 warnings | 0 note
## Submission 1.7.0
Bug fixes, new features, and removal of archived dependency 'portes'
## Submission 1.6.4
Minor bug fixes and refactoring.

Просмотреть файл

@ -4,7 +4,7 @@
\alias{create_dt}
\title{Create interactive tables in HTML with 'download' buttons.}
\usage{
create_dt(x, rounding = 1, freeze = 2)
create_dt(x, rounding = 1, freeze = 2, percent = FALSE)
}
\arguments{
\item{x}{Data frame to be passed through.}
@ -13,6 +13,9 @@ create_dt(x, rounding = 1, freeze = 2)
\item{freeze}{Number of columns from the left to 'freeze'. Defaults to 2,
which includes the row number column.}
\item{percent}{Logical value specifying whether to display numeric columns
as percentages.}
}
\value{
Returns an HTML widget displaying rectangular data.

Просмотреть файл

@ -12,7 +12,8 @@ flex_index(
start_hour = "0900",
end_hour = "1700",
return = "plot",
plot_method = "common"
plot_method = "common",
mode = "binary"
)
}
\arguments{
@ -54,6 +55,16 @@ See \code{Value} for more information.}
\item \code{"common"} plots the ten most common working patterns
\item \code{"time"} plots the Flexibility Index for the group over time
}}
\item{mode}{String specifying aggregation method for plot. Only applicable
when \code{return = "plot"}. Valid options include:
\itemize{
\item \code{"binary"}: convert hourly activity into binary blocks. In the plot, each
block would display as solid.
\item \code{"prop"}: calculate proportion of signals in each hour over total signals
across 24 hours, then average across all work weeks. In the plot, each
block would display as a heatmap.
}}
}
\value{
A different output is returned depending on the value passed to the \code{return}

Просмотреть файл

@ -44,8 +44,10 @@ of the nodes. Defaults to 0.7.}
\item{res}{Resolution parameter to be passed to \code{leiden::leiden()}. Defaults
to 0.5.}
\item{seed}{Seed for the random number generator passed to \code{leiden::leiden()}
to ensure consistency. Only applicable when \code{display} is set to \code{"leiden"}.}
\item{seed}{Seed for the random number generator passed to either
\code{set.seed()} when the Louvain algorithm is used, or \code{leiden::leiden()} when
the Leiden algorithm is used, to ensure consistency. Only applicable when
\code{display} is set to \code{"louvain"} or \code{"leiden"}.}
\item{desc_hrvar}{Character vector of length 3 containing the HR attributes
to use when returning the \code{"describe"} output. See \code{network_describe()}.}

Просмотреть файл

@ -86,8 +86,10 @@ of the edges (only for 'ggraph' mode). Defaults to 1.}
\item{res}{Resolution parameter to be passed to \code{leiden::leiden()}. Defaults
to 0.5.}
\item{seed}{Seed for the random number generator passed to \code{leiden::leiden()}
to ensure consistency. Only applicable when \code{display} is set to \code{"leiden"}.}
\item{seed}{Seed for the random number generator passed to either
\code{set.seed()} when the Louvain algorithm is used, or \code{leiden::leiden()} when
the Leiden algorithm is used, to ensure consistency. Only applicable when
\code{display} is set to \code{"louvain"} or \code{"leiden"}.}
\item{algorithm}{String to specify the node placement algorithm to be used.
Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of

Просмотреть файл

@ -9,7 +9,8 @@ plot_flex_index(
sig_label = "Signals_sent_",
method = "sample",
start_hour = 9,
end_hour = 17
end_hour = 17,
mode = "binary"
)
}
\arguments{
@ -25,6 +26,8 @@ working patterns; "time" plots the Flexibility Index for the group over time.}
\item{start_hour}{See \code{flex_index()}.}
\item{end_hour}{See \code{flex_index()}.}
\item{mode}{See \code{flex_index()}.}
}
\value{
ggplot object. See \code{method}.

Просмотреть файл

@ -11,6 +11,7 @@ workpatterns_classify(
signals = c("email", "IM"),
start_hour = "0900",
end_hour = "1700",
exp_hours = NULL,
mingroup = 5,
active_threshold = 0,
method = "bw",
@ -48,6 +49,11 @@ Note that this currently only supports \strong{hourly} increments. If the
official hours specifying checking in and 9 AM and checking out at 5 PM,
then \code{"1700"} should be supplied here.}
\item{exp_hours}{Numeric value representing the number of hours the
population is expected to be active for throughout the workday. By default,
this uses the difference between \code{end_hour} and \code{start_hour}. Only
applicable with the 'bw' method.}
\item{mingroup}{Numeric value setting the privacy threshold / minimum group
size. Defaults to 5.}
@ -122,24 +128,36 @@ implementations.
\section{Binary Week method}{
This method classifies each \strong{person-week} into one of the seven
This method classifies each \strong{person-week} into one of the eight
archetypes:
\itemize{
\item \strong{0 < 3 hours on}: fewer than 3 hours of active hours
\item \strong{1 Standard with breaks workday}: active for fewer than \emph{expected
hours}, with no activity outside working hours
\item \strong{2 Standard continuous workday}: number of active hours equal \emph{expected
hours}, with no activity outside working hours
\item \strong{3 Standard flexible workday}: number of active hours are less than or
equal to \emph{expected hours}, with some activity outside working hours
\item \strong{4 Long flexible workday}: number of active hours exceed \emph{expected
\item \strong{0 Low Activity (< 3 hours on)}: fewer than 3 hours of active hours
\item \strong{1.1 Standard continuous (expected schedule)}: active hours equal to
\emph{expected hours}, with all activity confined within the expected start and
end time
\item \strong{1.2 Standard continuous (shifted schedule)}: active hours equal to
\emph{expected hours}, with activity occurring beyond either the expected start
or end time.
\item \strong{2.1 Standard flexible (expected schedule)}: active hours less than or
equal to \emph{expected hours}, with all activity confined within the expected
start and end time
\item \strong{2.2 Standard flexible (shifted schedule)}: active hours less than or
equal to \emph{expected hours}, with activity occurring beyond either the
expected start or end time.
\item \strong{3 Long flexible workday}: number of active hours exceed \emph{expected
hours}, with breaks occurring throughout
\item \strong{5 Long continuous workday}: number of active hours exceed \emph{expected
\item \strong{4 Long continuous workday}: number of active hours exceed \emph{expected
hours}, with activity happening in a continuous block (no breaks)
\item \strong{6 Always on (13h+)}: number of active hours greater than or equal to
\item \strong{5 Always on (13h+)}: number of active hours greater than or equal to
13
}
\emph{Standard} here denotes the behaviour of not exhibiting total number of
active hours which exceed the expected total number of hours, as supplied by
\code{exp_hours}. \emph{Continuous} refers to the behaviour of \emph{not} taking breaks,
i.e. no inactive hours between the first and last active hours of the day,
where \emph{flexible} refers to the contrary.
This is the recommended method over \code{pav} for several reasons:
\enumerate{
\item \code{bw} ignores \emph{volume effects}, where activity volume can still bias the
@ -159,11 +177,11 @@ In the standard plot output, the archetypes have been abbreviated to show the
following:
\itemize{
\item \strong{Low Activity} - archetype 0
\item \strong{Standard} - archetype 2
\item \strong{Flexible} - archetypes 1 and 3
\item \strong{Long continuous} - archetype 5
\item \strong{Long flexible} - archetype 4
\item \strong{Always On} - archetype 6
\item \strong{Standard} - archetypes 1.1 and 1.2
\item \strong{Flexible} - archetypes 2.1 and 2.2
\item \strong{Long continuous} - archetype 4
\item \strong{Long flexible} - archetype 3
\item \strong{Always On} - archetype 5
}
}
@ -206,7 +224,13 @@ Hourly Collaboration Flexible Query.
em_data \%>\% workpatterns_classify(method = "bw")
# Return an area plot
em_data \%>\% workpatterns_classify(method = "bw", return = "plot-area")
# With custom expected hours
em_data \%>\%
workpatterns_classify(
method = "bw",
return = "plot-area",
exp_hours = 7
)
\donttest{

Просмотреть файл

@ -12,6 +12,7 @@ workpatterns_classify_bw(
start_hour = "0900",
end_hour = "1700",
mingroup = 5,
exp_hours = NULL,
active_threshold = 0,
return = "plot"
)
@ -45,6 +46,10 @@ then \code{"1700"} should be supplied here.}
\item{mingroup}{Numeric value setting the privacy threshold / minimum group
size. Defaults to 5.}
\item{exp_hours}{Numeric value representing the number of hours the population
is expected to be active for throughout the workday. By default, this uses
the difference between \code{end_hour} and \code{start_hour}.}
\item{active_threshold}{A numeric value specifying the minimum number of
signals to be greater than in order to qualify as \emph{active}. Defaults to 0.}

Просмотреть файл

@ -10,6 +10,7 @@ workpatterns_rank(
start_hour = "0900",
end_hour = "1700",
top = 10,
mode = "binary",
return = "plot"
)
}
@ -32,9 +33,19 @@ e.g. "\verb{0900"}}
\item{end_hour}{A character vector specifying starting hours,
e.g. \code{"1700"}}
\item{top}{number specifying how many top working patterns to display in plot,
\item{top}{numeric value specifying how many top working patterns to display in plot,
e.g. \code{"10"}}
\item{mode}{string specifying aggregation method for plot. Valid
options include:
\itemize{
\item \code{"binary"}: convert hourly activity into binary blocks. In the plot, each
block would display as solid.
\item \code{"prop"}: calculate proportion of signals in each hour over total signals
across 24 hours, then average across all work weeks. In the plot, each
block would display as a heatmap.
}}
\item{return}{String specifying what to return. This must be one of the
following strings:
\itemize{
@ -59,6 +70,7 @@ table of working patterns, ranked from the most common to the
least.
}
\examples{
# Plot by default
workpatterns_rank(
data = em_data,
signals = c(
@ -69,6 +81,13 @@ workpatterns_rank(
)
)
# Plot with prop / heatmap mode
workpatterns_rank(
data = em_data,
mode = "prop"
)
}
\seealso{
Other Visualization:

Просмотреть файл

@ -10,6 +10,7 @@ workpatterns_report(
signals = c("email", "IM"),
start_hour = "0900",
end_hour = "1700",
exp_hours = NULL,
path = "workpatterns report",
timestamp = TRUE
)
@ -26,6 +27,8 @@ workpatterns_report(
\item{end_hour}{See \code{workpatterns_classify()}.}
\item{exp_hours}{See \code{workpatterns_classify()}.}
\item{path}{Pass the file path and the desired file name, \emph{excluding the file
extension}. For example, \code{"scope report"}.}

12
tests/testthat.R Normal file
Просмотреть файл

@ -0,0 +1,12 @@
# This file is part of the standard setup for testthat.
# It is recommended that you do not modify it.
#
# Where should you do additional test configuration?
# Learn more about the roles of various files in:
# * https://r-pkgs.org/tests.html
# * https://testthat.r-lib.org/reference/test_package.html#special-files
library(testthat)
library(wpa)
test_check("wpa")

Просмотреть файл

@ -0,0 +1,27 @@
test_that(
desc = "flex_index data output returns data frame",
code = {
out <- flex_index(em_data, signals = "IM", return = "data")
expect_s3_class(out, "data.frame")
out <- flex_index(em_data, signals = "unscheduled_calls", return = "data")
expect_s3_class(out, "data.frame")
out <- flex_index(em_data, signals = "meetings", return = "data")
expect_s3_class(out, "data.frame")
}
)
test_that("flex_index plots returns ggplot object",{
p <- flex_index(em_data, signals = "meetings", return = "plot")
expect_s3_class(p, "ggplot")
p <- flex_index(em_data, signals = "IM", return = "plot")
expect_s3_class(p, "ggplot")
p <- flex_index(em_data, signals = "unscheduled_calls", return = "plot")
expect_s3_class(p, "ggplot")
})

Просмотреть файл

@ -0,0 +1,33 @@
test_that(
desc = "workpatterns data output row matches - signals",
code = {
out <- workpatterns_classify(em_data, signals = "IM", return = "data")
expect_equal(
object = nrow(out),
expected = nrow(em_data)
)
out <- workpatterns_classify(em_data, signals = "unscheduled_calls", return = "data")
expect_equal(
object = nrow(out),
expected = nrow(em_data)
)
out <- workpatterns_classify(em_data, signals = "meetings", return = "data")
expect_equal(
object = nrow(out),
expected = nrow(em_data)
)
}
)
test_that("workpatterns plots returns ggplot object",{
p <- workpatterns_classify(em_data, signals = "meetings", return = "plot")
expect_s3_class(p, "ggplot")
p <- workpatterns_classify(em_data, signals = "IM", return = "plot")
expect_s3_class(p, "ggplot")
p <- workpatterns_classify(em_data, signals = "unscheduled_calls", return = "plot")
expect_s3_class(p, "ggplot")
})

Различия файлов скрыты, потому что одна или несколько строк слишком длинны