feat: introduce heatmap for `flex_index()` (#212)

This commit is contained in:
Martin Chan 2022-06-22 17:10:21 +01:00
Родитель e3310b559f
Коммит bffdf33a28
4 изменённых файлов: 121 добавлений и 23 удалений

Просмотреть файл

@ -94,6 +94,14 @@
#' - `"common"` plots the ten most common working patterns
#' - `"time"` plots the Flexibility Index for the group over time
#'
#' @param mode String specifying aggregation method for plot. Only applicable
#' when `return = "plot"`. Valid options include:
#' - `"binary"`: convert hourly activity into binary blocks. In the plot, each
#' block would display as solid.
#' - `"prop"`: calculate proportion of signals in each hour over total signals
#' across 24 hours, then average across all work weeks. In the plot, each
#' block would display as a heatmap.
#'
#' @return
#' A different output is returned depending on the value passed to the `return`
#' argument:
@ -104,6 +112,7 @@
#' `plot_flex_index()` to recreate visuals found in `flex_index()`.
#' - `"table"`: data frame. A summary table for the metric.
#'
#'
#' @import dplyr
#' @importFrom data.table ":=" "%like%" "%between%"
#'
@ -147,7 +156,8 @@ flex_index <- function(data,
start_hour = "0900",
end_hour = "1700",
return = "plot",
plot_method = "common"){
plot_method = "common",
mode = "binary"){
## Bindings for variables
TakeBreaks <- NULL
@ -202,6 +212,15 @@ flex_index <- function(data,
.[, c("PersonId", "Date")] %>%
cbind(signal_cols)
## Save original `signals_df` before manipulating ------------------------
## Rename `Signals_sent` columns to prevent conflict
signals_df_o <- signals_df %>%
purrr::set_names(
nm = gsub(x = names(.),
replacement = "_ori_",
pattern = "_sent_")
)
## Signal label
sig_label <- ifelse(length(signal_set) > 1, "Signals_sent", signal_set)
@ -275,6 +294,7 @@ flex_index <- function(data,
WpA_classify %>%
left_join(signals_df, by = c("PersonId","Date")) %>%
left_join(hr_dt, by = c("PersonId","Date")) %>%
left_join(signals_df_o, by = c("PersonId","Date")) %>%
filter(Signals_Total >= 3) %>% # At least 3 signals required
## Additional calculations for Flexibility Index
@ -306,7 +326,8 @@ flex_index <- function(data,
sig_label = sig_label_,
start_hour = start_hour,
end_hour = end_hour,
method = plot_method)
method = plot_method,
mode = mode)
} else if(return == "data"){

Просмотреть файл

@ -12,7 +12,7 @@
#' working patterns; "time" plots the Flexibility Index for the group over time.
#' @param start_hour See `flex_index()`.
#' @param end_hour See `flex_index()`.
#'
#' @param mode See `flex_index()`.
#' @import dplyr
#' @import ggplot2
#' @importFrom data.table ":=" "%like%" "%between%"
@ -41,7 +41,8 @@ plot_flex_index <- function(data,
sig_label = "Signals_sent_",
method = "sample",
start_hour = 9,
end_hour = 17){
end_hour = 17,
mode = "binary"){
## Bindings for variables
TakeBreaks <- NULL
@ -95,18 +96,6 @@ plot_flex_index <- function(data,
data_tb <- data.table::as.data.table(data)
input_var <- names(data)[grepl(sig_label_, names(data))]
data_tb <- data_tb[, list(WeekCount = .N,
PersonCount = dplyr::n_distinct(PersonId)),
by = input_var]
plot_data <-
data_tb %>%
as.data.frame() %>%
dplyr::arrange(desc(WeekCount)) %>%
slice(1:10)
plot_title <- "Top 10 Most Common Working patterns"
} else if(method == "time"){
@ -124,11 +113,85 @@ plot_flex_index <- function(data,
if(method %in% c("sample", "common")){
plot_data %>%
mutate(patternRank = 1:nrow(.)) %>%
dplyr::select(patternRank, dplyr::starts_with(sig_label_)) %>%
purrr::set_names(nm = gsub(pattern = sig_label_, replacement = "", x = names(.))) %>%
purrr::set_names(nm = gsub(pattern = "_.+", replacement = "", x = names(.))) %>%
if(mode == "binary"){
input_var <- names(data)[grepl(sig_label_, names(data))]
data_tb <- data_tb[, list(WeekCount = .N,
PersonCount = dplyr::n_distinct(PersonId)),
by = input_var]
plot_data <-
data_tb %>%
as.data.frame() %>%
dplyr::arrange(desc(WeekCount)) %>%
slice(1:10)
plot_data_long <-
plot_data %>%
mutate(patternRank = 1:nrow(.)) %>%
dplyr::select(patternRank, dplyr::starts_with(sig_label_)) %>%
purrr::set_names(nm = gsub(pattern = sig_label_, replacement = "", x = names(.))) %>%
purrr::set_names(nm = gsub(pattern = "_.+", replacement = "", x = names(.)))
} else if(mode == "prop"){
input_var <- names(data)[grepl(sig_label_, names(data))]
sig_label_ <- gsub(
pattern = "_sent_",
replacement = "_ori_",
x = sig_label_
)
## 00, 01, 02, etc.
hours_col <- stringr::str_pad(seq(0,23), width = 2, pad = 0)
# Use `mutate()` method
# Will get 10 IDs, not 10 rows
# NOTE: `input_var` is used to identify a distinct work pattern
plot_data <-
data_tb %>%
data.table::as.data.table() %>%
.[, `:=`(WeekCount = .N,
PersonCount = dplyr::n_distinct(PersonId),
Id = .GRP), # group id assignment
by = input_var] %>%
dplyr::arrange(desc(WeekCount))
plot_data <-
plot_data %>%
dplyr::select(Id, dplyr::contains("_ori_"), WeekCount) %>%
purrr::set_names(nm = gsub(
pattern = ".+_ori_",
replacement = "",
x = names(.)
)) %>%
purrr::set_names(nm = gsub(
pattern = "_.+",
replacement = "",
x = names(.)
)) %>%
# Need aggregation
.[, Signals_Total := rowSums(.SD), .SDcols = hours_col] %>%
.[, c(hours_col) := .SD / Signals_Total, .SDcols = hours_col] %>%
.[, Signals_Total := NULL] %>% # Remove unneeded column
.[, lapply(.SD, mean, na.rm = TRUE), .SDcols = hours_col, by = list(Id, WeekCount)]
plot_data_long <-
plot_data %>%
dplyr::arrange(desc(WeekCount)) %>%
dplyr::mutate(patternRank = 1:nrow(.)) %>%
slice(1:10)
} else {
stop("Invalid value to `mode`")
}
plot_data_long %>%
plot_hourly_pat(
start_hour = start_hour,
end_hour = end_hour,

Просмотреть файл

@ -12,7 +12,8 @@ flex_index(
start_hour = "0900",
end_hour = "1700",
return = "plot",
plot_method = "common"
plot_method = "common",
mode = "binary"
)
}
\arguments{
@ -54,6 +55,16 @@ See \code{Value} for more information.}
\item \code{"common"} plots the ten most common working patterns
\item \code{"time"} plots the Flexibility Index for the group over time
}}
\item{mode}{String specifying aggregation method for plot. Only applicable
when \code{return = "plot"}. Valid options include:
\itemize{
\item \code{"binary"}: convert hourly activity into binary blocks. In the plot, each
block would display as solid.
\item \code{"prop"}: calculate proportion of signals in each hour over total signals
across 24 hours, then average across all work weeks. In the plot, each
block would display as a heatmap.
}}
}
\value{
A different output is returned depending on the value passed to the \code{return}

Просмотреть файл

@ -9,7 +9,8 @@ plot_flex_index(
sig_label = "Signals_sent_",
method = "sample",
start_hour = 9,
end_hour = 17
end_hour = 17,
mode = "binary"
)
}
\arguments{
@ -25,6 +26,8 @@ working patterns; "time" plots the Flexibility Index for the group over time.}
\item{start_hour}{See \code{flex_index()}.}
\item{end_hour}{See \code{flex_index()}.}
\item{mode}{See \code{flex_index()}.}
}
\value{
ggplot object. See \code{method}.