2021-02-09 02:44:48 +03:00
|
|
|
# --------------------------------------------------------------------------------------------
|
|
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
|
|
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
|
|
|
|
# --------------------------------------------------------------------------------------------
|
|
|
|
|
2021-01-27 19:16:08 +03:00
|
|
|
#' @title Compute a Flexibility Index based on the Hourly Collaboration Query
|
|
|
|
#'
|
2021-02-09 02:44:48 +03:00
|
|
|
#' @description
|
|
|
|
#' `r lifecycle::badge('experimental')`
|
|
|
|
#'
|
|
|
|
#' Pass an Hourly Collaboration query and compute a Flexibility Index for the
|
|
|
|
#' entire population. The Flexibility Index is a quantitative measure of the
|
|
|
|
#' freedom for employees to work at a time of their choice.
|
|
|
|
#'
|
|
|
|
#' @details
|
2021-05-10 20:32:06 +03:00
|
|
|
#' The **Flexibility Index** is a metric that has been developed to quantify and
|
2022-04-05 17:37:13 +03:00
|
|
|
#' measure flexibility using behavioural data from Viva Insights. Flexibility
|
|
|
|
#' here refers to the freedom of employees to adopt a working arrangement of
|
|
|
|
#' their own choice, and more specifically refers to **time** flexibility
|
|
|
|
#' (_whenever_ I want) as opposed to **geographical** flexibility (_wherever_ I
|
|
|
|
#' want).
|
2021-05-10 20:32:06 +03:00
|
|
|
#'
|
2021-02-09 02:44:48 +03:00
|
|
|
#' The **Flexibility Index** is a score between 0 and 1, and is calculated based
|
|
|
|
#' on three component measures:
|
|
|
|
#'
|
|
|
|
#' - `ChangeHours`: this represents the freedom to define work start and end
|
|
|
|
#' time. Teams that embrace flexibility allow members to start and end their
|
|
|
|
#' workday at different times.
|
|
|
|
#'
|
|
|
|
#' - `TakeBreaks`: this represents the freedom define one's own schedule. In
|
|
|
|
#' teams that embrace flexibility, some members will choose to organize / split
|
|
|
|
#' their day in different ways (e.g. take a long lunch-break, disconnect in the
|
|
|
|
#' afternoon and reconnect in the evening, etc.).
|
|
|
|
#'
|
|
|
|
#' - `ControlHours`: this represents the freedom to switch off. Members who
|
|
|
|
#' choose alternative arrangements should be able to maintain a workload that is
|
|
|
|
#' broadly equivalent to those that follow standard arrangements.
|
|
|
|
#'
|
|
|
|
#' The **Flexibility Index** returns with one single score for each person-week,
|
|
|
|
#' plus the **three** sub-component binary variables (`TakeBreaks`,
|
|
|
|
#' `ChangeHours`, `ControlHours`). At the person-week level, each score can only
|
|
|
|
#' have the values 0, 0.33, 0.66, and 1. The Flexibility Index should only be
|
|
|
|
#' interpreted as a **group** of person-weeks, e.g. the average Flexibility
|
|
|
|
#' Index of a team of 6 over time, where the possible values would range from 0
|
|
|
|
#' to 1.
|
|
|
|
#'
|
2021-02-09 21:26:05 +03:00
|
|
|
#' @section Context:
|
|
|
|
#' The central feature of flexible working arrangements is
|
2021-02-09 02:44:48 +03:00
|
|
|
#' that it is the employee rather the employer who chooses the working
|
|
|
|
#' arrangement. _Observed flexibility_ serves as a proxy to assess whether a
|
|
|
|
#' flexible working arrangement are in place. The Flexibility Index is an
|
|
|
|
#' attempt to create such a proxy for quantifying and measuring flexibility,
|
2022-04-05 17:37:13 +03:00
|
|
|
#' using behavioural data from Viva Insights.
|
2021-01-27 19:16:08 +03:00
|
|
|
#'
|
2021-06-17 17:18:34 +03:00
|
|
|
#' @section Recurring disconnection time:
|
|
|
|
#' The key component of `TakeBreaks` in the Flexibility Index is best
|
|
|
|
#' interpreted as 'recurring disconnection time'. This denotes an hourly block
|
|
|
|
#' where there is consistently no activity occurring throughout the week. Note
|
|
|
|
#' that this applies a stricter criterion compared to the common definition of
|
|
|
|
#' a break, which is simply a time interval where no active work is being
|
|
|
|
#' done, and thus the more specific terminology 'recurring disconnection time'
|
|
|
|
#' is preferred.
|
|
|
|
#'
|
2021-01-27 19:16:08 +03:00
|
|
|
#' @param data Hourly Collaboration query to be passed through as data frame.
|
2021-02-09 02:44:48 +03:00
|
|
|
#'
|
2021-01-27 19:16:08 +03:00
|
|
|
#' @param hrvar A string specifying the HR attribute to cut the data by.
|
2021-02-09 02:44:48 +03:00
|
|
|
#' Defaults to NULL. This only affects the function when "table" is returned.
|
|
|
|
#'
|
|
|
|
#' @param signals Character vector to specify which collaboration metrics to
|
2021-09-03 13:45:18 +03:00
|
|
|
#' use:
|
|
|
|
#' - a combination of signals, such as `c("email", "IM")` (default)
|
|
|
|
#' - `"email"` for emails only
|
|
|
|
#' - `"IM"` for Teams messages only
|
|
|
|
#' - `"unscheduled_calls"` for Unscheduled Calls only
|
|
|
|
#' - `"meetings"` for Meetings only
|
2021-02-09 02:44:48 +03:00
|
|
|
#'
|
|
|
|
#' @param active_threshold A numeric value specifying the minimum number of
|
|
|
|
#' signals to be greater than in order to qualify as _active_. Defaults to 0.
|
|
|
|
#'
|
2021-08-25 17:44:28 +03:00
|
|
|
#' @param start_hour A character vector specifying starting hours, e.g. `"0900"`
|
2021-02-09 02:44:48 +03:00
|
|
|
#'
|
2021-08-25 17:44:28 +03:00
|
|
|
#' @param end_hour A character vector specifying end hours, e.g. `"1700"`
|
2021-02-09 02:44:48 +03:00
|
|
|
#'
|
2021-02-09 21:26:05 +03:00
|
|
|
#' @param return String specifying what to return. This must be one of the
|
|
|
|
#' following strings:
|
|
|
|
#' - `"plot"`
|
|
|
|
#' - `"data"`
|
|
|
|
#' - `"table"`
|
|
|
|
#'
|
|
|
|
#' See `Value` for more information.
|
2021-02-09 02:44:48 +03:00
|
|
|
#'
|
2021-01-27 19:16:08 +03:00
|
|
|
#' @param plot_method Character string for determining which plot to return.
|
2021-02-09 21:26:05 +03:00
|
|
|
#' - `"sample"` plots a sample of ten working pattern
|
|
|
|
#' - `"common"` plots the ten most common working patterns
|
|
|
|
#' - `"time"` plots the Flexibility Index for the group over time
|
|
|
|
#'
|
2022-06-22 19:10:21 +03:00
|
|
|
#' @param mode String specifying aggregation method for plot. Only applicable
|
|
|
|
#' when `return = "plot"`. Valid options include:
|
|
|
|
#' - `"binary"`: convert hourly activity into binary blocks. In the plot, each
|
|
|
|
#' block would display as solid.
|
|
|
|
#' - `"prop"`: calculate proportion of signals in each hour over total signals
|
|
|
|
#' across 24 hours, then average across all work weeks. In the plot, each
|
|
|
|
#' block would display as a heatmap.
|
|
|
|
#'
|
2021-02-09 21:26:05 +03:00
|
|
|
#' @return
|
|
|
|
#' A different output is returned depending on the value passed to the `return`
|
|
|
|
#' argument:
|
2021-03-30 19:40:16 +03:00
|
|
|
#' - `"plot"`: 'ggplot' object. A random of ten working patterns are displayed,
|
2021-02-09 21:26:05 +03:00
|
|
|
#' with diagnostic data and the Flexibility Index shown on the plot.
|
|
|
|
#' - `"data"`: data frame. The original input data appended with the
|
2021-03-05 01:19:07 +03:00
|
|
|
#' Flexibility Index and the component scores. Can be used with
|
|
|
|
#' `plot_flex_index()` to recreate visuals found in `flex_index()`.
|
2021-02-09 21:26:05 +03:00
|
|
|
#' - `"table"`: data frame. A summary table for the metric.
|
2021-01-27 19:16:08 +03:00
|
|
|
#'
|
2022-06-22 19:10:21 +03:00
|
|
|
#'
|
2021-01-27 19:16:08 +03:00
|
|
|
#' @import dplyr
|
|
|
|
#' @importFrom data.table ":=" "%like%" "%between%"
|
|
|
|
#'
|
|
|
|
#' @examples
|
2021-10-15 17:05:42 +03:00
|
|
|
#' # Create a sample small dataset
|
|
|
|
#' orgs <- c("Customer Service", "Financial Planning", "Biz Dev")
|
|
|
|
#' em_data <- em_data[em_data$Organization %in% orgs, ]
|
|
|
|
#'
|
2021-01-27 19:16:08 +03:00
|
|
|
#' # Examples of how to test the plotting options individually
|
|
|
|
#' # Sample of 10 work patterns
|
|
|
|
#' em_data %>%
|
|
|
|
#' flex_index(return = "plot", plot_method = "sample")
|
|
|
|
#'
|
|
|
|
#' # 10 most common work patterns
|
|
|
|
#' em_data %>%
|
|
|
|
#' flex_index(return = "plot", plot_method = "common")
|
|
|
|
#'
|
|
|
|
#' # Plot Flexibility Index over time
|
2023-08-17 11:49:20 +03:00
|
|
|
#' \donttest{
|
2021-01-27 19:16:08 +03:00
|
|
|
#' em_data %>%
|
|
|
|
#' flex_index(return = "plot", plot_method = "time")
|
2023-08-17 11:49:20 +03:00
|
|
|
#' }
|
2021-01-27 19:16:08 +03:00
|
|
|
#'
|
2021-02-03 20:56:41 +03:00
|
|
|
#' # Return a summary table with the computed Flexibility Index
|
2023-08-17 11:49:20 +03:00
|
|
|
#' \donttest{
|
2021-02-03 20:56:41 +03:00
|
|
|
#' em_data %>%
|
|
|
|
#' flex_index(hrvar = "Organization", return = "table")
|
2023-08-17 11:49:20 +03:00
|
|
|
#' }
|
2021-02-03 20:56:41 +03:00
|
|
|
#'
|
2021-08-27 17:40:00 +03:00
|
|
|
#' @section Returning the raw data:
|
|
|
|
#' The raw data containing the computed Flexibility Index can be returned with
|
|
|
|
#' the following:
|
|
|
|
#' ```
|
2021-01-27 19:16:08 +03:00
|
|
|
#' em_data %>%
|
|
|
|
#' flex_index(return = "data")
|
2021-08-27 17:40:00 +03:00
|
|
|
#' ```
|
2021-03-05 01:19:07 +03:00
|
|
|
#'
|
|
|
|
#' @family Working Patterns
|
2021-01-27 19:16:08 +03:00
|
|
|
#'
|
|
|
|
#' @export
|
|
|
|
flex_index <- function(data,
|
|
|
|
hrvar = NULL,
|
|
|
|
signals = c("email", "IM"),
|
|
|
|
active_threshold = 0,
|
|
|
|
start_hour = "0900",
|
|
|
|
end_hour = "1700",
|
|
|
|
return = "plot",
|
2022-06-22 19:10:21 +03:00
|
|
|
plot_method = "common",
|
|
|
|
mode = "binary"){
|
2021-01-27 19:16:08 +03:00
|
|
|
|
2021-02-03 15:46:28 +03:00
|
|
|
## Bindings for variables
|
|
|
|
TakeBreaks <- NULL
|
|
|
|
ChangeHours <- NULL
|
|
|
|
ControlHours <- NULL
|
|
|
|
FlexibilityIndex <- NULL
|
2021-02-03 20:15:40 +03:00
|
|
|
Signals_Break_hours <- NULL
|
2021-02-03 15:46:28 +03:00
|
|
|
|
2021-01-27 19:16:08 +03:00
|
|
|
## Make sure data.table knows we know we're using it
|
|
|
|
.datatable.aware = TRUE
|
|
|
|
|
|
|
|
## Save original
|
|
|
|
start_hour_o <- start_hour
|
|
|
|
end_hour_o <- end_hour
|
|
|
|
|
|
|
|
## Coerce to numeric, remove trailing zeros
|
|
|
|
start_hour <- as.numeric(gsub(pattern = "00$", replacement = "", x = start_hour))
|
|
|
|
end_hour <- as.numeric(gsub(pattern = "00$", replacement = "", x = end_hour))
|
|
|
|
norm_span <- end_hour - start_hour
|
|
|
|
|
|
|
|
## convert to data.table
|
|
|
|
data2 <-
|
|
|
|
data %>%
|
|
|
|
dplyr::mutate(Date = as.Date(Date, format = "%m/%d/%Y")) %>%
|
|
|
|
data.table::as.data.table() %>%
|
|
|
|
data.table::copy()
|
|
|
|
|
2021-09-03 13:45:18 +03:00
|
|
|
## Text replacement only for allowed values
|
|
|
|
if(any(signals %in% c("email", "IM", "unscheduled_calls", "meetings"))){
|
2021-01-27 19:16:08 +03:00
|
|
|
|
2021-09-03 13:45:18 +03:00
|
|
|
signal_set <- gsub(pattern = "email", replacement = "Emails_sent", x = signals) # case-sensitive
|
|
|
|
signal_set <- gsub(pattern = "IM", replacement = "IMs_sent", x = signal_set)
|
|
|
|
signal_set <- gsub(pattern = "unscheduled_calls", replacement = "Unscheduled_calls", x = signal_set)
|
|
|
|
signal_set <- gsub(pattern = "meetings", replacement = "Meetings", x = signal_set)
|
2021-01-27 19:16:08 +03:00
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
stop("Invalid input for `signals`.")
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2021-09-03 13:45:18 +03:00
|
|
|
## Create 24 summed `Signals_sent` columns
|
|
|
|
signal_cols <- purrr::map(0:23, ~combine_signals(data, hr = ., signals = signal_set))
|
|
|
|
signal_cols <- bind_cols(signal_cols)
|
|
|
|
|
|
|
|
## Use names for matching
|
|
|
|
input_var <- names(signal_cols)
|
|
|
|
|
|
|
|
## Signals sent by Person and Date
|
|
|
|
signals_df <-
|
|
|
|
data2 %>%
|
|
|
|
.[, c("PersonId", "Date")] %>%
|
|
|
|
cbind(signal_cols)
|
|
|
|
|
2022-06-22 19:10:21 +03:00
|
|
|
## Save original `signals_df` before manipulating ------------------------
|
|
|
|
## Rename `Signals_sent` columns to prevent conflict
|
|
|
|
signals_df_o <- signals_df %>%
|
|
|
|
purrr::set_names(
|
|
|
|
nm = gsub(x = names(.),
|
|
|
|
replacement = "_ori_",
|
|
|
|
pattern = "_sent_")
|
|
|
|
)
|
|
|
|
|
2021-09-03 13:45:18 +03:00
|
|
|
## Signal label
|
|
|
|
sig_label <- ifelse(length(signal_set) > 1, "Signals_sent", signal_set)
|
2021-01-27 19:16:08 +03:00
|
|
|
|
|
|
|
## Create binary variable 0 or 1
|
|
|
|
num_cols <- names(which(sapply(signals_df, is.numeric))) # Get numeric columns
|
|
|
|
|
|
|
|
## Create Signals Total and binary values
|
|
|
|
signals_df <-
|
|
|
|
signals_df %>%
|
|
|
|
data.table::as.data.table() %>%
|
|
|
|
# active_threshold: minimum signals to qualify as active
|
|
|
|
.[, (num_cols) := lapply(.SD, function(x) ifelse(x > active_threshold, 1, 0)), .SDcols = num_cols] %>%
|
|
|
|
.[, ("Signals_Total") := apply(.SD, 1, sum), .SDcols = input_var]
|
|
|
|
|
|
|
|
## Classify PersonId-Signal data by time of day
|
|
|
|
|
|
|
|
WpA_classify <-
|
|
|
|
signals_df %>%
|
2021-02-03 15:46:28 +03:00
|
|
|
tidyr::gather(!!sym(sig_label), sent, -PersonId, -Date, -Signals_Total) %>%
|
2021-01-27 19:16:08 +03:00
|
|
|
data.table::as.data.table()
|
|
|
|
|
|
|
|
WpA_classify[, StartEnd := gsub(pattern = "[^[:digit:]]", replacement = "", x = get(sig_label))]
|
|
|
|
WpA_classify[, Start := as.numeric(substr(StartEnd, start = 1, stop = 2))]
|
|
|
|
WpA_classify[, End := as.numeric(substr(StartEnd, start = 3, stop = 4))]
|
|
|
|
WpA_classify[, Before_start := Start < (start_hour)] # Earlier than start hour
|
|
|
|
WpA_classify[, After_end := End > (end_hour)] # Later than start hour
|
|
|
|
WpA_classify[, Within_hours := (Start >= start_hour & End <= end_hour)]
|
|
|
|
WpA_classify[, HourType := NA_character_]
|
|
|
|
WpA_classify[After_end == TRUE, HourType := "After_end"]
|
|
|
|
WpA_classify[Before_start == TRUE, HourType := "Before_start"]
|
|
|
|
WpA_classify[Within_hours == TRUE, HourType := "Within_hours"]
|
|
|
|
|
|
|
|
WpA_classify <-
|
|
|
|
WpA_classify[, c("PersonId", "Date", "Signals_Total", "HourType", "sent")] %>%
|
|
|
|
.[, .(sent = sum(sent)), by = c("PersonId", "Date", "Signals_Total", "HourType")] %>%
|
|
|
|
tidyr::spread(HourType, sent) %>%
|
|
|
|
left_join(WpA_classify %>% ## Calculate first and last activity for day_span
|
|
|
|
filter(sent>0) %>%
|
|
|
|
group_by(PersonId,Date) %>%
|
|
|
|
summarise(First_signal = min(Start),
|
|
|
|
Last_signal = max(End),
|
|
|
|
.groups = "drop_last"),
|
|
|
|
by = c("PersonId","Date")) %>%
|
|
|
|
mutate(Day_Span = Last_signal-First_signal,
|
|
|
|
Signals_Break_hours = Day_Span-Signals_Total) %>%
|
|
|
|
select(-Signals_Total)
|
|
|
|
|
|
|
|
## hrvar treatment
|
|
|
|
if(is.null(hrvar)){
|
|
|
|
|
|
|
|
hr_dt <- data2[, c("PersonId", "Date")]
|
|
|
|
hr_dt <- hr_dt[, Total := "Total"]
|
|
|
|
hrvar <- "Total"
|
|
|
|
hr_dt <- as.data.frame(hr_dt)
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
temp_str <- c("PersonId", "Date", hrvar)
|
|
|
|
|
2021-02-03 15:46:28 +03:00
|
|
|
# hr_dt <- data2[, ..temp_str] # double dot prefix
|
|
|
|
|
|
|
|
hr_dt <-
|
2021-02-03 20:56:41 +03:00
|
|
|
data2 %>%
|
2021-02-03 15:46:28 +03:00
|
|
|
as.data.frame() %>%
|
|
|
|
select(temp_str)
|
2021-01-27 19:16:08 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
## Bind calculated columns with original Signals df
|
|
|
|
calculated_data <-
|
|
|
|
WpA_classify %>%
|
|
|
|
left_join(signals_df, by = c("PersonId","Date")) %>%
|
|
|
|
left_join(hr_dt, by = c("PersonId","Date")) %>%
|
2022-06-22 19:10:21 +03:00
|
|
|
left_join(signals_df_o, by = c("PersonId","Date")) %>%
|
2021-01-27 19:16:08 +03:00
|
|
|
filter(Signals_Total >= 3) %>% # At least 3 signals required
|
|
|
|
|
|
|
|
## Additional calculations for Flexibility Index
|
|
|
|
mutate(TakeBreaks = (Signals_Break_hours > 0),
|
|
|
|
ChangeHours = (First_signal != start_hour),
|
|
|
|
ControlHours = (Day_Span <= norm_span)) %>%
|
|
|
|
mutate(FlexibilityIndex = select(., TakeBreaks, ChangeHours, ControlHours) %>%
|
|
|
|
apply(1, mean))
|
|
|
|
|
|
|
|
## Plot return
|
|
|
|
sig_label_ <- paste0(sig_label, "_")
|
|
|
|
|
|
|
|
## Summary Table for Return
|
|
|
|
## Applies groups
|
|
|
|
returnTable <-
|
|
|
|
calculated_data %>%
|
|
|
|
group_by(!!sym(hrvar)) %>%
|
2021-02-09 21:26:05 +03:00
|
|
|
summarise_at(vars(TakeBreaks, ChangeHours, ControlHours),
|
|
|
|
~mean(.), .groups = "drop_last") %>%
|
|
|
|
mutate(FlexibilityIndex =
|
|
|
|
select(., TakeBreaks, ChangeHours, ControlHours) %>%
|
|
|
|
apply(1, mean))
|
2021-01-27 19:16:08 +03:00
|
|
|
|
|
|
|
## Main plot
|
|
|
|
|
|
|
|
if(return == "plot"){
|
|
|
|
|
|
|
|
plot_flex_index(data = calculated_data,
|
|
|
|
sig_label = sig_label_,
|
|
|
|
start_hour = start_hour,
|
|
|
|
end_hour = end_hour,
|
2022-06-22 19:10:21 +03:00
|
|
|
method = plot_method,
|
|
|
|
mode = mode)
|
2021-01-27 19:16:08 +03:00
|
|
|
|
|
|
|
} else if(return == "data"){
|
2021-02-09 21:26:05 +03:00
|
|
|
|
2021-01-27 19:16:08 +03:00
|
|
|
calculated_data
|
2021-02-09 21:26:05 +03:00
|
|
|
|
2021-01-27 19:16:08 +03:00
|
|
|
} else if(return == "table"){
|
|
|
|
|
|
|
|
returnTable
|
|
|
|
|
|
|
|
} else {
|
2021-02-09 21:26:05 +03:00
|
|
|
|
2021-01-27 19:16:08 +03:00
|
|
|
stop("Check input for `return`.")
|
2021-02-09 21:26:05 +03:00
|
|
|
|
2021-01-27 19:16:08 +03:00
|
|
|
}
|
|
|
|
}
|