This commit is contained in:
Martin Chan 2021-02-03 16:53:54 +00:00
Родитель 4c8ca2d581
Коммит a50d956640
6 изменённых файлов: 75 добавлений и 153 удалений

Просмотреть файл

@ -2,6 +2,7 @@
#' using the binary week-based (bw) method.
#'
#' @description
#' `r lifecycle::badge('experimental')`
#' Apply a rule based algorithm to emails sent by hour of day,
#' using the binary week-based (bw) method.
#'
@ -97,60 +98,6 @@ workpatterns_classify_bw <- function(data,
## Signal label
sig_label <- ifelse(length(signal_set) > 1, "Signals_sent", signal_set)
# ## Select input variable names
# if("email" %in% signals & "IM" %in% signals){
#
# ## Create 24 summed `Signals_sent` columns
# signal_cols <-
# purrr::map(0:23, ~combine_signals(data2, hr = .)) %>%
# dplyr::bind_cols()
#
# ## Use names for matching
# input_var <- names(signal_cols)
#
# ## Signals sent by Person and date
# signals_df <-
# data2 %>%
# .[, c("PersonId", "Date")] %>%
# cbind(signal_cols)
#
# ## Signal label
# sig_label <- "Signals_sent"
#
# } else if(signals == "IM"){
#
# match_index <- grepl(pattern = "^IMs_sent", x = names(data2))
# input_var <- names(data2)[match_index]
# input_var2 <- c("PersonId", "Date", input_var)
#
# ## signals sent by Person and date
# signals_df <-
# data2 %>%
# .[, ..input_var2]
#
# sig_label <- "IMs_sent"
#
#
# } else if(signals == "email"){
#
# match_index <- grepl(pattern = "^Emails_sent", x = names(data2))
# input_var <- names(data2)[match_index]
# input_var2 <- c("PersonId", "Date", input_var)
#
# ## signals sent by Person and date
# signals_df <-
# data2 %>%
# .[, ..input_var2]
#
# sig_label <- "Emails_sent"
#
# } else {
#
# stop("Invalid input for `signals`.")
#
# }
## Create binary variable 0 or 1
num_cols <- names(which(sapply(signals_df, is.numeric))) # Get numeric columns

Просмотреть файл

@ -2,22 +2,29 @@
#' the person-average volume-based (pav) method.
#'
#' @description
#' `r lifecycle::badge('experimental')`
#' Apply a rule based algorithm to emails or instant messages sent by hour of day.
#' This uses a person-average volume-based (pav) method.
#'
#' @param data A data frame containing data from the Hourly Collaboration query.
#'
#' @param return Character vector to specify what to return.
#' Valid options include "plot" (default), "data", "table" and "plot-area".
#' "plot" returns a bar plot, whilst "plot-area" returns an overlapping area plot.
#' @param return Character vector to specify what to return. Valid options include:
#' - "plot": returns a bar plot of signal distribution by hour and archetypes (default)
#' - "data": returns the raw data with the classified archetypes
#' - "table": returns a summary table of the archetypes
#' - "plot-area": returns an overlapping area plot
#'
#' @param values Character vector to specify whether to return percentages
#' or absolute values in "data" and "plot". Valid values are "percent" (default)
#' and "abs".
#' or absolute values in "data" and "plot". Valid values are:
#' - "percent": percentage of signals divided by total signals (default)
#' - "abs": absolute count of signals
#'
#' @param signals Character vector to specify which collaboration metrics to use:
#' You may use "email" (default) for emails only, "IM" for Teams messages only,
#' or a combination of the two `c("email", "IM")`.
#' - "email" (default) for emails only
#' - "IM" for Teams messages only,
#' - "unscheduled_calls" for Unscheduled Calls only
#' - "meetings" for Meetings only
#' - or a combination of signals, such as `c("email", "IM")`
#'
#' @param start_hour A character vector specifying starting hours,
#' e.g. "0900"
@ -29,13 +36,14 @@
#' @import ggplot2
#'
#' @examples
#' \dontrun{
#' # Returns a plot by default
#' workpatterns_classify(em_data)
#' ## Returns a plot by default
#' workpatterns_classify_pav(em_data)
#'
#' # Returning a table
#' workpatterns_classify(em_data, return = "table")
#' }
#' ## Return a table
#' workpatterns_classify_pav(em_data, return = "table")
#'
#' ## Return an area plot
#' workpatterns_classify_pav(em_data, return = "plot-area")
#'
#' @family Work Patterns
#'
@ -82,65 +90,6 @@ workpatterns_classify_pav <- function(data,
## Signal label
sig_label <- ifelse(length(signal_set) > 1, "Signals_sent", signal_set)
# ## Select input variable names
# if("email" %in% signals & "IM" %in% signals){
#
# ## Create 24 summed `Signals_sent` columns
# signal_cols <-
# purrr::map(0:23,
# ~combine_signals(data,
# hr = .)) %>%
# bind_cols()
#
# ## Use names for matching
# input_var <- names(signal_cols)
#
# ## Average signals sent by Person
# signals_df <-
# data %>%
# select(PersonId) %>%
# cbind(signal_cols) %>%
# group_by(PersonId) %>%
# summarise_all(~mean(.))
#
# ## Signal label
# sig_label <- "Signals_sent"
#
# } else if(signals == "IM"){
#
# match_index <- grepl(pattern = "^IMs_sent", x = names(data))
# input_var <-names(data)[match_index]
#
# ## Average signals sent by Person
# signals_df <-
# data %>%
# select(PersonId, all_of(input_var)) %>%
# group_by(PersonId) %>%
# summarise_all(~mean(.))
#
# sig_label <- "IMs_sent"
#
# } else if(signals == "email"){
#
# match_index <- grepl(pattern = "^Emails_sent", x = names(data))
# input_var <-names(data)[match_index]
#
# ## Average signals sent by Person
# signals_df <-
# data %>%
# select(PersonId, all_of(input_var)) %>%
# group_by(PersonId) %>%
# summarise_all(~mean(.))
#
# sig_label <- "Emails_sent"
#
# } else {
#
# stop("Invalid input for `signals`.")
#
# }
## Normalised pattern data
ptn_data_norm <-
signals_df %>%

Просмотреть файл

@ -18,15 +18,19 @@
#'
#' @param data A data frame containing data from the Hourly Collaboration query.
#' @param k Numeric vector to specify the `k` number of clusters to cut by.
#' @param return Character vector to specify what to return.
#' Valid options include "plot" (default), "data", "table", "plot-area", "hclust", and "dist".
#' "plot" returns a bar plot, whilst "plot-area" returns an overlapping area plot.
#' "hclust" returns the hierarchical model generated by the function.
#' "dist" returns the distance matrix used to build the clustering model.
#' @param return Character vector to specify what to return. Valid options include:
#' - "plot": returns a bar plot (default)
#' - "data": returns raw data with the clusters
#' - "table": returns a summary table
#' - "plot-area": returns an overlapping area plot
#' - "hclust": returns the hierarchical model generated by the function
#' - "dist": returns the distance matrix used to build the clustering model
#'
#' @param values Character vector to specify whether to return percentages
#' or absolute values in "data" and "plot". Valid values are "percent" (default)
#' and "abs".
#' or absolute values in "data" and "plot". Valid values are:
#' - "percent": percentage of signals divided by total signals (default)
#' - "abs": absolute count of signals
#'
#' @param signals Character vector to specify which collaboration metrics to use:
#' - "email" (default) for emails only
#' - "IM" for Teams messages only

Просмотреть файл

@ -39,6 +39,7 @@ Valid options include "plot" (default), "data", "table" and "plot-area".
"plot" returns a bar plot, whilst "plot-area" returns an overlapping area plot.}
}
\description{
\ifelse{html}{\out{<a href='https://www.tidyverse.org/lifecycle/#experimental'><img src='figures/lifecycle-experimental.svg' alt='Experimental lifecycle'></a>}}{\strong{Experimental}}
Apply a rule based algorithm to emails sent by hour of day,
using the binary week-based (bw) method.
}

Просмотреть файл

@ -18,12 +18,20 @@ workpatterns_classify_pav(
\item{data}{A data frame containing data from the Hourly Collaboration query.}
\item{values}{Character vector to specify whether to return percentages
or absolute values in "data" and "plot". Valid values are "percent" (default)
and "abs".}
or absolute values in "data" and "plot". Valid values are:
\itemize{
\item "percent": percentage of signals divided by total signals (default)
\item "abs": absolute count of signals
}}
\item{signals}{Character vector to specify which collaboration metrics to use:
You may use "email" (default) for emails only, "IM" for Teams messages only,
or a combination of the two \code{c("email", "IM")}.}
\itemize{
\item "email" (default) for emails only
\item "IM" for Teams messages only,
\item "unscheduled_calls" for Unscheduled Calls only
\item "meetings" for Meetings only
\item or a combination of signals, such as \code{c("email", "IM")}
}}
\item{start_hour}{A character vector specifying starting hours,
e.g. "0900"}
@ -31,22 +39,28 @@ e.g. "0900"}
\item{end_hour}{A character vector specifying starting hours,
e.g. "1700"}
\item{return}{Character vector to specify what to return.
Valid options include "plot" (default), "data", "table" and "plot-area".
"plot" returns a bar plot, whilst "plot-area" returns an overlapping area plot.}
\item{return}{Character vector to specify what to return. Valid options include:
\itemize{
\item "plot": returns a bar plot of signal distribution by hour and archetypes (default)
\item "data": returns the raw data with the classified archetypes
\item "table": returns a summary table of the archetypes
\item "plot-area": returns an overlapping area plot
}}
}
\description{
\ifelse{html}{\out{<a href='https://www.tidyverse.org/lifecycle/#experimental'><img src='figures/lifecycle-experimental.svg' alt='Experimental lifecycle'></a>}}{\strong{Experimental}}
Apply a rule based algorithm to emails or instant messages sent by hour of day.
This uses a person-average volume-based (pav) method.
}
\examples{
\dontrun{
# Returns a plot by default
workpatterns_classify(em_data)
## Returns a plot by default
workpatterns_classify_pav(em_data)
# Returning a table
workpatterns_classify(em_data, return = "table")
}
## Return a table
workpatterns_classify_pav(em_data, return = "table")
## Return an area plot
workpatterns_classify_pav(em_data, return = "plot-area")
}
\seealso{

Просмотреть файл

@ -19,15 +19,22 @@ workpatterns_hclust(
\item{k}{Numeric vector to specify the \code{k} number of clusters to cut by.}
\item{return}{Character vector to specify what to return.
Valid options include "plot" (default), "data", "table", "plot-area", "hclust", and "dist".
"plot" returns a bar plot, whilst "plot-area" returns an overlapping area plot.
"hclust" returns the hierarchical model generated by the function.
"dist" returns the distance matrix used to build the clustering model.}
\item{return}{Character vector to specify what to return. Valid options include:
\itemize{
\item "plot": returns a bar plot (default)
\item "data": returns raw data with the clusters
\item "table": returns a summary table
\item "plot-area": returns an overlapping area plot
\item "hclust": returns the hierarchical model generated by the function
\item "dist": returns the distance matrix used to build the clustering model
}}
\item{values}{Character vector to specify whether to return percentages
or absolute values in "data" and "plot". Valid values are "percent" (default)
and "abs".}
or absolute values in "data" and "plot". Valid values are:
\itemize{
\item "percent": percentage of signals divided by total signals (default)
\item "abs": absolute count of signals
}}
\item{signals}{Character vector to specify which collaboration metrics to use:
\itemize{