Merge branch 'main' into Feature/Sales-Insights

2022-06-30 15:12:59 +01:00 · 2022-06-30 15:12:59 +01:00 · c0f2e09cd2
--- a/6
+++ b/6
@ -1,3 +1,3 @@
-Version: 1.6.4
-Date: 2022-01-19 15:26:54 UTC
-SHA: 3edf7394a94cd59be8b6d83362ab0cfad81c4afa
+Version: 1.7.0
+Date: 2022-06-06 13:37:15 UTC
+SHA: 7aefaa599891ef7c20db669a8f52b59e80ef48ef
--- a/6
+++ b/6
@ -1,7 +1,7 @@
 Package: wpa
 Type: Package
 Title: Tools for Analysing and Visualising Viva Insights Data
-Version: 1.7.0
+Version: 1.7.0.9000
 Authors@R: c(
    person(given = "Martin", family = "Chan", role = c("aut", "cre"), email = "martin.chan@microsoft.com"),
    person(given = "Carlos", family = "Morales", role = "aut", email = "carlos.morales@microsoft.com"),
@ -67,5 +67,7 @@ Suggests:
    glue,
    flexdashboard,
    lmtest,
-    sandwich
+    sandwich,
+    testthat (>= 3.0.0)
 Language: en-US
+Config/testthat/edition: 3
--- a/1
+++ b/1
@ -152,6 +152,7 @@ export(period_change)
 export(personas_hclust)
 export(plot_WOE)
 export(plot_flex_index)
+export(plot_hourly_pat)
 export(read_preamble)
 export(remove_outliers)
 export(rgb2hex)
--- a/NEWS.md
+++ b/NEWS.md
@ -1,3 +1,8 @@
+# wpa (development version)
+
+- Updated and improved output and algorithm for `workpatterns_classify()`
+- Additional visual options for `workpatterns_classify()` and `flex_index()`
+
 # wpa 1.7.0

 - Renamed 'Workplace Analytics' to 'Viva Insights'
--- a/R/create_dt.R
+++ b/R/create_dt.R
@ -14,6 +14,8 @@
 #' @param rounding Numeric vector to specify the number of decimal points to display
 #' @param freeze Number of columns from the left to 'freeze'. Defaults to 2,
 #' which includes the row number column.
+#' @param percent Logical value specifying whether to display numeric columns
+#' as percentages.
 #'
 #' @import DT
 #' @importFrom dplyr mutate_if
@ -24,14 +26,15 @@
 #' Returns an HTML widget displaying rectangular data.
 #'
 #' @export
-create_dt <- function(x, rounding = 1, freeze = 2){
+create_dt <- function(x, rounding = 1, freeze = 2, percent = FALSE){

  # Round all numeric to "rounding" number of dp
  num_cols <- dplyr::select_if(x, is.numeric) %>% names()

  if(length(num_cols) == 0){ # No numeric columns

-    DT::datatable(x,
+    out <-
+      DT::datatable(x,
                  extensions = c('Buttons',
                                 'FixedColumns'),
                  options = list(dom = 'Blfrtip',
@ -43,7 +46,8 @@ create_dt <- function(x, rounding = 1, freeze = 2){

  } else {

-    DT::datatable(x,
+    out <-
+      DT::datatable(x,
                  extensions = c('Buttons',
                                 'FixedColumns'),
                  options = list(dom = 'Blfrtip',
@ -54,6 +58,15 @@ create_dt <- function(x, rounding = 1, freeze = 2){
                                                   c(10,25,50,"All")))) %>%
      DT::formatRound(columns = num_cols, rounding)

+    if(percent == TRUE){

+      out <-
+        out %>%
+        DT::formatPercentage(columns = num_cols, rounding)
+
+    }
  }
+
+
+  out
 }
--- a/R/flex_index.R
+++ b/R/flex_index.R
@ -94,6 +94,14 @@
 #'   - `"common"` plots the ten most common working patterns
 #'   - `"time"` plots the Flexibility Index for the group over time
 #'
+#' @param mode String specifying aggregation method for plot. Only applicable
+#'   when `return = "plot"`. Valid options include:
+#'   - `"binary"`: convert hourly activity into binary blocks. In the plot, each
+#'   block would display as solid.
+#'   - `"prop"`: calculate proportion of signals in each hour over total signals
+#'   across 24 hours, then average across all work weeks. In the plot, each
+#'   block would display as a heatmap.
+#'
 #' @return
 #' A different output is returned depending on the value passed to the `return`
 #' argument:
@ -104,6 +112,7 @@
 #'   `plot_flex_index()` to recreate visuals found in `flex_index()`.
 #'   - `"table"`: data frame. A summary table for the metric.
 #'
+#'
 #' @import dplyr
 #' @importFrom data.table ":=" "%like%" "%between%"
 #'
@ -147,7 +156,8 @@ flex_index <- function(data,
                       start_hour = "0900",
                       end_hour = "1700",
                       return = "plot",
-                       plot_method = "common"){
+                       plot_method = "common",
+                       mode = "binary"){

  ## Bindings for variables
  TakeBreaks <- NULL
@ -202,6 +212,15 @@ flex_index <- function(data,
    .[, c("PersonId", "Date")] %>%
    cbind(signal_cols)

+  ## Save original `signals_df` before manipulating ------------------------
+  ## Rename `Signals_sent` columns to prevent conflict
+  signals_df_o <- signals_df %>%
+    purrr::set_names(
+      nm = gsub(x = names(.),
+                replacement = "_ori_",
+                pattern = "_sent_")
+    )
+
  ## Signal label
  sig_label <- ifelse(length(signal_set) > 1, "Signals_sent", signal_set)

@ -275,6 +294,7 @@ flex_index <- function(data,
    WpA_classify %>%
    left_join(signals_df, by = c("PersonId","Date")) %>%
    left_join(hr_dt, by = c("PersonId","Date")) %>%
+    left_join(signals_df_o, by = c("PersonId","Date")) %>%
    filter(Signals_Total >= 3) %>% # At least 3 signals required

    ## Additional calculations for Flexibility Index
@ -306,7 +326,8 @@ flex_index <- function(data,
                    sig_label = sig_label_,
                    start_hour = start_hour,
                    end_hour = end_hour,
-                    method = plot_method)
+                    method = plot_method,
+                    mode = mode)

  } else if(return == "data"){

--- a/R/network_p2p.R
+++ b/R/network_p2p.R
@ -60,8 +60,10 @@
 #'   of the edges (only for 'ggraph' mode). Defaults to 1.
 #' @param res Resolution parameter to be passed to `leiden::leiden()`. Defaults
 #'   to 0.5.
-#' @param seed Seed for the random number generator passed to `leiden::leiden()`
-#'   to ensure consistency. Only applicable when `display` is set to `"leiden"`.
+#' @param seed Seed for the random number generator passed to either
+#'   `set.seed()` when the Louvain algorithm is used, or `leiden::leiden()` when
+#'   the Leiden algorithm is used, to ensure consistency. Only applicable when
+#'   `display` is set to `"louvain"` or `"leiden"`.
 #' @param algorithm String to specify the node placement algorithm to be used.
 #'   Defaults to `"mds"` for the deterministic multi-dimensional scaling of
 #'   nodes. See
@ -241,24 +243,26 @@ network_p2p <- function(data,

    } else if(display == "louvain"){

-    ## Convert to undirected
-    g_ud <- igraph::as.undirected(g_raw)
+      set.seed(seed = seed)

-    ## Return a numeric vector of partitions / clusters / modules
-    ## Set a low resolution parameter to have fewer groups
-    ## weights = NULL means that if the graph as a `weight` edge attribute, this
-    ## will be used by default.
-    lc <- igraph::cluster_louvain(g_ud, weights = NULL)
+      ## Convert to undirected
+      g_ud <- igraph::as.undirected(g_raw)

-    ## Add cluster
-    g <-
-      g_ud %>%
-      # Add louvain partitions to graph object
-      igraph::set_vertex_attr("cluster", value = as.character(igraph::membership(lc))) %>% # Return membership - diff from Leiden
-      igraph::simplify()
+      ## Return a numeric vector of partitions / clusters / modules
+      ## Set a low resolution parameter to have fewer groups
+      ## weights = NULL means that if the graph as a `weight` edge attribute, this
+      ## will be used by default.
+      lc <- igraph::cluster_louvain(g_ud, weights = NULL)

-    ## Name of vertex attribute
-    v_attr <- "cluster"
+      ## Add cluster
+      g <-
+        g_ud %>%
+        # Add louvain partitions to graph object
+        igraph::set_vertex_attr("cluster", value = as.character(igraph::membership(lc))) %>% # Return membership - diff from Leiden
+        igraph::simplify()
+
+      ## Name of vertex attribute
+      v_attr <- "cluster"

  } else if(display == "leiden"){

--- a/R/plot_flex_index.R
+++ b/R/plot_flex_index.R
@ -12,7 +12,7 @@
 #' working patterns; "time" plots the Flexibility Index for the group over time.
 #' @param start_hour See `flex_index()`.
 #' @param end_hour See `flex_index()`.
-#'
+#' @param mode See `flex_index()`.
 #' @import dplyr
 #' @import ggplot2
 #' @importFrom data.table ":=" "%like%" "%between%"
@ -41,7 +41,8 @@ plot_flex_index <- function(data,
                            sig_label = "Signals_sent_",
                            method = "sample",
                            start_hour = 9,
-                            end_hour = 17){
+                            end_hour = 17,
+                            mode = "binary"){

  ## Bindings for variables
  TakeBreaks <- NULL
@ -60,12 +61,19 @@ plot_flex_index <- function(data,
    dplyr::mutate(FlexibilityIndex = select(., TakeBreaks, ChangeHours, ControlHours) %>% apply(1, mean),
           patternRank = 5) # 5 so that it shows right in the middle

+
  ## Used for captions
  score_tb <-
    myTable_legends %>%
    dplyr::mutate_at(vars(FlexibilityIndex), ~round(.*100)) %>%
    dplyr::mutate_at(vars(TakeBreaks, ChangeHours, ControlHours), ~scales::percent(.))

+  ## Make for pretty printing
+  myTable_legends <-
+    myTable_legends %>%
+    dplyr::mutate(FlexibilityIndex = scales::percent(FlexibilityIndex))
+
+
  ## Main plot
  ## Different plots if different `method` is specified

@ -77,6 +85,11 @@ plot_flex_index <- function(data,
      data %>%
      .[sample(nrow(.), size = 10), ]

+    ## Make sure data.table knows we know we're using it
+    .datatable.aware = TRUE
+
+    data_tb <- data.table::as.data.table(data)
+
    plot_title <- "Random sample of 10 Working patterns"

  } else if(method == "common"){
@ -88,18 +101,6 @@ plot_flex_index <- function(data,

    data_tb <- data.table::as.data.table(data)

-    input_var <- names(data)[grepl(sig_label_, names(data))]
-
-    data_tb <- data_tb[, list(WeekCount = .N,
-                              PersonCount = dplyr::n_distinct(PersonId)),
-                       by = input_var]
-
-    plot_data <-
-      data_tb %>%
-      as.data.frame() %>%
-      dplyr::arrange(desc(WeekCount)) %>%
-      slice(1:10)
-
    plot_title <- "Top 10 Most Common Working patterns"

  } else if(method == "time"){
@ -117,51 +118,152 @@ plot_flex_index <- function(data,

  if(method %in% c("sample", "common")){

-    plot_data %>%
-      mutate(patternRank = 1:nrow(.)) %>%
-      dplyr::select(patternRank, dplyr::starts_with(sig_label_))  %>%
-      purrr::set_names(nm = gsub(pattern = sig_label_, replacement = "", x = names(.))) %>%
-      purrr::set_names(nm = gsub(pattern = "_.+", replacement = "", x = names(.))) %>%
-      tidyr::gather(Hours, Freq, -patternRank) %>%
-      ggplot2::ggplot(ggplot2::aes(x = Hours, y = patternRank, fill = Freq)) +
-      ggplot2::geom_tile(height=.5) +
-      ggplot2::ylab("Work Patterns") +
-      ggplot2::scale_fill_gradient2(low = "white", high = "#1d627e") +
-      ggplot2::scale_y_reverse(breaks=seq(1,10)) +
-      wpa::theme_wpa_basic() +
-      ggplot2::theme(legend.position = "none") +
-      ggplot2::annotate("text",
-                        y = myTable_legends$patternRank,
-                        x = 26.5,
-                        label = scales::percent(myTable_legends$FlexibilityIndex), size = 3) +
-      ggplot2::annotate("rect",
-                        xmin = 25,
-                        xmax = 28,
-                        ymin = 0.5,
-                        ymax = 10 + 0.5,
-                        alpha = .2) +
-      ggplot2::annotate("rect",
-                        xmin = 0.5,
-                        xmax = start_hour + 0.5,
-                        ymin = 0.5,
-                        ymax = 10 + 0.5,
-                        alpha = .1,
-                        fill = "gray50") +
-      ggplot2::annotate("rect",
-                        xmin = end_hour + 0.5,
-                        xmax = 24.5,
-                        ymin = 0.5,
-                        ymax = 10 + 0.5,
-                        alpha = .1,
-                        fill = "gray50") +
-      ggplot2::labs(title = "Work Patterns and Flexibility Index",
-                    subtitle = paste0(plot_title,
-                                      "\n",
-                                      "Group Flexibility Index: ", score_tb$FlexibilityIndex),
-                    caption = paste0("% Taking Breaks: ", score_tb$TakeBreaks, "\n",
-                                     "% Change Hours: ", score_tb$ChangeHours, "\n",
-                                     "% Keep Hours Under Control: ", score_tb$ControlHours, "\n",
-                                     extract_date_range(data, return = "text")))
+
+    if(mode == "binary"){
+
+      input_var <- names(data)[grepl(sig_label_, names(data))]
+
+      data_tb <- data_tb[, list(WeekCount = .N,
+                                PersonCount = dplyr::n_distinct(PersonId)),
+                         by = input_var]
+
+      plot_data <-
+        data_tb %>%
+        as.data.frame() %>%
+        dplyr::arrange(desc(WeekCount)) %>%
+        slice(1:10)
+
+      plot_data_long <-
+        plot_data %>%
+        mutate(patternRank = 1:nrow(.)) %>%
+        dplyr::select(patternRank, dplyr::starts_with(sig_label_))  %>%
+        purrr::set_names(nm = gsub(pattern = sig_label_, replacement = "", x = names(.))) %>%
+        purrr::set_names(nm = gsub(pattern = "_.+", replacement = "", x = names(.)))
+
+    } else if(mode == "prop"){
+
+      input_var <- names(data)[grepl(sig_label_, names(data))]
+
+      sig_label_ <- gsub(
+        pattern = "_sent_",
+        replacement = "_ori_",
+        x = sig_label_
+      )
+
+      ## 00, 01, 02, etc.
+      hours_col <- stringr::str_pad(seq(0,23), width = 2, pad = 0)
+
+      # Use `mutate()` method
+      # Will get 10 IDs, not 10 rows
+      # NOTE: `input_var` is used to identify a distinct work pattern
+      plot_data <-
+        data_tb %>%
+        data.table::as.data.table() %>%
+        .[, `:=`(WeekCount = .N,
+                 PersonCount = dplyr::n_distinct(PersonId),
+                 Id = .GRP), # group id assignment
+          by = input_var] %>%
+        dplyr::arrange(desc(WeekCount))
+
+      plot_data <-
+        plot_data %>%
+        dplyr::select(Id, dplyr::contains("_ori_"), WeekCount)  %>%
+        purrr::set_names(nm = gsub(
+          pattern = ".+_ori_",
+          replacement = "",
+          x = names(.)
+        )) %>%
+        purrr::set_names(nm = gsub(
+          pattern = "_.+",
+          replacement = "",
+          x = names(.)
+        )) %>%
+        # Need aggregation
+        .[, Signals_Total := rowSums(.SD), .SDcols = hours_col] %>%
+        .[, c(hours_col) := .SD / Signals_Total, .SDcols = hours_col] %>%
+        .[, Signals_Total := NULL] %>% # Remove unneeded column
+        .[, lapply(.SD, mean, na.rm = TRUE), .SDcols = hours_col, by = list(Id, WeekCount)]
+
+      plot_data_long <-
+        plot_data %>%
+        dplyr::arrange(desc(WeekCount)) %>%
+        dplyr::mutate(patternRank = 1:nrow(.)) %>%
+        slice(1:10)
+
+    } else {
+
+      stop("Invalid value to `mode`")
+    }
+
+
+    plot_data_long %>%
+      plot_hourly_pat(
+        start_hour = start_hour,
+        end_hour = end_hour,
+        legend = myTable_legends,
+        legend_label = "FlexibilityIndex",
+        legend_text = paste("Observed activity"),
+        rows = 10, # static
+        title = "Work Patterns and Flexibility Index",
+        subtitle = paste0(plot_title,
+                          "\n",
+                          "Group Flexibility Index: ",
+                          score_tb$FlexibilityIndex),
+        caption = paste0("% Taking Breaks: ", score_tb$TakeBreaks, "\n",
+                         "% Change Hours: ", score_tb$ChangeHours, "\n",
+                         "% Keep Hours Under Control: ", score_tb$ControlHours, "\n",
+                         extract_date_range(data, return = "text")),
+        ylab = "Work patterns"
+      )
+
+
+
+
+    # plot_data %>%
+    #   mutate(patternRank = 1:nrow(.)) %>%
+    #   dplyr::select(patternRank, dplyr::starts_with(sig_label_))  %>%
+    #   purrr::set_names(nm = gsub(pattern = sig_label_, replacement = "", x = names(.))) %>%
+    #   purrr::set_names(nm = gsub(pattern = "_.+", replacement = "", x = names(.))) %>%
+    #   tidyr::gather(Hours, Freq, -patternRank) %>%
+    #   ggplot2::ggplot(ggplot2::aes(x = Hours, y = patternRank, fill = Freq)) +
+    #   ggplot2::geom_tile(height=.5) +
+    #   ggplot2::ylab("Work Patterns") +
+    #   ggplot2::scale_fill_gradient2(low = "white", high = "#1d627e") +
+    #   ggplot2::scale_y_reverse(breaks=seq(1,10)) +
+    #   wpa::theme_wpa_basic() +
+    #   ggplot2::theme(legend.position = "none") +
+    #   ggplot2::annotate("text",
+    #                     y = myTable_legends$patternRank,
+    #                     x = 26.5,
+    #                     label = scales::percent(myTable_legends$FlexibilityIndex), size = 3) +
+    #   ggplot2::annotate("rect",
+    #                     xmin = 25,
+    #                     xmax = 28,
+    #                     ymin = 0.5,
+    #                     ymax = 10 + 0.5,
+    #                     alpha = .2) +
+    #   ggplot2::annotate("rect",
+    #                     xmin = 0.5,
+    #                     xmax = start_hour + 0.5,
+    #                     ymin = 0.5,
+    #                     ymax = 10 + 0.5,
+    #                     alpha = .1,
+    #                     fill = "gray50") +
+    #   ggplot2::annotate("rect",
+    #                     xmin = end_hour + 0.5,
+    #                     xmax = 24.5,
+    #                     ymin = 0.5,
+    #                     ymax = 10 + 0.5,
+    #                     alpha = .1,
+    #                     fill = "gray50") +
+    #   ggplot2::labs(title = "Work Patterns and Flexibility Index",
+    #                 subtitle = paste0(plot_title,
+    #                                   "\n",
+    #                                   "Group Flexibility Index: ", score_tb$FlexibilityIndex),
+    #                 caption = paste0("% Taking Breaks: ", score_tb$TakeBreaks, "\n",
+    #                                  "% Change Hours: ", score_tb$ChangeHours, "\n",
+    #                                  "% Keep Hours Under Control: ", score_tb$ControlHours, "\n",
+    #                                  extract_date_range(data, return = "text")))

  } else if(method == "time"){

--- a/R/plot_hourly_pat.R
+++ b/R/plot_hourly_pat.R
@ -0,0 +1,105 @@
+#' @title
+#' Internal function for plotting the hourly activity patterns.
+#'
+#' @description
+#' This is used within `plot_flex_index()` and `workpatterns_rank()`.
+#'
+#' @param data Data frame containing three columns:
+#'   - `patternRank`
+#'   - `Hours`
+#'   - `Freq`
+#'
+#' @param start_hour Numeric value to specify expected start hour.
+#' @param end_hour Numeric value to specify expected end hour.
+#'
+#' @param legend Data frame containing the columns:
+#'   - `patternRank`
+#'   - Any column to be used in the grey label box, supplied to `legend_label`
+#'
+#' @param legend_label String specifying column to display in the grey label
+#' box
+#'
+#' @param legend_text String to be used in the bottom legend label.
+#'
+#' @param rows Number of rows to show in plot.
+#' @noRd
+#'
+#' @export
+
+plot_hourly_pat <- function(
+    data,
+    start_hour,
+    end_hour,
+    legend,
+    legend_label,
+    legend_text = "Observed activity",
+    rows,
+    title,
+    subtitle,
+    caption,
+    ylab = paste("Top", rows, "activity patterns")
+){
+
+  ## 00, 01, 02, etc.
+  hours_col <- stringr::str_pad(seq(0,23), width = 2, pad = 0)
+
+  data %>%
+    utils::head(rows)  %>%
+    tidyr::pivot_longer(
+      cols = hours_col,
+      names_to = "Hours",
+      values_to = "Freq"
+    ) %>%
+    ggplot2::ggplot(ggplot2::aes(x = Hours, y = patternRank, fill = Freq)) +
+    ggplot2::geom_tile(height = .5) +
+    ggplot2::ylab(ylab) +
+    ggplot2::scale_y_reverse(expand = c(0, 0), breaks = seq(1, rows)) +
+    wpa::theme_wpa_basic() +
+    ggplot2::scale_x_discrete(position = "top")+
+    ggplot2::theme(
+      axis.title.x = element_blank(),
+      axis.line = element_blank(),
+      axis.ticks = element_blank()
+    ) +
+    # Not operational if not binary
+    scale_fill_continuous(
+      guide = "legend",
+      low = "white",
+      high = "#1d627e",
+      breaks = 0:1,
+      name = "",
+      labels = c("", legend_text)
+    ) +
+    ggplot2::annotate(
+      "text",
+      y = legend$patternRank,
+      x = 26.5,
+      label = legend[[legend_label]],
+      size = 3
+    )+
+    ggplot2::annotate("rect",
+                      xmin = 25,
+                      xmax = 28,
+                      ymin = 0.5,
+                      ymax = rows + 0.5,
+                      alpha = .2) +
+    ggplot2::annotate("rect",
+                      xmin = 0.5,
+                      xmax = start_hour + 0.5,
+                      ymin = 0.5,
+                      ymax = rows + 0.5,
+                      alpha = .1,
+                      fill = "gray50") +
+    ggplot2::annotate("rect",
+                      xmin = end_hour + 0.5,
+                      xmax = 24.5,
+                      ymin = 0.5,
+                      ymax = rows + 0.5,
+                      alpha = .1,
+                      fill = "gray50") +
+    labs(
+      title = title,
+      subtitle = subtitle,
+      caption = caption
+    )
+}
--- a/R/workpatterns_classify.R
+++ b/R/workpatterns_classify.R
@ -35,23 +35,35 @@
 #'
 #' @section Binary Week method:
 #'
-#'   This method classifies each **person-week** into one of the seven
+#'   This method classifies each **person-week** into one of the eight
 #'   archetypes:
-#'   - **0 < 3 hours on**: fewer than 3 hours of active hours
-#'   - **1 Standard with breaks workday**: active for fewer than _expected
-#'   hours_, with no activity outside working hours
-#'   - **2 Standard continuous workday**: number of active hours equal _expected
-#'   hours_, with no activity outside working hours
-#'   - **3 Standard flexible workday**: number of active hours are less than or
-#'   equal to _expected hours_, with some activity outside working hours
-#'   - **4 Long flexible workday**: number of active hours exceed _expected
+#'   - **0 Low Activity (< 3 hours on)**: fewer than 3 hours of active hours
+#'   - **1.1 Standard continuous (expected schedule)**: active hours equal to
+#'   _expected hours_, with all activity confined within the expected start and
+#'   end time
+#'   - **1.2 Standard continuous (shifted schedule)**: active hours equal to
+#'   _expected hours_, with activity occurring beyond either the expected start
+#'   or end time.
+#'   - **2.1 Standard flexible (expected schedule)**: active hours less than or
+#'   equal to _expected hours_, with all activity confined within the expected
+#'   start and end time
+#'   - **2.2 Standard flexible (shifted schedule)**: active hours less than or
+#'   equal to _expected hours_, with activity occurring beyond either the
+#'   expected start or end time.
+#'   - **3 Long flexible workday**: number of active hours exceed _expected
 #'   hours_, with breaks occurring throughout
-#'   - **5 Long continuous workday**: number of active hours exceed _expected
+#'   - **4 Long continuous workday**: number of active hours exceed _expected
 #'   hours_, with activity happening in a continuous block (no breaks)
-#'   - **6 Always on (13h+)**: number of active hours greater than or equal to
+#'   - **5 Always on (13h+)**: number of active hours greater than or equal to
 #'   13
 #'
-#'   This is the recommended method over `pav` for several reasons:
+#'  _Standard_ here denotes the behaviour of not exhibiting total number of
+#'  active hours which exceed the expected total number of hours, as supplied by
+#'  `exp_hours`. _Continuous_ refers to the behaviour of _not_ taking breaks,
+#'  i.e. no inactive hours between the first and last active hours of the day,
+#'  where _flexible_ refers to the contrary.
+#'
+#'  This is the recommended method over `pav` for several reasons:
 #'   1. `bw` ignores _volume effects_, where activity volume can still bias the
 #'   results towards the 'standard working hours'.
 #'   2. It captures the intuition that each individual can have 'light' and
@ -67,11 +79,11 @@
 #' In the standard plot output, the archetypes have been abbreviated to show the
 #' following:
 #'   - **Low Activity** - archetype 0
-#'   - **Standard** - archetype 2
-#'   - **Flexible** - archetypes 1 and 3
-#'   - **Long continuous** - archetype 5
-#'   - **Long flexible** - archetype 4
-#'   - **Always On** - archetype 6
+#'   - **Standard** - archetypes 1.1 and 1.2
+#'   - **Flexible** - archetypes 2.1 and 2.2
+#'   - **Long continuous** - archetype 4
+#'   - **Long flexible** - archetype 3
+#'   - **Always On** - archetype 5
 #'
 #' @section Person Average method:
 #'
@ -148,6 +160,11 @@
 #'   official hours specifying checking in and 9 AM and checking out at 5 PM,
 #'   then `"1700"` should be supplied here.
 #'
+#' @param exp_hours Numeric value representing the number of hours the
+#'   population is expected to be active for throughout the workday. By default,
+#'   this uses the difference between `end_hour` and `start_hour`. Only
+#'   applicable with the 'bw' method.
+#'
 #' @param mingroup Numeric value setting the privacy threshold / minimum group
 #'   size. Defaults to 5.
 #'
@ -184,7 +201,13 @@
 #' em_data %>% workpatterns_classify(method = "bw")
 #'
 #' # Return an area plot
-#' em_data %>% workpatterns_classify(method = "bw", return = "plot-area")
+#' # With custom expected hours
+#' em_data %>%
+#'   workpatterns_classify(
+#'     method = "bw",
+#'     return = "plot-area",
+#'     exp_hours = 7
+#'       )
 #'
 #' \donttest{
 #'
@ -206,6 +229,7 @@ workpatterns_classify <- function(data,
                                  signals = c("email", "IM"),
                                  start_hour = "0900",
                                  end_hour = "1700",
+                                  exp_hours = NULL,
                                  mingroup = 5,
                                  active_threshold = 0,
                                  method = "bw",
@ -232,14 +256,17 @@ workpatterns_classify <- function(data,
  # Method flow -------------------------------------------------------------

  if(method == "bw"){
+
    workpatterns_classify_bw(data = data,
                             hrvar = hrvar,
                             signals = signals,
                             start_hour = start_hour,
                             end_hour = end_hour,
+                             exp_hours = exp_hours,
                             mingroup = mingroup,
                             active_threshold = active_threshold,
                             return = return)
+
  } else if(method == "pav"){

    workpatterns_classify_pav(data = data,
--- a/R/workpatterns_classify_bw.R
+++ b/R/workpatterns_classify_bw.R
@ -53,6 +53,10 @@
 #'   official hours specifying checking in and 9 AM and checking out at 5 PM,
 #'   then `"1700"` should be supplied here.
 #'
+#' @param exp_hours Numeric value representing the number of hours the population
+#'   is expected to be active for throughout the workday. By default, this uses
+#'   the difference between `end_hour` and `start_hour`.
+#'
 #' @param mingroup Numeric value setting the privacy threshold / minimum group
 #'   size. Defaults to 5.
 #'
@ -82,10 +86,14 @@ workpatterns_classify_bw <- function(data,
                                     start_hour = "0900",
                                     end_hour = "1700",
                                     mingroup = 5,
+                                     exp_hours = NULL,
                                     active_threshold = 0,
                                     return = "plot"){

-  ## Handling NULL values passed to hrvar
+  ## set up variable -------------------------------------------------------
+  Active_Hours <- NULL
+
+  ## Handling NULL values passed to hrvar ----------------------------------
  if(is.null(hrvar)){
    data <- totals_col(data)

@ -100,7 +108,7 @@ workpatterns_classify_bw <- function(data,

  }

-  ## convert to data.table
+  ## convert to data.table -------------------------------------------------
  data2 <-
    data %>%
    dplyr::mutate(Date = as.Date(Date, format = "%m/%d/%Y")) %>%
@ -110,49 +118,53 @@ workpatterns_classify_bw <- function(data,
  # Make sure data.table knows we know we're using it
  .datatable.aware = TRUE

-  ## Save original
-  start_hour_o <- start_hour
-  end_hour_o <- end_hour
-
  ## Coerce to numeric, remove trailing zeros
  start_hour <- as.numeric(gsub(pattern = "00$", replacement = "", x = start_hour))
  end_hour <- as.numeric(gsub(pattern = "00$", replacement = "", x = end_hour))

-  ## Calculate hours within working hours
-  ## e.g. if `end_hour` value is 17, then the reference slot should be 16
-  d <- (end_hour - 1) - start_hour
+  ## Total expected hours --------------------------------------------------
+  ## If `NULL`, use the difference between `end_hour` and `start_hour`

-  ## Warning message
-  if(d >= 23){
+  if(is.null(exp_hours)){
+
+    exp_hours <- end_hour - start_hour
+
+  }
+
+
+  ## Warning message for extreme values of `exp_hours` ---------------------
+
+  if(exp_hours >= 23){

    stop(
      glue::glue(
-        "the total working hours is {d + 1}.
+        "the total working hours is {exp_hours}.
        Please provide a valid range."
        )
    )

-  } else if(d >= 11){
+  } else if(exp_hours >= 12){

    message(
      glue::glue(
-        "Note: the total working hours is {d + 1}.
+        "Note: the total working hours is {exp_hours}.
        Output archetypes will be reduced as the total number of hours is greater than or equal to 12."
      )
    )

-  } else if(d <= 3){
+  } else if(exp_hours <= 3){

    message(
      glue::glue(
-        "Note: the total working hours is {d + 1}.
+        "Note: the total working hours is {exp_hours}.
        Output archetypes will be reduced as the total number of hours is fewer than or equal to 3."
      )
    )

  }

-  ## Text replacement only for allowed values
+  ## Text replacement only for allowed values ------------------------------
+
  if(any(signals %in% c("email", "IM", "unscheduled_calls", "meetings"))){

    signal_set <- gsub(pattern = "email", replacement = "Emails_sent", x = signals) # case-sensitive
@ -166,7 +178,8 @@ workpatterns_classify_bw <- function(data,

  }

-  ## Create 24 summed `Signals_sent` columns
+  ## Create 24 summed `Signals_sent` columns -------------------------------
+
  signal_cols <- purrr::map(0:23, ~combine_signals(data, hr = ., signals = signal_set))
  signal_cols <- bind_cols(signal_cols)

@ -174,15 +187,20 @@ workpatterns_classify_bw <- function(data,
  input_var <- names(signal_cols)

  ## Signals sent by Person and Date
+  ## Data frame with `PersonId`, `Date`, and the 24 signal columns
+
  signals_df <-
    data2 %>%
    .[, c("PersonId", "Date")] %>%
    cbind(signal_cols)

  ## Signal label
+  ## Only show as `Signals_sent` if more than one signal, i.e. if there is
+  ## aggregation of multiple signals
  sig_label <- ifelse(length(signal_set) > 1, "Signals_sent", signal_set)

-  ## Create binary variable 0 or 1
+  ## Create binary variable 0 or 1  ----------------------------------------
+
  num_cols <- names(which(sapply(signals_df, is.numeric))) # Get numeric columns

  signals_df <-
@ -190,13 +208,15 @@ workpatterns_classify_bw <- function(data,
    data.table::as.data.table() %>%
    # active_threshold: minimum signals to qualify as active
    .[, (num_cols) := lapply(.SD, function(x) ifelse(x > active_threshold, 1, 0)), .SDcols = num_cols] %>%
-    .[, ("Signals_Total") := apply(.SD, 1, sum), .SDcols = input_var]
+    .[, ("Active_Hours") := apply(.SD, 1, sum), .SDcols = input_var]

-  ## Classify PersonId-Signal data by time of day
+  ## Classify PersonId-Signal data by time of day --------------------------
+  ## Long format table that classifies each hour of the day on whether it is
+  ## before, within, or after standard hours

  WpA_classify <-
    signals_df %>%
-    tidyr::gather(!!sym(sig_label), sent, -PersonId,-Date,-Signals_Total) %>%
+    tidyr::gather(!!sym(sig_label), sent, -PersonId,-Date,-Active_Hours) %>%
    data.table::as.data.table()

  WpA_classify[, StartEnd := gsub(pattern = "[^[:digit:]]", replacement = "", x = get(sig_label))]
@ -212,54 +232,112 @@ workpatterns_classify_bw <- function(data,


  WpA_classify <-
-    WpA_classify[, c("PersonId", "Date", "Signals_Total", "HourType", "sent")] %>%
-    .[, .(sent = sum(sent)), by = c("PersonId", "Date", "Signals_Total", "HourType")] %>%
+    WpA_classify[, c("PersonId", "Date", "Active_Hours", "HourType", "sent")] %>%
+    .[, .(sent = sum(sent)), by = c("PersonId", "Date", "Active_Hours", "HourType")] %>%
    tidyr::spread(HourType, sent)%>%
-    left_join(WpA_classify%>%   ## Calculate first and last activity for day_span
-                filter(sent>0)%>%
-                group_by(PersonId,Date)%>%
-                summarise(First_signal=min(Start),
-                          Last_signal=max(End)),
-              by=c("PersonId","Date"))%>%
+    left_join(WpA_classify %>%   ## Calculate first and last activity for day_span
+                filter(sent > 0)%>%
+                group_by(PersonId, Date)%>%
+                summarise(First_signal = min(Start),
+                          Last_signal = max(End)),
+              by = c("PersonId","Date"))%>%
    mutate(Day_Span = Last_signal - First_signal,
-           Signals_Break_hours = Day_Span - Signals_Total)
+           Signals_Break_hours = Day_Span - Active_Hours)


+  ## Working patterns classification ---------------------------------------
+
+  # # Level 1 with 7 personas
+  # personas_levels <-
+  #   c("0 < 3 hours on",
+  #     "1 Standard with breaks workday",
+  #     "2 Standard continuous workday",
+  #     "3 Standard flexible workday",
+  #     "4 Long flexible workday",
+  #     "5 Long continuous workday",
+  #     "6 Always on (13h+)")
+  #
+  # ptn_data_personas <- data.table::copy(WpA_classify)
+  # ptn_data_personas[, Personas := "Unclassified"]
+  # ptn_data_personas[Active_Hours > exp_hours & Active_Hours==Day_Span , Personas := "5 Long continuous workday"]
+  # ptn_data_personas[Active_Hours > exp_hours & Active_Hours<Day_Span, Personas := "4 Long flexible workday"]
+  # ptn_data_personas[Active_Hours <= exp_hours & (Before_start>0|After_end>0), Personas := "3 Standard flexible workday"] #do we want to split betwen block and non block?
+  # ptn_data_personas[Active_Hours == exp_hours & Within_hours == exp_hours , Personas := "2 Standard continuous workday"]
+  # ptn_data_personas[Active_Hours < exp_hours & Before_start==0 & After_end == 0, Personas := "1 Standard with breaks workday"]
+  # ptn_data_personas[Active_Hours >= 13, Personas := "6 Always on (13h+)"]
+  # ptn_data_personas[Active_Hours < 3, Personas := "0 < 3 hours on"]
+  # ptn_data_personas[, Personas := factor(Personas, levels = personas_levels)]
+
+  # Level 2 with 8 personas
  personas_levels <-
-    c("0 < 3 hours on",
-      "1 Standard with breaks workday",
-      "2 Standard continuous workday",
-      "3 Standard flexible workday",
-      "4 Long flexible workday",
-      "5 Long continuous workday",
-      "6 Always on (13h+)")
+    c(
+      "0 Low Activity (< 3 hours on)",
+      "1.1 Standard continuous (expected schedule)",
+      "1.2 Standard continuous (shifted schedule)",
+      "2.1 Standard flexible (expected schedule)",
+      "2.2 Standard flexible (shifted schedule)",
+      "3 Long flexible workday",
+      "4 Long continuous workday",
+      "5 Always on (13h+)"
+    )

  ptn_data_personas <- data.table::copy(WpA_classify)
  ptn_data_personas[, Personas := "Unclassified"]
-  ptn_data_personas[Signals_Total > d & Signals_Total==Day_Span , Personas := "5 Long continuous workday"]
-  ptn_data_personas[Signals_Total > d & Signals_Total<Day_Span, Personas := "4 Long flexible workday"]
-  ptn_data_personas[Signals_Total <= d & (Before_start>0|After_end>0), Personas := "3 Standard flexible workday"] #do we want to split betwen block and non block?
-  ptn_data_personas[Signals_Total == d+1 & Within_hours ==d+1, Personas := "2 Standard continuous workday"]
-  ptn_data_personas[Signals_Total<= d & Before_start==0 & After_end == 0, Personas := "1 Standard with breaks workday"]
-  ptn_data_personas[Signals_Total >= 13, Personas := "6 Always on (13h+)"]
-  ptn_data_personas[Signals_Total < 3, Personas := "0 < 3 hours on"]
+  ptn_data_personas[Active_Hours > exp_hours & Active_Hours==Day_Span , Personas := "4 Long continuous workday"]
+  ptn_data_personas[Active_Hours > exp_hours & Active_Hours<Day_Span, Personas := "3 Long flexible workday"]
+  ptn_data_personas[Active_Hours <= exp_hours & (Before_start>0|After_end>0), Personas := "2.2 Standard flexible (shifted schedule)"]
+  ptn_data_personas[Active_Hours <= exp_hours & Before_start == 0 & After_end == 0, Personas := "2.1 Standard flexible (expected schedule)"]
+  ptn_data_personas[Active_Hours == exp_hours & (Before_start > 0 | After_end > 0) & Active_Hours == Day_Span, Personas := "1.2 Standard continuous (shifted schedule)"]
+  ptn_data_personas[Active_Hours == exp_hours & Before_start == 0 & After_end == 0 & Active_Hours == Day_Span, Personas := "1.1 Standard continuous (expected schedule)"]
+  ptn_data_personas[Active_Hours >= 13, Personas := "5 Always on (13h+)"]
+  ptn_data_personas[Active_Hours < 3, Personas := "0 Low Activity (< 3 hours on)"]
  ptn_data_personas[, Personas := factor(Personas, levels = personas_levels)]

+
+
  # bind cut tree to data frame
  ptn_data_final <-
    ptn_data_personas %>%
    left_join(
      signals_df %>%
-        select(-Signals_Total), # Avoid duplication
+        select(-Active_Hours), # Avoid duplication
      by = c("PersonId","Date")) %>%
    left_join(
      data2 %>%
        select(PersonId, Date, hrvar_str), # Avoid duplication
      by = c("PersonId","Date"))

-  ## Return-chunks
+  ## Long caption -----------------------------------------------------------
+  ## Parameters used in creating visualization
+
+  ## Change first character to upper case
+  firstup <- function(x){
+    substr(x, start = 1, stop = 1) <- toupper(substr(x, start = 1, stop = 1))
+    x
+  }
+
+
+
+  signals_str <- firstup(paste(signals, collapse = ", "))
+
+  cap_long <-
+    glue::glue(
+      "Signals used: {signals_str}.
+      The official hours are {start_hour}:00 and {end_hour}:00, with a total of {exp_hours} expected hours.
+      \n"
+    ) %>%
+    paste(extract_date_range(data2, return = "text"))
+
+
+  ## Return-chunks ----------------------------------------------------------
+
  return_data <- function(){
-    dplyr::as_tibble(ptn_data_final)
+    dplyr::as_tibble(ptn_data_final) %>%
+      dplyr::mutate(
+        Start_hour = start_hour,
+        End_hour = end_hour,
+        Exp_hours = exp_hours
+      )
  }

  # NOW DEFUNCT - NOT USED ---------------------------------------------------
@ -337,7 +415,7 @@ workpatterns_classify_bw <- function(data,
      theme_wpa_basic() +
      labs(title = "Distribution of Working Patterns over time",
           y = "Percentage",
-           caption = extract_date_range(data2, return = "text")) +
+           caption = cap_long) +
      theme(legend.position = "right") +
      scale_y_continuous(labels = scales::percent)
  }
@ -361,9 +439,11 @@ workpatterns_classify_bw <- function(data,
        ptn_data_final %>%
        hrvar_count(hrvar = hrvar, return = "table") %>%
        dplyr::filter(n >= mingroup) %>%
-        dplyr::pull(hrvar)
+        dplyr::pull(hrvar) %>%
+        c("Total") # Ensure included in filter

      ptn_data_final %>%
+        totals_bind(target_col = hrvar, target_value = "Total") %>%
        data.table::as.data.table() %>%
        .[, .(n = .N), by = c("Personas", hrvar)] %>%
        dplyr::as_tibble() %>%
@ -386,7 +466,11 @@ workpatterns_classify_bw <- function(data,

  } else if(return == "plot"){

-    plot_workpatterns_classify_bw(ptn_data_final, range = d)
+    plot_workpatterns_classify_bw(
+      ptn_data_final,
+      range = exp_hours,
+      caption = cap_long
+      )

  } else if(return == "plot-dist"){

@ -399,7 +483,8 @@ workpatterns_classify_bw <- function(data,
  } else if(return == "plot-hrvar"){

    plot_wp_bw_hrvar(
-      x = return_table()
+      x = return_table(),
+      caption = cap_long
    )

  } else if (return == "table"){
@ -409,8 +494,15 @@ workpatterns_classify_bw <- function(data,
  } else if (return == "list"){

    list(data = return_data(),
-         plot = plot_workpatterns_classify_bw(ptn_data_final, range = d),
-         plot_hrvar = plot_wp_bw_hrvar(x = return_table()),
+         plot = plot_workpatterns_classify_bw(
+           ptn_data_final,
+           range = exp_hours,
+           caption = cap_long
+           ),
+         plot_hrvar = plot_wp_bw_hrvar(
+           x = return_table(),
+           caption = cap_long
+           ),
         plot_area = return_plot_area(),
         table = return_table())

@ -425,25 +517,27 @@ workpatterns_classify_bw <- function(data,
 #'
 #' @description Internal use only.
 #'
-#' @param range Numeric. Accepts `d` from the main `workpatterns_classify_bw()`
+#' @param range Numeric. Accepts `exp_hours` from the main `workpatterns_classify_bw()`
 #'   function. Used to update labels on main plot.
+#' @param caption String to override plot captions.
 #'
 #' @noRd

-plot_workpatterns_classify_bw <- function(data, range){
+plot_workpatterns_classify_bw <- function(data, range, caption){

  plot_table <-
    data %>%
    dplyr::mutate(
      PersonasNet =
        case_when(
-          Personas == "0 < 3 hours on" ~ "Low activity",
-          Personas == "1 Standard with breaks workday" ~ "Flexible",
-          Personas == "2 Standard continuous workday" ~ "Standard",
-          Personas == "3 Standard flexible workday" ~ "Flexible",
-          Personas == "4 Long flexible workday" ~ "Long flexible",
-          Personas == "5 Long continuous workday" ~ "Long continuous",
-          Personas == "6 Always on (13h+)" ~ "Always On",
+          Personas == "0 Low Activity (< 3 hours on)" ~ "Low activity",
+          Personas == "2.1 Standard flexible (expected schedule)" ~ "Flexible",
+          Personas == "2.2 Standard flexible (shifted schedule)" ~ "Flexible",
+          Personas == "1.1 Standard continuous (expected schedule)" ~ "Standard",
+          Personas == "1.2 Standard continuous (shifted schedule)" ~ "Standard",
+          Personas == "3 Long flexible workday" ~ "Long flexible",
+          Personas == "4 Long continuous workday" ~ "Long continuous",
+          Personas == "5 Always on (13h+)" ~ "Always On",
          TRUE ~ NA_character_
          )
    ) %>%
@ -591,7 +685,7 @@ plot_workpatterns_classify_bw <- function(data, range){
         subtitle = "Classification of employee-weeks",
         x = "Flexibility level (breaks)",
         y = "Average activity level",
-         caption = extract_date_range(data, return = "text")) +
+         caption = caption) +
    theme_wpa_basic() +
    theme(
      legend.position = "none",
@ -611,7 +705,7 @@ plot_workpatterns_classify_bw <- function(data, range){
 #' @import ggplot2
 #'
 #' @noRd
-plot_wp_bw_hrvar <- function(x){
+plot_wp_bw_hrvar <- function(x, caption){

  x %>%
    tidyr::pivot_longer(cols = -Personas) %>%
@ -625,7 +719,8 @@ plot_wp_bw_hrvar <- function(x){
    coord_flip() +
    scale_y_continuous(labels = scales::percent,
                       limits = c(NA, 1)) +
-    theme_wpa_basic()
+    theme_wpa_basic() +
+    labs(caption = caption)

 }

--- a/R/workpatterns_rank.R
+++ b/R/workpatterns_rank.R
@ -22,8 +22,17 @@
 #' e.g. "`0900"`
 #' @param end_hour A character vector specifying starting hours,
 #' e.g. `"1700"`
-#' @param top number specifying how many top working patterns to display in plot,
+#' @param top numeric value specifying how many top working patterns to display in plot,
 #' e.g. `"10"`
+#'
+#' @param mode string specifying aggregation method for plot. Valid
+#' options include:
+#'   - `"binary"`: convert hourly activity into binary blocks. In the plot, each
+#'   block would display as solid.
+#'   - `"prop"`: calculate proportion of signals in each hour over total signals
+#'   across 24 hours, then average across all work weeks. In the plot, each
+#'   block would display as a heatmap.
+#'
 #' @param return String specifying what to return. This must be one of the
 #'   following strings:
 #'   - `"plot"`
@ -41,6 +50,7 @@
 #' @importFrom data.table ":=" "%like%" "%between%"
 #'
 #' @examples
+#' # Plot by default
 #' workpatterns_rank(
 #'   data = em_data,
 #'   signals = c(
@ -51,6 +61,13 @@
 #'   )
 #'   )
 #'
+#' # Plot with prop / heatmap mode
+#' workpatterns_rank(
+#'   data = em_data,
+#'   mode = "prop"
+#' )
+#'
+#'
 #' @family Visualization
 #' @family Working Patterns
 #'
@ -60,6 +77,7 @@ workpatterns_rank <- function(data,
                              start_hour = "0900",
                              end_hour = "1700",
                              top = 10,
+                              mode = "binary",
                              return = "plot"){

  # Make sure data.table knows we know we're using it
@ -114,28 +132,86 @@ workpatterns_rank <- function(data,
  ## Signal label
  sig_label <- ifelse(length(signal_set) > 1, "Signals_sent", signal_set)

-  ## Create binary variable 0 or 1
+
+  ## This should only pick up `Signals_sent_` prefixed columns
+  ## This is run on `signals_df`
  num_cols <- names(which(sapply(signals_df, is.numeric))) # Get numeric columns

-  signals_df <-
-    signals_df %>%
-    data.table::as.data.table() %>%
-    .[, (num_cols) := lapply(.SD, function(x) ifelse(x > 0, 1, 0)), .SDcols = num_cols]
+  if(mode == "binary"){

-  signals_df <- signals_df[, list(WeekCount = .N,
-                                  PersonCount = dplyr::n_distinct(PersonId)), by = input_var]
+    ## Summarized table performed on `signals_df` ----------------------------
+    ## Section ignoring `signals_df_o`

-  myTable_return <- data.table::setorder(signals_df, -PersonCount)
+    signals_df <-
+      signals_df %>%
+      data.table::as.data.table() %>%
+      .[, (num_cols) := lapply(.SD, function(x) ifelse(x > 0, 1, 0)), .SDcols = num_cols] %>%
+      .[, list(WeekCount = .N, PersonCount = dplyr::n_distinct(PersonId)), by = input_var]

-  if(return == "plot"){
+    myTable_return <- data.table::setorder(signals_df, -PersonCount)
+
+  } else if(mode == "prop"){
+
+    ## Save original `signals_df` before manipulating ------------------------
+    ## Rename `Signals_sent` columns to prevent conflict
+    signals_df_o <- signals_df %>%
+      purrr::set_names(
+        nm = gsub(x = names(.),
+                  replacement = "_ori_",
+                  pattern = "_sent_")
+      ) %>%
+      cbind(select(signals_df, num_cols)) %>% # duplicate signals
+      # Convert `Signals_sent_` prefixed to binary. `Signals_ori_` are intact
+      # Create binary variable 0 or 1
+      .[, (num_cols) := lapply(.SD, function(x) ifelse(x > 0, 1, 0)), .SDcols = num_cols] %>%
+      # Use `mutate()` method
+      .[, `:=`(WeekCount = .N,
+               PersonCount = dplyr::n_distinct(PersonId),
+               Id = .GRP), # group id assignment
+        by = num_cols]
+
+    ## 00, 01, 02, etc.
+    hours_col <- stringr::str_pad(seq(0,23), width = 2, pad = 0)
+
+    # Wide table showing proportion of signals by hour
+    # Ranked descending by `WeekCount`
+    wp_prop_tb <-
+      signals_df_o %>%
+      arrange(desc(WeekCount)) %>%
+      dplyr::select(Id, dplyr::contains("_ori_"), WeekCount)  %>%
+      purrr::set_names(nm = gsub(
+        pattern = ".+_ori_",
+        replacement = "",
+        x = names(.)
+      )) %>%
+      purrr::set_names(nm = gsub(
+        pattern = "_.+",
+        replacement = "",
+        x = names(.)
+      )) %>%
+      # Need aggregation
+      .[, Signals_Total := rowSums(.SD), .SDcols = hours_col] %>%
+      .[, c(hours_col) := .SD / Signals_Total, .SDcols = hours_col] %>%
+      .[, Signals_Total := NULL] %>% # Remove unneeded column
+      .[, lapply(.SD, mean, na.rm = TRUE), .SDcols = hours_col, by = list(Id, WeekCount)]
+
+
+  } else {
+
+    stop("invalid value to `mode`.")
+
+  }
+
+
+  if(return == "plot" & mode == "binary"){

    ## Plot return
    sig_label_ <- paste0(sig_label, "_")

-	myTable_return <-
-	  myTable_return %>%
-	  arrange(desc(WeekCount)) %>%
-	  mutate(patternRank= 1:nrow(.))
+  	myTable_return <-
+  	  myTable_return %>%
+  	  arrange(desc(WeekCount)) %>%
+  	  mutate(patternRank= 1:nrow(.))

    ## Table for annotation
    myTable_legends <-
@ -163,66 +239,85 @@ workpatterns_rank <- function(data,
 	    replacement = "",
 	    x = names(.)
 	  )) %>%
-	  utils::head(top)  %>%
-	  tidyr::gather(Hours, Freq, -patternRank)  %>%
-	  ggplot2::ggplot(ggplot2::aes(x = Hours, y = patternRank, fill = Freq)) +
-	  ggplot2::geom_tile(height = .5) +
-	  ggplot2::ylab(paste("Top", top, "activity patterns")) +
-	  #ggplot2::scale_fill_gradient2(low = "white", high = "#1d627e") +
-	  ggplot2::scale_y_reverse(expand = c(0, 0), breaks = seq(1, top)) +
-	  theme_wpa_basic() +
-	  ggplot2::scale_x_discrete(position = "top")+
-      ggplot2::theme(
-        axis.title.x = element_blank(),
-        axis.line = element_blank(),
-        axis.ticks = element_blank()
-      ) +
-      scale_fill_continuous(
-        guide = "legend",
-        low = "white",
-        high = "#1d627e",
-        breaks = 0:1,
-        name = "",
-        labels = c("", paste("Observed", subtitle_signal, "activity"))
-      ) +
-      ggplot2::annotate(
-        "text",
-        y = myTable_legends$patternRank,
-        x = 26.5,
-        label = myTable_legends$WeekCount,
-        size = 3
-      )+
-      ggplot2::annotate("rect",
-               xmin = 25,
-               xmax = 28,
-               ymin = 0.5,
-               ymax = length(myTable_legends$patternRank) + 0.5,
-               alpha = .2) +
-      ggplot2::annotate("rect",
-               xmin = 0.5,
-               xmax = start_hour + 0.5,
-               ymin = 0.5,
-               ymax = length(myTable_legends$patternRank) + 0.5,
-               alpha = .1,
-               fill = "gray50") +
-      ggplot2::annotate("rect",
-               xmin = end_hour + 0.5,
-               xmax = 24.5,
-               ymin = 0.5,
-               ymax = length(myTable_legends$patternRank) + 0.5,
-               alpha = .1,
-               fill = "gray50") +
-  labs(
-    title = "Patterns of digital activity",
-    subtitle = paste("Hourly activity based on", subtitle_signal ,"sent over a week"),
-    caption = paste(
-      "Top", top, "patterns represent", coverage, "of workweeks.", extract_date_range(data, return = "text"))
+	  plot_hourly_pat(
+	    start_hour = start_hour,
+	    end_hour = end_hour,
+	    legend = myTable_legends,
+	    legend_label = "WeekCount",
+	    legend_text = paste("Observed", subtitle_signal, "activity"),
+	    rows = top,
+	    title = "Patterns of digital activity",
+	    subtitle = paste(
+	      "Hourly activity based on",
+	      subtitle_signal,
+	      "sent over a week"),
+	    caption = paste(
+	      "Top",
+	      top,
+	      "patterns represent",
+	      coverage,
+	      "of workweeks.\n",
+	      extract_date_range(data, return = "text")
+	    ),
+	    ylab = paste("Top", top, "activity patterns")
+	  )
+
+  } else if(return == "plot" & mode == "prop"){
+
+
+    ## Table for annotation
+    myTable_legends <-
+      wp_prop_tb %>%
+      arrange(desc(WeekCount)) %>%
+      mutate(patternRank= 1:nrow(.)) %>%
+      dplyr::select(patternRank, WeekCount) %>%
+      dplyr::mutate(WeekPercentage = WeekCount / sum(WeekCount, na.rm = TRUE),
+                    WeekCount = paste0(scales::percent(WeekPercentage, accuracy = 0.1))) %>%
+      utils::head(top)
+
+    ## Coverage
+    coverage <-
+      myTable_legends %>%
+      summarize(total = sum(WeekPercentage)) %>%
+      pull(1) %>%
+      scales::percent(accuracy = 0.1)
+
+
+    ## Run plot
+
+    wp_prop_tb %>%
+      dplyr::mutate(patternRank = 1:nrow(.)) %>%
+      plot_hourly_pat(
+        start_hour = start_hour,
+        end_hour = end_hour,
+        legend = myTable_legends,
+        legend_label = "WeekCount",
+        legend_text = paste("Observed", subtitle_signal, "activity"),
+        rows = top,
+        title = "Patterns of digital activity",
+        subtitle = paste(
+          "Hourly activity based on",
+          subtitle_signal,
+          "sent over a week"),
+        caption = paste(
+          "Top",
+          top,
+          "patterns represent",
+          coverage,
+          "of workweeks.\n",
+          extract_date_range(data, return = "text")
+        ),
+        ylab = paste("Top", top, "activity patterns")
    )

  } else if(return == "table"){

    dplyr::as_tibble(myTable_return)

+  } else if(return == "test"){
+
+    signals_df_o
+
  } else {

    stop("Invalid `return`")
--- a/R/workpatterns_report.R
+++ b/R/workpatterns_report.R
@ -13,6 +13,8 @@
 #' @param signals See `workpatterns_classify()`.
 #' @param start_hour See `workpatterns_classify()`.
 #' @param end_hour See `workpatterns_classify()`.
+#' @param exp_hours See `workpatterns_classify()`.
+#'
 #' @param path Pass the file path and the desired file name, _excluding the file
 #'   extension_. For example, `"scope report"`.
 #' @param timestamp Logical vector specifying whether to include a timestamp in
@ -32,6 +34,7 @@ workpatterns_report <- function(data,
                                signals = c("email", "IM"),
                                start_hour = "0900",
                                end_hour = "1700",
+                                exp_hours = NULL,
                                path = "workpatterns report",
                                timestamp = TRUE){

@ -52,6 +55,7 @@ workpatterns_report <- function(data,
                             signals = signals,
                             start_hour = start_hour,
                             end_hour = end_hour,
+                             exp_hours = exp_hours,
                             return = "list")

  ## plot for `workpatterns_rank`
@ -132,8 +136,10 @@ workpatterns_report <- function(data,
      plot_rank_list[[4]],
      plot_rank_list[[5]],
      plot_rank_list[[6]],
-      plot_rank_list[[7]]) %>% # Expand objects to this list
-    purrr::map_if(is.data.frame, wpa::create_dt, rounding = 2) %>%
+      plot_rank_list[[7]],
+      plot_rank_list[[8]]
+      ) %>% # Expand objects to this list
+    purrr::map_if(is.data.frame, wpa::create_dt, rounding = 1, percent = TRUE) %>%
    purrr::map_if(is.character, md2html)

  ## Set header titles
@ -150,7 +156,9 @@ workpatterns_report <- function(data,
      paste(names(plot_table_list)[[4]]),
      paste(names(plot_table_list)[[5]]),
      paste(names(plot_table_list)[[6]]),
-      paste(names(plot_table_list)[[7]]))
+      paste(names(plot_table_list)[[7]]),
+      paste(names(plot_table_list)[[8]])
+      )

  ## Set header levels
  ## Makes use of level/header system for Markdown syntax
--- a/cran-comments.md
+++ b/cran-comments.md
@ -7,6 +7,10 @@

 0 errors | 0 warnings | 0 note

+## Submission 1.7.0
+
+Bug fixes, new features, and removal of archived dependency 'portes'
+
 ## Submission 1.6.4

 Minor bug fixes and refactoring.
--- a/man/create_dt.Rd
+++ b/man/create_dt.Rd
@ -4,7 +4,7 @@
 \alias{create_dt}
 \title{Create interactive tables in HTML with 'download' buttons.}
 \usage{
-create_dt(x, rounding = 1, freeze = 2)
+create_dt(x, rounding = 1, freeze = 2, percent = FALSE)
 }
 \arguments{
 \item{x}{Data frame to be passed through.}
@ -13,6 +13,9 @@ create_dt(x, rounding = 1, freeze = 2)

 \item{freeze}{Number of columns from the left to 'freeze'. Defaults to 2,
 which includes the row number column.}
+
+\item{percent}{Logical value specifying whether to display numeric columns
+as percentages.}
 }
 \value{
 Returns an HTML widget displaying rectangular data.
--- a/man/flex_index.Rd
+++ b/man/flex_index.Rd
@ -12,7 +12,8 @@ flex_index(
  start_hour = "0900",
  end_hour = "1700",
  return = "plot",
-  plot_method = "common"
+  plot_method = "common",
+  mode = "binary"
 )
 }
 \arguments{
@ -54,6 +55,16 @@ See \code{Value} for more information.}
 \item \code{"common"} plots the ten most common working patterns
 \item \code{"time"} plots the Flexibility Index for the group over time
 }}
+
+\item{mode}{String specifying aggregation method for plot. Only applicable
+when \code{return = "plot"}. Valid options include:
+\itemize{
+\item \code{"binary"}: convert hourly activity into binary blocks. In the plot, each
+block would display as solid.
+\item \code{"prop"}: calculate proportion of signals in each hour over total signals
+across 24 hours, then average across all work weeks. In the plot, each
+block would display as a heatmap.
+}}
 }
 \value{
 A different output is returned depending on the value passed to the \code{return}
--- a/man/network_leiden.Rd
+++ b/man/network_leiden.Rd
@ -44,8 +44,10 @@ of the nodes. Defaults to 0.7.}
 \item{res}{Resolution parameter to be passed to \code{leiden::leiden()}. Defaults
 to 0.5.}

-\item{seed}{Seed for the random number generator passed to \code{leiden::leiden()}
-to ensure consistency. Only applicable when \code{display} is set to \code{"leiden"}.}
+\item{seed}{Seed for the random number generator passed to either
+\code{set.seed()} when the Louvain algorithm is used, or \code{leiden::leiden()} when
+the Leiden algorithm is used, to ensure consistency. Only applicable when
+\code{display} is set to \code{"louvain"} or \code{"leiden"}.}

 \item{desc_hrvar}{Character vector of length 3 containing the HR attributes
 to use when returning the \code{"describe"} output. See \code{network_describe()}.}
--- a/man/network_p2p.Rd
+++ b/man/network_p2p.Rd
@ -86,8 +86,10 @@ of the edges (only for 'ggraph' mode). Defaults to 1.}
 \item{res}{Resolution parameter to be passed to \code{leiden::leiden()}. Defaults
 to 0.5.}

-\item{seed}{Seed for the random number generator passed to \code{leiden::leiden()}
-to ensure consistency. Only applicable when \code{display} is set to \code{"leiden"}.}
+\item{seed}{Seed for the random number generator passed to either
+\code{set.seed()} when the Louvain algorithm is used, or \code{leiden::leiden()} when
+the Leiden algorithm is used, to ensure consistency. Only applicable when
+\code{display} is set to \code{"louvain"} or \code{"leiden"}.}

 \item{algorithm}{String to specify the node placement algorithm to be used.
 Defaults to \code{"mds"} for the deterministic multi-dimensional scaling of
--- a/man/plot_flex_index.Rd
+++ b/man/plot_flex_index.Rd
@ -9,7 +9,8 @@ plot_flex_index(
  sig_label = "Signals_sent_",
  method = "sample",
  start_hour = 9,
-  end_hour = 17
+  end_hour = 17,
+  mode = "binary"
 )
 }
 \arguments{
@ -25,6 +26,8 @@ working patterns; "time" plots the Flexibility Index for the group over time.}
 \item{start_hour}{See \code{flex_index()}.}

 \item{end_hour}{See \code{flex_index()}.}
+
+\item{mode}{See \code{flex_index()}.}
 }
 \value{
 ggplot object. See \code{method}.
--- a/man/workpatterns_classify.Rd
+++ b/man/workpatterns_classify.Rd
@ -11,6 +11,7 @@ workpatterns_classify(
  signals = c("email", "IM"),
  start_hour = "0900",
  end_hour = "1700",
+  exp_hours = NULL,
  mingroup = 5,
  active_threshold = 0,
  method = "bw",
@ -48,6 +49,11 @@ Note that this currently only supports \strong{hourly} increments. If the
 official hours specifying checking in and 9 AM and checking out at 5 PM,
 then \code{"1700"} should be supplied here.}

+\item{exp_hours}{Numeric value representing the number of hours the
+population is expected to be active for throughout the workday. By default,
+this uses the difference between \code{end_hour} and \code{start_hour}. Only
+applicable with the 'bw' method.}
+
 \item{mingroup}{Numeric value setting the privacy threshold / minimum group
 size. Defaults to 5.}

@ -122,24 +128,36 @@ implementations.
 \section{Binary Week method}{


-This method classifies each \strong{person-week} into one of the seven
+This method classifies each \strong{person-week} into one of the eight
 archetypes:
 \itemize{
-\item \strong{0 < 3 hours on}: fewer than 3 hours of active hours
-\item \strong{1 Standard with breaks workday}: active for fewer than \emph{expected
-hours}, with no activity outside working hours
-\item \strong{2 Standard continuous workday}: number of active hours equal \emph{expected
-hours}, with no activity outside working hours
-\item \strong{3 Standard flexible workday}: number of active hours are less than or
-equal to \emph{expected hours}, with some activity outside working hours
-\item \strong{4 Long flexible workday}: number of active hours exceed \emph{expected
+\item \strong{0 Low Activity (< 3 hours on)}: fewer than 3 hours of active hours
+\item \strong{1.1 Standard continuous (expected schedule)}: active hours equal to
+\emph{expected hours}, with all activity confined within the expected start and
+end time
+\item \strong{1.2 Standard continuous (shifted schedule)}: active hours equal to
+\emph{expected hours}, with activity occurring beyond either the expected start
+or end time.
+\item \strong{2.1 Standard flexible (expected schedule)}: active hours less than or
+equal to \emph{expected hours}, with all activity confined within the expected
+start and end time
+\item \strong{2.2 Standard flexible (shifted schedule)}: active hours less than or
+equal to \emph{expected hours}, with activity occurring beyond either the
+expected start or end time.
+\item \strong{3 Long flexible workday}: number of active hours exceed \emph{expected
 hours}, with breaks occurring throughout
-\item \strong{5 Long continuous workday}: number of active hours exceed \emph{expected
+\item \strong{4 Long continuous workday}: number of active hours exceed \emph{expected
 hours}, with activity happening in a continuous block (no breaks)
-\item \strong{6 Always on (13h+)}: number of active hours greater than or equal to
+\item \strong{5 Always on (13h+)}: number of active hours greater than or equal to
 13
 }

+\emph{Standard} here denotes the behaviour of not exhibiting total number of
+active hours which exceed the expected total number of hours, as supplied by
+\code{exp_hours}. \emph{Continuous} refers to the behaviour of \emph{not} taking breaks,
+i.e. no inactive hours between the first and last active hours of the day,
+where \emph{flexible} refers to the contrary.
+
 This is the recommended method over \code{pav} for several reasons:
 \enumerate{
 \item \code{bw} ignores \emph{volume effects}, where activity volume can still bias the
@ -159,11 +177,11 @@ In the standard plot output, the archetypes have been abbreviated to show the
 following:
 \itemize{
 \item \strong{Low Activity} - archetype 0
-\item \strong{Standard} - archetype 2
-\item \strong{Flexible} - archetypes 1 and 3
-\item \strong{Long continuous} - archetype 5
-\item \strong{Long flexible} - archetype 4
-\item \strong{Always On} - archetype 6
+\item \strong{Standard} - archetypes 1.1 and 1.2
+\item \strong{Flexible} - archetypes 2.1 and 2.2
+\item \strong{Long continuous} - archetype 4
+\item \strong{Long flexible} - archetype 3
+\item \strong{Always On} - archetype 5
 }
 }

@ -206,7 +224,13 @@ Hourly Collaboration Flexible Query.
 em_data \%>\% workpatterns_classify(method = "bw")

 # Return an area plot
-em_data \%>\% workpatterns_classify(method = "bw", return = "plot-area")
+# With custom expected hours
+em_data \%>\%
+  workpatterns_classify(
+    method = "bw",
+    return = "plot-area",
+    exp_hours = 7
+      )

 \donttest{

--- a/man/workpatterns_classify_bw.Rd
+++ b/man/workpatterns_classify_bw.Rd
@ -12,6 +12,7 @@ workpatterns_classify_bw(
  start_hour = "0900",
  end_hour = "1700",
  mingroup = 5,
+  exp_hours = NULL,
  active_threshold = 0,
  return = "plot"
 )
@ -45,6 +46,10 @@ then \code{"1700"} should be supplied here.}
 \item{mingroup}{Numeric value setting the privacy threshold / minimum group
 size. Defaults to 5.}

+\item{exp_hours}{Numeric value representing the number of hours the population
+is expected to be active for throughout the workday. By default, this uses
+the difference between \code{end_hour} and \code{start_hour}.}
+
 \item{active_threshold}{A numeric value specifying the minimum number of
 signals to be greater than in order to qualify as \emph{active}. Defaults to 0.}

--- a/man/workpatterns_rank.Rd
+++ b/man/workpatterns_rank.Rd
@ -10,6 +10,7 @@ workpatterns_rank(
  start_hour = "0900",
  end_hour = "1700",
  top = 10,
+  mode = "binary",
  return = "plot"
 )
 }
@ -32,9 +33,19 @@ e.g. "\verb{0900"}}
 \item{end_hour}{A character vector specifying starting hours,
 e.g. \code{"1700"}}

-\item{top}{number specifying how many top working patterns to display in plot,
+\item{top}{numeric value specifying how many top working patterns to display in plot,
 e.g. \code{"10"}}

+\item{mode}{string specifying aggregation method for plot. Valid
+options include:
+\itemize{
+\item \code{"binary"}: convert hourly activity into binary blocks. In the plot, each
+block would display as solid.
+\item \code{"prop"}: calculate proportion of signals in each hour over total signals
+across 24 hours, then average across all work weeks. In the plot, each
+block would display as a heatmap.
+}}
+
 \item{return}{String specifying what to return. This must be one of the
 following strings:
 \itemize{
@ -59,6 +70,7 @@ table of working patterns, ranked from the most common to the
 least.
 }
 \examples{
+# Plot by default
 workpatterns_rank(
  data = em_data,
  signals = c(
@ -69,6 +81,13 @@ workpatterns_rank(
  )
  )

+# Plot with prop / heatmap mode
+workpatterns_rank(
+  data = em_data,
+  mode = "prop"
+)
+
+
 }
 \seealso{
 Other Visualization: 
--- a/man/workpatterns_report.Rd
+++ b/man/workpatterns_report.Rd
@ -10,6 +10,7 @@ workpatterns_report(
  signals = c("email", "IM"),
  start_hour = "0900",
  end_hour = "1700",
+  exp_hours = NULL,
  path = "workpatterns report",
  timestamp = TRUE
 )
@ -26,6 +27,8 @@ workpatterns_report(

 \item{end_hour}{See \code{workpatterns_classify()}.}

+\item{exp_hours}{See \code{workpatterns_classify()}.}
+
 \item{path}{Pass the file path and the desired file name, \emph{excluding the file
 extension}. For example, \code{"scope report"}.}

--- a/tests/testthat.R
+++ b/tests/testthat.R
@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/tests.html
+# * https://testthat.r-lib.org/reference/test_package.html#special-files
+
+library(testthat)
+library(wpa)
+
+test_check("wpa")
--- a/tests/testthat/test-flex_index.R
+++ b/tests/testthat/test-flex_index.R
@ -0,0 +1,27 @@
+test_that(
+  desc = "flex_index data output returns data frame",
+  code = {
+    out <- flex_index(em_data, signals = "IM", return = "data")
+
+    expect_s3_class(out, "data.frame")
+
+    out <- flex_index(em_data, signals = "unscheduled_calls", return = "data")
+
+    expect_s3_class(out, "data.frame")
+
+    out <- flex_index(em_data, signals = "meetings", return = "data")
+
+    expect_s3_class(out, "data.frame")
+  }
+)
+
+test_that("flex_index plots returns ggplot object",{
+  p <- flex_index(em_data, signals = "meetings", return = "plot")
+  expect_s3_class(p, "ggplot")
+
+  p <- flex_index(em_data, signals = "IM", return = "plot")
+  expect_s3_class(p, "ggplot")
+
+  p <- flex_index(em_data, signals = "unscheduled_calls", return = "plot")
+  expect_s3_class(p, "ggplot")
+})
--- a/tests/testthat/test-workpatterns_classify.R
+++ b/tests/testthat/test-workpatterns_classify.R
@ -0,0 +1,33 @@
+test_that(
+  desc = "workpatterns data output row matches - signals",
+  code = {
+    out <- workpatterns_classify(em_data, signals = "IM", return = "data")
+    expect_equal(
+      object = nrow(out),
+      expected = nrow(em_data)
+    )
+
+    out <- workpatterns_classify(em_data, signals = "unscheduled_calls", return = "data")
+    expect_equal(
+      object = nrow(out),
+      expected = nrow(em_data)
+    )
+
+    out <- workpatterns_classify(em_data, signals = "meetings", return = "data")
+    expect_equal(
+      object = nrow(out),
+      expected = nrow(em_data)
+    )
+  }
+)
+
+test_that("workpatterns plots returns ggplot object",{
+  p <- workpatterns_classify(em_data, signals = "meetings", return = "plot")
+  expect_s3_class(p, "ggplot")
+
+  p <- workpatterns_classify(em_data, signals = "IM", return = "plot")
+  expect_s3_class(p, "ggplot")
+
+  p <- workpatterns_classify(em_data, signals = "unscheduled_calls", return = "plot")
+  expect_s3_class(p, "ggplot")
+})
--- a/workpatterns-report-20220613_151528.html
+++ b/workpatterns-report-20220613_151528.html