зеркало из https://github.com/microsoft/wpa.git
251 строка
9.6 KiB
R
251 строка
9.6 KiB
R
# --------------------------------------------------------------------------------------------
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
|
|
# --------------------------------------------------------------------------------------------
|
|
|
|
#' @title Generate a Data Validation report in HTML
|
|
#'
|
|
#' @description
|
|
#' The function generates an interactive HTML report using
|
|
#' Standard Query data as an input. The report contains a checks on
|
|
#' Workplace Analytics query outputs, to provide diagnostic information
|
|
#' for the Analyst pre-analysis.
|
|
#'
|
|
#' For your input data or meeting_data, please use the function `wpa::import_wpa()`
|
|
#' to import your csv query files into R. This function will standardize format
|
|
#' and prepare the data as input for this report.
|
|
#'
|
|
#' @param data A Standard Person Query dataset in the form of a data frame.
|
|
#' @param meeting_data An optional Meeting Query dataset in the form of a data frame.
|
|
#' @param hrvar HR Variable by which to split metrics, defaults to "Organization"
|
|
#' but accepts any character vector, e.g. "Organization"
|
|
#' @param path Pass the file path and the desired file name, _excluding the file extension_.
|
|
#' @param timestamp Logical vector specifying whether to include a timestamp in the file name.
|
|
#' Defaults to TRUE.
|
|
#'
|
|
#' @examples
|
|
#' \dontrun{
|
|
#' validation_report(dv_data,
|
|
#' meeting_data = mt_data,
|
|
#' hrvar = "Organization")
|
|
#'
|
|
#' }
|
|
#'
|
|
#' @importFrom purrr map_if
|
|
#' @importFrom dplyr `%>%`
|
|
#'
|
|
#' @family Reports
|
|
#'
|
|
#' @export
|
|
validation_report <- function(data,
|
|
meeting_data = NULL,
|
|
hrvar = "Organization",
|
|
path = "validation report",
|
|
timestamp = TRUE){
|
|
|
|
## Create timestamped path (if applicable)
|
|
if(timestamp == TRUE){
|
|
newpath <- paste(path, wpa::tstamp())
|
|
} else {
|
|
newpath <- path
|
|
}
|
|
|
|
## Dynamic: if meeting data is not available
|
|
if(is.null(meeting_data)){
|
|
subline_obj <- "[Note] Subject line analysis is unavailable as no meeting query is supplied."
|
|
subline_obj2 <- ""
|
|
} else {
|
|
subline_obj <- meeting_data %>% subject_validate(return = "text")
|
|
subline_obj2 <- meeting_data %>% subject_validate(return = "table")
|
|
}
|
|
|
|
## Dynamic: if `HireDate` is not available
|
|
if("HireDate" %in% names(data)){
|
|
tenure_obj <- data %>% identify_tenure(return = "text") %>% suppressWarnings()
|
|
tenure_obj2 <- data %>% identify_tenure(return = "plot") %>% suppressWarnings()
|
|
} else {
|
|
tenure_obj <- "[Note] Tenure analysis is unavailable as the data has no `HireDate` variable."
|
|
tenure_obj2 <- ""
|
|
}
|
|
|
|
## Dynamic: if `WorkingStartTimeSetInOutlook` and `WorkingEndTimeSetInOutlook` is not available
|
|
wktimes_var <- c("WorkingStartTimeSetInOutlook", "WorkingEndTimeSetInOutlook")
|
|
wktimes_msg <- "[Note] Outlook hours analysis is unavailable as the data does not have the following variables:"
|
|
|
|
if(all(wktimes_var %in% names(data))){
|
|
wktimes_obj <- data %>% flag_outlooktime(return = "text")
|
|
shift_obj <- data %>% identify_shifts(return = "plot")
|
|
} else {
|
|
wktimes_obj <- paste(wktimes_msg, paste(wrap(wktimes_var, "`"), collapse = ", "), collapse = "\n")
|
|
shift_obj <- paste(wktimes_msg, paste(wrap(wktimes_var, "`"), collapse = ", "), collapse = "\n")
|
|
}
|
|
|
|
## Dynamic: Track HR changes
|
|
mtry <- try(track_HR_change(data, hrvar = hrvar), silent = TRUE)
|
|
|
|
if (!inherits(mtry, "try-error")) {
|
|
trackhr_obj <- data %>% track_HR_change(hrvar = hrvar, return = "plot")
|
|
trackhr_obj2 <- data %>% track_HR_change(hrvar = hrvar, return = "table")
|
|
} else {
|
|
trackhr_obj <- "[Error!] unable to parse HR changes."
|
|
trackhr_obj2 <- ""
|
|
}
|
|
|
|
## Outputs as accessed here
|
|
## Can be data frames, plot objects, or text
|
|
output_list <-
|
|
list(read_preamble("blank.md"), # Header - Data Available
|
|
data %>% check_query(return = "text", validation = TRUE),
|
|
|
|
read_preamble("blank.md"), # Header - 1.1 Workplace Analytics Settings
|
|
read_preamble("outlook_settings_1.md"),
|
|
|
|
shift_obj, # See `identify_shifts()` dynamic treatment above
|
|
|
|
read_preamble("outlook_settings_2.md"),
|
|
paste(">", wktimes_obj),
|
|
paste(">", data %>% flag_ch_ratio(return = "text")),
|
|
read_preamble("outlook_settings_3.md"),
|
|
|
|
read_preamble("meeting_exclusions_1.md"), #item 9, Header - 1.2 Meeting Exclusions
|
|
paste(">", subline_obj),
|
|
subline_obj2,
|
|
read_preamble("meeting_exclusions_2.md"),
|
|
|
|
read_preamble("organizational_data_quality.md"), #13, Header - 2. Organizational Data Quality
|
|
read_preamble("attributes_available.md"),#14
|
|
data %>% hrvar_count_all(return = "table"),
|
|
|
|
read_preamble("groups_under_privacy_threshold_1.md"), #16, Header - 2.2 Groups under Privacy Threshold
|
|
paste(">", data %>% identify_privacythreshold(return="text")),
|
|
read_preamble("groups_under_privacy_threshold_2.md"),
|
|
data %>% identify_privacythreshold(return="table"),
|
|
|
|
read_preamble("distribution_employees_key_attributes.md"), #20, Header - 2.3 Distribution employees key attributes
|
|
data %>% hrvar_count(hrvar = hrvar, return = "plot"),
|
|
data %>% hrvar_count(hrvar = hrvar, return = "table"),
|
|
|
|
read_preamble("updates_organizational_data.md"), #23, Header - 2.4 Updates to Organizational Data
|
|
read_preamble("blank.md"), #placeholder for track_HR_change message obj,
|
|
trackhr_obj,
|
|
trackhr_obj2,
|
|
|
|
read_preamble("quality_tenure_data.md"), #27, Header - 2.5 Quality Tenure Data
|
|
paste(">", tenure_obj), # Text
|
|
tenure_obj2, # Plot
|
|
|
|
read_preamble("m365_data_quality.md"), #30, Header - 3. M365 Data Quality
|
|
read_preamble("population_over_time.md"), #Header - 3.1
|
|
data %>% hr_trend(return = "plot"),
|
|
|
|
read_preamble("nonknowledge_workers.md"), #33, Header - 3.2 Non-knowledge workers
|
|
paste(">", data %>% identify_nkw(return = "text")),
|
|
data %>% identify_nkw(return = "data_summary"),
|
|
|
|
read_preamble("holiday_weeks_1.md"), #36, Header - 3.3 Company Holiday weeks
|
|
paste(">", data %>% identify_holidayweeks(return = "text")),
|
|
read_preamble("holiday_weeks_2.md"),
|
|
data %>% identify_holidayweeks(return = "plot"),
|
|
|
|
read_preamble("inactive_weeks_1.md"), #40, Header - 3.4 Inactive weeks
|
|
paste(">", data %>% identify_inactiveweeks(return = "text")),
|
|
read_preamble("inactive_weeks_2.md"),
|
|
|
|
read_preamble("extreme_values.md"), #43, Header - 3.5 Extreme values
|
|
paste(">",data %>% flag_extreme(metric = "Email_hours", threshold = 80, person = TRUE, return = "text")),
|
|
paste(">",data %>% flag_extreme(metric = "Email_hours", threshold = 80, person = FALSE, return = "text")),
|
|
|
|
paste(">",data %>% flag_extreme(metric = "Meeting_hours", threshold = 80, person = TRUE, return = "text")),
|
|
paste(">",data %>% flag_extreme(metric = "Meeting_hours", threshold = 80, person = FALSE, return = "text")),
|
|
|
|
paste(">",data %>% flag_extreme(metric = "Call_hours", threshold = 40, person = TRUE, return = "text")),
|
|
paste(">",data %>% flag_extreme(metric = "Call_hours", threshold = 40, person = FALSE, return = "text")),
|
|
|
|
paste(">",data %>% flag_extreme(metric = "Instant_Message_hours", threshold = 40, person = TRUE, return = "text")),
|
|
paste(">",data %>% flag_extreme(metric = "Instant_Message_hours", threshold = 40, person = FALSE, return = "text")),
|
|
|
|
paste(">",data %>% flag_extreme(metric = "Conflicting_meeting_hours", threshold = 70, person = TRUE, return = "text")),
|
|
paste(">",data %>% flag_extreme(metric = "Conflicting_meeting_hours", threshold = 70, person = FALSE, return = "text"))) %>%
|
|
|
|
purrr::map_if(is.data.frame, create_dt, rounding = 0) %>%
|
|
purrr::map_if(is.character, md2html)
|
|
|
|
## Title of the outputs
|
|
title_list <-
|
|
c("Data Available",
|
|
"Query Check",
|
|
|
|
"1. Workplace Analytics Settings",
|
|
"1.1 Outlook Settings",
|
|
"",
|
|
"",
|
|
"",
|
|
"",
|
|
"",
|
|
"1.2 Meeting Exclusion Rules",
|
|
"",
|
|
"",
|
|
"",
|
|
|
|
"2. Organizational Data Quality",
|
|
"2.1 Attributes Available",
|
|
"",
|
|
"2.2 Groups Under Privacy Threshold",
|
|
"",
|
|
"",
|
|
"",
|
|
|
|
"2.3 Distribution of Employees in Key Attributes",
|
|
"",
|
|
"",
|
|
|
|
"2.4 Updates to Organizational Data",
|
|
"",
|
|
"",
|
|
"",
|
|
|
|
"2.5 Quality of Tenure Data",
|
|
"",
|
|
"",
|
|
|
|
"3. M365 Data Quality",
|
|
"3.1 Population Over Time",
|
|
"",
|
|
"3.2 Non-knowledge Workers",
|
|
"",
|
|
"",
|
|
"3.3 Company Holiday Weeks",
|
|
"",
|
|
"",
|
|
"",
|
|
"3.4 Inactive Weeks",
|
|
"",
|
|
"",
|
|
"3.5 Extreme Values",
|
|
"3.5.1 Extreme values: Email",
|
|
"",
|
|
"3.5.2 Extreme values: Meeting",
|
|
"",
|
|
"3.5.3 Extreme values: Calls",
|
|
"",
|
|
"3.5.4 Extreme values: IM",
|
|
"",
|
|
"3.5.5 Extreme values: Conflicting Meetings",
|
|
"")
|
|
|
|
# Set header levels
|
|
n_title <- length(title_list)
|
|
levels_list <- rep(4, n_title)
|
|
levels_list[c(1, 3, 14, 31)] <- 2 # Section header
|
|
|
|
generate_report(title = "Data Validation Report",
|
|
filename = newpath,
|
|
outputs = output_list,
|
|
titles = title_list,
|
|
subheaders = rep("", n_title),
|
|
echos = rep(FALSE, n_title),
|
|
levels = levels_list,
|
|
theme = "cosmo",
|
|
preamble = read_preamble("validation_report.md")) # See inst/preamble/validation_report.md
|
|
}
|