wpa/R/subject_validate_report.R

80 строки
2.9 KiB
R

# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
#' @title Generate Meeting Text Mining report in HTML for Common Exclusion Terms
#'
#' @description
#' This functions creates a text mining report in HTML based on Meeting Subject Lines for data validation. It scans a meeting query and highlights meetings with subjects that include common exlusion terms. It is intended to be used by an analyst to validate raw data before conducting additional analysis.
#' Returns a HTML report by default.
#'
#' @family Data Validation
#'
#' @param data A Meeting Query dataset in the form of a data frame.
#' @param path Pass the file path and the desired file name, _excluding the file extension_.
#' For example, "meeting text mining report".
#' @param timestamp Logical vector specifying whether to include a timestamp in the file name.
#' Defaults to TRUE.
#' @param keep A numeric vector specifying maximum number of words to keep.
#' @param seed A numeric vector to set seed for random generation.
#'
#' @export
subject_validate_report <- function(data,
path = "Subject Lines Validation Report",
timestamp = TRUE,
keep = 100,
seed = 100){
## Create timestamped path (if applicable)
if(timestamp == TRUE){
newpath <- paste(path, wpa::tstamp())
} else {
newpath <- path
}
# Get Results
test_data <- data %>% subject_validate(return="data") %>% filter(subjectFlag==1)
results <- data %>% subject_validate(return="table")
# Set outputs
output_list <-
list(data %>% subject_validate(return="table"),
test_data %>% tm_wordcloud(),
test_data %>% tm_freq(token = "words"),
test_data %>% tm_freq(token = "words", return = "table"),
test_data %>% tm_freq(token = "ngrams"),
test_data %>% tm_freq(token = "ngrams", return = "table"),
test_data %>% tm_cooc(),
test_data %>% tm_cooc(return="table")) %>%
purrr::map_if(is.data.frame, create_dt)
# Set header titles
title_list <-
c("Exclusion Terms Identified",
"Related Words",
"",
"",
"Common Phrases",
"",
"Word Co-occurrence",
"")
# Set header levels
n_title <- length(title_list)
levels_list <- rep(3, n_title)
# Generate report
generate_report(title = "Subject Lines Validation Report",
filename = newpath,
outputs = output_list,
titles = title_list,
subheaders = rep("", n_title),
echos = rep(FALSE, n_title),
levels = levels_list,
theme = "cosmo",
preamble = "")
}