зеркало из https://github.com/microsoft/wpa.git
111 строки
3.0 KiB
R
111 строки
3.0 KiB
R
% Generated by roxygen2: do not edit by hand
|
|
% Please edit documentation in R/subject_scan.R
|
|
\name{subject_scan}
|
|
\alias{subject_scan}
|
|
\alias{tm_scan}
|
|
\title{Count top words in subject lines grouped by a custom attribute}
|
|
\usage{
|
|
subject_scan(
|
|
data,
|
|
hrvar,
|
|
mode = NULL,
|
|
top_n = 10,
|
|
token = "words",
|
|
return = "plot",
|
|
weight = NULL,
|
|
stopwords = NULL,
|
|
...
|
|
)
|
|
|
|
tm_scan(
|
|
data,
|
|
hrvar,
|
|
mode = NULL,
|
|
top_n = 10,
|
|
token = "words",
|
|
return = "plot",
|
|
weight = NULL,
|
|
stopwords = NULL,
|
|
...
|
|
)
|
|
}
|
|
\arguments{
|
|
\item{data}{A Meeting Query dataset in the form of a data frame.}
|
|
|
|
\item{hrvar}{String containing the name of the HR Variable by which to split
|
|
metrics. Note that the prefix \code{'Organizer_'} or equivalent will be
|
|
required.}
|
|
|
|
\item{mode}{String specifying what variable to use for grouping subject
|
|
words. Valid values include:
|
|
\itemize{
|
|
\item \code{"hours"}
|
|
\item \code{"days"}
|
|
\item \code{NULL} (defaults to \code{hrvar})
|
|
When the value passed to \code{mode} is not \code{NULL}, the value passed to \code{hrvar}
|
|
will be discarded and instead be over-written by setting specified in \code{mode}.
|
|
}}
|
|
|
|
\item{top_n}{Numeric value specifying the top number of words to show.}
|
|
|
|
\item{token}{A character vector accepting either \code{"words"} or \code{"ngrams"},
|
|
determining type of tokenisation to return.}
|
|
|
|
\item{return}{String specifying what to return. This must be one of the
|
|
following strings:
|
|
\itemize{
|
|
\item \code{"plot"}
|
|
\item \code{"table"}
|
|
\item \code{"data"}
|
|
}
|
|
|
|
See \code{Value} for more information.}
|
|
|
|
\item{weight}{String specifying the column name of a numeric variable for
|
|
weighting data, such as \code{"Invitees"}. The column must contain positive
|
|
integers. Defaults to \code{NULL}, where no weighting is applied.}
|
|
|
|
\item{stopwords}{A character vector OR a single-column data frame labelled
|
|
\code{'word'} containing custom stopwords to remove.}
|
|
|
|
\item{...}{Additional parameters to pass to \code{tm_clean()}.}
|
|
}
|
|
\value{
|
|
A different output is returned depending on the value passed to the \code{return}
|
|
argument:
|
|
\itemize{
|
|
\item \code{"plot"}: 'ggplot' object. A heatmapped grid.
|
|
\item \code{"table"}: data frame. A summary table for the metric.
|
|
\item \code{"data"}: data frame.
|
|
}
|
|
}
|
|
\description{
|
|
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
|
|
|
|
This function generates a matrix of the top occurring words in meetings,
|
|
grouped by a specified attribute such as organisational attribute, day of the
|
|
week, or hours of the day.
|
|
}
|
|
\examples{
|
|
\donttest{
|
|
# return a heatmap table for words
|
|
mt_data \%>\% subject_scan(hrvar = "Organizer_Organization")
|
|
|
|
# return a heatmap table for ngrams
|
|
mt_data \%>\%
|
|
subject_scan(
|
|
hrvar = "Organizer_Organization",
|
|
token = "ngrams",
|
|
n = 2)
|
|
|
|
# return raw table format
|
|
mt_data \%>\% subject_scan(hrvar = "Organizer_Organization", return = "table")
|
|
|
|
# grouped by hours
|
|
mt_data \%>\% subject_scan(mode = "hours")
|
|
|
|
# grouped by days
|
|
mt_data \%>\% subject_scan(mode = "days")
|
|
}
|
|
}
|