wpa/man/subject_scan.Rd

110 строки
3.0 KiB
R

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/subject_scan.R
\name{subject_scan}
\alias{subject_scan}
\alias{tm_scan}
\title{Count top words in subject lines grouped by a custom attribute}
\usage{
subject_scan(
data,
hrvar,
mode = NULL,
top_n = 10,
token = "words",
return = "plot",
weight = NULL,
stopwords = NULL,
...
)
tm_scan(
data,
hrvar,
mode = NULL,
top_n = 10,
token = "words",
return = "plot",
weight = NULL,
stopwords = NULL,
...
)
}
\arguments{
\item{data}{A Meeting Query dataset in the form of a data frame.}
\item{hrvar}{String containing the name of the HR Variable by which to split
metrics. Note that the prefix \code{'Organizer_'} or equivalent will be
required.}
\item{mode}{String specifying what variable to use for grouping subject
words. Valid values include:
\itemize{
\item \code{"hours"}
\item \code{"days"}
\item \code{NULL} (defaults to \code{hrvar})
When the value passed to \code{mode} is not \code{NULL}, the value passed to \code{hrvar}
will be discarded and instead be over-written by setting specified in \code{mode}.
}}
\item{top_n}{Numeric value specifying the top number of words to show.}
\item{token}{A character vector accepting either \code{"words"} or \code{"ngrams"},
determining type of tokenisation to return.}
\item{return}{String specifying what to return. This must be one of the
following strings:
\itemize{
\item \code{"plot"}
\item \code{"table"}
\item \code{"data"}
}
See \code{Value} for more information.}
\item{weight}{String specifying the column name of a numeric variable for
weighting data, such as \code{"Invitees"}. The column must contain positive
integers. Defaults to \code{NULL}, where no weighting is applied.}
\item{stopwords}{A character vector OR a single-column data frame labelled
\code{'word'} containing custom stopwords to remove.}
\item{...}{Additional parameters to pass to \code{tm_clean()}.}
}
\value{
A different output is returned depending on the value passed to the \code{return}
argument:
\itemize{
\item \code{"plot"}: 'ggplot' object. A heatmapped grid.
\item \code{"table"}: data frame. A summary table for the metric.
\item \code{"data"}: data frame.
}
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
This function generates a matrix of the top occurring words in meetings,
grouped by a specified attribute such as organisational attribute, day of the
week, or hours of the day.
}
\examples{
# return a heatmap table for words
mt_data \%>\% subject_scan(hrvar = "Organizer_Organization")
# return a heatmap table for ngrams
mt_data \%>\%
subject_scan(
hrvar = "Organizer_Organization",
token = "ngrams",
n = 2)
# return raw table format
mt_data \%>\% subject_scan(hrvar = "Organizer_Organization", return = "table")
# grouped by hours
mt_data \%>\% subject_scan(mode = "hours")
# grouped by days
mt_data \%>\% subject_scan(mode = "days")
}