wpa/man/subject_scan.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/subject_scan.R
\name{subject_scan}
\alias{subject_scan}
\alias{tm_scan}
\title{Count top words in subject lines grouped by a custom attribute}
\usage{
subject_scan(
  data,
  hrvar,
  mode = NULL,
  top_n = 10,
  token = "words",
  return = "plot",
  weight = NULL,
  stopwords = NULL,
  ...
)

tm_scan(
  data,
  hrvar,
  mode = NULL,
  top_n = 10,
  token = "words",
  return = "plot",
  weight = NULL,
  stopwords = NULL,
  ...
)
}
\arguments{
\item{data}{A Meeting Query dataset in the form of a data frame.}

\item{hrvar}{String containing the name of the HR Variable by which to split
metrics. Note that the prefix \code{'Organizer_'} or equivalent will be
required.}

\item{mode}{String specifying what variable to use for grouping subject
words. Valid values include:
\itemize{
\item \code{"hours"}
\item \code{"days"}
\item \code{NULL} (defaults to \code{hrvar})
When the value passed to \code{mode} is not \code{NULL}, the value passed to \code{hrvar}
will be discarded and instead be over-written by setting specified in \code{mode}.
}}

\item{top_n}{Numeric value specifying the top number of words to show.}

\item{token}{A character vector accepting either \code{"words"} or \code{"ngrams"},
determining type of tokenisation to return.}

\item{return}{String specifying what to return. This must be one of the
following strings:
\itemize{
\item \code{"plot"}
\item \code{"table"}
\item \code{"data"}
}

See \code{Value} for more information.}

\item{weight}{String specifying the column name of a numeric variable for
weighting data, such as \code{"Invitees"}. The column must contain positive
integers. Defaults to \code{NULL}, where no weighting is applied.}

\item{stopwords}{A character vector OR a single-column data frame labelled
\code{'word'} containing custom stopwords to remove.}

\item{...}{Additional parameters to pass to \code{tm_clean()}.}
}
\value{
A different output is returned depending on the value passed to the \code{return}
argument:
\itemize{
\item \code{"plot"}: 'ggplot' object. A heatmapped grid.
\item \code{"table"}: data frame. A summary table for the metric.
\item \code{"data"}: data frame.
}
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}

This function generates a matrix of the top occurring words in meetings,
grouped by a specified attribute such as organisational attribute, day of the
week, or hours of the day.
}
\examples{
# return a heatmap table for words
mt_data \%>\% subject_scan(hrvar = "Organizer_Organization")

# return a heatmap table for ngrams
mt_data \%>\%
  subject_scan(
    hrvar = "Organizer_Organization",
    token = "ngrams",
    n = 2)

# return raw table format
mt_data \%>\% subject_scan(hrvar = "Organizer_Organization", return = "table")

# grouped by hours
mt_data \%>\% subject_scan(mode = "hours")

# grouped by days
mt_data \%>\% subject_scan(mode = "days")

}