wpa/man/create_IV.Rd

97 строки
2.7 KiB
R

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/create_IV.R
\name{create_IV}
\alias{create_IV}
\title{Calculate Information Value for a selected outcome variable}
\usage{
create_IV(
data,
predictors = NULL,
outcome,
bins = 5,
siglevel = 0.05,
exc_sig = FALSE,
return = "plot"
)
}
\arguments{
\item{data}{A Person Query dataset in the form of a data frame.}
\item{predictors}{A character vector specifying the columns to be used as
predictors. Defaults to NULL, where all numeric vectors in the data will be
used as predictors.}
\item{outcome}{A string specifying a binary variable, i.e. can only contain
the values 1 or 0.}
\item{bins}{Number of bins to use, defaults to 5.}
\item{siglevel}{Significance level to use in comparing populations for the
outcomes, defaults to 0.05}
\item{exc_sig}{Logical value determining whether to exclude values where the
p-value lies below what is set at \code{siglevel}. Defaults to \code{FALSE}.}
\item{return}{String specifying what to return. This must be one of the
following strings:
\itemize{
\item \code{"plot"}
\item \code{"summary"}
\item \code{"list"}
\item \code{"plot-WOE"}
\item \code{"IV"}
}
See \code{Value} for more information.}
}
\value{
A different output is returned depending on the value passed to the \code{return}
argument:
\itemize{
\item \code{"plot"}: 'ggplot' object. A bar plot showing the IV value of the top
(maximum 12) variables.
\item \code{"summary"}: data frame. A summary table for the metric.
\item \code{"list"}: list. A list of outputs for all the input variables.
\item \code{"plot-WOE"}: A list of 'ggplot' objects that show the WOE for each
predictor used in the model.
\item \code{"IV"} returns a list object which mirrors the return
in \code{Information::create_infotables()}.
}
}
\description{
Specify an outcome variable and return IV outputs.
All numeric variables in the dataset are used as predictor variables.
}
\examples{
# Return a summary table of IV
sq_data \%>\%
dplyr::mutate(X = ifelse(Workweek_span > 40, 1, 0)) \%>\%
create_IV(outcome = "X",
predictors = c("Email_hours",
"Meeting_hours",
"Instant_Message_hours"),
return = "plot")
# Return summary
sq_data \%>\%
dplyr::mutate(X = ifelse(Collaboration_hours > 2, 1, 0)) \%>\%
create_IV(outcome = "X",
predictors = c("Email_hours", "Meeting_hours"),
return = "summary")
}
\seealso{
Other Variable Association:
\code{\link{IV_by_period}()},
\code{\link{IV_report}()},
\code{\link{plot_WOE}()}
Other Information Value:
\code{\link{IV_by_period}()},
\code{\link{IV_report}()},
\code{\link{plot_WOE}()}
}
\concept{Information Value}
\concept{Variable Association}