[R-package] added support for first_metric_only (fixes #2368) (#2912)

* [R-package] started implementing first_metric_only

* trying stuff

* more changes

* fixed handling of multiple metrics

* fixed tests

* remove duplicate tests

* get training tests

* fixes for lgb.cv()

* fixes for lgb.cv()

* fix linting
This commit is contained in:
James Lamb 2020-09-06 02:41:37 +01:00 коммит произвёл GitHub
Родитель 636e4eee0f
Коммит d4325c5a44
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
12 изменённых файлов: 870 добавлений и 39 удалений

Просмотреть файл

@ -108,3 +108,17 @@
)
return(c(learning_params, .DATASET_PARAMETERS()))
}
# [description]
# Per https://github.com/microsoft/LightGBM/blob/master/docs/Parameters.rst#metric,
# a few different strings can be used to indicate "no metrics".
# [returns]
# A character vector
.NO_METRIC_STRINGS <- function() {
return(c(
"na"
, "None"
, "null"
, "custom"
))
}

Просмотреть файл

@ -268,7 +268,7 @@ cb.record.evaluation <- function() {
}
cb.early.stop <- function(stopping_rounds, verbose = TRUE) {
cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose = TRUE) {
# Initialize variables
factor_to_bigger_better <- NULL
@ -325,8 +325,16 @@ cb.early.stop <- function(stopping_rounds, verbose = TRUE) {
# Store iteration
cur_iter <- env$iteration
# By default, any metric can trigger early stopping. This can be disabled
# with 'first_metric_only = TRUE'
if (isTRUE(first_metric_only)) {
evals_to_check <- 1L
} else {
evals_to_check <- seq_len(eval_len)
}
# Loop through evaluation
for (i in seq_len(eval_len)) {
for (i in evals_to_check) {
# Store score
score <- env$eval_list[[i]]$value * factor_to_bigger_better[i]

Просмотреть файл

@ -24,10 +24,6 @@ CVBooster <- R6::R6Class(
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
#' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
#' @param weight vector of response values. If not NULL, will set to dataset
#' @param obj objective function, can be character or custom objective function. Examples include
#' \code{regression}, \code{regression_l1}, \code{huber},
#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}
#' @param eval evaluation function, can be (list of) character or custom eval function
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation
#' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
@ -52,7 +48,7 @@ CVBooster <- R6::R6Class(
#' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).}
#' }
#'
#' @inheritSection lgb_shared_params Early Stopping
#' @return a trained model \code{lgb.CVBooster}.
#'
#' @examples
@ -114,7 +110,7 @@ lgb.cv <- function(params = list()
params <- lgb.check.obj(params, obj)
params <- lgb.check.eval(params, eval)
fobj <- NULL
feval <- NULL
eval_functions <- list(NULL)
# Check for objective (function or not)
if (is.function(params$objective)) {
@ -122,9 +118,17 @@ lgb.cv <- function(params = list()
params$objective <- "NONE"
}
# Check for loss (function or not)
# If loss is a single function, store it as a 1-element list
# (for backwards compatibility). If it is a list of functions, store
# all of them
if (is.function(eval)) {
feval <- eval
eval_functions <- list(eval)
}
if (methods::is(eval, "list")) {
eval_functions <- Filter(
f = is.function
, x = eval
)
}
# Init predictor to empty
@ -266,6 +270,7 @@ lgb.cv <- function(params = list()
callbacks
, cb.early.stop(
stopping_rounds = early_stopping_rounds
, first_metric_only = isTRUE(params[["first_metric_only"]])
, verbose = verbose
)
)
@ -357,7 +362,11 @@ lgb.cv <- function(params = list()
# Update one boosting iteration
msg <- lapply(cv_booster$boosters, function(fd) {
fd$booster$update(fobj = fobj)
fd$booster$eval_valid(feval = feval)
out <- list()
for (eval_function in eval_functions) {
out <- append(out, fd$booster$eval_valid(feval = eval_function))
}
return(out)
})
# Prepare collection of evaluation results
@ -384,7 +393,13 @@ lgb.cv <- function(params = list()
# When early stopping is not activated, we compute the best iteration / score ourselves
# based on the first first metric
if (record && is.na(env$best_score)) {
first_metric <- cv_booster$boosters[[1L]][[1L]]$.__enclos_env__$private$eval_names[1L]
# when using a custom eval function, the metric name is returned from the
# function, so figure it out from record_evals
if (!is.null(eval_functions[1L])) {
first_metric <- names(cv_booster$record_evals[["valid"]])[1L]
} else {
first_metric <- cv_booster$.__enclos_env__$private$eval_names[1L]
}
.find_best <- which.min
if (isTRUE(env$eval_list[[1L]]$higher_better[1L])) {
.find_best <- which.max
@ -576,7 +591,8 @@ lgb.merge.cv.result <- function(msg, showsd = TRUE) {
msg[[i]][[j]]$value }))
})
# Get evaluation
# Get evaluation. Just taking the first element here to
# get structture (name, higher_bettter, data_name)
ret_eval <- msg[[1L]]
# Go through evaluation length items

Просмотреть файл

@ -3,10 +3,6 @@
#' @description Logic to train with LightGBM
#' @inheritParams lgb_shared_params
#' @param valids a list of \code{lgb.Dataset} objects, used for validation
#' @param obj objective function, can be character or custom objective function. Examples include
#' \code{regression}, \code{regression_l1}, \code{huber},
#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}
#' @param eval evaluation function, can be (a list of) character or custom eval function
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
#' @param categorical_feature list of str or int
@ -26,6 +22,7 @@
#' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).}
#' }
#' @inheritSection lgb_shared_params Early Stopping
#' @return a trained booster model \code{lgb.Booster}.
#'
#' @examples
@ -90,7 +87,7 @@ lgb.train <- function(params = list(),
params <- lgb.check.obj(params, obj)
params <- lgb.check.eval(params, eval)
fobj <- NULL
feval <- NULL
eval_functions <- list(NULL)
# Check for objective (function or not)
if (is.function(params$objective)) {
@ -98,9 +95,17 @@ lgb.train <- function(params = list(),
params$objective <- "NONE"
}
# Check for loss (function or not)
# If loss is a single function, store it as a 1-element list
# (for backwards compatibility). If it is a list of functions, store
# all of them
if (is.function(eval)) {
feval <- eval
eval_functions <- list(eval)
}
if (methods::is(eval, "list")) {
eval_functions <- Filter(
f = is.function
, x = eval
)
}
# Init predictor to empty
@ -235,6 +240,7 @@ lgb.train <- function(params = list(),
callbacks
, cb.early.stop(
stopping_rounds = early_stopping_rounds
, first_metric_only = isTRUE(params[["first_metric_only"]])
, verbose = verbose
)
)
@ -280,13 +286,28 @@ lgb.train <- function(params = list(),
# Collection: Has validation dataset?
if (length(valids) > 0L) {
# Validation has training dataset?
if (valid_contain_train) {
eval_list <- append(eval_list, booster$eval_train(feval = feval))
# Get evaluation results with passed-in functions
for (eval_function in eval_functions) {
# Validation has training dataset?
if (valid_contain_train) {
eval_list <- append(eval_list, booster$eval_train(feval = eval_function))
}
eval_list <- append(eval_list, booster$eval_valid(feval = eval_function))
}
# Calling booster$eval_valid() will get
# evaluation results with the metrics in params$metric by calling LGBM_BoosterGetEval_R",
# so need to be sure that gets called, which it wouldn't be above if no functions
# were passed in
if (length(eval_functions) == 0L) {
if (valid_contain_train) {
eval_list <- append(eval_list, booster$eval_train(feval = eval_function))
}
eval_list <- append(eval_list, booster$eval_valid(feval = eval_function))
}
# Has no validation dataset
eval_list <- append(eval_list, booster$eval_valid(feval = feval))
}
# Write evaluation result in environment
@ -312,7 +333,7 @@ lgb.train <- function(params = list(),
# when using a custom eval function, the metric name is returned from the
# function, so figure it out from record_evals
if (!is.null(feval)) {
if (!is.null(eval_functions[1L])) {
first_metric <- names(booster$record_evals[[first_valid_name]])[1L]
} else {
first_metric <- booster$.__enclos_env__$private$eval_names[1L]

Просмотреть файл

@ -10,11 +10,61 @@
#' and one metric. If there's more than one, will check all of them
#' except the training data. Returns the model with (best_iter + early_stopping_rounds).
#' If early stopping occurs, the model will have 'best_iter' field.
#' @param eval evaluation function(s). This can be a character vector, function, or list with a mixture of
#' strings and functions.
#'
#' \itemize{
#' \item{\bold{a. character vector}:
#' If you provide a character vector to this argument, it should contain strings with valid
#' evaluation metrics.
#' See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{
#' The "metric" section of the documentation}
#' for a list of valid metrics.
#' }
#' \item{\bold{b. function}:
#' You can provide a custom evaluation function. This
#' should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named
#' list with three elements:
#' \itemize{
#' \item{\code{name}: A string with the name of the metric, used for printing
#' and storing results.
#' }
#' \item{\code{value}: A single number indicating the value of the metric for the
#' given predictions and true values
#' }
#' \item{
#' \code{higher_better}: A boolean indicating whether higher values indicate a better fit.
#' For example, this would be \code{FALSE} for metrics like MAE or RMSE.
#' }
#' }
#' }
#' \item{\bold{c. list}:
#' If a list is given, it should only contain character vectors and functions.
#' These should follow the requirements from the descriptions above.
#' }
#' }
#' @param eval_freq evaluation output frequency, only effect when verbose > 0
#' @param init_model path of model file of \code{lgb.Booster} object, will continue training from this model
#' @param nrounds number of training rounds
#' @param obj objective function, can be character or custom objective function. Examples include
#' \code{regression}, \code{regression_l1}, \code{huber},
#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}
#' @param params List of parameters
#' @param verbose verbosity for output, if <= 0, also will disable the print of evaluation during training
#' @section Early Stopping:
#'
#' "early stopping" refers to stopping the training process if the model's performance on a given
#' validation set does not improve for several consecutive iterations.
#'
#' If multiple arguments are given to \code{eval}, their order will be preserved. If you enable
#' early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all
#' metrics will be considered for early stopping.
#'
#' If you want to only consider the first metric for early stopping, pass
#' \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric}
#' in \code{params}, that metric will be considered the "first" one. If you omit \code{metric},
#' a default metric will be used based on your choice for the parameter \code{obj} (keyword argument)
#' or \code{objective} (passed into \code{params}).
#' @keywords internal
NULL
@ -47,6 +97,7 @@ NULL
#' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).}
#' }
#' @inheritSection lgb_shared_params Early Stopping
#' @export
lightgbm <- function(data,
label = NULL,

Просмотреть файл

@ -318,10 +318,10 @@ lgb.check.obj <- function(params, obj) {
}
# [description]
# make sure that "metric" is populated on params,
# and add any eval values to it
# [return]
# params, where "metric" is a list
# Take any character values from eval and store them in params$metric.
# This has to account for the fact that `eval` could be a character vector,
# a function, a list of functions, or a list with a mix of strings and
# functions
lgb.check.eval <- function(params, eval) {
if (is.null(params$metric)) {
@ -330,13 +330,30 @@ lgb.check.eval <- function(params, eval) {
params$metric <- as.list(params$metric)
}
if (is.character(eval)) {
params$metric <- append(params$metric, eval)
# if 'eval' is a character vector or list, find the character
# elements and add them to 'metric'
if (!is.function(eval)) {
for (i in seq_along(eval)) {
element <- eval[[i]]
if (is.character(element)) {
params$metric <- append(params$metric, element)
}
}
}
if (identical(class(eval), "list")) {
params$metric <- append(params$metric, unlist(eval))
# If more than one character metric was given, then "None" should
# not be included
if (length(params$metric) > 1L) {
params$metric <- Filter(
f = function(metric) {
!(metric %in% .NO_METRIC_STRINGS())
}
, x = params$metric
)
}
# duplicate metrics should be filtered out
params$metric <- as.list(unique(unlist(params$metric)))
return(params)
}

Просмотреть файл

@ -45,9 +45,41 @@ may allow you to pass other types of data like \code{matrix} and then separately
\item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{eval}{evaluation function, can be (list of) character or custom eval function}
\item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of
strings and functions.
\itemize{
\item{\bold{a. character vector}:
If you provide a character vector to this argument, it should contain strings with valid
evaluation metrics.
See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{
The "metric" section of the documentation}
for a list of valid metrics.
}
\item{\bold{b. function}:
You can provide a custom evaluation function. This
should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named
list with three elements:
\itemize{
\item{\code{name}: A string with the name of the metric, used for printing
and storing results.
}
\item{\code{value}: A single number indicating the value of the metric for the
given predictions and true values
}
\item{
\code{higher_better}: A boolean indicating whether higher values indicate a better fit.
For example, this would be \code{FALSE} for metrics like MAE or RMSE.
}
}
}
\item{\bold{c. list}:
If a list is given, it should only contain character vectors and functions.
These should follow the requirements from the descriptions above.
}
}}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
@ -99,6 +131,23 @@ a trained model \code{lgb.CVBooster}.
\description{
Cross validation logic used by LightGBM
}
\section{Early Stopping}{
"early stopping" refers to stopping the training process if the model's performance on a given
validation set does not improve for several consecutive iterations.
If multiple arguments are given to \code{eval}, their order will be preserved. If you enable
early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all
metrics will be considered for early stopping.
If you want to only consider the first metric for early stopping, pass
\code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric}
in \code{params}, that metric will be considered the "first" one. If you omit \code{metric},
a default metric will be used based on your choice for the parameter \code{obj} (keyword argument)
or \code{objective} (passed into \code{params}).
}
\examples{
\dontrun{
data(agaricus.train, package = "lightgbm")

Просмотреть файл

@ -38,7 +38,39 @@ may allow you to pass other types of data like \code{matrix} and then separately
\code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{eval}{evaluation function, can be (a list of) character or custom eval function}
\item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of
strings and functions.
\itemize{
\item{\bold{a. character vector}:
If you provide a character vector to this argument, it should contain strings with valid
evaluation metrics.
See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{
The "metric" section of the documentation}
for a list of valid metrics.
}
\item{\bold{b. function}:
You can provide a custom evaluation function. This
should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named
list with three elements:
\itemize{
\item{\code{name}: A string with the name of the metric, used for printing
and storing results.
}
\item{\code{value}: A single number indicating the value of the metric for the
given predictions and true values
}
\item{
\code{higher_better}: A boolean indicating whether higher values indicate a better fit.
For example, this would be \code{FALSE} for metrics like MAE or RMSE.
}
}
}
\item{\bold{c. list}:
If a list is given, it should only contain character vectors and functions.
These should follow the requirements from the descriptions above.
}
}}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
@ -82,6 +114,23 @@ a trained booster model \code{lgb.Booster}.
\description{
Logic to train with LightGBM
}
\section{Early Stopping}{
"early stopping" refers to stopping the training process if the model's performance on a given
validation set does not improve for several consecutive iterations.
If multiple arguments are given to \code{eval}, their order will be preserved. If you enable
early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all
metrics will be considered for early stopping.
If you want to only consider the first metric for early stopping, pass
\code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric}
in \code{params}, that metric will be considered the "first" one. If you omit \code{metric},
a default metric will be used based on your choice for the parameter \code{obj} (keyword argument)
or \code{objective} (passed into \code{params}).
}
\examples{
\dontrun{
data(agaricus.train, package = "lightgbm")

Просмотреть файл

@ -16,12 +16,50 @@ and one metric. If there's more than one, will check all of them
except the training data. Returns the model with (best_iter + early_stopping_rounds).
If early stopping occurs, the model will have 'best_iter' field.}
\item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of
strings and functions.
\itemize{
\item{\bold{a. character vector}:
If you provide a character vector to this argument, it should contain strings with valid
evaluation metrics.
See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{
The "metric" section of the documentation}
for a list of valid metrics.
}
\item{\bold{b. function}:
You can provide a custom evaluation function. This
should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named
list with three elements:
\itemize{
\item{\code{name}: A string with the name of the metric, used for printing
and storing results.
}
\item{\code{value}: A single number indicating the value of the metric for the
given predictions and true values
}
\item{
\code{higher_better}: A boolean indicating whether higher values indicate a better fit.
For example, this would be \code{FALSE} for metrics like MAE or RMSE.
}
}
}
\item{\bold{c. list}:
If a list is given, it should only contain character vectors and functions.
These should follow the requirements from the descriptions above.
}
}}
\item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model}
\item{nrounds}{number of training rounds}
\item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{params}{List of parameters}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
@ -29,4 +67,21 @@ If early stopping occurs, the model will have 'best_iter' field.}
\description{
Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm}
}
\section{Early Stopping}{
"early stopping" refers to stopping the training process if the model's performance on a given
validation set does not improve for several consecutive iterations.
If multiple arguments are given to \code{eval}, their order will be preserved. If you enable
early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all
metrics will be considered for early stopping.
If you want to only consider the first metric for early stopping, pass
\code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric}
in \code{params}, that metric will be considered the "first" one. If you omit \code{metric},
a default metric will be used based on your choice for the parameter \code{obj} (keyword argument)
or \code{objective} (passed into \code{params}).
}
\keyword{internal}

Просмотреть файл

@ -74,3 +74,20 @@ List of callback functions that are applied at each iteration.}
\description{
Simple interface for training a LightGBM model.
}
\section{Early Stopping}{
"early stopping" refers to stopping the training process if the model's performance on a given
validation set does not improve for several consecutive iterations.
If multiple arguments are given to \code{eval}, their order will be preserved. If you enable
early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all
metrics will be considered for early stopping.
If you want to only consider the first metric for early stopping, pass
\code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric}
in \code{params}, that metric will be considered the "first" one. If you omit \code{metric},
a default metric will be used based on your choice for the parameter \code{obj} (keyword argument)
or \code{objective} (passed into \code{params}).
}

Просмотреть файл

@ -6,6 +6,59 @@ train <- agaricus.train
test <- agaricus.test
TOLERANCE <- 1e-6
set.seed(708L)
# [description] Every time this function is called, it adds 0.1
# to an accumulator then returns the current value.
# This is used to mock the situation where an evaluation
# metric increases every iteration
ACCUMULATOR_NAME <- "INCREASING_METRIC_ACUMULATOR"
assign(x = "INCREASING_METRIC_ACUMULATOR", value = 0.0, envir = .GlobalEnv)
.increasing_metric <- function(preds, dtrain) {
if (!exists(ACCUMULATOR_NAME, envir = .GlobalEnv)) {
assign(ACCUMULATOR_NAME, 0.0, envir = .GlobalEnv)
}
assign(
x = ACCUMULATOR_NAME
, value = get(ACCUMULATOR_NAME, envir = .GlobalEnv) + 0.1
, envir = .GlobalEnv
)
return(list(
name = "increasing_metric"
, value = get(ACCUMULATOR_NAME, envir = .GlobalEnv)
, higher_better = TRUE
))
}
# [description] Evaluation function that always returns the
# same value
CONSTANT_METRIC_VALUE <- 0.2
.constant_metric <- function(preds, dtrain) {
return(list(
name = "constant_metric"
, value = CONSTANT_METRIC_VALUE
, higher_better = FALSE
))
}
# sample datasets to test early stopping
DTRAIN_RANDOM_REGRESSION <- lgb.Dataset(
data = as.matrix(rnorm(100L), ncol = 1L, drop = FALSE)
, label = rnorm(100L)
)
DVALID_RANDOM_REGRESSION <- lgb.Dataset(
data = as.matrix(rnorm(50L), ncol = 1L, drop = FALSE)
, label = rnorm(50L)
)
DTRAIN_RANDOM_CLASSIFICATION <- lgb.Dataset(
data = as.matrix(rnorm(120L), ncol = 1L, drop = FALSE)
, label = sample(c(0L, 1L), size = 120L, replace = TRUE)
)
DVALID_RANDOM_CLASSIFICATION <- lgb.Dataset(
data = as.matrix(rnorm(37L), ncol = 1L, drop = FALSE)
, label = sample(c(0L, 1L), size = 37L, replace = TRUE)
)
test_that("train and predict binary classification", {
nrounds <- 10L
@ -707,7 +760,6 @@ test_that("lgb.train() works with early stopping for regression", {
params = list(
objective = "regression"
, metric = "rmse"
, min_data_in_bin = 5L
)
, data = dtrain
, nrounds = nrounds
@ -730,7 +782,6 @@ test_that("lgb.train() works with early stopping for regression", {
params = list(
objective = "regression"
, metric = "rmse"
, min_data_in_bin = 5L
, early_stopping_rounds = early_stopping_rounds
)
, data = dtrain
@ -750,6 +801,314 @@ test_that("lgb.train() works with early stopping for regression", {
)
})
test_that("lgb.train() does not stop early if early_stopping_rounds is not given", {
set.seed(708L)
increasing_metric_starting_value <- get(
ACCUMULATOR_NAME
, envir = .GlobalEnv
)
nrounds <- 10L
metrics <- list(
.constant_metric
, .increasing_metric
)
bst <- lgb.train(
params = list(
objective = "regression"
, metric = "None"
)
, data = DTRAIN_RANDOM_REGRESSION
, nrounds = nrounds
, valids = list("valid1" = DVALID_RANDOM_REGRESSION)
, eval = metrics
)
# Only the two functions provided to "eval" should have been evaluated
expect_equal(length(bst$record_evals[["valid1"]]), 2L)
# all 10 iterations should have happen, and the best_iter should be
# the first one (based on constant_metric)
best_iter <- 1L
expect_equal(bst$best_iter, best_iter)
# best_score should be taken from the first metric
expect_equal(
bst$best_score
, bst$record_evals[["valid1"]][["constant_metric"]][["eval"]][[best_iter]]
)
# early stopping should not have happened. Even though constant_metric
# had 9 consecutive iterations with no improvement, it is ignored because of
# first_metric_only = TRUE
expect_equal(
length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]])
, nrounds
)
expect_equal(
length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]])
, nrounds
)
})
test_that("If first_metric_only is not given or is FALSE, lgb.train() decides to stop early based on all metrics", {
set.seed(708L)
early_stopping_rounds <- 3L
param_variations <- list(
list(
objective = "regression"
, metric = "None"
, early_stopping_rounds = early_stopping_rounds
)
, list(
objective = "regression"
, metric = "None"
, early_stopping_rounds = early_stopping_rounds
, first_metric_only = FALSE
)
)
for (params in param_variations) {
nrounds <- 10L
bst <- lgb.train(
params = params
, data = DTRAIN_RANDOM_REGRESSION
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_REGRESSION
)
, eval = list(
.increasing_metric
, .constant_metric
)
)
# Only the two functions provided to "eval" should have been evaluated
expect_equal(length(bst$record_evals[["valid1"]]), 2L)
# early stopping should have happened, and should have stopped early_stopping_rounds + 1 rounds in
# because constant_metric never improves
#
# the best iteration should be the last one, because increasing_metric was first
# and gets better every iteration
best_iter <- early_stopping_rounds + 1L
expect_equal(bst$best_iter, best_iter)
# best_score should be taken from "increasing_metric" because it was first
expect_equal(
bst$best_score
, bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]][[best_iter]]
)
# early stopping should not have happened. even though increasing_metric kept
# getting better, early stopping should have happened because "constant_metric"
# did not improve
expect_equal(
length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]])
, early_stopping_rounds + 1L
)
expect_equal(
length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]])
, early_stopping_rounds + 1L
)
}
})
test_that("If first_metric_only is TRUE, lgb.train() decides to stop early based on only the first metric", {
set.seed(708L)
nrounds <- 10L
early_stopping_rounds <- 3L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.train(
params = list(
objective = "regression"
, metric = "None"
, early_stopping_rounds = early_stopping_rounds
, first_metric_only = TRUE
)
, data = DTRAIN_RANDOM_REGRESSION
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_REGRESSION
)
, eval = list(
.increasing_metric
, .constant_metric
)
)
# Only the two functions provided to "eval" should have been evaluated
expect_equal(length(bst$record_evals[["valid1"]]), 2L)
# all 10 iterations should happen, and the best_iter should be the final one
expect_equal(bst$best_iter, nrounds)
# best_score should be taken from "increasing_metric"
expect_equal(
bst$best_score
, increasing_metric_starting_value + 0.1 * nrounds
)
# early stopping should not have happened. Even though constant_metric
# had 9 consecutive iterations with no improvement, it is ignored because of
# first_metric_only = TRUE
expect_equal(
length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]])
, nrounds
)
expect_equal(
length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]])
, nrounds
)
})
test_that("lgb.train() works when a mixture of functions and strings are passed to eval", {
set.seed(708L)
nrounds <- 10L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.train(
params = list(
objective = "regression"
, metric = "None"
)
, data = DTRAIN_RANDOM_REGRESSION
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_REGRESSION
)
, eval = list(
.increasing_metric
, "rmse"
, .constant_metric
, "l2"
)
)
# all 4 metrics should have been used
expect_named(
bst$record_evals[["valid1"]]
, expected = c("rmse", "l2", "increasing_metric", "constant_metric")
, ignore.order = TRUE
, ignore.case = FALSE
)
# the difference metrics shouldn't have been mixed up with each other
results <- bst$record_evals[["valid1"]]
expect_true(abs(results[["rmse"]][["eval"]][[1L]] - 1.105012) < TOLERANCE)
expect_true(abs(results[["l2"]][["eval"]][[1L]] - 1.221051) < TOLERANCE)
expected_increasing_metric <- increasing_metric_starting_value + 0.1
expect_true(
abs(
results[["increasing_metric"]][["eval"]][[1L]] - expected_increasing_metric
) < TOLERANCE
)
expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE)
})
test_that("lgb.train() works when a list of strings or a character vector is passed to eval", {
# testing list and character vector, as well as length-1 and length-2
eval_variations <- list(
c("binary_error", "binary_logloss")
, "binary_logloss"
, list("binary_error", "binary_logloss")
, list("binary_logloss")
)
for (eval_variation in eval_variations) {
set.seed(708L)
nrounds <- 10L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.train(
params = list(
objective = "binary"
, metric = "None"
)
, data = DTRAIN_RANDOM_CLASSIFICATION
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_CLASSIFICATION
)
, eval = eval_variation
)
# both metrics should have been used
expect_named(
bst$record_evals[["valid1"]]
, expected = unlist(eval_variation)
, ignore.order = TRUE
, ignore.case = FALSE
)
# the difference metrics shouldn't have been mixed up with each other
results <- bst$record_evals[["valid1"]]
if ("binary_error" %in% unlist(eval_variation)) {
expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.4864865) < TOLERANCE)
}
if ("binary_logloss" %in% unlist(eval_variation)) {
expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE)
}
}
})
test_that("lgb.train() works when you specify both 'metric' and 'eval' with strings", {
set.seed(708L)
nrounds <- 10L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.train(
params = list(
objective = "binary"
, metric = "binary_error"
)
, data = DTRAIN_RANDOM_CLASSIFICATION
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_CLASSIFICATION
)
, eval = "binary_logloss"
)
# both metrics should have been used
expect_named(
bst$record_evals[["valid1"]]
, expected = c("binary_error", "binary_logloss")
, ignore.order = TRUE
, ignore.case = FALSE
)
# the difference metrics shouldn't have been mixed up with each other
results <- bst$record_evals[["valid1"]]
expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.4864865) < TOLERANCE)
expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE)
})
test_that("lgb.train() works when you give a function for eval", {
set.seed(708L)
nrounds <- 10L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.train(
params = list(
objective = "binary"
, metric = "None"
)
, data = DTRAIN_RANDOM_CLASSIFICATION
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_CLASSIFICATION
)
, eval = .constant_metric
)
# the difference metrics shouldn't have been mixed up with each other
results <- bst$record_evals[["valid1"]]
expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE)
})
test_that("lgb.train() works with early stopping for regression with a metric that should be minimized", {
set.seed(708L)
trainDF <- data.frame(
@ -1099,6 +1458,172 @@ test_that("using lightgbm() without early stopping, best_iter and best_score com
expect_identical(bst$best_score, auc_scores[which.max(auc_scores)])
})
test_that("lgb.cv() works when you specify both 'metric' and 'eval' with strings", {
set.seed(708L)
nrounds <- 10L
nfolds <- 4L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.cv(
params = list(
objective = "binary"
, metric = "binary_error"
)
, data = DTRAIN_RANDOM_CLASSIFICATION
, nrounds = nrounds
, nfold = nfolds
, eval = "binary_logloss"
)
# both metrics should have been used
expect_named(
bst$record_evals[["valid"]]
, expected = c("binary_error", "binary_logloss")
, ignore.order = TRUE
, ignore.case = FALSE
)
# the difference metrics shouldn't have been mixed up with each other
results <- bst$record_evals[["valid"]]
expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.5005654) < TOLERANCE)
expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.7011232) < TOLERANCE)
# all boosters should have been created
expect_length(bst$boosters, nfolds)
})
test_that("lgb.cv() works when you give a function for eval", {
set.seed(708L)
nrounds <- 10L
nfolds <- 3L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.cv(
params = list(
objective = "binary"
, metric = "None"
)
, data = DTRAIN_RANDOM_CLASSIFICATION
, nfold = nfolds
, nrounds = nrounds
, eval = .constant_metric
)
# the difference metrics shouldn't have been mixed up with each other
results <- bst$record_evals[["valid"]]
expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE)
expect_named(results, "constant_metric")
})
test_that("If first_metric_only is TRUE, lgb.cv() decides to stop early based on only the first metric", {
set.seed(708L)
nrounds <- 10L
nfolds <- 5L
early_stopping_rounds <- 3L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.cv(
params = list(
objective = "regression"
, metric = "None"
, early_stopping_rounds = early_stopping_rounds
, first_metric_only = TRUE
)
, data = DTRAIN_RANDOM_REGRESSION
, nfold = nfolds
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_REGRESSION
)
, eval = list(
.increasing_metric
, .constant_metric
)
)
# Only the two functions provided to "eval" should have been evaluated
expect_named(bst$record_evals[["valid"]], c("increasing_metric", "constant_metric"))
# all 10 iterations should happen, and the best_iter should be the final one
expect_equal(bst$best_iter, nrounds)
# best_score should be taken from "increasing_metric"
#
# this expected value looks magical and confusing, but it's because
# evaluation metrics are averaged over all folds.
#
# consider 5-fold CV with a metric that adds 0.1 to a global accumulator
# each time it's called
#
# * iter 1: [0.1, 0.2, 0.3, 0.4, 0.5] (mean = 0.3)
# * iter 2: [0.6, 0.7, 0.8, 0.9, 1.0] (mean = 1.3)
# * iter 3: [1.1, 1.2, 1.3, 1.4, 1.5] (mean = 1.8)
#
cv_value <- increasing_metric_starting_value + mean(seq_len(nfolds) / 10.0) + (nrounds - 1L) * 0.1 * nfolds
expect_equal(bst$best_score, cv_value)
# early stopping should not have happened. Even though constant_metric
# had 9 consecutive iterations with no improvement, it is ignored because of
# first_metric_only = TRUE
expect_equal(
length(bst$record_evals[["valid"]][["constant_metric"]][["eval"]])
, nrounds
)
expect_equal(
length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]])
, nrounds
)
})
test_that("early stopping works with lgb.cv()", {
set.seed(708L)
nrounds <- 10L
nfolds <- 5L
early_stopping_rounds <- 3L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.cv(
params = list(
objective = "regression"
, metric = "None"
, early_stopping_rounds = early_stopping_rounds
, first_metric_only = TRUE
)
, data = DTRAIN_RANDOM_REGRESSION
, nfold = nfolds
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_REGRESSION
)
, eval = list(
.constant_metric
, .increasing_metric
)
)
# only the two functions provided to "eval" should have been evaluated
expect_named(bst$record_evals[["valid"]], c("constant_metric", "increasing_metric"))
# best_iter should be based on the first metric. Since constant_metric
# never changes, its first iteration was the best oone
expect_equal(bst$best_iter, 1L)
# best_score should be taken from the first metri
expect_equal(bst$best_score, 0.2)
# early stopping should have happened, since constant_metric was the first
# one passed to eval and it will not improve over consecutive iterations
#
# note that this test is identical to the previous one, but with the
# order of the eval metrics switched
expect_equal(
length(bst$record_evals[["valid"]][["constant_metric"]][["eval"]])
, early_stopping_rounds + 1L
)
expect_equal(
length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]])
, early_stopping_rounds + 1L
)
})
context("interaction constraints")
test_that("lgb.train() throws an informative error if interaction_constraints is not a list", {
dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(objective = "regression", interaction_constraints = "[1,2],[3]")

Просмотреть файл

@ -115,3 +115,12 @@ test_that("lgb.check.eval adds eval to metric in params if a list is provided",
expect_named(params, "metric")
expect_identical(params[["metric"]], list("auc", "binary_error", "binary_logloss"))
})
test_that("lgb.check.eval drops duplicate metrics and preserves order", {
params <- lgb.check.eval(
params = list(metric = "l1")
, eval = list("l2", "rmse", "l1", "rmse")
)
expect_named(params, "metric")
expect_identical(params[["metric"]], list("l1", "l2", "rmse"))
})