зеркало из https://github.com/microsoft/LightGBM.git
[R-package] fix warnings in demos (#4569)
* [R-package] fix warnings in demos * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru> * fix additional params issues in multiclass and categorical_feature examples * Update R-package/demo/multiclass.R Co-authored-by: Nikita Titov <nekit94-08@mail.ru> Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
This commit is contained in:
Родитель
b4213e96cb
Коммит
846e8954ac
|
@ -326,7 +326,7 @@ coverage.xml
|
||||||
.hypothesis/
|
.hypothesis/
|
||||||
**/coverage.html
|
**/coverage.html
|
||||||
**/coverage.html.zip
|
**/coverage.html.zip
|
||||||
R-package/tests/testthat/Rplots.pdf
|
**/Rplots.pdf
|
||||||
|
|
||||||
# Translations
|
# Translations
|
||||||
*.mo
|
*.mo
|
||||||
|
@ -427,6 +427,7 @@ miktex*.zip
|
||||||
*.def
|
*.def
|
||||||
|
|
||||||
# Files created by examples and tests
|
# Files created by examples and tests
|
||||||
|
*.buffer
|
||||||
**/lgb-Dataset.data
|
**/lgb-Dataset.data
|
||||||
**/lgb.Dataset.data
|
**/lgb.Dataset.data
|
||||||
**/model.txt
|
**/model.txt
|
||||||
|
|
|
@ -12,6 +12,14 @@ test <- agaricus.test
|
||||||
class(train$label)
|
class(train$label)
|
||||||
class(train$data)
|
class(train$data)
|
||||||
|
|
||||||
|
# Set parameters for model training
|
||||||
|
train_params <- list(
|
||||||
|
num_leaves = 4L
|
||||||
|
, learning_rate = 1.0
|
||||||
|
, objective = "binary"
|
||||||
|
, nthread = 2L
|
||||||
|
)
|
||||||
|
|
||||||
#--------------------Basic Training using lightgbm----------------
|
#--------------------Basic Training using lightgbm----------------
|
||||||
# This is the basic usage of lightgbm you can put matrix in data field
|
# This is the basic usage of lightgbm you can put matrix in data field
|
||||||
# Note: we are putting in sparse matrix here, lightgbm naturally handles sparse input
|
# Note: we are putting in sparse matrix here, lightgbm naturally handles sparse input
|
||||||
|
@ -19,22 +27,18 @@ class(train$data)
|
||||||
print("Training lightgbm with sparseMatrix")
|
print("Training lightgbm with sparseMatrix")
|
||||||
bst <- lightgbm(
|
bst <- lightgbm(
|
||||||
data = train$data
|
data = train$data
|
||||||
|
, params = train_params
|
||||||
, label = train$label
|
, label = train$label
|
||||||
, num_leaves = 4L
|
|
||||||
, learning_rate = 1.0
|
|
||||||
, nrounds = 2L
|
, nrounds = 2L
|
||||||
, objective = "binary"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Alternatively, you can put in dense matrix, i.e. basic R-matrix
|
# Alternatively, you can put in dense matrix, i.e. basic R-matrix
|
||||||
print("Training lightgbm with Matrix")
|
print("Training lightgbm with Matrix")
|
||||||
bst <- lightgbm(
|
bst <- lightgbm(
|
||||||
data = as.matrix(train$data)
|
data = as.matrix(train$data)
|
||||||
|
, params = train_params
|
||||||
, label = train$label
|
, label = train$label
|
||||||
, num_leaves = 4L
|
|
||||||
, learning_rate = 1.0
|
|
||||||
, nrounds = 2L
|
, nrounds = 2L
|
||||||
, objective = "binary"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# You can also put in lgb.Dataset object, which stores label, data and other meta datas needed for advanced features
|
# You can also put in lgb.Dataset object, which stores label, data and other meta datas needed for advanced features
|
||||||
|
@ -45,42 +49,32 @@ dtrain <- lgb.Dataset(
|
||||||
)
|
)
|
||||||
bst <- lightgbm(
|
bst <- lightgbm(
|
||||||
data = dtrain
|
data = dtrain
|
||||||
, num_leaves = 4L
|
, params = train_params
|
||||||
, learning_rate = 1.0
|
|
||||||
, nrounds = 2L
|
, nrounds = 2L
|
||||||
, objective = "binary"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verbose = 0,1,2
|
# Verbose = 0,1,2
|
||||||
print("Train lightgbm with verbose 0, no message")
|
print("Train lightgbm with verbose 0, no message")
|
||||||
bst <- lightgbm(
|
bst <- lightgbm(
|
||||||
data = dtrain
|
data = dtrain
|
||||||
, num_leaves = 4L
|
, params = train_params
|
||||||
, learning_rate = 1.0
|
|
||||||
, nrounds = 2L
|
, nrounds = 2L
|
||||||
, objective = "binary"
|
|
||||||
, verbose = 0L
|
, verbose = 0L
|
||||||
)
|
)
|
||||||
|
|
||||||
print("Train lightgbm with verbose 1, print evaluation metric")
|
print("Train lightgbm with verbose 1, print evaluation metric")
|
||||||
bst <- lightgbm(
|
bst <- lightgbm(
|
||||||
data = dtrain
|
data = dtrain
|
||||||
, num_leaves = 4L
|
, params = train_params
|
||||||
, learning_rate = 1.0
|
|
||||||
, nrounds = 2L
|
, nrounds = 2L
|
||||||
, nthread = 2L
|
|
||||||
, objective = "binary"
|
|
||||||
, verbose = 1L
|
, verbose = 1L
|
||||||
)
|
)
|
||||||
|
|
||||||
print("Train lightgbm with verbose 2, also print information about tree")
|
print("Train lightgbm with verbose 2, also print information about tree")
|
||||||
bst <- lightgbm(
|
bst <- lightgbm(
|
||||||
data = dtrain
|
data = dtrain
|
||||||
, num_leaves = 4L
|
, params = train_params
|
||||||
, learning_rate = 1.0
|
|
||||||
, nrounds = 2L
|
, nrounds = 2L
|
||||||
, nthread = 2L
|
|
||||||
, objective = "binary"
|
|
||||||
, verbose = 2L
|
, verbose = 2L
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -126,25 +120,19 @@ valids <- list(train = dtrain, test = dtest)
|
||||||
print("Train lightgbm using lgb.train with valids")
|
print("Train lightgbm using lgb.train with valids")
|
||||||
bst <- lgb.train(
|
bst <- lgb.train(
|
||||||
data = dtrain
|
data = dtrain
|
||||||
, num_leaves = 4L
|
, params = train_params
|
||||||
, learning_rate = 1.0
|
|
||||||
, nrounds = 2L
|
, nrounds = 2L
|
||||||
, valids = valids
|
, valids = valids
|
||||||
, nthread = 2L
|
|
||||||
, objective = "binary"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# We can change evaluation metrics, or use multiple evaluation metrics
|
# We can change evaluation metrics, or use multiple evaluation metrics
|
||||||
print("Train lightgbm using lgb.train with valids, watch logloss and error")
|
print("Train lightgbm using lgb.train with valids, watch logloss and error")
|
||||||
bst <- lgb.train(
|
bst <- lgb.train(
|
||||||
data = dtrain
|
data = dtrain
|
||||||
, num_leaves = 4L
|
, params = train_params
|
||||||
, learning_rate = 1.0
|
|
||||||
, nrounds = 2L
|
, nrounds = 2L
|
||||||
, valids = valids
|
, valids = valids
|
||||||
, eval = c("binary_error", "binary_logloss")
|
, eval = c("binary_error", "binary_logloss")
|
||||||
, nthread = 2L
|
|
||||||
, objective = "binary"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# lgb.Dataset can also be saved using lgb.Dataset.save
|
# lgb.Dataset can also be saved using lgb.Dataset.save
|
||||||
|
@ -154,12 +142,9 @@ lgb.Dataset.save(dtrain, "dtrain.buffer")
|
||||||
dtrain2 <- lgb.Dataset("dtrain.buffer")
|
dtrain2 <- lgb.Dataset("dtrain.buffer")
|
||||||
bst <- lgb.train(
|
bst <- lgb.train(
|
||||||
data = dtrain2
|
data = dtrain2
|
||||||
, num_leaves = 4L
|
, params = train_params
|
||||||
, learning_rate = 1.0
|
|
||||||
, nrounds = 2L
|
, nrounds = 2L
|
||||||
, valids = valids
|
, valids = valids
|
||||||
, nthread = 2L
|
|
||||||
, objective = "binary"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# information can be extracted from lgb.Dataset using getinfo
|
# information can be extracted from lgb.Dataset using getinfo
|
||||||
|
|
|
@ -85,7 +85,6 @@ params <- list(
|
||||||
, metric = "l2"
|
, metric = "l2"
|
||||||
, min_data = 1L
|
, min_data = 1L
|
||||||
, learning_rate = 0.1
|
, learning_rate = 0.1
|
||||||
, min_data = 0L
|
|
||||||
, min_hessian = 1.0
|
, min_hessian = 1.0
|
||||||
, max_depth = 2L
|
, max_depth = 2L
|
||||||
)
|
)
|
||||||
|
|
|
@ -46,7 +46,7 @@ bst <- lgb.train(
|
||||||
, dtrain
|
, dtrain
|
||||||
, num_round
|
, num_round
|
||||||
, valids
|
, valids
|
||||||
, objective = logregobj
|
, obj = logregobj
|
||||||
, eval = evalerror
|
, eval = evalerror
|
||||||
, early_stopping_round = 3L
|
, early_stopping_round = 3L
|
||||||
)
|
)
|
||||||
|
|
|
@ -85,18 +85,21 @@ test <- agaricus.test
|
||||||
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
|
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
|
||||||
|
|
||||||
# setup parameters and we train a model
|
# setup parameters and we train a model
|
||||||
params <- list(objective = "regression", metric = "l2")
|
params <- list(
|
||||||
|
objective = "regression"
|
||||||
|
, metric = "l2"
|
||||||
|
, min_data = 1L
|
||||||
|
, learning_rate = 0.1
|
||||||
|
, bagging_fraction = 0.1
|
||||||
|
, bagging_freq = 1L
|
||||||
|
, bagging_seed = 1L
|
||||||
|
)
|
||||||
valids <- list(test = dtest)
|
valids <- list(test = dtest)
|
||||||
model <- lgb.train(
|
model <- lgb.train(
|
||||||
params
|
params
|
||||||
, dtrain
|
, dtrain
|
||||||
, 50L
|
, 50L
|
||||||
, valids
|
, valids
|
||||||
, min_data = 1L
|
|
||||||
, learning_rate = 0.1
|
|
||||||
, bagging_fraction = 0.1
|
|
||||||
, bagging_freq = 1L
|
|
||||||
, bagging_seed = 1L
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# We create a data.frame with the following structure:
|
# We create a data.frame with the following structure:
|
||||||
|
@ -141,13 +144,17 @@ table(new_data$binned)
|
||||||
.depth_density_plot(df = new_data)
|
.depth_density_plot(df = new_data)
|
||||||
|
|
||||||
# Now, let's show with other parameters
|
# Now, let's show with other parameters
|
||||||
|
params <- list(
|
||||||
|
objective = "regression"
|
||||||
|
, metric = "l2"
|
||||||
|
, min_data = 1L
|
||||||
|
, learning_rate = 1.0
|
||||||
|
)
|
||||||
model2 <- lgb.train(
|
model2 <- lgb.train(
|
||||||
params
|
params
|
||||||
, dtrain
|
, dtrain
|
||||||
, 100L
|
, 100L
|
||||||
, valids
|
, valids
|
||||||
, min_data = 1L
|
|
||||||
, learning_rate = 1.0
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# We create the data structure, but for model2
|
# We create the data structure, but for model2
|
||||||
|
@ -193,13 +200,17 @@ table(new_data2$binned)
|
||||||
.depth_density_plot(df = new_data2)
|
.depth_density_plot(df = new_data2)
|
||||||
|
|
||||||
# Now, try with very severe overfitting
|
# Now, try with very severe overfitting
|
||||||
|
params <- list(
|
||||||
|
objective = "regression"
|
||||||
|
, metric = "l2"
|
||||||
|
, min_data = 1L
|
||||||
|
, learning_rate = 1.0
|
||||||
|
)
|
||||||
model3 <- lgb.train(
|
model3 <- lgb.train(
|
||||||
params
|
params
|
||||||
, dtrain
|
, dtrain
|
||||||
, 1000L
|
, 1000L
|
||||||
, valids
|
, valids
|
||||||
, min_data = 1L
|
|
||||||
, learning_rate = 1.0
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# We create the data structure, but for model3
|
# We create the data structure, but for model3
|
||||||
|
|
|
@ -18,14 +18,18 @@ dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5
|
||||||
valids <- list(test = dtest)
|
valids <- list(test = dtest)
|
||||||
|
|
||||||
# Method 1 of training
|
# Method 1 of training
|
||||||
params <- list(objective = "multiclass", metric = "multi_error", num_class = 3L)
|
params <- list(
|
||||||
|
objective = "multiclass"
|
||||||
|
, metric = "multi_error"
|
||||||
|
, num_class = 3L
|
||||||
|
, min_data = 1L
|
||||||
|
, learning_rate = 1.0
|
||||||
|
)
|
||||||
model <- lgb.train(
|
model <- lgb.train(
|
||||||
params
|
params
|
||||||
, dtrain
|
, dtrain
|
||||||
, 100L
|
, 100L
|
||||||
, valids
|
, valids
|
||||||
, min_data = 1L
|
|
||||||
, learning_rate = 1.0
|
|
||||||
, early_stopping_rounds = 10L
|
, early_stopping_rounds = 10L
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -34,18 +38,20 @@ model <- lgb.train(
|
||||||
my_preds <- predict(model, test[, 1L:4L])
|
my_preds <- predict(model, test[, 1L:4L])
|
||||||
|
|
||||||
# Method 2 of training, identical
|
# Method 2 of training, identical
|
||||||
model <- lgb.train(
|
params <- list(
|
||||||
list()
|
min_data = 1L
|
||||||
, dtrain
|
|
||||||
, 100L
|
|
||||||
, valids
|
|
||||||
, min_data = 1L
|
|
||||||
, learning_rate = 1.0
|
, learning_rate = 1.0
|
||||||
, early_stopping_rounds = 10L
|
|
||||||
, objective = "multiclass"
|
, objective = "multiclass"
|
||||||
, metric = "multi_error"
|
, metric = "multi_error"
|
||||||
, num_class = 3L
|
, num_class = 3L
|
||||||
)
|
)
|
||||||
|
model <- lgb.train(
|
||||||
|
params
|
||||||
|
, dtrain
|
||||||
|
, 100L
|
||||||
|
, valids
|
||||||
|
, early_stopping_rounds = 10L
|
||||||
|
)
|
||||||
|
|
||||||
# We can predict on test data, identical
|
# We can predict on test data, identical
|
||||||
my_preds <- predict(model, test[, 1L:4L])
|
my_preds <- predict(model, test[, 1L:4L])
|
||||||
|
|
|
@ -20,18 +20,20 @@ valids <- list(train = dtrain, test = dtest)
|
||||||
# Method 1 of training with built-in multiclass objective
|
# Method 1 of training with built-in multiclass objective
|
||||||
# Note: need to turn off boost from average to match custom objective
|
# Note: need to turn off boost from average to match custom objective
|
||||||
# (https://github.com/microsoft/LightGBM/issues/1846)
|
# (https://github.com/microsoft/LightGBM/issues/1846)
|
||||||
model_builtin <- lgb.train(
|
params <- list(
|
||||||
list()
|
min_data = 1L
|
||||||
, dtrain
|
, learning_rate = 1.0
|
||||||
|
, num_class = 3L
|
||||||
, boost_from_average = FALSE
|
, boost_from_average = FALSE
|
||||||
|
, metric = "multi_logloss"
|
||||||
|
)
|
||||||
|
model_builtin <- lgb.train(
|
||||||
|
params
|
||||||
|
, dtrain
|
||||||
, 100L
|
, 100L
|
||||||
, valids
|
, valids
|
||||||
, min_data = 1L
|
|
||||||
, learning_rate = 1.0
|
|
||||||
, early_stopping_rounds = 10L
|
, early_stopping_rounds = 10L
|
||||||
, objective = "multiclass"
|
, obj = "multiclass"
|
||||||
, metric = "multi_logloss"
|
|
||||||
, num_class = 3L
|
|
||||||
)
|
)
|
||||||
|
|
||||||
preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
|
preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
|
||||||
|
@ -92,17 +94,19 @@ custom_multiclass_metric <- function(preds, dtrain) {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
params <- list(
|
||||||
|
min_data = 1L
|
||||||
|
, learning_rate = 1.0
|
||||||
|
, num_class = 3L
|
||||||
|
)
|
||||||
model_custom <- lgb.train(
|
model_custom <- lgb.train(
|
||||||
list()
|
params
|
||||||
, dtrain
|
, dtrain
|
||||||
, 100L
|
, 100L
|
||||||
, valids
|
, valids
|
||||||
, min_data = 1L
|
|
||||||
, learning_rate = 1.0
|
|
||||||
, early_stopping_rounds = 10L
|
, early_stopping_rounds = 10L
|
||||||
, objective = custom_multiclass_obj
|
, obj = custom_multiclass_obj
|
||||||
, eval = custom_multiclass_metric
|
, eval = custom_multiclass_metric
|
||||||
, num_class = 3L
|
|
||||||
)
|
)
|
||||||
|
|
||||||
preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
|
preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
|
||||||
|
|
|
@ -34,14 +34,14 @@ params <- list(
|
||||||
, num_leaves = 7L
|
, num_leaves = 7L
|
||||||
, max_depth = 3L
|
, max_depth = 3L
|
||||||
, nthread = 1L
|
, nthread = 1L
|
||||||
|
, min_data = 1L
|
||||||
|
, learning_rate = 1.0
|
||||||
)
|
)
|
||||||
model <- lgb.train(
|
model <- lgb.train(
|
||||||
params
|
params
|
||||||
, dtrain
|
, dtrain
|
||||||
, 50L
|
, 50L
|
||||||
, valids
|
, valids
|
||||||
, min_data = 1L
|
|
||||||
, learning_rate = 1.0
|
|
||||||
, early_stopping_rounds = 10L
|
, early_stopping_rounds = 10L
|
||||||
)
|
)
|
||||||
weight_loss <- as.numeric(model$record_evals$test$l2$eval)
|
weight_loss <- as.numeric(model$record_evals$test$l2$eval)
|
||||||
|
@ -58,14 +58,14 @@ params <- list(
|
||||||
, num_leaves = 7L
|
, num_leaves = 7L
|
||||||
, max_depth = 3L
|
, max_depth = 3L
|
||||||
, nthread = 1L
|
, nthread = 1L
|
||||||
|
, min_data = 1L
|
||||||
|
, learning_rate = 1.0
|
||||||
)
|
)
|
||||||
model <- lgb.train(
|
model <- lgb.train(
|
||||||
params
|
params
|
||||||
, dtrain
|
, dtrain
|
||||||
, 50L
|
, 50L
|
||||||
, valids
|
, valids
|
||||||
, min_data = 1L
|
|
||||||
, learning_rate = 1.0
|
|
||||||
, early_stopping_rounds = 10L
|
, early_stopping_rounds = 10L
|
||||||
)
|
)
|
||||||
small_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
|
small_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
|
||||||
|
@ -94,14 +94,14 @@ params <- list(
|
||||||
, num_leaves = 7L
|
, num_leaves = 7L
|
||||||
, max_depth = 3L
|
, max_depth = 3L
|
||||||
, nthread = 1L
|
, nthread = 1L
|
||||||
|
, min_data = 1L
|
||||||
|
, learning_rate = 1.0
|
||||||
)
|
)
|
||||||
model <- lgb.train(
|
model <- lgb.train(
|
||||||
params
|
params
|
||||||
, dtrain
|
, dtrain
|
||||||
, 50L
|
, 50L
|
||||||
, valids
|
, valids
|
||||||
, min_data = 1L
|
|
||||||
, learning_rate = 1.0
|
|
||||||
, early_stopping_rounds = 10L
|
, early_stopping_rounds = 10L
|
||||||
)
|
)
|
||||||
large_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
|
large_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче