[R-package] Add remainder of prediction funtions (#5312)

This commit is contained in:
david-cortes 2022-08-23 21:15:18 +03:00 коммит произвёл GitHub
Родитель 01774bb921
Коммит 702db13b01
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
15 изменённых файлов: 1129 добавлений и 85 удалений

Просмотреть файл

@ -16,6 +16,7 @@ export(lgb.Dataset.create.valid)
export(lgb.Dataset.save)
export(lgb.Dataset.set.categorical)
export(lgb.Dataset.set.reference)
export(lgb.configure_fast_predict)
export(lgb.convert_with_rules)
export(lgb.cv)
export(lgb.drop_serialized)
@ -37,6 +38,8 @@ export(saveRDS.lgb.Booster)
export(set_field)
export(slice)
import(methods)
importClassesFrom(Matrix,CsparseMatrix)
importClassesFrom(Matrix,RsparseMatrix)
importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgRMatrix)
importClassesFrom(Matrix,dsparseMatrix)

Просмотреть файл

@ -530,6 +530,7 @@ Booster <- R6::R6Class(
predictor <- Predictor$new(
modelfile = private$handle
, params = params
, fast_predict_config = private$fast_predict_config
)
return(
predictor$predict(
@ -550,6 +551,57 @@ Booster <- R6::R6Class(
return(Predictor$new(modelfile = private$handle))
},
configure_fast_predict = function(csr = FALSE,
start_iteration = NULL,
num_iteration = NULL,
rawscore = FALSE,
predleaf = FALSE,
predcontrib = FALSE,
params = list()) {
self$restore_handle()
ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
if (is.null(num_iteration)) {
num_iteration <- -1L
}
if (is.null(start_iteration)) {
start_iteration <- 0L
}
if (!csr) {
fun <- LGBM_BoosterPredictForMatSingleRowFastInit_R
} else {
fun <- LGBM_BoosterPredictForCSRSingleRowFastInit_R
}
fast_handle <- .Call(
fun
, private$handle
, ncols
, rawscore
, predleaf
, predcontrib
, start_iteration
, num_iteration
, lgb.params2str(params = params)
)
private$fast_predict_config <- list(
handle = fast_handle
, csr = as.logical(csr)
, ncols = ncols
, start_iteration = start_iteration
, num_iteration = num_iteration
, rawscore = as.logical(rawscore)
, predleaf = as.logical(predleaf)
, predcontrib = as.logical(predcontrib)
, params = params
)
return(invisible(NULL))
},
# Used for serialization
raw = NULL,
@ -601,6 +653,7 @@ Booster <- R6::R6Class(
higher_better_inner_eval = NULL,
set_objective_to_none = FALSE,
train_set_version = 0L,
fast_predict_config = list(),
# Predict data
inner_predict = function(idx) {
@ -748,18 +801,15 @@ Booster <- R6::R6Class(
)
)
#' @name predict.lgb.Booster
#' @title Predict method for LightGBM model
#' @description Predicted values based on class \code{lgb.Booster}
#' @param object Object of class \code{lgb.Booster}
#' @param newdata a \code{matrix} object, a \code{dgCMatrix} object or
#' a character representing a path to a text file (CSV, TSV, or LibSVM)
#' @name lgb_predict_shared_params
#' @param type Type of prediction to output. Allowed types are:\itemize{
#' \item \code{"response"}: will output the predicted score according to the objective function being
#' optimized (depending on the link function that the objective uses), after applying any necessary
#' transformations - for example, for \code{objective="binary"}, it will output class probabilities.
#' \item \code{"class"}: for classification objectives, will output the class with the highest predicted
#' probability. For other objectives, will output the same as "response".
#' probability. For other objectives, will output the same as "response". Note that \code{"class"} is
#' not a supported type for \link{lgb.configure_fast_predict} (see the documentation of that function
#' for more details).
#' \item \code{"raw"}: will output the non-transformed numbers (sum of predictions from boosting iterations'
#' results) from which the "response" number is produced for a given objective function - for example,
#' for \code{objective="binary"}, this corresponds to log-odds. For many objectives such as
@ -780,12 +830,34 @@ Booster <- R6::R6Class(
#' If None, if the best iteration exists and start_iteration is None or <= 0, the
#' best iteration is used; otherwise, all iterations from start_iteration are used.
#' If <= 0, all iterations from start_iteration are used (no limits).
#' @param header only used for prediction for text file. True if text file has header
#' @param params a list of additional named parameters. See
#' \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{
#' the "Predict Parameters" section of the documentation} for a list of parameters and
#' valid values. Where these conflict with the values of keyword arguments to this function,
#' the values in \code{params} take precedence.
NULL
#' @name predict.lgb.Booster
#' @title Predict method for LightGBM model
#' @description Predicted values based on class \code{lgb.Booster}
#' @details If the model object has been configured for fast single-row predictions through
#' \link{lgb.configure_fast_predict}, this function will use the prediction parameters
#' that were configured for it - as such, extra prediction parameters should not be passed
#' here, otherwise the configuration will be ignored and the slow route will be taken.
#' @inheritParams lgb_predict_shared_params
#' @param object Object of class \code{lgb.Booster}
#' @param newdata a \code{matrix} object, a \code{dgCMatrix}, a \code{dgRMatrix} object, a \code{dsparseVector} object,
#' or a character representing a path to a text file (CSV, TSV, or LibSVM).
#'
#' For sparse inputs, if predictions are only going to be made for a single row, it will be faster to
#' use CSR format, in which case the data may be passed as either a single-row CSR matrix (class
#' \code{dgRMatrix} from package \code{Matrix}) or as a sparse numeric vector (class
#' \code{dsparseVector} from package \code{Matrix}).
#'
#' If single-row predictions are going to be performed frequently, it is recommended to
#' pre-configure the model object for fast single-row sparse predictions through function
#' \link{lgb.configure_fast_predict}.
#' @param header only used for prediction for text file. True if text file has header
#' @param ... ignored
#' @return For prediction types that are meant to always return one output per observation (e.g. when predicting
#' \code{type="response"} or \code{type="raw"} on a binary classification or regression objective), will
@ -918,12 +990,124 @@ predict.lgb.Booster <- function(object,
return(pred)
}
#' @title Configure Fast Single-Row Predictions
#' @description Pre-configures a LightGBM model object to produce fast single-row predictions
#' for a given input data type, prediction type, and parameters.
#' @details Calling this function multiple times with different parameters might not override
#' the previous configuration and might trigger undefined behavior.
#'
#' Any saved configuration for fast predictions might be lost after making a single-row
#' prediction of a different type than what was configured (except for types "response" and
#' "class", which can be switched between each other at any time without losing the configuration).
#'
#' In some situations, setting a fast prediction configuration for one type of prediction
#' might cause the prediction function to keep using that configuration for single-row
#' predictions even if the requested type of prediction is different from what was configured.
#'
#' Note that this function will not accept argument \code{type="class"} - for such cases, one
#' can pass \code{type="response"} to this function and then \code{type="class"} to the
#' \code{predict} function - the fast configuration will not be lost or altered if the switch
#' is between "response" and "class".
#'
#' The configuration does not survive de-serializations, so it has to be generated
#' anew in every R process that is going to use it (e.g. if loading a model object
#' through \code{readRDS}, whatever configuration was there previously will be lost).
#'
#' Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster}
#' will cause it to ignore the fast-predict configuration and take the slow route instead
#' (but be aware that an existing configuration might not always be overriden by supplying
#' different parameters or prediction type, so make sure to check that the output is what
#' was expected when a prediction is to be made on a single row for something different than
#' what is configured).
#'
#' Note that, if configuring a non-default prediction type (such as leaf indices),
#' then that type must also be passed in the call to \link{predict.lgb.Booster} in
#' order for it to use the configuration. This also applies for \code{start_iteration}
#' and \code{num_iteration}, but \bold{the \code{params} list must be empty} in the call to \code{predict}.
#'
#' Predictions about feature contributions do not allow a fast route for CSR inputs,
#' and as such, this function will produce an error if passing \code{csr=TRUE} and
#' \code{type = "contrib"} together.
#' @inheritParams lgb_predict_shared_params
#' @param model LighGBM model object (class \code{lgb.Booster}).
#'
#' \bold{The object will be modified in-place}.
#' @param csr Whether the prediction function is going to be called on sparse CSR inputs.
#' If \code{FALSE}, will be assumed that predictions are going to be called on single-row
#' regular R matrices.
#' @return The same \code{model} that was passed as input, invisibly, with the desired
#' configuration stored inside it and available to be used in future calls to
#' \link{predict.lgb.Booster}.
#' @examples
#' \donttest{
#' library(lightgbm)
#' data(mtcars)
#' X <- as.matrix(mtcars[, -1L])
#' y <- mtcars[, 1L]
#' dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L))
#' params <- list(min_data_in_leaf = 2L)
#' model <- lgb.train(
#' params = params
#' , data = dtrain
#' , obj = "regression"
#' , nrounds = 5L
#' , verbose = -1L
#' )
#' lgb.configure_fast_predict(model)
#'
#' x_single <- X[11L, , drop = FALSE]
#' predict(model, x_single)
#'
#' # Will not use it if the prediction to be made
#' # is different from what was configured
#' predict(model, x_single, type = "leaf")
#' }
#' @export
lgb.configure_fast_predict <- function(model,
csr = FALSE,
start_iteration = NULL,
num_iteration = NULL,
type = "response",
params = list()) {
if (!lgb.is.Booster(x = model)) {
stop("lgb.configure_fast_predict: model should be an ", sQuote("lgb.Booster"))
}
if (type == "class") {
stop("type='class' is not supported for 'lgb.configure_fast_predict'. Use 'response' instead.")
}
rawscore <- FALSE
predleaf <- FALSE
predcontrib <- FALSE
if (type == "raw") {
rawscore <- TRUE
} else if (type == "leaf") {
predleaf <- TRUE
} else if (type == "contrib") {
predcontrib <- TRUE
}
if (csr && predcontrib) {
stop("'lgb.configure_fast_predict' does not support feature contributions for CSR data.")
}
model$configure_fast_predict(
csr = csr
, start_iteration = start_iteration
, num_iteration = num_iteration
, rawscore = rawscore
, predleaf = predleaf
, predcontrib = predcontrib
, params = params
)
return(invisible(model))
}
#' @name print.lgb.Booster
#' @title Print method for LightGBM model
#' @description Show summary information about a LightGBM model object (same as \code{summary}).
#' @param x Object of class \code{lgb.Booster}
#' @param ... Not used
#' @return The same input `x`, returned as invisible.
#' @return The same input \code{x}, returned as invisible.
#' @export
print.lgb.Booster <- function(x, ...) {
# nolint start
@ -972,7 +1156,7 @@ print.lgb.Booster <- function(x, ...) {
#' @description Show summary information about a LightGBM model object (same as \code{print}).
#' @param object Object of class \code{lgb.Booster}
#' @param ... Not used
#' @return The same input `object`, returned as invisible.
#' @return The same input \code{object}, returned as invisible.
#' @export
summary.lgb.Booster <- function(object, ...) {
print(object)
@ -983,7 +1167,7 @@ summary.lgb.Booster <- function(object, ...) {
#' @description Load LightGBM takes in either a file path or model string.
#' If both are provided, Load will default to loading from file
#' @param filename path of model file
#' @param model_str a str containing the model (as a `character` or `raw` vector)
#' @param model_str a str containing the model (as a \code{character} or \code{raw} vector)
#'
#' @return lgb.Booster
#'

Просмотреть файл

@ -1,7 +1,7 @@
#' @importFrom methods is new
#' @importClassesFrom Matrix dsparseMatrix dsparseVector dgCMatrix dgRMatrix
#' @importFrom R6 R6Class
#' @importFrom utils read.delim
#' @importClassesFrom Matrix dsparseMatrix dsparseVector dgCMatrix dgRMatrix CsparseMatrix RsparseMatrix
Predictor <- R6::R6Class(
classname = "lgb.Predictor",
@ -27,7 +27,7 @@ Predictor <- R6::R6Class(
},
# Initialize will create a starter model
initialize = function(modelfile, params = list()) {
initialize = function(modelfile, params = list(), fast_predict_config = list()) {
private$params <- lgb.params2str(params = params)
handle <- NULL
@ -57,6 +57,8 @@ Predictor <- R6::R6Class(
}
private$fast_predict_config <- fast_predict_config
# Override class and store it
class(handle) <- "lgb.Booster.handle"
private$handle <- handle
@ -236,6 +238,9 @@ Predictor <- R6::R6Class(
# Not a file, we need to predict from R object
num_row <- nrow(data)
if (is.null(num_row)) {
num_row <- 1L
}
npred <- 0L
@ -262,20 +267,175 @@ Predictor <- R6::R6Class(
if (storage.mode(data) != "double") {
storage.mode(data) <- "double"
}
.Call(
LGBM_BoosterPredictForMat_R
, private$handle
, data
, as.integer(nrow(data))
, as.integer(ncol(data))
, as.integer(rawscore)
, as.integer(predleaf)
, as.integer(predcontrib)
, as.integer(start_iteration)
, as.integer(num_iteration)
, private$params
, preds
)
if (nrow(data) == 1L) {
use_fast_config <- private$check_can_use_fast_predict_config(
csr = FALSE
, rawscore = rawscore
, predleaf = predleaf
, predcontrib = predcontrib
, start_iteration = start_iteration
, num_iteration = num_iteration
)
if (use_fast_config) {
.Call(
LGBM_BoosterPredictForMatSingleRowFast_R
, private$fast_predict_config$handle
, data
, preds
)
} else {
.Call(
LGBM_BoosterPredictForMatSingleRow_R
, private$handle
, data
, rawscore
, predleaf
, predcontrib
, start_iteration
, num_iteration
, private$params
, preds
)
}
} else {
.Call(
LGBM_BoosterPredictForMat_R
, private$handle
, data
, as.integer(nrow(data))
, as.integer(ncol(data))
, as.integer(rawscore)
, as.integer(predleaf)
, as.integer(predcontrib)
, as.integer(start_iteration)
, as.integer(num_iteration)
, private$params
, preds
)
}
} else if (inherits(data, "dsparseVector")) {
if (length(self$fast_predict_config)) {
ncols <- self$fast_predict_config$ncols
use_fast_config <- private$check_can_use_fast_predict_config(
csr = TRUE
, rawscore = rawscore
, predleaf = predleaf
, predcontrib = predcontrib
, start_iteration = start_iteration
, num_iteration = num_iteration
)
} else {
ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
use_fast_config <- FALSE
}
if (length(data) > ncols) {
stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
, ncols
, length(data)))
}
if (use_fast_config) {
.Call(
LGBM_BoosterPredictForCSRSingleRowFast_R
, self$fast_predict_config$handle
, data@i - 1L
, data@x
, preds
)
} else {
.Call(
LGBM_BoosterPredictForCSRSingleRow_R
, private$handle
, data@i - 1L
, data@x
, ncols
, as.integer(rawscore)
, as.integer(predleaf)
, as.integer(predcontrib)
, start_iteration
, num_iteration
, private$params
, preds
)
}
} else if (inherits(data, "dgRMatrix")) {
ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
if (ncol(data) > ncols) {
stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns."
, ncols
, ncol(data)))
}
if (nrow(data) == 1L) {
if (length(self$fast_predict_config)) {
ncols <- self$fast_predict_config$ncols
use_fast_config <- private$check_can_use_fast_predict_config(
csr = TRUE
, rawscore = rawscore
, predleaf = predleaf
, predcontrib = predcontrib
, start_iteration = start_iteration
, num_iteration = num_iteration
)
} else {
ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle)
use_fast_config <- FALSE
}
if (use_fast_config) {
.Call(
LGBM_BoosterPredictForCSRSingleRowFast_R
, self$fast_predict_config$handle
, data@j
, data@x
, preds
)
} else {
.Call(
LGBM_BoosterPredictForCSRSingleRow_R
, private$handle
, data@j
, data@x
, ncols
, as.integer(rawscore)
, as.integer(predleaf)
, as.integer(predcontrib)
, start_iteration
, num_iteration
, private$params
, preds
)
}
} else {
.Call(
LGBM_BoosterPredictForCSR_R
, private$handle
, data@p
, data@j
, data@x
, ncols
, as.integer(rawscore)
, as.integer(predleaf)
, as.integer(predcontrib)
, start_iteration
, num_iteration
, private$params
, preds
)
}
} else if (methods::is(data, "dgCMatrix")) {
if (length(data@p) > 2147483647L) {
@ -342,5 +502,36 @@ Predictor <- R6::R6Class(
handle = NULL
, need_free_handle = FALSE
, params = ""
, fast_predict_config = list()
, check_can_use_fast_predict_config = function(csr,
rawscore,
predleaf,
predcontrib,
start_iteration,
num_iteration) {
if (!NROW(private$fast_predict_config)) {
return(FALSE)
}
if (lgb.is.null.handle(private$fast_predict_config$handle)) {
warning(paste0("Model had fast CSR predict configuration, but it is inactive."
, " Try re-generating it through 'lgb.configure_fast_predict'."))
return(FALSE)
}
if (isTRUE(csr) != private$fast_predict_config$csr) {
return(FALSE)
}
return(
private$params == "" &&
private$fast_predict_config$rawscore == rawscore &&
private$fast_predict_config$predleaf == predleaf &&
private$fast_predict_config$predcontrib == predcontrib &&
lgb.equal.or.both.null(private$fast_predict_config$start_iteration, start_iteration) &&
lgb.equal.or.both.null(private$fast_predict_config$num_iteration, num_iteration)
)
}
)
)

Просмотреть файл

@ -4,6 +4,10 @@
#' \code{saveRDS}, its underlying C++ object will be blank and needs to be restored to able to use it. Such
#' object is restored automatically when calling functions such as \code{predict}, but this function can be
#' used to forcibly restore it beforehand. Note that the object will be modified in-place.
#'
#' @details Be aware that fast single-row prediction configurations are not restored through this
#' function. If you wish to make fast single-row predictions using a \code{lgb.Booster} loaded this way,
#' call \link{lgb.configure_fast_predict} on the loaded \code{lgb.Booster} object.
#' @param model \code{lgb.Booster} object which was de-serialized and whose underlying C++ object and R handle
#' need to be restored.
#'

Просмотреть файл

@ -246,3 +246,17 @@ lgb.get.default.num.threads <- function() {
return(cores)
}
}
lgb.equal.or.both.null <- function(a, b) {
if (is.null(a)) {
if (!is.null(b)) {
return(FALSE)
}
return(TRUE)
} else {
if (is.null(b)) {
return(FALSE)
}
return(a == b)
}
}

Просмотреть файл

@ -0,0 +1,132 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Booster.R
\name{lgb.configure_fast_predict}
\alias{lgb.configure_fast_predict}
\title{Configure Fast Single-Row Predictions}
\usage{
lgb.configure_fast_predict(
model,
csr = FALSE,
start_iteration = NULL,
num_iteration = NULL,
type = "response",
params = list()
)
}
\arguments{
\item{model}{LighGBM model object (class \code{lgb.Booster}).
\bold{The object will be modified in-place}.}
\item{csr}{Whether the prediction function is going to be called on sparse CSR inputs.
If \code{FALSE}, will be assumed that predictions are going to be called on single-row
regular R matrices.}
\item{start_iteration}{int or None, optional (default=None)
Start index of the iteration to predict.
If None or <= 0, starts from the first iteration.}
\item{num_iteration}{int or None, optional (default=None)
Limit number of iterations in the prediction.
If None, if the best iteration exists and start_iteration is None or <= 0, the
best iteration is used; otherwise, all iterations from start_iteration are used.
If <= 0, all iterations from start_iteration are used (no limits).}
\item{type}{Type of prediction to output. Allowed types are:\itemize{
\item \code{"response"}: will output the predicted score according to the objective function being
optimized (depending on the link function that the objective uses), after applying any necessary
transformations - for example, for \code{objective="binary"}, it will output class probabilities.
\item \code{"class"}: for classification objectives, will output the class with the highest predicted
probability. For other objectives, will output the same as "response". Note that \code{"class"} is
not a supported type for \link{lgb.configure_fast_predict} (see the documentation of that function
for more details).
\item \code{"raw"}: will output the non-transformed numbers (sum of predictions from boosting iterations'
results) from which the "response" number is produced for a given objective function - for example,
for \code{objective="binary"}, this corresponds to log-odds. For many objectives such as
"regression", since no transformation is applied, the output will be the same as for "response".
\item \code{"leaf"}: will output the index of the terminal node / leaf at which each observations falls
in each tree in the model, outputted as integers, with one column per tree.
\item \code{"contrib"}: will return the per-feature contributions for each prediction, including an
intercept (each feature will produce one column).
}
Note that, if using custom objectives, types "class" and "response" will not be available and will
default towards using "raw" instead.}
\item{params}{a list of additional named parameters. See
\href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{
the "Predict Parameters" section of the documentation} for a list of parameters and
valid values. Where these conflict with the values of keyword arguments to this function,
the values in \code{params} take precedence.}
}
\value{
The same \code{model} that was passed as input, invisibly, with the desired
configuration stored inside it and available to be used in future calls to
\link{predict.lgb.Booster}.
}
\description{
Pre-configures a LightGBM model object to produce fast single-row predictions
for a given input data type, prediction type, and parameters.
}
\details{
Calling this function multiple times with different parameters might not override
the previous configuration and might trigger undefined behavior.
Any saved configuration for fast predictions might be lost after making a single-row
prediction of a different type than what was configured (except for types "response" and
"class", which can be switched between each other at any time without losing the configuration).
In some situations, setting a fast prediction configuration for one type of prediction
might cause the prediction function to keep using that configuration for single-row
predictions even if the requested type of prediction is different from what was configured.
Note that this function will not accept argument \code{type="class"} - for such cases, one
can pass \code{type="response"} to this function and then \code{type="class"} to the
\code{predict} function - the fast configuration will not be lost or altered if the switch
is between "response" and "class".
The configuration does not survive de-serializations, so it has to be generated
anew in every R process that is going to use it (e.g. if loading a model object
through \code{readRDS}, whatever configuration was there previously will be lost).
Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster}
will cause it to ignore the fast-predict configuration and take the slow route instead
(but be aware that an existing configuration might not always be overriden by supplying
different parameters or prediction type, so make sure to check that the output is what
was expected when a prediction is to be made on a single row for something different than
what is configured).
Note that, if configuring a non-default prediction type (such as leaf indices),
then that type must also be passed in the call to \link{predict.lgb.Booster} in
order for it to use the configuration. This also applies for \code{start_iteration}
and \code{num_iteration}, but \bold{the \code{params} list must be empty} in the call to \code{predict}.
Predictions about feature contributions do not allow a fast route for CSR inputs,
and as such, this function will produce an error if passing \code{csr=TRUE} and
\code{type = "contrib"} together.
}
\examples{
\donttest{
library(lightgbm)
data(mtcars)
X <- as.matrix(mtcars[, -1L])
y <- mtcars[, 1L]
dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L))
params <- list(min_data_in_leaf = 2L)
model <- lgb.train(
params = params
, data = dtrain
, obj = "regression"
, nrounds = 5L
, verbose = -1L
)
lgb.configure_fast_predict(model)
x_single <- X[11L, , drop = FALSE]
predict(model, x_single)
# Will not use it if the prediction to be made
# is different from what was configured
predict(model, x_single, type = "leaf")
}
}

Просмотреть файл

@ -9,7 +9,7 @@ lgb.load(filename = NULL, model_str = NULL)
\arguments{
\item{filename}{path of model file}
\item{model_str}{a str containing the model (as a `character` or `raw` vector)}
\item{model_str}{a str containing the model (as a \code{character} or \code{raw} vector)}
}
\value{
lgb.Booster

Просмотреть файл

@ -19,6 +19,11 @@ After a LightGBM model object is de-serialized through functions such as \code{s
object is restored automatically when calling functions such as \code{predict}, but this function can be
used to forcibly restore it beforehand. Note that the object will be modified in-place.
}
\details{
Be aware that fast single-row prediction configurations are not restored through this
function. If you wish to make fast single-row predictions using a \code{lgb.Booster} loaded this way,
call \link{lgb.configure_fast_predict} on the loaded \code{lgb.Booster} object.
}
\examples{
library(lightgbm)
data("agaricus.train")

Просмотреть файл

@ -18,15 +18,26 @@
\arguments{
\item{object}{Object of class \code{lgb.Booster}}
\item{newdata}{a \code{matrix} object, a \code{dgCMatrix} object or
a character representing a path to a text file (CSV, TSV, or LibSVM)}
\item{newdata}{a \code{matrix} object, a \code{dgCMatrix}, a \code{dgRMatrix} object, a \code{dsparseVector} object,
or a character representing a path to a text file (CSV, TSV, or LibSVM).
For sparse inputs, if predictions are only going to be made for a single row, it will be faster to
use CSR format, in which case the data may be passed as either a single-row CSR matrix (class
\code{dgRMatrix} from package \code{Matrix}) or as a sparse numeric vector (class
\code{dsparseVector} from package \code{Matrix}).
If single-row predictions are going to be performed frequently, it is recommended to
pre-configure the model object for fast single-row sparse predictions through function
\link{lgb.configure_fast_predict}.}
\item{type}{Type of prediction to output. Allowed types are:\itemize{
\item \code{"response"}: will output the predicted score according to the objective function being
optimized (depending on the link function that the objective uses), after applying any necessary
transformations - for example, for \code{objective="binary"}, it will output class probabilities.
\item \code{"class"}: for classification objectives, will output the class with the highest predicted
probability. For other objectives, will output the same as "response".
probability. For other objectives, will output the same as "response". Note that \code{"class"} is
not a supported type for \link{lgb.configure_fast_predict} (see the documentation of that function
for more details).
\item \code{"raw"}: will output the non-transformed numbers (sum of predictions from boosting iterations'
results) from which the "response" number is produced for a given objective function - for example,
for \code{objective="binary"}, this corresponds to log-odds. For many objectives such as
@ -85,6 +96,12 @@ For prediction types that are meant to always return one output per observation
\description{
Predicted values based on class \code{lgb.Booster}
}
\details{
If the model object has been configured for fast single-row predictions through
\link{lgb.configure_fast_predict}, this function will use the prediction parameters
that were configured for it - as such, extra prediction parameters should not be passed
here, otherwise the configuration will be ignored and the slow route will be taken.
}
\examples{
\donttest{
data(agaricus.train, package = "lightgbm")

Просмотреть файл

@ -12,7 +12,7 @@
\item{...}{Not used}
}
\value{
The same input `x`, returned as invisible.
The same input \code{x}, returned as invisible.
}
\description{
Show summary information about a LightGBM model object (same as \code{summary}).

Просмотреть файл

@ -12,7 +12,7 @@
\item{...}{Not used}
}
\value{
The same input `object`, returned as invisible.
The same input \code{object}, returned as invisible.
}
\description{
Show summary information about a LightGBM model object (same as \code{print}).

Просмотреть файл

@ -18,6 +18,7 @@
#include <string>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <utility>
@ -839,6 +840,109 @@ SEXP LGBM_BoosterPredictForCSC_R(SEXP handle,
R_API_END();
}
SEXP LGBM_BoosterPredictForCSR_R(SEXP handle,
SEXP indptr,
SEXP indices,
SEXP data,
SEXP ncols,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP is_predcontrib,
SEXP start_iteration,
SEXP num_iteration,
SEXP parameter,
SEXP out_result) {
R_API_BEGIN();
_AssertBoosterHandleNotNull(handle);
int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
int64_t out_len;
CHECK_CALL(LGBM_BoosterPredictForCSR(R_ExternalPtrAddr(handle),
INTEGER(indptr), C_API_DTYPE_INT32, INTEGER(indices),
REAL(data), C_API_DTYPE_FLOAT64,
Rf_xlength(indptr), Rf_xlength(data), Rf_asInteger(ncols),
pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
parameter_ptr, &out_len, REAL(out_result)));
UNPROTECT(1);
return R_NilValue;
R_API_END();
}
SEXP LGBM_BoosterPredictForCSRSingleRow_R(SEXP handle,
SEXP indices,
SEXP data,
SEXP ncols,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP is_predcontrib,
SEXP start_iteration,
SEXP num_iteration,
SEXP parameter,
SEXP out_result) {
R_API_BEGIN();
_AssertBoosterHandleNotNull(handle);
int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
int nnz = static_cast<int>(Rf_xlength(data));
const int indptr[] = {0, nnz};
int64_t out_len;
CHECK_CALL(LGBM_BoosterPredictForCSRSingleRow(R_ExternalPtrAddr(handle),
indptr, C_API_DTYPE_INT32, INTEGER(indices),
REAL(data), C_API_DTYPE_FLOAT64,
2, nnz, Rf_asInteger(ncols),
pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
parameter_ptr, &out_len, REAL(out_result)));
UNPROTECT(1);
return R_NilValue;
R_API_END();
}
void LGBM_FastConfigFree_wrapped(SEXP handle) {
LGBM_FastConfigFree(static_cast<FastConfigHandle*>(R_ExternalPtrAddr(handle)));
}
SEXP LGBM_BoosterPredictForCSRSingleRowFastInit_R(SEXP handle,
SEXP ncols,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP is_predcontrib,
SEXP start_iteration,
SEXP num_iteration,
SEXP parameter) {
R_API_BEGIN();
_AssertBoosterHandleNotNull(handle);
int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
FastConfigHandle out_fastConfig;
CHECK_CALL(LGBM_BoosterPredictForCSRSingleRowFastInit(R_ExternalPtrAddr(handle),
pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
C_API_DTYPE_FLOAT64, Rf_asInteger(ncols),
parameter_ptr, &out_fastConfig));
R_SetExternalPtrAddr(ret, out_fastConfig);
R_RegisterCFinalizerEx(ret, LGBM_FastConfigFree_wrapped, TRUE);
UNPROTECT(2);
return ret;
R_API_END();
}
SEXP LGBM_BoosterPredictForCSRSingleRowFast_R(SEXP handle_fastConfig,
SEXP indices,
SEXP data,
SEXP out_result) {
R_API_BEGIN();
int nnz = static_cast<int>(Rf_xlength(data));
const int indptr[] = {0, nnz};
int64_t out_len;
CHECK_CALL(LGBM_BoosterPredictForCSRSingleRowFast(R_ExternalPtrAddr(handle_fastConfig),
indptr, C_API_DTYPE_INT32, INTEGER(indices),
REAL(data),
2, nnz,
&out_len, REAL(out_result)));
return R_NilValue;
R_API_END();
}
SEXP LGBM_BoosterPredictForMat_R(SEXP handle,
SEXP data,
SEXP num_row,
@ -937,6 +1041,66 @@ SEXP LGBM_BoosterPredictSparseOutput_R(SEXP handle,
R_API_END();
}
SEXP LGBM_BoosterPredictForMatSingleRow_R(SEXP handle,
SEXP data,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP is_predcontrib,
SEXP start_iteration,
SEXP num_iteration,
SEXP parameter,
SEXP out_result) {
R_API_BEGIN();
_AssertBoosterHandleNotNull(handle);
int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
double* ptr_ret = REAL(out_result);
int64_t out_len;
CHECK_CALL(LGBM_BoosterPredictForMatSingleRow(R_ExternalPtrAddr(handle),
REAL(data), C_API_DTYPE_FLOAT64, Rf_xlength(data), 1,
pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
parameter_ptr, &out_len, ptr_ret));
UNPROTECT(1);
return R_NilValue;
R_API_END();
}
SEXP LGBM_BoosterPredictForMatSingleRowFastInit_R(SEXP handle,
SEXP ncols,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP is_predcontrib,
SEXP start_iteration,
SEXP num_iteration,
SEXP parameter) {
R_API_BEGIN();
_AssertBoosterHandleNotNull(handle);
int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
FastConfigHandle out_fastConfig;
CHECK_CALL(LGBM_BoosterPredictForMatSingleRowFastInit(R_ExternalPtrAddr(handle),
pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
C_API_DTYPE_FLOAT64, Rf_asInteger(ncols),
parameter_ptr, &out_fastConfig));
R_SetExternalPtrAddr(ret, out_fastConfig);
R_RegisterCFinalizerEx(ret, LGBM_FastConfigFree_wrapped, TRUE);
UNPROTECT(2);
return ret;
R_API_END();
}
SEXP LGBM_BoosterPredictForMatSingleRowFast_R(SEXP handle_fastConfig,
SEXP data,
SEXP out_result) {
R_API_BEGIN();
int64_t out_len;
CHECK_CALL(LGBM_BoosterPredictForMatSingleRowFast(R_ExternalPtrAddr(handle_fastConfig),
REAL(data), &out_len, REAL(out_result)));
return R_NilValue;
R_API_END();
}
SEXP LGBM_BoosterSaveModel_R(SEXP handle,
SEXP num_iteration,
SEXP feature_importance_type,
@ -1021,52 +1185,59 @@ SEXP LGBM_DumpParamAliases_R() {
// .Call() calls
static const R_CallMethodDef CallEntries[] = {
{"LGBM_HandleIsNull_R" , (DL_FUNC) &LGBM_HandleIsNull_R , 1},
{"LGBM_DatasetCreateFromFile_R" , (DL_FUNC) &LGBM_DatasetCreateFromFile_R , 3},
{"LGBM_DatasetCreateFromCSC_R" , (DL_FUNC) &LGBM_DatasetCreateFromCSC_R , 8},
{"LGBM_DatasetCreateFromMat_R" , (DL_FUNC) &LGBM_DatasetCreateFromMat_R , 5},
{"LGBM_DatasetGetSubset_R" , (DL_FUNC) &LGBM_DatasetGetSubset_R , 4},
{"LGBM_DatasetSetFeatureNames_R" , (DL_FUNC) &LGBM_DatasetSetFeatureNames_R , 2},
{"LGBM_DatasetGetFeatureNames_R" , (DL_FUNC) &LGBM_DatasetGetFeatureNames_R , 1},
{"LGBM_DatasetSaveBinary_R" , (DL_FUNC) &LGBM_DatasetSaveBinary_R , 2},
{"LGBM_DatasetFree_R" , (DL_FUNC) &LGBM_DatasetFree_R , 1},
{"LGBM_DatasetSetField_R" , (DL_FUNC) &LGBM_DatasetSetField_R , 4},
{"LGBM_DatasetGetFieldSize_R" , (DL_FUNC) &LGBM_DatasetGetFieldSize_R , 3},
{"LGBM_DatasetGetField_R" , (DL_FUNC) &LGBM_DatasetGetField_R , 3},
{"LGBM_DatasetUpdateParamChecking_R", (DL_FUNC) &LGBM_DatasetUpdateParamChecking_R, 2},
{"LGBM_DatasetGetNumData_R" , (DL_FUNC) &LGBM_DatasetGetNumData_R , 2},
{"LGBM_DatasetGetNumFeature_R" , (DL_FUNC) &LGBM_DatasetGetNumFeature_R , 2},
{"LGBM_DatasetGetFeatureNumBin_R" , (DL_FUNC) &LGBM_DatasetGetFeatureNumBin_R , 3},
{"LGBM_BoosterCreate_R" , (DL_FUNC) &LGBM_BoosterCreate_R , 2},
{"LGBM_BoosterFree_R" , (DL_FUNC) &LGBM_BoosterFree_R , 1},
{"LGBM_BoosterCreateFromModelfile_R", (DL_FUNC) &LGBM_BoosterCreateFromModelfile_R, 1},
{"LGBM_BoosterLoadModelFromString_R", (DL_FUNC) &LGBM_BoosterLoadModelFromString_R, 1},
{"LGBM_BoosterMerge_R" , (DL_FUNC) &LGBM_BoosterMerge_R , 2},
{"LGBM_BoosterAddValidData_R" , (DL_FUNC) &LGBM_BoosterAddValidData_R , 2},
{"LGBM_BoosterResetTrainingData_R" , (DL_FUNC) &LGBM_BoosterResetTrainingData_R , 2},
{"LGBM_BoosterResetParameter_R" , (DL_FUNC) &LGBM_BoosterResetParameter_R , 2},
{"LGBM_BoosterGetNumClasses_R" , (DL_FUNC) &LGBM_BoosterGetNumClasses_R , 2},
{"LGBM_BoosterGetNumFeature_R" , (DL_FUNC) &LGBM_BoosterGetNumFeature_R , 1},
{"LGBM_BoosterUpdateOneIter_R" , (DL_FUNC) &LGBM_BoosterUpdateOneIter_R , 1},
{"LGBM_BoosterUpdateOneIterCustom_R", (DL_FUNC) &LGBM_BoosterUpdateOneIterCustom_R, 4},
{"LGBM_BoosterRollbackOneIter_R" , (DL_FUNC) &LGBM_BoosterRollbackOneIter_R , 1},
{"LGBM_BoosterGetCurrentIteration_R", (DL_FUNC) &LGBM_BoosterGetCurrentIteration_R, 2},
{"LGBM_BoosterGetUpperBoundValue_R" , (DL_FUNC) &LGBM_BoosterGetUpperBoundValue_R , 2},
{"LGBM_BoosterGetLowerBoundValue_R" , (DL_FUNC) &LGBM_BoosterGetLowerBoundValue_R , 2},
{"LGBM_BoosterGetEvalNames_R" , (DL_FUNC) &LGBM_BoosterGetEvalNames_R , 1},
{"LGBM_BoosterGetEval_R" , (DL_FUNC) &LGBM_BoosterGetEval_R , 3},
{"LGBM_BoosterGetNumPredict_R" , (DL_FUNC) &LGBM_BoosterGetNumPredict_R , 3},
{"LGBM_BoosterGetPredict_R" , (DL_FUNC) &LGBM_BoosterGetPredict_R , 3},
{"LGBM_BoosterPredictForFile_R" , (DL_FUNC) &LGBM_BoosterPredictForFile_R , 10},
{"LGBM_BoosterCalcNumPredict_R" , (DL_FUNC) &LGBM_BoosterCalcNumPredict_R , 8},
{"LGBM_BoosterPredictForCSC_R" , (DL_FUNC) &LGBM_BoosterPredictForCSC_R , 14},
{"LGBM_BoosterPredictForMat_R" , (DL_FUNC) &LGBM_BoosterPredictForMat_R , 11},
{"LGBM_BoosterPredictSparseOutput_R", (DL_FUNC) &LGBM_BoosterPredictSparseOutput_R, 10},
{"LGBM_BoosterSaveModel_R" , (DL_FUNC) &LGBM_BoosterSaveModel_R , 4},
{"LGBM_BoosterSaveModelToString_R" , (DL_FUNC) &LGBM_BoosterSaveModelToString_R , 3},
{"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3},
{"LGBM_NullBoosterHandleError_R" , (DL_FUNC) &LGBM_NullBoosterHandleError_R , 0},
{"LGBM_DumpParamAliases_R" , (DL_FUNC) &LGBM_DumpParamAliases_R , 0},
{"LGBM_HandleIsNull_R" , (DL_FUNC) &LGBM_HandleIsNull_R , 1},
{"LGBM_DatasetCreateFromFile_R" , (DL_FUNC) &LGBM_DatasetCreateFromFile_R , 3},
{"LGBM_DatasetCreateFromCSC_R" , (DL_FUNC) &LGBM_DatasetCreateFromCSC_R , 8},
{"LGBM_DatasetCreateFromMat_R" , (DL_FUNC) &LGBM_DatasetCreateFromMat_R , 5},
{"LGBM_DatasetGetSubset_R" , (DL_FUNC) &LGBM_DatasetGetSubset_R , 4},
{"LGBM_DatasetSetFeatureNames_R" , (DL_FUNC) &LGBM_DatasetSetFeatureNames_R , 2},
{"LGBM_DatasetGetFeatureNames_R" , (DL_FUNC) &LGBM_DatasetGetFeatureNames_R , 1},
{"LGBM_DatasetSaveBinary_R" , (DL_FUNC) &LGBM_DatasetSaveBinary_R , 2},
{"LGBM_DatasetFree_R" , (DL_FUNC) &LGBM_DatasetFree_R , 1},
{"LGBM_DatasetSetField_R" , (DL_FUNC) &LGBM_DatasetSetField_R , 4},
{"LGBM_DatasetGetFieldSize_R" , (DL_FUNC) &LGBM_DatasetGetFieldSize_R , 3},
{"LGBM_DatasetGetField_R" , (DL_FUNC) &LGBM_DatasetGetField_R , 3},
{"LGBM_DatasetUpdateParamChecking_R" , (DL_FUNC) &LGBM_DatasetUpdateParamChecking_R , 2},
{"LGBM_DatasetGetNumData_R" , (DL_FUNC) &LGBM_DatasetGetNumData_R , 2},
{"LGBM_DatasetGetNumFeature_R" , (DL_FUNC) &LGBM_DatasetGetNumFeature_R , 2},
{"LGBM_DatasetGetFeatureNumBin_R" , (DL_FUNC) &LGBM_DatasetGetFeatureNumBin_R , 3},
{"LGBM_BoosterCreate_R" , (DL_FUNC) &LGBM_BoosterCreate_R , 2},
{"LGBM_BoosterFree_R" , (DL_FUNC) &LGBM_BoosterFree_R , 1},
{"LGBM_BoosterCreateFromModelfile_R" , (DL_FUNC) &LGBM_BoosterCreateFromModelfile_R , 1},
{"LGBM_BoosterLoadModelFromString_R" , (DL_FUNC) &LGBM_BoosterLoadModelFromString_R , 1},
{"LGBM_BoosterMerge_R" , (DL_FUNC) &LGBM_BoosterMerge_R , 2},
{"LGBM_BoosterAddValidData_R" , (DL_FUNC) &LGBM_BoosterAddValidData_R , 2},
{"LGBM_BoosterResetTrainingData_R" , (DL_FUNC) &LGBM_BoosterResetTrainingData_R , 2},
{"LGBM_BoosterResetParameter_R" , (DL_FUNC) &LGBM_BoosterResetParameter_R , 2},
{"LGBM_BoosterGetNumClasses_R" , (DL_FUNC) &LGBM_BoosterGetNumClasses_R , 2},
{"LGBM_BoosterGetNumFeature_R" , (DL_FUNC) &LGBM_BoosterGetNumFeature_R , 1},
{"LGBM_BoosterUpdateOneIter_R" , (DL_FUNC) &LGBM_BoosterUpdateOneIter_R , 1},
{"LGBM_BoosterUpdateOneIterCustom_R" , (DL_FUNC) &LGBM_BoosterUpdateOneIterCustom_R , 4},
{"LGBM_BoosterRollbackOneIter_R" , (DL_FUNC) &LGBM_BoosterRollbackOneIter_R , 1},
{"LGBM_BoosterGetCurrentIteration_R" , (DL_FUNC) &LGBM_BoosterGetCurrentIteration_R , 2},
{"LGBM_BoosterGetUpperBoundValue_R" , (DL_FUNC) &LGBM_BoosterGetUpperBoundValue_R , 2},
{"LGBM_BoosterGetLowerBoundValue_R" , (DL_FUNC) &LGBM_BoosterGetLowerBoundValue_R , 2},
{"LGBM_BoosterGetEvalNames_R" , (DL_FUNC) &LGBM_BoosterGetEvalNames_R , 1},
{"LGBM_BoosterGetEval_R" , (DL_FUNC) &LGBM_BoosterGetEval_R , 3},
{"LGBM_BoosterGetNumPredict_R" , (DL_FUNC) &LGBM_BoosterGetNumPredict_R , 3},
{"LGBM_BoosterGetPredict_R" , (DL_FUNC) &LGBM_BoosterGetPredict_R , 3},
{"LGBM_BoosterPredictForFile_R" , (DL_FUNC) &LGBM_BoosterPredictForFile_R , 10},
{"LGBM_BoosterCalcNumPredict_R" , (DL_FUNC) &LGBM_BoosterCalcNumPredict_R , 8},
{"LGBM_BoosterPredictForCSC_R" , (DL_FUNC) &LGBM_BoosterPredictForCSC_R , 14},
{"LGBM_BoosterPredictForCSR_R" , (DL_FUNC) &LGBM_BoosterPredictForCSR_R , 12},
{"LGBM_BoosterPredictForCSRSingleRow_R" , (DL_FUNC) &LGBM_BoosterPredictForCSRSingleRow_R , 11},
{"LGBM_BoosterPredictForCSRSingleRowFastInit_R", (DL_FUNC) &LGBM_BoosterPredictForCSRSingleRowFastInit_R, 8},
{"LGBM_BoosterPredictForCSRSingleRowFast_R" , (DL_FUNC) &LGBM_BoosterPredictForCSRSingleRowFast_R , 4},
{"LGBM_BoosterPredictSparseOutput_R" , (DL_FUNC) &LGBM_BoosterPredictSparseOutput_R , 10},
{"LGBM_BoosterPredictForMat_R" , (DL_FUNC) &LGBM_BoosterPredictForMat_R , 11},
{"LGBM_BoosterPredictForMatSingleRow_R" , (DL_FUNC) &LGBM_BoosterPredictForMatSingleRow_R , 9},
{"LGBM_BoosterPredictForMatSingleRowFastInit_R", (DL_FUNC) &LGBM_BoosterPredictForMatSingleRowFastInit_R, 8},
{"LGBM_BoosterPredictForMatSingleRowFast_R" , (DL_FUNC) &LGBM_BoosterPredictForMatSingleRowFast_R , 3},
{"LGBM_BoosterSaveModel_R" , (DL_FUNC) &LGBM_BoosterSaveModel_R , 4},
{"LGBM_BoosterSaveModelToString_R" , (DL_FUNC) &LGBM_BoosterSaveModelToString_R , 3},
{"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3},
{"LGBM_NullBoosterHandleError_R" , (DL_FUNC) &LGBM_NullBoosterHandleError_R , 0},
{"LGBM_DumpParamAliases_R" , (DL_FUNC) &LGBM_DumpParamAliases_R , 0},
{NULL, NULL, 0}
};

Просмотреть файл

@ -545,10 +545,12 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForCSC_R(
* Note: should pre-allocate memory for out_result,
* for normal and raw score: its length is equal to num_class * num_data
* for leaf index, its length is equal to num_class * num_data * num_iteration
* for feature contributions, its length is equal to num_data * num_class * (num_features + 1)
* \param handle Booster handle
* \param data pointer to the data space
* \param num_row number of rows
* \param num_col number columns
* \param indptr array with the index pointer of the data in CSR format
* \param indices array with the non-zero indices of the data in CSR format
* \param data array with the non-zero values of the data in CSR format
* \param ncols number of columns in the data
* \param is_rawscore 1 to get raw predictions, before transformations like
* converting to probabilities, 0 otherwise
* \param is_leafidx 1 to get record of which leaf in each tree
@ -560,11 +562,12 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForCSC_R(
* \param out_result prediction result
* \return R NULL value
*/
LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMat_R(
LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForCSR_R(
SEXP handle,
SEXP indptr,
SEXP indices,
SEXP data,
SEXP num_row,
SEXP num_col,
SEXP ncols,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP is_predcontrib,
@ -574,6 +577,84 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMat_R(
SEXP out_result
);
/*!
* \brief make prediction for a single row of data
* Note: should pre-allocate memory for out_result,
* for normal and raw score: its length is equal to num_class
* for leaf index, its length is equal to num_class * num_iteration
* for feature contributions, its length is equal to num_class * (num_features + 1)
* \param handle Booster handle
* \param indices array corresponding to the indices of the columns with non-zero values of the row to predict on
* \param data array corresponding to the non-zero values of row to predict on
* \param is_rawscore 1 to get raw predictions, before transformations like
* converting to probabilities, 0 otherwise
* \param is_leafidx 1 to get record of which leaf in each tree
* observations fell into, 0 otherwise
* \param is_predcontrib 1 to get feature contributions, 0 otherwise
* \param start_iteration Start index of the iteration to predict
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param parameter additional parameters
* \param out_result prediction result
* \return R NULL value
*/
LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForCSRSingleRow_R(
SEXP handle,
SEXP indices,
SEXP data,
SEXP ncols,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP is_predcontrib,
SEXP start_iteration,
SEXP num_iteration,
SEXP parameter,
SEXP out_result
);
/*!
* \brief Initialize and return a fast configuration handle to use with ``LGBM_BoosterPredictForCSRSingleRowFast_R``.
* \param handle Booster handle
* \param num_col number columns in the data
* \param is_rawscore 1 to get raw predictions, before transformations like
* converting to probabilities, 0 otherwise
* \param is_leafidx 1 to get record of which leaf in each tree
* observations fell into, 0 otherwise
* \param is_predcontrib 1 to get feature contributions, 0 otherwise
* \param start_iteration Start index of the iteration to predict
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param parameter additional parameters
* \return Fast configuration handle
*/
LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForCSRSingleRowFastInit_R(
SEXP handle,
SEXP ncols,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP is_predcontrib,
SEXP start_iteration,
SEXP num_iteration,
SEXP parameter
);
/*!
* \brief make prediction for a single row of data
* Note: should pre-allocate memory for out_result,
* for normal and raw score: its length is equal to num_class
* for leaf index, its length is equal to num_class * num_iteration
* for feature contributions, its length is equal to num_class * (num_features + 1)
* \param handle_fastConfig Fast configuration handle
* \param indices array corresponding to the indices of the columns with non-zero values of the row to predict on
* \param data array corresponding to the non-zero values of row to predict on
* \param out_result prediction result
* \return R NULL value
*/
LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForCSRSingleRowFast_R(
SEXP handle_fastConfig,
SEXP indices,
SEXP data,
SEXP out_result
);
/*!
* \brief make feature contribution prediction for a new Dataset
* \param handle Booster handle
@ -603,6 +684,113 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictSparseOutput_R(
SEXP parameter
);
/*!
* \brief make prediction for a new Dataset
* Note: should pre-allocate memory for out_result,
* for normal and raw score: its length is equal to num_class * num_data
* for leaf index, its length is equal to num_class * num_data * num_iteration
* \param handle Booster handle
* \param data pointer to the data space
* \param num_row number of rows
* \param num_col number columns
* \param is_rawscore 1 to get raw predictions, before transformations like
* converting to probabilities, 0 otherwise
* \param is_leafidx 1 to get record of which leaf in each tree
* observations fell into, 0 otherwise
* \param is_predcontrib 1 to get feature contributions, 0 otherwise
* \param start_iteration Start index of the iteration to predict
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param parameter additional parameters
* \param out_result prediction result
* \return R NULL value
*/
LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMat_R(
SEXP handle,
SEXP data,
SEXP num_row,
SEXP num_col,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP is_predcontrib,
SEXP start_iteration,
SEXP num_iteration,
SEXP parameter,
SEXP out_result
);
/*!
* \brief make prediction for a single row of data
* Note: should pre-allocate memory for out_result,
* for normal and raw score: its length is equal to num_class
* for leaf index, its length is equal to num_class * num_iteration
* for feature contributions, its length is equal to num_class * (num_features + 1)
* \param handle Booster handle
* \param data array corresponding to the row to predict on
* \param is_rawscore 1 to get raw predictions, before transformations like
* converting to probabilities, 0 otherwise
* \param is_leafidx 1 to get record of which leaf in each tree
* observations fell into, 0 otherwise
* \param is_predcontrib 1 to get feature contributions, 0 otherwise
* \param start_iteration Start index of the iteration to predict
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param parameter additional parameters
* \param out_result prediction result
* \return R NULL value
*/
LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMatSingleRow_R(
SEXP handle,
SEXP data,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP is_predcontrib,
SEXP start_iteration,
SEXP num_iteration,
SEXP parameter,
SEXP out_result
);
/*!
* \brief Initialize and return a fast configuration handle to use with ``LGBM_BoosterPredictForMatSingleRowFast_R``.
* \param handle Booster handle
* \param num_col number columns in the data
* \param is_rawscore 1 to get raw predictions, before transformations like
* converting to probabilities, 0 otherwise
* \param is_leafidx 1 to get record of which leaf in each tree
* observations fell into, 0 otherwise
* \param is_predcontrib 1 to get feature contributions, 0 otherwise
* \param start_iteration Start index of the iteration to predict
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param parameter additional parameters
* \return Fast configuration handle
*/
LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMatSingleRowFastInit_R(
SEXP handle,
SEXP ncols,
SEXP is_rawscore,
SEXP is_leafidx,
SEXP is_predcontrib,
SEXP start_iteration,
SEXP num_iteration,
SEXP parameter
);
/*!
* \brief make prediction for a single row of data
* Note: should pre-allocate memory for out_result,
* for normal and raw score: its length is equal to num_class
* for leaf index, its length is equal to num_class * num_iteration
* for feature contributions, its length is equal to num_class * (num_features + 1)
* \param handle_fastConfig Fast configuration handle
* \param data array corresponding to the row to predict on
* \param out_result prediction result
* \return R NULL value
*/
LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMatSingleRowFast_R(
SEXP handle_fastConfig,
SEXP data,
SEXP out_result
);
/*!
* \brief save model into file
* \param handle Booster handle

Просмотреть файл

@ -528,6 +528,129 @@ test_that("predictions for multiclass classification are returned as matrix", {
expect_equal(ncol(pred), 3L)
})
test_that("Single-row predictions are identical to multi-row ones", {
data(mtcars)
X <- as.matrix(mtcars[, -1L])
y <- mtcars[, 1L]
dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L))
params <- list(min_data_in_leaf = 2L)
model <- lgb.train(
params = params
, data = dtrain
, obj = "regression"
, nrounds = 5L
, verbose = -1L
)
x1 <- X[1L, , drop = FALSE]
x11 <- X[11L, , drop = FALSE]
x1_spv <- as(x1, "sparseVector")
x11_spv <- as(x11, "sparseVector")
x1_csr <- as(x1, "RsparseMatrix")
x11_csr <- as(x11, "RsparseMatrix")
pred_all <- predict(model, X)
pred1_wo_config <- predict(model, x1)
pred11_wo_config <- predict(model, x11)
pred1_spv_wo_config <- predict(model, x1_spv)
pred11_spv_wo_config <- predict(model, x11_spv)
pred1_csr_wo_config <- predict(model, x1_csr)
pred11_csr_wo_config <- predict(model, x11_csr)
lgb.configure_fast_predict(model)
pred1_w_config <- predict(model, x1)
pred11_w_config <- predict(model, x11)
model <- lgb.train(
params = params
, data = dtrain
, obj = "regression"
, nrounds = 5L
, verbose = -1L
)
lgb.configure_fast_predict(model, csr = TRUE)
pred1_spv_w_config <- predict(model, x1_spv)
pred11_spv_w_config <- predict(model, x11_spv)
pred1_csr_w_config <- predict(model, x1_csr)
pred11_csr_w_config <- predict(model, x11_csr)
expect_equal(pred1_wo_config, pred_all[1L])
expect_equal(pred11_wo_config, pred_all[11L])
expect_equal(pred1_spv_wo_config, unname(pred_all[1L]))
expect_equal(pred11_spv_wo_config, unname(pred_all[11L]))
expect_equal(pred1_csr_wo_config, pred_all[1L])
expect_equal(pred11_csr_wo_config, pred_all[11L])
expect_equal(pred1_w_config, pred_all[1L])
expect_equal(pred11_w_config, pred_all[11L])
expect_equal(pred1_spv_w_config, unname(pred_all[1L]))
expect_equal(pred11_spv_w_config, unname(pred_all[11L]))
expect_equal(pred1_csr_w_config, pred_all[1L])
expect_equal(pred11_csr_w_config, pred_all[11L])
})
test_that("Fast-predict configuration accepts non-default prediction types", {
data(mtcars)
X <- as.matrix(mtcars[, -1L])
y <- mtcars[, 1L]
dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L))
params <- list(min_data_in_leaf = 2L)
model <- lgb.train(
params = params
, data = dtrain
, obj = "regression"
, nrounds = 5L
, verbose = -1L
)
x1 <- X[1L, , drop = FALSE]
x11 <- X[11L, , drop = FALSE]
pred_all <- predict(model, X, type = "leaf")
pred1_wo_config <- predict(model, x1, type = "leaf")
pred11_wo_config <- predict(model, x11, type = "leaf")
expect_equal(pred1_wo_config, pred_all[1L, , drop = FALSE])
expect_equal(pred11_wo_config, pred_all[11L, , drop = FALSE])
lgb.configure_fast_predict(model, type = "leaf")
pred1_w_config <- predict(model, x1, type = "leaf")
pred11_w_config <- predict(model, x11, type = "leaf")
expect_equal(pred1_w_config, pred_all[1L, , drop = FALSE])
expect_equal(pred11_w_config, pred_all[11L, , drop = FALSE])
})
test_that("Fast-predict configuration does not block other prediction types", {
data(mtcars)
X <- as.matrix(mtcars[, -1L])
y <- mtcars[, 1L]
dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L))
params <- list(min_data_in_leaf = 2L)
model <- lgb.train(
params = params
, data = dtrain
, obj = "regression"
, nrounds = 5L
, verbose = -1L
)
x1 <- X[1L, , drop = FALSE]
x11 <- X[11L, , drop = FALSE]
pred_all <- predict(model, X)
pred_all_leaf <- predict(model, X, type = "leaf")
lgb.configure_fast_predict(model)
pred1_w_config <- predict(model, x1)
pred11_w_config <- predict(model, x11)
pred1_leaf_w_config <- predict(model, x1, type = "leaf")
pred11_leaf_w_config <- predict(model, x11, type = "leaf")
expect_equal(pred1_w_config, pred_all[1L])
expect_equal(pred11_w_config, pred_all[11L])
expect_equal(pred1_leaf_w_config, pred_all_leaf[1L, , drop = FALSE])
expect_equal(pred11_leaf_w_config, pred_all_leaf[11L, , drop = FALSE])
})
test_that("predict type='class' returns predicted class for classification objectives", {
data(agaricus.train, package = "lightgbm")
X <- as.matrix(agaricus.train$data)

Просмотреть файл

@ -135,3 +135,15 @@ test_that("lgb.check.wrapper_param() prefers alias to keyword arg", {
expect_equal(params2[["num_iterations"]], num_tree)
expect_identical(params2, list(num_iterations = num_tree))
})
test_that("lgb.equal.or.both.null produces expected results", {
expect_true(lgb.equal.or.both.null(NULL, NULL))
expect_false(lgb.equal.or.both.null(1.0, NULL))
expect_false(lgb.equal.or.both.null(NULL, 1.0))
expect_true(lgb.equal.or.both.null(1.0, 1.0))
expect_true(lgb.equal.or.both.null(1.0, 1L))
expect_false(lgb.equal.or.both.null(NA, NULL))
expect_false(lgb.equal.or.both.null(NULL, NA))
expect_false(lgb.equal.or.both.null(10.0, 1L))
expect_true(lgb.equal.or.both.null(0L, 0L))
})