[R-package] [c++] add tighter multithreading control, avoid global OpenMP side effects (fixes #4705, fixes #5102) (#6226)

This commit is contained in:
James Lamb 2023-12-07 17:03:16 -06:00 коммит произвёл GitHub
Родитель e797985227
Коммит 1548b42bac
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
58 изменённых файлов: 428 добавлений и 20 удалений

Просмотреть файл

@ -30,8 +30,7 @@ get_omp_pragmas_without_num_threads() {
--include='*.h' \
--include='*.hpp' \
'pragma omp parallel' \
| grep -v ' num_threads' \
| grep -v 'openmp_wrapper.h'
| grep -v ' num_threads'
}
PROBLEMATIC_LINES=$(
get_omp_pragmas_without_num_threads

Просмотреть файл

@ -432,6 +432,7 @@ file(
src/objective/*.cpp
src/network/*.cpp
src/treelearner/*.cpp
src/utils/*.cpp
if(USE_CUDA)
src/treelearner/*.cu
src/boosting/cuda/*.cpp

Просмотреть файл

@ -9,6 +9,7 @@ S3method(print,lgb.Booster)
S3method(set_field,lgb.Dataset)
S3method(slice,lgb.Dataset)
S3method(summary,lgb.Booster)
export(getLGBMthreads)
export(get_field)
export(lgb.Dataset)
export(lgb.Dataset.construct)
@ -35,6 +36,7 @@ export(lgb.train)
export(lightgbm)
export(readRDS.lgb.Booster)
export(saveRDS.lgb.Booster)
export(setLGBMthreads)
export(set_field)
export(slice)
import(methods)

Просмотреть файл

@ -917,6 +917,8 @@ NULL
#' the factor levels not being present in the output.
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -1082,6 +1084,8 @@ predict.lgb.Booster <- function(object,
#' \link{predict.lgb.Booster}.
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' library(lightgbm)
#' data(mtcars)
#' X <- as.matrix(mtcars[, -1L])
@ -1224,6 +1228,8 @@ summary.lgb.Booster <- function(object, ...) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -1289,6 +1295,8 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
@ -1346,6 +1354,8 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
#' @examples
#' \donttest{
#' library(lightgbm)
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -1396,6 +1406,8 @@ lgb.dump <- function(booster, num_iteration = NULL) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' # train a regression model
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train

Просмотреть файл

@ -780,6 +780,8 @@ Dataset <- R6::R6Class(
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -837,6 +839,8 @@ lgb.Dataset <- function(data,
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -913,6 +917,8 @@ lgb.Dataset.create.valid <- function(dataset,
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -942,6 +948,8 @@ lgb.Dataset.construct <- function(dataset) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -975,6 +983,8 @@ dim.lgb.Dataset <- function(x) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -1045,6 +1055,8 @@ dimnames.lgb.Dataset <- function(x) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -1089,6 +1101,8 @@ slice.lgb.Dataset <- function(dataset, idxset) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -1138,6 +1152,8 @@ get_field.lgb.Dataset <- function(dataset, field_name) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -1177,6 +1193,8 @@ set_field.lgb.Dataset <- function(dataset, field_name, data) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -1207,6 +1225,8 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' # create training Dataset
#' data(agaricus.train, package ="lightgbm")
#' train <- agaricus.train
@ -1240,6 +1260,8 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -51,6 +51,8 @@ CVBooster <- R6::R6Class(
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -14,6 +14,8 @@
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -17,6 +17,8 @@
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' Logit <- function(x) log(x / (1.0 - x))
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train

Просмотреть файл

@ -29,6 +29,8 @@
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -19,6 +19,8 @@
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -16,6 +16,8 @@
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' Logit <- function(x) {
#' log(x / (1.0 - x))
#' }

Просмотреть файл

@ -16,7 +16,10 @@
#' @return \code{lgb.Booster} (the same `model` object that was passed as input, invisibly).
#' @seealso \link{lgb.make_serializable}, \link{lgb.drop_serialized}.
#' @examples
#' \donttest{
#' library(lightgbm)
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data("agaricus.train")
#' model <- lightgbm(
#' agaricus.train$data
@ -33,6 +36,7 @@
#' model_new$check_null_handle()
#' lgb.restore_handle(model_new)
#' model_new$check_null_handle()
#' }
#' @export
lgb.restore_handle <- function(model) {
if (!.is_Booster(x = model)) {

Просмотреть файл

@ -19,6 +19,8 @@
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -0,0 +1,51 @@
#' @name setLGBMThreads
#' @title Set maximum number of threads used by LightGBM
#' @description LightGBM attempts to speed up many operations by using multi-threading.
#' The number of threads used in those operations can be controlled via the
#' \code{num_threads} parameter passed through \code{params} to functions like
#' \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
#' a model from a text file) are done via code paths that don't explicitly accept thread-control
#' configuration.
#'
#' Use this function to set the maximum number of threads LightGBM will use for such operations.
#'
#' This function affects all LightGBM operations in the same process.
#'
#' So, for example, if you call \code{setLGBMthreads(4)}, no other multi-threaded LightGBM
#' operation in the same process will use more than 4 threads.
#'
#' Call \code{setLGBMthreads(-1)} to remove this limitation.
#' @param num_threads maximum number of threads to be used by LightGBM in multi-threaded operations
#' @return NULL
#' @seealso \link{getLGBMthreads}
#' @export
setLGBMthreads <- function(num_threads) {
.Call(
LGBM_SetMaxThreads_R,
num_threads
)
return(invisible(NULL))
}
#' @name getLGBMThreads
#' @title Get default number of threads used by LightGBM
#' @description LightGBM attempts to speed up many operations by using multi-threading.
#' The number of threads used in those operations can be controlled via the
#' \code{num_threads} parameter passed through \code{params} to functions like
#' \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
#' a model from a text file) are done via code paths that don't explicitly accept thread-control
#' configuration.
#'
#' Use this function to see the default number of threads LightGBM will use for such operations.
#' @return number of threads as an integer. \code{-1} means that in situations where parameter \code{num_threads} is
#' not explicitly supplied, LightGBM will choose a number of threads to use automatically.
#' @seealso \link{setLGBMthreads}
#' @export
getLGBMthreads <- function() {
out <- 0L
.Call(
LGBM_GetMaxThreads_R,
out
)
return(out)
}

Просмотреть файл

@ -12,6 +12,8 @@
#' @examples
#' \donttest{
#' library(lightgbm)
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -22,6 +22,8 @@
#' @examples
#' \donttest{
#' library(lightgbm)
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -21,6 +21,8 @@ be directly used with an \code{lgb.Dataset} object.
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -28,6 +28,8 @@ Since row names are irrelevant, it is recommended to use \code{colnames} directl
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -0,0 +1,26 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/multithreading.R
\name{getLGBMThreads}
\alias{getLGBMThreads}
\alias{getLGBMthreads}
\title{Get default number of threads used by LightGBM}
\usage{
getLGBMthreads()
}
\value{
number of threads as an integer. \code{-1} means that in situations where parameter \code{num_threads} is
not explicitly supplied, LightGBM will choose a number of threads to use automatically.
}
\description{
LightGBM attempts to speed up many operations by using multi-threading.
The number of threads used in those operations can be controlled via the
\code{num_threads} parameter passed through \code{params} to functions like
\link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
a model from a text file) are done via code paths that don't explicitly accept thread-control
configuration.
Use this function to see the default number of threads LightGBM will use for such operations.
}
\seealso{
\link{setLGBMthreads}
}

Просмотреть файл

@ -32,6 +32,8 @@ Get one attribute of a \code{lgb.Dataset}
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -65,6 +65,8 @@ Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -17,6 +17,8 @@ Construct Dataset explicitly
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -48,6 +48,8 @@ Construct validation data according to training data
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -20,6 +20,8 @@ Please note that \code{init_score} is not saved in binary file.
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -22,6 +22,8 @@ Set the categorical features of an \code{lgb.Dataset} object. Use this function
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -19,6 +19,8 @@ If you want to use validation data, you should set reference to training data
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
# create training Dataset
data(agaricus.train, package ="lightgbm")
train <- agaricus.train

Просмотреть файл

@ -114,6 +114,8 @@ Calling this function multiple times with different parameters might not overrid
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
library(lightgbm)
data(mtcars)
X <- as.matrix(mtcars[, -1L])

Просмотреть файл

@ -152,6 +152,8 @@ Cross validation logic used by LightGBM
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -20,6 +20,8 @@ Dump LightGBM model to json
\examples{
\donttest{
library(lightgbm)
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -33,6 +33,8 @@ Given a \code{lgb.Booster}, return evaluation results for a
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
# train a regression model
data(agaricus.train, package = "lightgbm")
train <- agaricus.train

Просмотреть файл

@ -25,6 +25,8 @@ Creates a \code{data.table} of feature importances in a model.
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -30,6 +30,8 @@ Computes feature contribution components of rawscore prediction.
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
Logit <- function(x) log(x / (1.0 - x))
data(agaricus.train, package = "lightgbm")
train <- agaricus.train

Просмотреть файл

@ -20,6 +20,8 @@ Load LightGBM takes in either a file path or model string.
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -40,6 +40,8 @@ Parse a LightGBM model json dump into a \code{data.table} structure.
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -38,6 +38,8 @@ Features are shown ranked in a decreasing importance order.
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -35,6 +35,8 @@ contribution of a feature. Features are shown ranked in a decreasing contributio
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
Logit <- function(x) {
log(x / (1.0 - x))
}

Просмотреть файл

@ -27,7 +27,10 @@ function. If you wish to make fast single-row predictions using a \code{lgb.Boos
call \link{lgb.configure_fast_predict} on the loaded \code{lgb.Booster} object.
}
\examples{
\donttest{
library(lightgbm)
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data("agaricus.train")
model <- lightgbm(
agaricus.train$data
@ -45,6 +48,7 @@ model_new$check_null_handle()
lgb.restore_handle(model_new)
model_new$check_null_handle()
}
}
\seealso{
\link{lgb.make_serializable}, \link{lgb.drop_serialized}.
}

Просмотреть файл

@ -21,6 +21,8 @@ Save LightGBM model
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train

Просмотреть файл

@ -130,6 +130,8 @@ Low-level R interface to train a LightGBM model. Unlike \code{\link{lightgbm}},
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -121,6 +121,8 @@ If the model object has been configured for fast single-row predictions through
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -23,6 +23,8 @@ Calls \code{readRDS} in what is expected to be a serialized \code{lgb.Booster} o
\examples{
\donttest{
library(lightgbm)
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -46,6 +46,8 @@ Calls \code{saveRDS} on an \code{lgb.Booster} object, making it serializable bef
\examples{
\donttest{
library(lightgbm)
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -0,0 +1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/multithreading.R
\name{setLGBMThreads}
\alias{setLGBMThreads}
\alias{setLGBMthreads}
\title{Set maximum number of threads used by LightGBM}
\usage{
setLGBMthreads(num_threads)
}
\arguments{
\item{num_threads}{maximum number of threads to be used by LightGBM in multi-threaded operations}
}
\description{
LightGBM attempts to speed up many operations by using multi-threading.
The number of threads used in those operations can be controlled via the
\code{num_threads} parameter passed through \code{params} to functions like
\link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
a model from a text file) are done via code paths that don't explicitly accept thread-control
configuration.
Use this function to set the maximum number of threads LightGBM will use for such operations.
This function affects all LightGBM operations in the same process.
So, for example, if you call \code{setLGBMthreads(4)}, no other multi-threaded LightGBM
operation in the same process will use more than 4 threads.
Call \code{setLGBMthreads(-1)} to remove this limitation.
}
\seealso{
\link{getLGBMthreads}
}

Просмотреть файл

@ -34,6 +34,8 @@ Set one attribute of a \code{lgb.Dataset}
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -23,6 +23,8 @@ Get a new \code{lgb.Dataset} containing the specified rows of
}
\examples{
\donttest{
\dontshow{setLGBMthreads(2L)}
\dontshow{data.table::setDTthreads(1L)}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -53,5 +53,6 @@ OBJECTS = \
treelearner/serial_tree_learner.o \
treelearner/tree_learner.o \
treelearner/voting_parallel_tree_learner.o \
utils/openmp_wrapper.o \
c_api.o \
lightgbm_R.o

Просмотреть файл

@ -54,5 +54,6 @@ OBJECTS = \
treelearner/serial_tree_learner.o \
treelearner/tree_learner.o \
treelearner/voting_parallel_tree_learner.o \
utils/openmp_wrapper.o \
c_api.o \
lightgbm_R.o

Просмотреть файл

@ -1212,6 +1212,23 @@ SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
R_API_END();
}
SEXP LGBM_GetMaxThreads_R(SEXP out) {
R_API_BEGIN();
int num_threads;
CHECK_CALL(LGBM_GetMaxThreads(&num_threads));
INTEGER(out)[0] = num_threads;
return R_NilValue;
R_API_END();
}
SEXP LGBM_SetMaxThreads_R(SEXP num_threads) {
R_API_BEGIN();
int new_num_threads = Rf_asInteger(num_threads);
CHECK_CALL(LGBM_SetMaxThreads(new_num_threads));
return R_NilValue;
R_API_END();
}
// .Call() calls
static const R_CallMethodDef CallEntries[] = {
{"LGBM_HandleIsNull_R" , (DL_FUNC) &LGBM_HandleIsNull_R , 1},
@ -1268,6 +1285,8 @@ static const R_CallMethodDef CallEntries[] = {
{"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3},
{"LGBM_NullBoosterHandleError_R" , (DL_FUNC) &LGBM_NullBoosterHandleError_R , 0},
{"LGBM_DumpParamAliases_R" , (DL_FUNC) &LGBM_DumpParamAliases_R , 0},
{"LGBM_GetMaxThreads_R" , (DL_FUNC) &LGBM_GetMaxThreads_R , 1},
{"LGBM_SetMaxThreads_R" , (DL_FUNC) &LGBM_SetMaxThreads_R , 1},
{NULL, NULL, 0}
};

Просмотреть файл

@ -850,4 +850,23 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R(
*/
LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamAliases_R();
/*!
* \brief Get current maximum number of threads used by LightGBM routines in this process.
* \param[out] out current maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
* \return R NULL value
*/
LIGHTGBM_C_EXPORT SEXP LGBM_GetMaxThreads_R(
SEXP out
);
/*!
* \brief Set maximum number of threads used by LightGBM routines in this process.
* \param num_threads maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
* \return R NULL value
*/
LIGHTGBM_C_EXPORT SEXP LGBM_SetMaxThreads_R(
SEXP num_threads
);
#endif // LIGHTGBM_R_H_

Просмотреть файл

@ -11,6 +11,11 @@
# the check farm is a shared resource and will typically be running many checks simultaneously.
#
.LGB_MAX_THREADS <- 2L
setLGBMthreads(.LGB_MAX_THREADS)
# control data.table parallelism
# ref: https://github.com/Rdatatable/data.table/issues/5658
data.table::setDTthreads(1L)
# by default, how much should results in tests be allowed to differ from hard-coded expected numbers?
.LGB_NUMERIC_TOLERANCE <- 1e-6

Просмотреть файл

@ -0,0 +1,16 @@
test_that("getLGBMthreads() and setLGBMthreads() work as expected", {
# works with integer input
ret <- setLGBMthreads(2L)
expect_null(ret)
expect_equal(getLGBMthreads(), 2L)
# works with float input
ret <- setLGBMthreads(1.0)
expect_null(ret)
expect_equal(getLGBMthreads(), 1L)
# setting to any negative number sets max threads to -1
ret <- setLGBMthreads(-312L)
expect_null(ret)
expect_equal(getLGBMthreads(), -1L)
})

Просмотреть файл

@ -27,6 +27,12 @@ Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/),
library(lightgbm)
```
```{r, include=FALSE}
# limit number of threads used, to be respectful of CRAN's resources when it checks this vignette
data.table::setDTthreads(1L)
setLGBMthreads(2L)
```
This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit.
## The dataset

Просмотреть файл

@ -227,6 +227,7 @@ if ${BUILD_VIGNETTES} ; then
rm -f ./lightgbm/src/network/*.o
rm -f ./lightgbm/src/objective/*.o
rm -f ./lightgbm/src/treelearner/*.o
rm -f ./lightgbm/src/utils/*.o
echo "re-tarring ${TARBALL_NAME}"
tar \

Просмотреть файл

@ -1561,6 +1561,20 @@ LIGHTGBM_C_EXPORT int LGBM_NetworkInitWithFunctions(int num_machines,
void* reduce_scatter_ext_fun,
void* allgather_ext_fun);
/*!
* \brief Set maximum number of threads used by LightGBM routines in this process.
* \param num_threads maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_SetMaxThreads(int num_threads);
/*!
* \brief Get current maximum number of threads used by LightGBM routines in this process.
* \param[out] out current maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_GetMaxThreads(int* out);
#if !defined(__cplusplus) && (!defined(__STDC__) || (__STDC_VERSION__ < 199901L))
/*! \brief Inline specifier no-op in C using standards before C99. */
#define INLINE_FUNCTION

Просмотреть файл

@ -5,6 +5,15 @@
#ifndef LIGHTGBM_OPENMP_WRAPPER_H_
#define LIGHTGBM_OPENMP_WRAPPER_H_
#include <LightGBM/export.h>
// this can only be changed by LGBM_SetMaxThreads()
LIGHTGBM_EXTERN_C int LGBM_MAX_NUM_THREADS;
// this is modified by OMP_SET_NUM_THREADS(), for example
// by passing num_thread through params
LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS;
#ifdef _OPENMP
#include <LightGBM/utils/log.h>
@ -17,22 +26,25 @@
#include <stdexcept>
#include <vector>
inline int OMP_NUM_THREADS() {
int ret = 1;
#pragma omp parallel
#pragma omp master
{ ret = omp_get_num_threads(); }
return ret;
}
/*
Get number of threads to use in OpenMP parallel regions.
inline void OMP_SET_NUM_THREADS(int num_threads) {
static const int default_omp_num_threads = OMP_NUM_THREADS();
if (num_threads > 0) {
omp_set_num_threads(num_threads);
} else {
omp_set_num_threads(default_omp_num_threads);
}
}
By default, this will return the result of omp_get_max_threads(),
which is OpenMP-implementation dependent but generally can be controlled
by environment variable OMP_NUM_THREADS.
ref:
- https://www.openmp.org/spec-html/5.0/openmpsu112.html
- https://gcc.gnu.org/onlinedocs/libgomp/omp_005fget_005fmax_005fthreads.html
*/
LIGHTGBM_EXTERN_C int OMP_NUM_THREADS();
/*
Update the default number of threads that'll be used in OpenMP parallel
regions for LightGBM routines where the number of threads aren't directly
supplied.
*/
LIGHTGBM_EXTERN_C void OMP_SET_NUM_THREADS(int num_threads);
class ThreadExceptionHelper {
public:
@ -102,10 +114,7 @@ class ThreadExceptionHelper {
/** Fall here if no OPENMP support, so just
simulate a single thread running.
All #pragma omp should be ignored by the compiler **/
inline void omp_set_num_threads(int) __GOMP_NOTHROW {} // NOLINT (no cast done here)
inline void OMP_SET_NUM_THREADS(int) __GOMP_NOTHROW {}
inline int omp_get_num_threads() __GOMP_NOTHROW {return 1;}
inline int omp_get_max_threads() __GOMP_NOTHROW {return 1;}
inline int omp_get_thread_num() __GOMP_NOTHROW {return 0;}
inline int OMP_NUM_THREADS() __GOMP_NOTHROW { return 1; }
#ifdef __cplusplus

Просмотреть файл

@ -2699,6 +2699,23 @@ int LGBM_NetworkInitWithFunctions(int num_machines, int rank,
API_END();
}
int LGBM_SetMaxThreads(int num_threads) {
API_BEGIN();
if (num_threads <= 0) {
LGBM_MAX_NUM_THREADS = -1;
} else {
LGBM_MAX_NUM_THREADS = num_threads;
}
API_END();
}
int LGBM_GetMaxThreads(int* out) {
API_BEGIN();
*out = LGBM_MAX_NUM_THREADS;
API_END();
}
// ---- start of some help functions

Просмотреть файл

@ -0,0 +1,44 @@
/*!
* Copyright (c) 2023 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#include <LightGBM/utils/openmp_wrapper.h>
int LGBM_MAX_NUM_THREADS = -1;
int LGBM_DEFAULT_NUM_THREADS = -1;
#ifdef _OPENMP
#include <omp.h>
int OMP_NUM_THREADS() {
int default_num_threads = 1;
if (LGBM_DEFAULT_NUM_THREADS > 0) {
// if LightGBM-specific default has been set, ignore OpenMP-global config
default_num_threads = LGBM_DEFAULT_NUM_THREADS;
} else {
// otherwise, default to OpenMP-global config
#pragma omp single
{ default_num_threads = omp_get_max_threads(); }
}
// ensure that if LGBM_SetMaxThreads() was ever called, LightGBM doesn't
// use more than that many threads
if (LGBM_MAX_NUM_THREADS > 0 && default_num_threads > LGBM_MAX_NUM_THREADS) {
return LGBM_MAX_NUM_THREADS;
}
return default_num_threads;
}
void OMP_SET_NUM_THREADS(int num_threads) {
if (num_threads <= 0) {
LGBM_DEFAULT_NUM_THREADS = -1;
} else {
LGBM_DEFAULT_NUM_THREADS = num_threads;
}
}
#endif // _OPENMP

Просмотреть файл

@ -247,3 +247,36 @@ def test_booster():
c_str(''),
c_str('preb.txt'))
LIB.LGBM_BoosterFree(booster2)
def test_max_thread_control():
# at initialization, should be -1
num_threads = ctypes.c_int(0)
ret = LIB.LGBM_GetMaxThreads(
ctypes.byref(num_threads)
)
assert ret == 0
assert num_threads.value == -1
# updating that value through the C API should work
ret = LIB.LGBM_SetMaxThreads(
ctypes.c_int(6)
)
assert ret == 0
ret = LIB.LGBM_GetMaxThreads(
ctypes.byref(num_threads)
)
assert ret == 0
assert num_threads.value == 6
# resetting to any negative number should set it to -1
ret = LIB.LGBM_SetMaxThreads(
ctypes.c_int(-123)
)
assert ret == 0
ret = LIB.LGBM_GetMaxThreads(
ctypes.byref(num_threads)
)
assert ret == 0
assert num_threads.value == -1