зеркало из https://github.com/microsoft/LightGBM.git
[R-package] Promote number of threads to top-level argument in `lightgbm()` and change default to number of cores (#4972)
This commit is contained in:
Родитель
4ae3d1387d
Коммит
33eb03767d
|
@ -105,13 +105,13 @@ if [[ $OS_NAME == "macos" ]]; then
|
|||
fi
|
||||
fi
|
||||
|
||||
# Manually install Depends and Imports libraries + 'knitr', 'rmarkdown', 'testthat'
|
||||
# Manually install Depends and Imports libraries + 'knitr', 'RhpcBLASctl', 'rmarkdown', 'testthat'
|
||||
# to avoid a CI-time dependency on devtools (for devtools::install_deps())
|
||||
# NOTE: testthat is not required when running rchk
|
||||
if [[ "${TASK}" == "r-rchk" ]]; then
|
||||
packages="c('data.table', 'jsonlite', 'knitr', 'Matrix', 'R6', 'rmarkdown')"
|
||||
packages="c('data.table', 'jsonlite', 'knitr', 'Matrix', 'R6', 'RhpcBLASctl', 'rmarkdown')"
|
||||
else
|
||||
packages="c('data.table', 'jsonlite', 'knitr', 'Matrix', 'R6', 'rmarkdown', 'testthat')"
|
||||
packages="c('data.table', 'jsonlite', 'knitr', 'Matrix', 'R6', 'RhpcBLASctl', 'rmarkdown', 'testthat')"
|
||||
fi
|
||||
compile_from_source="both"
|
||||
if [[ $OS_NAME == "macos" ]]; then
|
||||
|
|
|
@ -7,7 +7,7 @@ apt-get install --no-install-recommends -y \
|
|||
|
||||
# installation of dependencies needs to happen before building the package,
|
||||
# since `R CMD build` needs to install the package to build vignettes
|
||||
Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown', 'rhub', 'testthat'), dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1
|
||||
Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'RhpcBLASctl', 'rmarkdown', 'rhub', 'testthat'), dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1
|
||||
|
||||
sh build-cran-package.sh || exit -1
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash
|
||||
|
||||
RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1
|
||||
RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'RhpcBLASctl', 'rmarkdown', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1
|
||||
sh build-cran-package.sh \
|
||||
--r-executable=RDvalgrind \
|
||||
|| exit -1
|
||||
|
|
|
@ -122,7 +122,7 @@ Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT
|
|||
Write-Output "Done installing Rtools"
|
||||
|
||||
Write-Output "Installing dependencies"
|
||||
$packages = "c('data.table', 'jsonlite', 'knitr', 'Matrix', 'processx', 'R6', 'rmarkdown', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')"
|
||||
$packages = "c('data.table', 'jsonlite', 'knitr', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'rmarkdown', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')"
|
||||
Run-R-Code-Redirect-Stderr "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())" ; Check-Output $?
|
||||
|
||||
# MiKTeX and pandoc can be skipped on non-MinGW builds, since we don't
|
||||
|
|
|
@ -188,7 +188,7 @@ jobs:
|
|||
- name: Install packages
|
||||
shell: bash
|
||||
run: |
|
||||
RDscript${{ matrix.r_customization }} -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())"
|
||||
RDscript${{ matrix.r_customization }} -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'RhpcBLASctl', 'rmarkdown', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())"
|
||||
sh build-cran-package.sh --r-executable=RD${{ matrix.r_customization }}
|
||||
RD${{ matrix.r_customization }} CMD INSTALL lightgbm_*.tar.gz || exit -1
|
||||
- name: Run tests with sanitizers
|
||||
|
@ -219,7 +219,7 @@ jobs:
|
|||
shell: bash
|
||||
run: |
|
||||
export PATH=/opt/R-devel/bin/:${PATH}
|
||||
Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())"
|
||||
Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'RhpcBLASctl', 'rmarkdown', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())"
|
||||
sh build-cran-package.sh
|
||||
R CMD check --as-cran --run-donttest lightgbm_*.tar.gz || exit -1
|
||||
if grep -q -E "NOTE|WARNING|ERROR" lightgbm.Rcheck/00check.log; then
|
||||
|
|
|
@ -313,7 +313,7 @@ jobs:
|
|||
R_LIB_PATH=~/Rlib
|
||||
export R_LIBS=${R_LIB_PATH}
|
||||
mkdir -p ${R_LIB_PATH}
|
||||
RDscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'rmarkdown'), lib = '${R_LIB_PATH}', dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1
|
||||
RDscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'Matrix', 'RhpcBLASctl', 'rmarkdown'), lib = '${R_LIB_PATH}', dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" || exit -1
|
||||
sh build-cran-package.sh --r-executable=RD || exit -1
|
||||
mv lightgbm_${LGB_VER}.tar.gz $(Build.ArtifactStagingDirectory)/lightgbm-${LGB_VER}-r-cran.tar.gz
|
||||
displayName: 'Build CRAN R-package'
|
||||
|
|
|
@ -50,6 +50,7 @@ VignetteBuilder: knitr
|
|||
Suggests:
|
||||
knitr,
|
||||
processx,
|
||||
RhpcBLASctl,
|
||||
rmarkdown,
|
||||
testthat
|
||||
Depends:
|
||||
|
@ -61,6 +62,7 @@ Imports:
|
|||
jsonlite (>= 1.0),
|
||||
Matrix (>= 1.1-0),
|
||||
methods,
|
||||
parallel,
|
||||
utils
|
||||
SystemRequirements:
|
||||
C++11
|
||||
|
|
|
@ -52,6 +52,7 @@ importFrom(graphics,barplot)
|
|||
importFrom(graphics,par)
|
||||
importFrom(jsonlite,fromJSON)
|
||||
importFrom(methods,is)
|
||||
importFrom(parallel,detectCores)
|
||||
importFrom(stats,quantile)
|
||||
importFrom(utils,modifyList)
|
||||
importFrom(utils,read.delim)
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#' model <- lightgbm(
|
||||
#' agaricus.train$data
|
||||
#' , agaricus.train$label
|
||||
#' , params = list(objective = "binary", nthreads = 1L)
|
||||
#' , params = list(objective = "binary")
|
||||
#' , nrounds = 5L
|
||||
#' , verbose = 0)
|
||||
#' fname <- tempfile(fileext="rds")
|
||||
|
|
|
@ -98,6 +98,22 @@ NULL
|
|||
#' \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#objective}{
|
||||
#' the "objective" item of the "Parameters" section of the documentation}.
|
||||
#' @param init_score initial score is the base prediction lightgbm will boost from
|
||||
#' @param num_threads Number of parallel threads to use. For best speed, this should be set to the number of
|
||||
#' physical cores in the CPU - in a typical x86-64 machine, this corresponds to half the
|
||||
#' number of maximum threads.
|
||||
#'
|
||||
#' Be aware that using too many threads can result in speed degradation in smaller datasets
|
||||
#' (see the parameters documentation for more details).
|
||||
#'
|
||||
#' If passing zero, will use the default number of threads configured for OpenMP
|
||||
#' (typically controlled through an environment variable \code{OMP_NUM_THREADS}).
|
||||
#'
|
||||
#' If passing \code{NULL} (the default), will try to use the number of physical cores in the
|
||||
#' system, but be aware that getting the number of cores detected correctly requires package
|
||||
#' \code{RhpcBLASctl} to be installed.
|
||||
#'
|
||||
#' This parameter gets overriden by \code{num_threads} and its aliases under \code{params}
|
||||
#' if passed there.
|
||||
#' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example
|
||||
#' \itemize{
|
||||
#' \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
|
||||
|
@ -129,6 +145,7 @@ lightgbm <- function(data,
|
|||
serializable = TRUE,
|
||||
objective = "regression",
|
||||
init_score = NULL,
|
||||
num_threads = NULL,
|
||||
...) {
|
||||
|
||||
# validate inputs early to avoid unnecessary computation
|
||||
|
@ -136,6 +153,15 @@ lightgbm <- function(data,
|
|||
stop("nrounds should be greater than zero")
|
||||
}
|
||||
|
||||
if (is.null(num_threads)) {
|
||||
num_threads <- lgb.get.default.num.threads()
|
||||
}
|
||||
params <- lgb.check.wrapper_param(
|
||||
main_param_name = "num_threads"
|
||||
, params = params
|
||||
, alternative_kwarg_value = num_threads
|
||||
)
|
||||
|
||||
# Set data to a temporary variable
|
||||
dtrain <- data
|
||||
|
||||
|
|
|
@ -217,3 +217,26 @@ lgb.check.wrapper_param <- function(main_param_name, params, alternative_kwarg_v
|
|||
params[[main_param_name]] <- alternative_kwarg_value
|
||||
return(params)
|
||||
}
|
||||
|
||||
#' @importFrom parallel detectCores
|
||||
lgb.get.default.num.threads <- function() {
|
||||
if (requireNamespace("RhpcBLASctl", quietly = TRUE)) { # nolint
|
||||
return(RhpcBLASctl::get_num_cores())
|
||||
} else {
|
||||
msg <- "Optional package 'RhpcBLASctl' not found."
|
||||
cores <- 0L
|
||||
if (Sys.info()["sysname"] != "Linux") {
|
||||
cores <- parallel::detectCores(logical = FALSE)
|
||||
if (is.na(cores) || cores < 0L) {
|
||||
cores <- 0L
|
||||
}
|
||||
}
|
||||
if (cores == 0L) {
|
||||
msg <- paste(msg, "Will use default number of OpenMP threads.", sep = " ")
|
||||
} else {
|
||||
msg <- paste(msg, "Detection of CPU cores might not be accurate.", sep = " ")
|
||||
}
|
||||
warning(msg)
|
||||
return(cores)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ data("agaricus.train")
|
|||
model <- lightgbm(
|
||||
agaricus.train$data
|
||||
, agaricus.train$label
|
||||
, params = list(objective = "binary", nthreads = 1L)
|
||||
, params = list(objective = "binary")
|
||||
, nrounds = 5L
|
||||
, verbose = 0)
|
||||
fname <- tempfile(fileext="rds")
|
||||
|
|
|
@ -18,6 +18,7 @@ lightgbm(
|
|||
serializable = TRUE,
|
||||
objective = "regression",
|
||||
init_score = NULL,
|
||||
num_threads = NULL,
|
||||
...
|
||||
)
|
||||
}
|
||||
|
@ -60,6 +61,23 @@ the "objective" item of the "Parameters" section of the documentation}.}
|
|||
|
||||
\item{init_score}{initial score is the base prediction lightgbm will boost from}
|
||||
|
||||
\item{num_threads}{Number of parallel threads to use. For best speed, this should be set to the number of
|
||||
physical cores in the CPU - in a typical x86-64 machine, this corresponds to half the
|
||||
number of maximum threads.
|
||||
|
||||
Be aware that using too many threads can result in speed degradation in smaller datasets
|
||||
(see the parameters documentation for more details).
|
||||
|
||||
If passing zero, will use the default number of threads configured for OpenMP
|
||||
(typically controlled through an environment variable \code{OMP_NUM_THREADS}).
|
||||
|
||||
If passing \code{NULL} (the default), will try to use the number of physical cores in the
|
||||
system, but be aware that getting the number of cores detected correctly requires package
|
||||
\code{RhpcBLASctl} to be installed.
|
||||
|
||||
This parameter gets overriden by \code{num_threads} and its aliases under \code{params}
|
||||
if passed there.}
|
||||
|
||||
\item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example
|
||||
\itemize{
|
||||
\item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
|
||||
|
|
|
@ -2928,6 +2928,51 @@ test_that("lightgbm() defaults to 'regression' objective if objective not otherw
|
|||
expect_false(any(model_txt_lines == "objective=regression_l1"))
|
||||
})
|
||||
|
||||
test_that("lightgbm() accepts 'num_threads' as either top-level argument or under params", {
|
||||
bst <- lightgbm(
|
||||
data = train$data
|
||||
, label = train$label
|
||||
, nrounds = 5L
|
||||
, verbose = VERBOSITY
|
||||
, num_threads = 1L
|
||||
)
|
||||
expect_equal(bst$params$num_threads, 1L)
|
||||
model_txt_lines <- strsplit(
|
||||
x = bst$save_model_to_string()
|
||||
, split = "\n"
|
||||
)[[1L]]
|
||||
expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
|
||||
|
||||
bst <- lightgbm(
|
||||
data = train$data
|
||||
, label = train$label
|
||||
, nrounds = 5L
|
||||
, verbose = VERBOSITY
|
||||
, params = list(num_threads = 1L)
|
||||
)
|
||||
expect_equal(bst$params$num_threads, 1L)
|
||||
model_txt_lines <- strsplit(
|
||||
x = bst$save_model_to_string()
|
||||
, split = "\n"
|
||||
)[[1L]]
|
||||
expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
|
||||
|
||||
bst <- lightgbm(
|
||||
data = train$data
|
||||
, label = train$label
|
||||
, nrounds = 5L
|
||||
, verbose = VERBOSITY
|
||||
, num_threads = 10L
|
||||
, params = list(num_threads = 1L)
|
||||
)
|
||||
expect_equal(bst$params$num_threads, 1L)
|
||||
model_txt_lines <- strsplit(
|
||||
x = bst$save_model_to_string()
|
||||
, split = "\n"
|
||||
)[[1L]]
|
||||
expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
|
||||
})
|
||||
|
||||
test_that("lightgbm() accepts 'weight' and 'weights'", {
|
||||
data(mtcars)
|
||||
X <- as.matrix(mtcars[, -1L])
|
||||
|
|
Загрузка…
Ссылка в новой задаче