[R-package] miscellaneous changes to comply with CRAN requirements (#3338)

* [R-package] update DESCRIPTION per CRAN comments

* newlines

* Apply suggestions from code review

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* more fixes

* update Rbuildignore

* more changes

* more changes per CRAN response

* add email

* run examples in CI

* add newest CRAN response

* add Solaris patch

* update patch

* another attempt at ifaddrs patch

* fix unnecessary comment

* update configure

* comments

* bump version

* tabs

* fix address alignment, required by cran (#3415)

* fix dataset binary file alignment

* many fixes

* fix warnings

* fix bug

* Update file_io.cpp

* Update file_io.cpp

* simplify code

* Apply suggestions from code review

* general

* remove unneeded alignment

* Update file_io.h

* int32 to byte8 alignment

* Apply suggestions from code review

* Apply suggestions from code review

* [R-package] add new copyright holder in DESCRIPTION (#3409)

* [R-package] add new copyright holder in DESCRIPTION

* fix role

* fixing conflicts

* [R-package] add new copyright holder in DESCRIPTION (#3409)

* [R-package] add new copyright holder in DESCRIPTION

* fix role

* trying to fix conflicts

* more fixes

* this will work

* update cran-comments

* simplify solaris, add more testing docs

* stuff

* remove rchck docs

* Apply suggestions from code review

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* remove extra use of cat()

* change solaris check

* update docs

* remove testing code

* fix warning about cleanup not having execute permissions

* fix cmake builds

* remove blank line

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
Co-authored-by: Guolin Ke <guolin.ke@outlook.com>
This commit is contained in:
James Lamb 2020-10-08 03:51:21 +01:00 коммит произвёл GitHub
Родитель 7a51ae042a
Коммит 186711deae
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
61 изменённых файлов: 706 добавлений и 104 удалений

Просмотреть файл

@ -53,7 +53,8 @@ LINTERS_TO_USE <- list(
, "true_false" = lintr::T_and_F_symbol_linter
, "undesirable_function" = lintr::undesirable_function_linter(
fun = c(
"cbind" = paste0(
"cat" = "CRAN forbids the use of cat() in packages except in special cases. Use message() or warning()."
, "cbind" = paste0(
"cbind is an unsafe way to build up a data frame. merge() or direct "
, "column assignment is preferred."
)
@ -85,7 +86,7 @@ LINTERS_TO_USE <- list(
, "unneeded_concatenation" = lintr::unneeded_concatenation_linter
)
cat(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT)))
print(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT)))
results <- NULL

Просмотреть файл

@ -165,7 +165,7 @@ check_succeeded="yes"
(
R CMD check ${PKG_TARBALL} \
--as-cran \
--run-dontrun \
--run-donttest \
|| check_succeeded="no"
) &

Просмотреть файл

@ -147,9 +147,9 @@ if ($env:COMPILER -ne "MSVC") {
Write-Output "Running R CMD check"
if ($env:R_BUILD_TYPE -eq "cran") {
# CRAN packages must pass without --no-multiarch (build on 64-bit and 32-bit)
$check_args = "c('CMD', 'check', '--as-cran', '--run-dontrun', '$PKG_FILE_NAME')"
$check_args = "c('CMD', 'check', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')"
} else {
$check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-dontrun', '$PKG_FILE_NAME')"
$check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')"
}
Run-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $?

Просмотреть файл

@ -3,7 +3,7 @@ AUTOCONF_UBUNTU_VERSION
^.*\.bin
^build_r.R$
^cran-comments\.md$
^docs/.*$
^docs$
^.*\.dll
\.gitkeep$
^.*\.history

Просмотреть файл

@ -6,11 +6,25 @@ Date: ~~DATE~~
Authors@R: c(
person("Guolin", "Ke", email = "guolin.ke@microsoft.com", role = c("aut", "cre")),
person("Damien", "Soukhavong", email = "damien.soukhavong@skema.edu", role = c("aut")),
person("Yachen", "Yan", role = c("ctb")),
person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")),
person("Qi", "Meng", role = c("aut")),
person("Thomas", "Finley", role = c("aut")),
person("Taifeng", "Wang", role = c("aut")),
person("Wei", "Chen", role = c("aut")),
person("Weidong", "Ma", role = c("aut")),
person("Qiwei", "Ye", role = c("aut")),
person("Tie-Yan", "Liu", role = c("aut")),
person("Yachen", "Yan", role = c("ctb")),
person("Microsoft Corporation", role = c("cph")),
person("Dropbox, Inc.", role = c("cph")),
person("Jay", "Loden", role = c("cph")),
person("Dave", "Daeschler", role = c("cph")),
person("Giampaolo", "Rodola", role = c("cph")),
person("IBM Corporation", role = c("ctb"))
)
Description: Tree based algorithms can be improved by introducing boosting frameworks. 'LightGBM' is one such framework, and this package offers an R interface to work with it.
Description: Tree based algorithms can be improved by introducing boosting frameworks.
'LightGBM' is one such framework, based on Ke, Guolin et al. (2017) <https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision>.
This package offers an R interface to work with it.
It is designed to be distributed and efficient with the following advantages:
1. Faster training speed and higher efficiency.
2. Lower memory usage.

Просмотреть файл

@ -1,21 +1,2 @@
The MIT License (MIT)
Copyright (c) Microsoft Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
YEAR: 2016
COPYRIGHT HOLDER: Microsoft Corporation

Просмотреть файл

@ -150,7 +150,7 @@ merge.eval.string <- function(env) {
}
paste0(msg, collapse = "\t")
paste0(msg, collapse = " ")
}
@ -173,7 +173,7 @@ cb.print.evaluation <- function(period = 1L) {
# Check if message is existing
if (nchar(msg) > 0L) {
cat(merge.eval.string(env = env), "\n")
print(merge.eval.string(env = env))
}
}
@ -284,7 +284,12 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose =
# Check if verbose or not
if (isTRUE(verbose)) {
cat("Will train until there is no improvement in ", stopping_rounds, " rounds.\n\n", sep = "")
msg <- paste0(
"Will train until there is no improvement in "
, stopping_rounds
, " rounds."
)
print(msg)
}
# Internally treat everything as a maximization task
@ -359,8 +364,7 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose =
# Print message if verbose
if (isTRUE(verbose)) {
cat("Early stopping, best iteration is:", "\n")
cat(best_msg[[i]], "\n")
print(paste0("Early stopping, best iteration is: ", best_msg[[i]]))
}
@ -380,8 +384,7 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose =
# Print message if verbose
if (isTRUE(verbose)) {
cat("Did not meet early stopping, best iteration is:", "\n")
cat(best_msg[[i]], "\n")
print(paste0("Did not meet early stopping, best iteration is: ", best_msg[[i]]))
}
# Store best iteration and stop

Просмотреть файл

@ -721,7 +721,7 @@ Booster <- R6::R6Class(
#' number of columns corresponding to the number of trees.
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -780,7 +780,7 @@ predict.lgb.Booster <- function(object,
#' @return lgb.Booster
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -840,7 +840,7 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
#' @return lgb.Booster
#'
#' @examples
#' \dontrun{
#' \donttest{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
@ -886,7 +886,7 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
#' @return json format of model
#'
#' @examples
#' \dontrun{
#' \donttest{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
@ -930,10 +930,10 @@ lgb.dump <- function(booster, num_iteration = NULL) {
#' (the default), evaluation results for all iterations will be returned.
#' @param is_err TRUE will return evaluation error instead
#'
#' @return vector of evaluation result
#' @return numeric vector of evaluation result
#'
#' @examples
#' \dontrun{
#' \donttest{
#' # train a regression model
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train

Просмотреть файл

@ -720,7 +720,7 @@ Dataset <- R6::R6Class(
#' @return constructed dataset
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -766,7 +766,7 @@ lgb.Dataset <- function(data,
#' @return constructed dataset
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -793,12 +793,13 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
#' @param dataset Object of class \code{lgb.Dataset}
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
#' }
#' @return constructed dataset
#' @export
lgb.Dataset.construct <- function(dataset) {
@ -824,7 +825,7 @@ lgb.Dataset.construct <- function(dataset) {
#' be directly used with an \code{lgb.Dataset} object.
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -858,7 +859,7 @@ dim.lgb.Dataset <- function(x, ...) {
#' Since row names are irrelevant, it is recommended to use \code{colnames} directly.
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -869,6 +870,7 @@ dim.lgb.Dataset <- function(x, ...) {
#' print(dtrain, verbose = TRUE)
#' }
#' @rdname dimnames.lgb.Dataset
#' @return A list with the dimension names of the dataset
#' @export
dimnames.lgb.Dataset <- function(x) {
@ -883,6 +885,7 @@ dimnames.lgb.Dataset <- function(x) {
}
#' @rdname dimnames.lgb.Dataset
#' @return A list with the dimension names of the dataset
#' @export
`dimnames<-.lgb.Dataset` <- function(x, value) {
@ -929,7 +932,7 @@ dimnames.lgb.Dataset <- function(x) {
#' @return constructed sub dataset
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -944,6 +947,7 @@ slice <- function(dataset, ...) {
}
#' @rdname slice
#' @return constructed sub dataset
#' @export
slice.lgb.Dataset <- function(dataset, idxset, ...) {
@ -976,7 +980,7 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
#' }
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -994,6 +998,7 @@ getinfo <- function(dataset, ...) {
}
#' @rdname getinfo
#' @return info data
#' @export
getinfo.lgb.Dataset <- function(dataset, name, ...) {
@ -1013,7 +1018,7 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
#' @param name the name of the field to get
#' @param info the specific field of information to set
#' @param ... other parameters
#' @return passed object
#' @return the dataset you passed in
#'
#' @details
#' The \code{name} field can be one of the following:
@ -1029,7 +1034,7 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
#' }
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -1047,6 +1052,7 @@ setinfo <- function(dataset, ...) {
}
#' @rdname setinfo
#' @return the dataset you passed in
#' @export
setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
@ -1066,10 +1072,10 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
#' @param categorical_feature categorical features. This can either be a character vector of feature
#' names or an integer vector with the indices of the features (e.g.
#' \code{c(1L, 10L)} to say "the first and tenth columns").
#' @return passed dataset
#' @return the dataset you passed in
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -1097,10 +1103,10 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
#' @param dataset object of class \code{lgb.Dataset}
#' @param reference object of class \code{lgb.Dataset}
#'
#' @return passed dataset
#' @return the dataset you passed in
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package ="lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -1129,10 +1135,10 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
#' @param dataset object of class \code{lgb.Dataset}
#' @param fname object filename of output file
#'
#' @return passed dataset
#' @return the dataset you passed in
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -68,7 +68,7 @@
#' \code{lgb.Dataset}.
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(iris)
#'
#' str(iris)

Просмотреть файл

@ -52,7 +52,7 @@ CVBooster <- R6::R6Class(
#' @return a trained model \code{lgb.CVBooster}.
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
@ -466,7 +466,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {
# When doing group, stratified is not possible (only random selection)
if (nfold > length(group)) {
stop("\n\tYou requested too many folds for the number of available groups.\n")
stop("\nYou requested too many folds for the number of available groups.\n")
}
# Degroup the groups

Просмотреть файл

@ -13,7 +13,7 @@
#' }
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -16,7 +16,7 @@
#' Contribution columns to each class.
#'
#' @examples
#' \dontrun{
#' \donttest{
#' Logit <- function(x) log(x / (1.0 - x))
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train

Просмотреть файл

@ -28,7 +28,7 @@
#' }
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -18,7 +18,7 @@
#' and silently returns a processed data.table with \code{top_n} features sorted by defined importance.
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -15,7 +15,7 @@
#' The \code{lgb.plot.interpretation} function creates a \code{barplot}.
#'
#' @examples
#' \dontrun{
#' \donttest{
#' Logit <- function(x) {
#' log(x / (1.0 - x))
#' }

Просмотреть файл

@ -26,7 +26,7 @@
#' @return a trained booster model \code{lgb.Booster}.
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -14,7 +14,7 @@
#' @return NULL invisibly.
#'
#' @examples
#' \dontrun{
#' \donttest{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -97,6 +97,7 @@ NULL
#' CPU using hyper-threading to generate 2 threads per CPU core).}
#' }
#' @inheritSection lgb_shared_params Early Stopping
#' @return a trained \code{lgb.Booster}
#' @export
lightgbm <- function(data,
label = NULL,

Просмотреть файл

@ -4,10 +4,10 @@
#' @param file a connection or the name of the file where the R object is saved to or read from.
#' @param refhook a hook function for handling reference objects.
#'
#' @return \code{lgb.Booster}.
#' @return \code{lgb.Booster}
#'
#' @examples
#' \dontrun{
#' \donttest{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train

Просмотреть файл

@ -2,6 +2,7 @@
#' @name lgb.prepare
#' @description removed functions
#' @param ... catch-all to match old calls
#' @return Nothing. This function always raises an exception
#' @export
lgb.prepare <- function(...) {
stop("lgb.prepare() was removed in LightGBM 3.0.0. Please use lgb.convert_with_rules()")
@ -11,6 +12,7 @@ lgb.prepare <- function(...) {
#' @name lgb.prepare2
#' @description removed functions
#' @param ... catch-all to match old calls
#' @return Nothing. This function always raises an exception
#' @export
lgb.prepare2 <- function(...) {
stop("lgb.prepare2() was removed in LightGBM 3.0.0. Please use lgb.convert_with_rules()")
@ -20,6 +22,7 @@ lgb.prepare2 <- function(...) {
#' @name lgb.prepare_rules
#' @description removed functions
#' @param ... catch-all to match old calls
#' @return Nothing. This function always raises an exception
#' @export
lgb.prepare_rules <- function(...) {
stop("lgb.prepare_rules() was removed in LightGBM 3.0.0. Please use lgb.convert_with_rules()")
@ -29,6 +32,7 @@ lgb.prepare_rules <- function(...) {
#' @name lgb.prepare_rules2
#' @description removed functions
#' @param ... catch-all to match old calls
#' @return Nothing. This function always raises an exception
#' @export
lgb.prepare_rules2 <- function(...) {
stop("lgb.prepare_rules2() was removed in LightGBM 3.0.0. Please use lgb.convert_with_rules()")

Просмотреть файл

@ -18,7 +18,7 @@
#' @return NULL invisibly.
#'
#' @examples
#' \dontrun{
#' \donttest{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train

Просмотреть файл

@ -338,6 +338,97 @@ mv \
lightgbm-${LGB_VERSION}-r40-windows.zip
```
### Testing the CRAN Package
`{lightgbm}` is tested automatically on every commit, across many combinations of operating system, R version, and compiler. This section describes how to test the package locally while you are developing.
#### Windows, Mac, and Linux
```shell
sh build-cran-package.sh
R CMD check --as-cran lightgbm_*.tar.gz
```
#### Solaris
All packages uploaded to CRAN must pass `R CMD check` on Solaris 10. To test LightGBM on this operating system, you can use the free service [R Hub](https://builder.r-hub.io/), a free service generously provided by the R Consortium.
```shell
sh build-cran-package.sh
```
```r
package_tarball <- paste0("lightgbm_", readLines("VERSION.txt")[1], ".tar.gz")
rhub::check(
path = package_tarball
, email = "your_email_here"
, check_args = "--as-cran"
, platform = c(
"solaris-x86-patched"
, "solaris-x86-patched-ods"
)
, env_vars = c(
"R_COMPILE_AND_INSTALL_PACKAGES" = "always"
)
)
```
#### UBSAN
All packages uploaded to CRAN must pass a build using `gcc` instrumented with two sanitizers: the Address Sanitizer (ASAN) and the Undefined Behavior Sanitizer (UBSAN). For more background, see [this blog post](http://dirk.eddelbuettel.com/code/sanitizers.html).
You can replicate these checks locally using Docker.
```shell
docker run \
-v $(pwd):/opt/LightGBM \
-it rhub/rocker-gcc-san \
/bin/bash
cd /opt/LightGBM
Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'testthat'), repos = 'https://cran.rstudio.com')"
sh build-cran-package.sh
Rdevel CMD install lightgbm_*.tar.gz
cd R-package/tests
Rscriptdevel testthat.R
```
#### Valgrind
All packages uploaded to CRAN must be built and tested without raising any issues from `valgrind`. `valgrind` is a profiler that can catch serious issues like memory leaks and illegal writes. For more information, see [this blog post](https://reside-ic.github.io/blog/debugging-and-fixing-crans-additional-checks-errors/).
You can replicate these checks locally using Docker. Note that instrumented versions of R built to use `valgrind` run much slower, and these tests may take as long as 20 minutes to run.
```shell
docker run \
-v $(pwd):/opt/LightGBM \
-it \
wch1/r-debug
cd /opt/LightGBM
RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'jsonlite', 'testthat'), repos = 'https://cran.rstudio.com')"
sh build-cran-package.sh
RDvalgrind CMD INSTALL \
--preclean \
--install-tests \
lightgbm_*.tar.gz
cd R-package/tests
RDvalgrind \
--no-readline \
--vanilla \
-d valgrind \
-f testthat.R \
2>&1 \
| tee out.log \
| cat
```
External (Unofficial) Repositories
----------------------------------

Просмотреть файл

@ -1,5 +1,307 @@
# CRAN Submission History
## v3.0.0.2 - Submission 1 - (September 29, 2020)
### CRAN response
First response was a message talking about failing checks on 3.0.0.
```text
package lightgbm_3.0.0.2.tar.gz has been auto-processed.
The auto-check found additional issues for the last version released on CRAN:
gcc-UBSAN <link>
valgrind <link>
CRAN incoming checks do not test for these additional issues and you will need an appropriately instrumented build of R to reproduce these.
Hence please reply-all and explain: Have these been fixed?
Please correct before 2020-10-05 to safely retain your package on CRAN.
There is still a valgrind error. This did not happen when tested on
submission, but the tests did run until timeout at 4 hours. When you
write illegally, corruption is common.
Illegal writes are serious errors.
```
Then in later responses to email correspondence with CRAN, CRAN expressed frustration with the number of failed submission and banned this package from new submissions for a month.
The content of that frustrated message was regrettable and it does not need to be preserved forever in this file.
### Maintainer Notes
The 3.0.0.x series is officially not making it to CRAN. We will wait until November, and try again.
Detailed plan about what will be tried before November 2020 to increase the likelihood of success for that package: https://github.com/microsoft/LightGBM/pull/3338#issuecomment-702756840.
## v3.0.0.1 - Submission 1 - (September 24, 2020)
### CRAN response
```text
Thanks, we see:
Still lots of alignment errors, such as
lightgbm.Rcheck/tests/testthat.Rout:io/dataset_loader.cpp:340:59:
runtime error: reference binding to misaligned address 0x7f51fefad81e for type 'const value_type', which requires 4 byte alignment
lightgbm.Rcheck/tests/testthat.Rout:/usr/include/c++/10/bits/stl_vector.h:1198:21:
runtime error: reference binding to misaligned address 0x7f51fefad81e for type 'const int', which requires 4 byte alignment lightgbm.Rcheck/tests/testthat.Rout:/usr/include/c++/10/bits/vector.tcc:449:28:runtime
error: reference binding to misaligned address 0x7f51fefad81e for type 'const type', which requires 4 byte alignment
lightgbm.Rcheck/tests/testthat.Rout:/usr/include/c++/10/bits/move.h:77:36:
runtime error: reference binding to misaligned address 0x7f51fefad81e for type 'const int', which requires 4 byte alignment
lightgbm.Rcheck/tests/testthat.Rout:/usr/include/c++/10/bits/alloc_traits.h:512:17:
runtime error: reference binding to misaligned address 0x7f51fefad81e for type 'const type', which requires 4 byte alignment
Please fix and resubmit.
```
### Maintainer Notes
Ok, these are the notes from the UBSAN tests. Was able to reproduce them with https://github.com/microsoft/LightGBM/pull/3338#issuecomment-700399862, and they were fixed in https://github.com/microsoft/LightGBM/pull/3415.
Struggling to replicate the valgrind result (running `R CMD check --use-valgrind` returns no issues), so trying submission again. Hoping that the fixes for mis-alignment fix the other errors too.
## v3.0.0 - Submission 6 - (September 24, 2020)
### CRAN response
Failing pre-checks.
### `R CMD check` results
```text
* checking CRAN incoming feasibility ... WARNING
Maintainer: Guolin Ke <guolin.ke@microsoft.com>
Insufficient package version (submitted: 3.0.0, existing: 3.0.0)
Days since last update: 4
```
### Maintainer Notes
Did not think the version needed to be incremented if submitting a package in response to CRAN saying "you are failing checks and will be kicked off if you don't fix it", but I guess you do!
This can be fixed by just re-submitting but with the version changed from `3.0.0` to `3.0.0.1`.
## v3.0.0 - Submission 5 - (September 11, 2020)
### CRAN Response
Accepted to CRAN!
Please correct the problems below before 2020-10-05 to safely retain your package on CRAN:
```text
checking installed package size ... NOTE
installed size is 49.7Mb
sub-directories of 1Mb or more:
libs 49.1Mb
"network/socket_wrapper.hpp", line 30: Error: Could not open include file<ifaddrs.h>.
"network/socket_wrapper.hpp", line 216: Error: The type "ifaddrs" is incomplete.
"network/socket_wrapper.hpp", line 217: Error: The type "ifaddrs" is incomplete.
"network/socket_wrapper.hpp", line 220: Error: The type "ifaddrs" is incomplete.
"network/socket_wrapper.hpp", line 222: Error: The type "ifaddrs" is incomplete.
"network/socket_wrapper.hpp", line 214: Error: The function "getifaddrs" must have a prototype.
"network/socket_wrapper.hpp", line 228: Error: The function "freeifaddrs" must have a prototype.
"network/linkers_socket.cpp", line 76: Warning: A non-POD object of type "std::chrono::duration<double, std::ratio<1, 1000>>" passed as a variable argument to function "static LightGBM::Log::Info(const char*, ...)".
7 Error(s) and 1 Warning(s) detected.
*** Error code 2
make: Fatal error: Command failed for target `network/linkers_socket.o'
Current working directory /tmp/RtmpNfaavG/R.INSTALL40a84f70130a/lightgbm/src
ERROR: compilation failed for package lightgbm
* removing /home/ripley/R/Lib32/lightgbm
```
### Maintainer Notes
Added a patch that `psutil` has used to fix missing `ifaddrs.h` on Solaris 10: https://github.com/microsoft/LightGBM/issues/629#issuecomment-665091451.
## v3.0.0 - Submission 4 - (September 4, 2020)
### CRAN Response
> Thanks, if the running time is the only reason to wrap the examples in
\donttest, please replace \donttest by \donttest (\donttest examples are
not executed in the CRAN checks).
> Please replace cat() by message() or warning() in your functions (except
for print() and summary() functions). Messages and warnings can be
suppressed if needed.
> Missing Rd-tags:
lightgbm/man/dimnames.lgb.Dataset.Rd: \value
lightgbm/man/lgb.Dataset.construct.Rd: \value
lightgbm/man/lgb.prepare.Rd: \value
...
> Please add the tag and explain in detail the returned objects.
### Maintainer Notes
Responded to CRAN with the following:
All examples have been wrapped with `\donttest` as requested. We have replied to Swetlana Herbrandt asking for clarification on the donttest news item in the R 4.0.2 changelog (https://cran.r-project.org/doc/manuals/r-devel/NEWS.html).
All uses of `cat()` have been replaced with `print()`. We chose `print()` over `message()` because it's important that they be written to stdout alongside all the other logs coming from the library's C++ code. `message()` and `warning()` write to stderr.
All exported objects now have `\value{}` statements in their documentation files in `man/`.
**We also replied directly to CRAN's feedback email**
> Swetlana,
> Thank you for your comments. I've just created a new submission that I believe addresses them.
> Can you help us understand something? In your message you said "\donttest examples are
not executed in the CRAN checks)", but in https://cran.r-project.org/doc/manuals/r-devel/NEWS.html we see the following:
> > "`R CMD check --as-cran` now runs \donttest examples (which are run by example()) instead of instructing the tester to do so. This can be temporarily circumvented during development by setting environment variable `_R_CHECK_DONTTEST_EXAMPLES_` to a false value."
> Could you help us understand how both of those statements can be true?
## v3.0.0 - Submission 3 - (August 29, 2020)
### CRAN response
* Please write references in the description of the DESCRIPTION file in
the form
- authors (year) doi:...
- authors (year) arXiv:...
- authors (year, ISBN:...)
* if those are not available: authors (year) https:... with no space after 'doi:', 'arXiv:', 'https:' and angle brackets for auto-linking.
* (If you want to add a title as well please put it in quotes: "Title")
* \donttest{} should only be used if the example really cannot be executed (e.g. because of missing additional software, missing API keys, ...) by the user. That's why wrapping examples in \donttest{} adds the comment ("# Not run:") as a warning for the user. Does not seem necessary. Please unwrap the examples if they are executable in < 5 sec, or replace
\donttest{} with \donttest{}.
* Please do not modify the global environment (e.g. by using <<-) in your
functions. This is not allowed by the CRAN policies.
* Please always add all authors, contributors and copyright holders in the Authors@R field with the appropriate roles. From CRAN policies you agreed to: "The ownership of copyright and intellectual property rights of all components of the package must be clear and unambiguous (including from the authors specification in the DESCRIPTION file). Where code is copied (or derived) from the work of others (including from R itself), care must be taken that any copyright/license statements are preserved and authorship is not misrepresented." e.g.: Microsoft Corporation, Dropbox Inc. Please explain in the submission comments what you did about this issue.
Please fix and resubmit
### Maintainer Notes
Responded to CRAN with the following:
The paper citation has been adjusted as requested. We were using 'glmnet' as a guide on how to include the URL but maybe they are no longer in compliance with CRAN policies: https://github.com/cran/glmnet/blob/b1a4b50de01e0cd24343959d7cf86452bac17b26/DESCRIPTION
All authors from the original LightGBM paper have been added to Authors@R as `"aut"`. We have also added Microsoft and DropBox, Inc. as `"cph"` (copyright holders). These roles were chosen based on the guidance in https://journal.r-project.org/archive/2012-1/RJournal_2012-1_Hornik~et~al.pdf.
lightgbm's code does use `<<-`, but it does not modify the global environment. The uses of `<<-` in R/lgb.interprete.R and R/callback.R are in functions which are called in an environment created by the lightgbm functions that call them, and this operator is used to reach one level up into the calling function's environment.
We chose to wrap our examples in `\donttest{}` because we found, through testing on https://builder.r-hub.io/ and in our own continuous integration environments, that their run time varies a lot between platforms, and we cannot guarantee that all examples will run in under 5 seconds. We intentionally chose `\donttest{}` over `\donttest{}` because this item in the R 4.0.0 changelog (https://cran.r-project.org/doc/manuals/r-devel/NEWS.html) seems to indicate that \donttest will be ignored by CRAN's automated checks:
> "`R CMD check --as-cran` now runs \donttest examples (which are run by example()) instead of instructing the tester to do so. This can be temporarily circumvented during development by setting environment variable `_R_CHECK_DONTTEST_EXAMPLES_` to a false value."
We run all examples with `R CMD check --as-cran --run-dontrun` in our continuous integration tests on every commit to the package, so we have high confidence that they are working correctly.
## v3.0.0 - Submission 2 - (August 28, 2020)
### CRAN response
Failing pre-checks.
### `R CMD check` results
* Debian: 2 NOTEs
```text
* checking CRAN incoming feasibility ... NOTE
Maintainer: 'Guolin Ke <guolin.ke@microsoft.com>'
New submission
Possibly mis-spelled words in DESCRIPTION:
Guolin (13:52)
Ke (13:48)
LightGBM (14:20)
al (13:62)
et (13:59)
* checking top-level files ... NOTE
Non-standard files/directories found at top level:
'docs' 'lightgbm-hex-logo.png' 'lightgbm-hex-logo.svg'
```
* Windows: 2 NOTEs
```text
* checking CRAN incoming feasibility ... NOTE
Maintainer: 'Guolin Ke <guolin.ke@microsoft.com>'
New submission
Possibly mis-spelled words in DESCRIPTION:
Guolin (13:52)
Ke (13:48)
LightGBM (14:20)
al (13:62)
et (13:59)
* checking top-level files ... NOTE
Non-standard files/directories found at top level:
'docs' 'lightgbm-hex-logo.png' 'lightgbm-hex-logo.svg'
```
### Maintainer Notes
We should tell them the misspellings note is a false positive.
For the note about included files, that is my fault. I had extra files laying around when I generated the package. I'm surprised to see `docs/` in that list, since it is ignored in `.Rbuildignore`. I even tested that with [the exact code Rbuildignore uses](https://github.com/wch/r-source/blob/9d13622f41cfa0f36db2595bd6a5bf93e2010e21/src/library/tools/R/build.R#L85). For now, I added `rm -r docs/` to `build-cran-package.sh`. We can figure out what is happening with `.Rbuildignore` in the future, but it shouldn't block a release.
## v3.0.0 - Submission 1 - (August 24, 2020)
NOTE: 3.0.0-1 was never released to CRAN. CRAN was on vacation August 14-24, 2020, and in that time version 3.0.0-1 (a release candidate) became 3.0.0.
### CRAN response
> Please only ship the CRAN template for the MIT license.
> Is there some reference about the method you can add in the Description field in the form Authors (year) doi:.....?
> Please fix and resubmit.
### `R CMD check` results
* Debian: 1 NOTE
```text
* checking CRAN incoming feasibility ... NOTE
Maintainer: Guolin Ke <guolin.ke@microsoft.com>
New submission
License components with restrictions and base license permitting such:
MIT + file LICENSE
```
* Windows: 1 NOTE
```text
* checking CRAN incoming feasibility ... NOTE
Maintainer: 'Guolin Ke <guolin.ke@microsoft.com>'
New submission
License components with restrictions and base license permitting such:
MIT + file LICENSE
```
### Maintainer Notes
Tried updating `LICENSE` file to this template:
```yaml
YEAR: 2016
COPYRIGHT HOLDER: Microsoft Corporation
```
Added a citation and link for [the main paper](https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision) in `DESCRIPTION`.
## v3.0.0-1 - Submission 3 - (August 12, 2020)
### CRAN response

Просмотреть файл

@ -22,7 +22,7 @@ Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
be directly used with an \code{lgb.Dataset} object.
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -15,6 +15,11 @@
\item{value}{a list of two elements: the first one is ignored
and the second one is column names}
}
\value{
A list with the dimension names of the dataset
A list with the dimension names of the dataset
}
\description{
Only column names are supported for \code{lgb.Dataset}, thus setting of
row names would have no effect and returned row names would be NULL.
@ -24,7 +29,7 @@ Generic \code{dimnames} methods are used by \code{colnames}.
Since row names are irrelevant, it is recommended to use \code{colnames} directly.
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -17,6 +17,8 @@ getinfo(dataset, ...)
\item{name}{the name of the information field to get (see details)}
}
\value{
info data
info data
}
\description{
@ -33,7 +35,7 @@ The \code{name} field can be one of the following:
}
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -40,7 +40,7 @@ Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
or local file (that was created previously by saving an \code{lgb.Dataset}).
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -9,11 +9,14 @@ lgb.Dataset.construct(dataset)
\arguments{
\item{dataset}{Object of class \code{lgb.Dataset}}
}
\value{
constructed dataset
}
\description{
Construct Dataset explicitly
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -22,7 +22,7 @@ constructed dataset
Construct validation data according to training data
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -12,14 +12,14 @@ lgb.Dataset.save(dataset, fname)
\item{fname}{object filename of output file}
}
\value{
passed dataset
the dataset you passed in
}
\description{
Please note that \code{init_score} is not saved in binary file.
If you need it, please set it again after loading Dataset.
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -14,14 +14,14 @@ names or an integer vector with the indices of the features (e.g.
\code{c(1L, 10L)} to say "the first and tenth columns").}
}
\value{
passed dataset
the dataset you passed in
}
\description{
Set the categorical features of an \code{lgb.Dataset} object. Use this function
to tell LightGBM which features should be treated as categorical.
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -12,13 +12,13 @@ lgb.Dataset.set.reference(dataset, reference)
\item{reference}{object of class \code{lgb.Dataset}}
}
\value{
passed dataset
the dataset you passed in
}
\description{
If you want to use validation data, you should set reference to training data
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package ="lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -34,7 +34,7 @@ Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
NOTE: In previous releases of LightGBM, this function was called \code{lgb.prepare_rules2}.
}
\examples{
\dontrun{
\donttest{
data(iris)
str(iris)

Просмотреть файл

@ -149,7 +149,7 @@ Cross validation logic used by LightGBM
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -18,7 +18,7 @@ json format of model
Dump LightGBM model to json
}
\examples{
\dontrun{
\donttest{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train

Просмотреть файл

@ -25,14 +25,14 @@ lgb.get.eval.result(
\item{is_err}{TRUE will return evaluation error instead}
}
\value{
vector of evaluation result
numeric vector of evaluation result
}
\description{
Given a \code{lgb.Booster}, return evaluation results for a
particular metric on a particular dataset.
}
\examples{
\dontrun{
\donttest{
# train a regression model
data(agaricus.train, package = "lightgbm")
train <- agaricus.train

Просмотреть файл

@ -24,7 +24,7 @@ For a tree model, a \code{data.table} with the following columns:
Creates a \code{data.table} of feature importances in a model.
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -29,7 +29,7 @@ For regression, binary classification and lambdarank model, a \code{list} of \co
Computes feature contribution components of rawscore prediction.
}
\examples{
\dontrun{
\donttest{
Logit <- function(x) log(x / (1.0 - x))
data(agaricus.train, package = "lightgbm")
train <- agaricus.train

Просмотреть файл

@ -19,7 +19,7 @@ Load LightGBM takes in either a file path or model string.
If both are provided, Load will default to loading from file
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -39,7 +39,7 @@ The columns of the \code{data.table} are:
Parse a LightGBM model json dump into a \code{data.table} structure.
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -37,7 +37,7 @@ The graph represents each feature as a horizontal bar of length proportional to
Features are shown ranked in a decreasing importance order.
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -34,7 +34,7 @@ The graph represents each feature as a horizontal bar of length proportional to
contribution of a feature. Features are shown ranked in a decreasing contribution order.
}
\examples{
\dontrun{
\donttest{
Logit <- function(x) {
log(x / (1.0 - x))
}

Просмотреть файл

@ -9,6 +9,9 @@ lgb.prepare(...)
\arguments{
\item{...}{catch-all to match old calls}
}
\value{
Nothing. This function always raises an exception
}
\description{
removed functions
}

Просмотреть файл

@ -9,6 +9,9 @@ lgb.prepare2(...)
\arguments{
\item{...}{catch-all to match old calls}
}
\value{
Nothing. This function always raises an exception
}
\description{
removed functions
}

Просмотреть файл

@ -9,6 +9,9 @@ lgb.prepare_rules(...)
\arguments{
\item{...}{catch-all to match old calls}
}
\value{
Nothing. This function always raises an exception
}
\description{
removed functions
}

Просмотреть файл

@ -9,6 +9,9 @@ lgb.prepare_rules2(...)
\arguments{
\item{...}{catch-all to match old calls}
}
\value{
Nothing. This function always raises an exception
}
\description{
removed functions
}

Просмотреть файл

@ -20,7 +20,7 @@ lgb.Booster
Save LightGBM model
}
\examples{
\dontrun{
\donttest{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train

Просмотреть файл

@ -132,7 +132,7 @@ Logic to train with LightGBM
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -26,7 +26,7 @@ Attempts to unload LightGBM packages so you can remove objects cleanly without
apparent reason and you do not want to restart R to fix the lost object.
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -70,6 +70,9 @@ If early stopping occurs, the model will have 'best_iter' field.}
CPU using hyper-threading to generate 2 threads per CPU core).}
}}
}
\value{
a trained \code{lgb.Booster}
}
\description{
Simple interface for training a LightGBM model.
}

Просмотреть файл

@ -61,7 +61,7 @@ For regression or binary classification, it returns a vector of length \code{nro
Predicted values based on class \code{lgb.Booster}
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -12,13 +12,13 @@ readRDS.lgb.Booster(file = "", refhook = NULL)
\item{refhook}{a hook function for handling reference objects.}
}
\value{
\code{lgb.Booster}.
\code{lgb.Booster}
}
\description{
Attempts to load a model stored in a \code{.rds} file, using \code{\link[base]{readRDS}}
}
\examples{
\dontrun{
\donttest{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train

Просмотреть файл

@ -42,7 +42,7 @@ Attempts to save a model using RDS. Has an additional parameter (\code{raw})
which decides whether to save the raw model or not.
}
\examples{
\dontrun{
\donttest{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train

Просмотреть файл

@ -19,7 +19,9 @@ setinfo(dataset, ...)
\item{info}{the specific field of information to set}
}
\value{
passed object
the dataset you passed in
the dataset you passed in
}
\description{
Set one attribute of a \code{lgb.Dataset}
@ -38,7 +40,7 @@ The \code{name} field can be one of the following:
}
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -17,6 +17,8 @@ slice(dataset, ...)
\item{idxset}{an integer vector of indices of rows needed}
}
\value{
constructed sub dataset
constructed sub dataset
}
\description{
@ -24,7 +26,7 @@ Get a new \code{lgb.Dataset} containing the specified rows of
original \code{lgb.Dataset} object
}
\examples{
\dontrun{
\donttest{
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)

Просмотреть файл

@ -40,6 +40,7 @@ OBJECTS = \
metric/dcg_calculator.o \
metric/metric.o \
objective/objective_function.o \
network/ifaddrs_patch.o \
network/linker_topo.o \
network/linkers_mpi.o \
network/linkers_socket.o \

Просмотреть файл

@ -41,6 +41,7 @@ OBJECTS = \
metric/dcg_calculator.o \
metric/metric.o \
objective/objective_function.o \
network/ifaddrs_patch.o \
network/linker_topo.o \
network/linkers_mpi.o \
network/linkers_socket.o \

Просмотреть файл

@ -0,0 +1,128 @@
/*!
* Copyright (c) 2009, Jay Loden, Dave Daeschler, Giampaolo Rodola.
* Licensed under the BSD 3-Clause License.
* See https://github.com/giampaolo/psutil/blob/master/LICENSE
*/
#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <net/if.h>
#include <netinet/in.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include "ifaddrs_patch.h"
#define MAX(x, y) ((x) > (y)?(x):(y))
#define SIZE(p) MAX((p).ss_len, sizeof(p))
static struct sockaddr *
sa_dup(struct sockaddr_storage *sa1) {
struct sockaddr *sa2;
size_t sz = sizeof(struct sockaddr_storage);
sa2 = (struct sockaddr *) calloc(1, sz);
memcpy(sa2, sa1, sz);
return(sa2);
}
void freeifaddrs(struct ifaddrs *ifp) {
if (NULL == ifp) return;
free(ifp->ifa_name);
free(ifp->ifa_addr);
free(ifp->ifa_netmask);
free(ifp->ifa_dstaddr);
freeifaddrs(ifp->ifa_next);
free(ifp);
}
int getifaddrs(struct ifaddrs **ifap) {
int sd = -1;
char *ccp, *ecp;
struct lifconf ifc;
struct lifreq *ifr;
struct lifnum lifn;
struct ifaddrs *cifa = NULL; /* current */
struct ifaddrs *pifa = NULL; /* previous */
const size_t IFREQSZ = sizeof(struct lifreq);
sd = socket(AF_INET, SOCK_STREAM, 0);
if (sd < 0)
goto error;
ifc.lifc_buf = NULL;
*ifap = NULL;
/* find how much memory to allocate for the SIOCGLIFCONF call */
lifn.lifn_family = AF_UNSPEC;
lifn.lifn_flags = 0;
if (ioctl(sd, SIOCGLIFNUM, &lifn) < 0)
goto error;
/* Sun and Apple code likes to pad the interface count here in case interfaces
* are coming up between calls */
lifn.lifn_count += 4;
ifc.lifc_family = AF_UNSPEC;
ifc.lifc_len = lifn.lifn_count * sizeof(struct lifreq);
ifc.lifc_buf = static_cast<char*>(calloc(1, ifc.lifc_len));
if (ioctl(sd, SIOCGLIFCONF, &ifc) < 0)
goto error;
ccp = reinterpret_cast<char *>(ifc.lifc_req);
ecp = ccp + ifc.lifc_len;
while (ccp < ecp) {
ifr = (struct lifreq *) ccp;
cifa = (struct ifaddrs *) calloc(1, sizeof(struct ifaddrs));
cifa->ifa_next = NULL;
cifa->ifa_name = strdup(ifr->lifr_name);
if (pifa == NULL) {
*ifap = cifa; /* first one */
} else {
pifa->ifa_next = cifa;
}
if (ioctl(sd, SIOCGLIFADDR, ifr, IFREQSZ) < 0)
goto error;
cifa->ifa_addr = sa_dup(&ifr->lifr_addr);
if (ioctl(sd, SIOCGLIFNETMASK, ifr, IFREQSZ) < 0)
goto error;
cifa->ifa_netmask = sa_dup(&ifr->lifr_addr);
cifa->ifa_flags = 0;
cifa->ifa_dstaddr = NULL;
if (0 == ioctl(sd, SIOCGLIFFLAGS, ifr)) /* optional */
cifa->ifa_flags = ifr->lifr_flags;
if (ioctl(sd, SIOCGLIFDSTADDR, ifr, IFREQSZ) < 0) {
if (0 == ioctl(sd, SIOCGLIFBRDADDR, ifr, IFREQSZ))
cifa->ifa_dstaddr = sa_dup(&ifr->lifr_addr);
} else {
cifa->ifa_dstaddr = sa_dup(&ifr->lifr_addr);
}
pifa = cifa;
ccp += IFREQSZ;
}
free(ifc.lifc_buf);
close(sd);
return 0;
error:
if (ifc.lifc_buf != NULL)
free(ifc.lifc_buf);
if (sd != -1)
close(sd);
freeifaddrs(*ifap);
return (-1);
}
#endif // (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))

Просмотреть файл

@ -0,0 +1,34 @@
/*!
* Copyright (c) 2009, Jay Loden, Dave Daeschler, Giampaolo Rodola.
* Licensed under the BSD 3-Clause License.
* See https://github.com/giampaolo/psutil/blob/master/LICENSE
*/
/*
* - https://lists.samba.org/archive/samba-technical/2009-February/063079.html
* - https://github.com/giampaolo/psutil/blob/master/psutil/arch/solaris/v10/ifaddrs.h
*/
#ifndef LIGHTGBM_NETWORK_IFADDRS_PATCH_H_
#define LIGHTGBM_NETWORK_IFADDRS_PATCH_H_
#include <sys/socket.h>
#include <net/if.h>
#undef ifa_dstaddr
#undef ifa_broadaddr
#define ifa_broadaddr ifa_dstaddr
struct ifaddrs {
struct ifaddrs *ifa_next;
char *ifa_name;
unsigned int ifa_flags;
struct sockaddr *ifa_addr;
struct sockaddr *ifa_netmask;
struct sockaddr *ifa_dstaddr;
};
extern int getifaddrs(struct ifaddrs **);
extern void freeifaddrs(struct ifaddrs *);
#endif // LIGHTGBM_NETWORK_IFADDRS_PATCH_H_

Просмотреть файл

@ -27,7 +27,6 @@
#include <arpa/inet.h>
#include <fcntl.h>
#include <ifaddrs.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
@ -36,6 +35,13 @@
#include <sys/types.h>
#include <unistd.h>
// ifaddrs.h is not available on Solaris 10
#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))
#include "ifaddrs_patch.h"
#else
#include <ifaddrs.h>
#endif
#endif // defined(_WIN32)
#ifdef _MSC_VER