[R-package][ci] added CI stage for R package (fixes #2335, fixes #2569) (#2530)

This commit is contained in:
James Lamb 2019-12-15 12:01:01 -06:00 коммит произвёл GitHub
Родитель dfb91576d0
Коммит 86ca484508
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
11 изменённых файлов: 188 добавлений и 5 удалений

Просмотреть файл

@ -78,6 +78,11 @@ if [[ $TASK == "if-else" ]]; then
exit 0
fi
if [[ $TASK == "r-package" ]]; then
bash ${BUILD_DIRECTORY}/.ci/test_r_package.sh || exit -1
exit 0
fi
conda install -q -y -n $CONDA_ENV joblib matplotlib numpy pandas psutil pytest python-graphviz scikit-learn scipy
if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "clang" ]]; then

90
.ci/test_r_package.sh Executable file
Просмотреть файл

@ -0,0 +1,90 @@
#!/bin/bash
# set up R environment
R_LIB_PATH=~/Rlib
mkdir -p $R_LIB_PATH
echo "R_LIBS=$R_LIB_PATH" > ${HOME}/.Renviron
echo 'options(repos = "https://cran.rstudio.com")' > ${HOME}/.Rprofile
export PATH="$R_LIB_PATH/R/bin:$PATH"
# installing precompiled R for Ubuntu
# https://cran.r-project.org/bin/linux/ubuntu/#installation
# adding steps from https://stackoverflow.com/a/56378217/3986677 to get latest version
#
# This only needs to get run on Travis because R environment for Linux
# used by Azure pipelines is set up in https://github.com/guolinke/lightgbm-ci-docker
if [[ $TRAVIS == "true" ]] && [[ $OS_NAME == "linux" ]]; then
sudo add-apt-repository \
"deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/"
sudo apt-key adv \
--keyserver keyserver.ubuntu.com \
--recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
sudo apt-get update
sudo apt-get install \
--no-install-recommends \
-y \
r-base-dev=${R_TRAVIS_LINUX_VERSION} \
texinfo \
texlive-latex-recommended \
texlive-fonts-recommended \
texlive-fonts-extra \
qpdf \
|| exit -1
fi
# Installing R precompiled for Mac OS 10.11 or higher
if [[ $OS_NAME == "macos" ]]; then
brew install qpdf
brew cask install basictex
export PATH="/Library/TeX/texbin:$PATH"
sudo tlmgr update --self
sudo tlmgr install inconsolata helvetic
wget -q https://cran.r-project.org/bin/macosx/R-${R_MAC_VERSION}.pkg -O R.pkg
sudo installer \
-pkg $(pwd)/R.pkg \
-target /
# Fix "duplicate libomp versions" issue on Mac
# by replacing the R libomp.dylib with a symlink to the one installed with brew
if [[ $COMPILER == "clang" ]]; then
ver_arr=( ${R_MAC_VERSION//./ } )
R_MAJOR_MINOR="${ver_arr[0]}.${ver_arr[1]}"
sudo ln -sf \
"$(brew --cellar libomp)"/*/lib/libomp.dylib \
/Library/Frameworks/R.framework/Versions/${R_MAJOR_MINOR}/Resources/lib/libomp.dylib
fi
fi
conda install \
-y \
-q \
--no-deps \
pandoc
# Manually install Depends and Imports libraries + 'testthat'
# to avoid a CI-time dependency on devtools (for devtools::install_deps())
Rscript -e "install.packages(c('data.table', 'jsonlite', 'Matrix', 'R6', 'testthat'))" || exit -1
cd ${BUILD_DIRECTORY}
Rscript build_r.R || exit -1
PKG_TARBALL="lightgbm_${LGB_VER}.tar.gz"
LOG_FILE_NAME="lightgbm.Rcheck/00check.log"
# suppress R CMD check warning from Suggests dependencies not being available
export _R_CHECK_FORCE_SUGGESTS_=0
# fails tests if either ERRORs or WARNINGs are thrown by
# R CMD CHECK
R CMD check ${PKG_TARBALL} \
--as-cran \
|| exit -1
if grep -q -R "WARNING" "$LOG_FILE_NAME"; then
echo "WARNINGS have been found by R CMD check!"
exit -1
fi
exit 0

Просмотреть файл

@ -24,6 +24,7 @@ env:
- TASK=mpi METHOD=pip
- TASK=gpu METHOD=source PYTHON_VERSION=3.5
- TASK=gpu METHOD=pip PYTHON_VERSION=3.6
- TASK=r-package
matrix:
exclude:
@ -44,9 +45,11 @@ before_install:
- if [[ $TRAVIS_OS_NAME == "osx" ]]; then
export OS_NAME="macos";
export COMPILER="gcc";
export R_MAC_VERSION=3.6.1;
else
export OS_NAME="linux";
export COMPILER="clang";
export R_TRAVIS_LINUX_VERSION=3.6.1-3bionic;
fi
- export CONDA="$HOME/miniconda"
- export PATH="$CONDA/bin:$PATH"

Просмотреть файл

@ -42,6 +42,8 @@ jobs:
TASK: gpu
METHOD: source
PYTHON_VERSION: 3.6
r_package:
TASK: r-package
steps:
- script: |
echo "##vso[task.setvariable variable=HOME_DIRECTORY]$AGENT_HOMEDIRECTORY"
@ -84,6 +86,8 @@ jobs:
PYTHON_VERSION: 3.5
bdist:
TASK: bdist
r_package:
TASK: r-package
steps:
- script: |
echo "##vso[task.setvariable variable=HOME_DIRECTORY]$AGENT_HOMEDIRECTORY"
@ -95,6 +99,7 @@ jobs:
echo "##vso[task.setvariable variable=CONDA]$CONDA"
echo "##vso[task.prependpath]$CONDA/bin"
echo "##vso[task.setvariable variable=JAVA_HOME]$JAVA_HOME_8_X64"
echo "##vso[task.setvariable variable=R_MAC_VERSION]3.6.1"
displayName: 'Set variables'
- bash: $(Build.SourcesDirectory)/.ci/setup.sh
displayName: Setup

Просмотреть файл

@ -12,3 +12,14 @@
# Code copied in at build time
^src/CMakeLists.txt$
# unnecessary files from submodules
^src/compute/.appveyor.yml$
^src/compute/.coveralls.yml$
^src/compute/.travis.yml$
^src/compute/test/$
^src/compute/index.html$
^src/compute/.git$
^src/compute/.gitignore$
^src/compute/CONTRIBUTING.md$
^src/compute/README.md$

Просмотреть файл

@ -36,4 +36,4 @@ Imports:
jsonlite (>= 1.0),
Matrix (>= 1.1-0),
methods
RoxygenNote: 6.0.1
RoxygenNote: 7.0.2

Просмотреть файл

@ -53,6 +53,8 @@ lgb.cv(
\item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation}
\item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified
by the values of outcome labels.}
@ -88,8 +90,6 @@ into a predictor model which frees up memory and the original datasets}
the number of real CPU cores, not the number of threads (most
CPU using hyper-threading to generate 2 threads per CPU core).}
}}
\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation}
}
\value{
a trained model \code{lgb.CVBooster}.

Просмотреть файл

@ -24,3 +24,42 @@ This is useful if you have a specific need for integer dataset instead of numeri
Note that there are programs which do not support integer-only input.
Consider this as a half memory technique which is dangerous, especially for LightGBM.
}
\examples{
library(lightgbm)
data(iris)
str(iris)
new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter
str(new_iris$data)
data(iris) # Erase iris dataset
iris$Species[1L] <- "NEW FACTOR" # Introduce junk factor (NA)
# Use conversion using known rules
# Unknown factors become 0, excellent for sparse datasets
newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules)
# Unknown factor is now zero, perfect for sparse datasets
newer_iris$data[1L, ] # Species became 0 as it is an unknown factor
newer_iris$data[1L, 5L] <- 1.0 # Put back real initial value
# Is the newly created dataset equal? YES!
all.equal(new_iris$data, newer_iris$data)
# Can we test our own rules?
data(iris) # Erase iris dataset
# We remapped values differently
personal_rules <- list(
Species = c(
"setosa" = 3L
, "versicolor" = 2L
, "virginica" = 1L
)
)
newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules)
str(newest_iris$data) # SUCCESS!
}

Просмотреть файл

@ -99,6 +99,32 @@ if (!use_precompile) {
# Install
system(paste0(cmake_cmd, " .."))
# R CMD check complains about the .NOTPARALLEL directive created in the cmake
# Makefile. We don't need it here anyway since targets are built serially, so trying
# to remove it with this hack
generated_makefile <- file.path(
R_PACKAGE_SOURCE
, "src"
, "build"
, "Makefile"
)
if (file.exists(generated_makefile)) {
makefile_txt <- readLines(
con = generated_makefile
)
makefile_txt <- gsub(
pattern = ".*NOTPARALLEL.*"
, replacement = ""
, x = makefile_txt
)
writeLines(
text = makefile_txt
, con = generated_makefile
, sep = "\n"
)
}
system(build_cmd)
src <- file.path(lib_folder, paste0("lib_lightgbm", SHLIB_EXT), fsep = "/")

Просмотреть файл

@ -1,4 +1,8 @@
library(testthat)
library(lightgbm)
test_check("lightgbm")
test_check(
package = "lightgbm"
, stop_on_failure = TRUE
, stop_on_warning = FALSE
)

Просмотреть файл

@ -85,5 +85,5 @@ version <- gsub(
)
tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version))
cmd <- sprintf("R CMD INSTALL %s --no-multiarch", tarball)
cmd <- sprintf("R CMD INSTALL %s --no-multiarch --with-keep.source", tarball)
.run_shell_command(cmd)