зеркало из https://github.com/microsoft/LightGBM.git
Merge branch 'master' into nccl-dev
This commit is contained in:
Коммит
1e6e4a1cca
|
@ -1,16 +1,14 @@
|
|||
version: 4.3.0.99.{build}
|
||||
version: 4.4.0.99.{build}
|
||||
|
||||
image: Visual Studio 2015
|
||||
platform: x64
|
||||
configuration: # a trick to construct a build matrix with multiple Python versions
|
||||
configuration:
|
||||
- '3.8'
|
||||
|
||||
# only build pull requests and
|
||||
# commits to 'master' or any branch starting with 'release'
|
||||
# only build on 'master' and pull requests targeting it
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- /^release/
|
||||
|
||||
environment:
|
||||
matrix:
|
||||
|
@ -25,12 +23,13 @@ install:
|
|||
- git submodule update --init --recursive # get `external_libs` folder
|
||||
- set PATH=C:\mingw-w64\x86_64-8.1.0-posix-seh-rt_v6-rev0\mingw64\bin;%PATH%
|
||||
- set PYTHON_VERSION=%CONFIGURATION%
|
||||
- set CONDA_ENV="test-env"
|
||||
- ps: |
|
||||
$env:ALLOW_SKIP_ARROW_TESTS = "1"
|
||||
$env:APPVEYOR = "true"
|
||||
$env:CMAKE_BUILD_PARALLEL_LEVEL = 4
|
||||
$env:MINICONDA = "C:\Miniconda3-x64"
|
||||
$env:PATH = "$env:MINICONDA;$env:MINICONDA\Scripts;$env:PATH"
|
||||
$env:BUILD_SOURCESDIRECTORY = "$env:APPVEYOR_BUILD_FOLDER"
|
||||
$env:LGB_VER = (Get-Content $env:APPVEYOR_BUILD_FOLDER\VERSION.txt).trim()
|
||||
|
||||
build: false
|
||||
|
||||
|
|
|
@ -26,11 +26,12 @@ fi
|
|||
PY_MINOR_VER=$(python -c "import sys; print(sys.version_info.minor)")
|
||||
if [ $PY_MINOR_VER -gt 7 ]; then
|
||||
echo "pydistcheck..."
|
||||
pip install pydistcheck
|
||||
pip install 'pydistcheck>=0.7.0'
|
||||
if { test "${TASK}" = "cuda" || test "${METHOD}" = "wheel"; }; then
|
||||
pydistcheck \
|
||||
--inspect \
|
||||
--ignore 'compiled-objects-have-debug-symbols,distro-too-large-compressed' \
|
||||
--ignore 'compiled-objects-have-debug-symbols'\
|
||||
--ignore 'distro-too-large-compressed' \
|
||||
--max-allowed-size-uncompressed '500M' \
|
||||
--max-allowed-files 800 \
|
||||
${DIST_DIR}/* || exit 1
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
# [description]
|
||||
#
|
||||
# Similar to ci-core.txt, but specific to Python 3.8.
|
||||
#
|
||||
# Unlike ci-core.txt, this includes a Python version and uses
|
||||
# `=` and `<=` pins to make solves faster and prevent against
|
||||
# issues like https://github.com/microsoft/LightGBM/pull/6370.
|
||||
#
|
||||
# [usage]
|
||||
#
|
||||
# conda create \
|
||||
# --name test-env \
|
||||
# --file ./.ci/conda-envs/ci-core-py38.txt
|
||||
#
|
||||
|
||||
# python
|
||||
python=3.8.*
|
||||
|
||||
# direct imports
|
||||
cffi=1.15.*
|
||||
dask=2023.5.*
|
||||
distributed=2023.5.*
|
||||
joblib=1.4.*
|
||||
matplotlib-base=3.7.*
|
||||
numpy=1.24.*
|
||||
pandas=1.5.*
|
||||
pyarrow-core=16.1.*
|
||||
python-graphviz=0.20.*
|
||||
scikit-learn=1.3.*
|
||||
scipy=1.10.*
|
||||
|
||||
# testing-only dependencies
|
||||
cloudpickle=3.0.*
|
||||
pluggy=1.5.*
|
||||
psutil=5.9.8
|
||||
pytest=8.2.*
|
||||
|
||||
# other recursive dependencies, just
|
||||
# pinned here to help speed up solves
|
||||
bokeh=3.1.*
|
||||
fsspec=2024.5.*
|
||||
msgpack-python=1.0.*
|
||||
pluggy=1.5.*
|
||||
pytz=2024.1
|
||||
setuptools=69.5.*
|
||||
snappy=1.2.*
|
||||
tomli=2.0.*
|
||||
tornado=6.4.*
|
||||
wheel=0.43.*
|
||||
zict=3.0.*
|
||||
zipp=3.17.*
|
|
@ -6,15 +6,12 @@
|
|||
|
||||
TRIGGER_PHRASE: Code phrase that triggers workflow.
|
||||
"""
|
||||
|
||||
import json
|
||||
from os import environ
|
||||
from sys import argv, exit
|
||||
from time import sleep
|
||||
|
||||
try:
|
||||
from urllib import request
|
||||
except ImportError:
|
||||
import urllib2 as request
|
||||
from urllib import request
|
||||
|
||||
|
||||
def get_runs(trigger_phrase):
|
||||
|
|
|
@ -52,6 +52,8 @@ LINTERS_TO_USE <- list(
|
|||
, "inner_combine" = lintr::inner_combine_linter()
|
||||
, "is_numeric" = lintr::is_numeric_linter()
|
||||
, "lengths" = lintr::lengths_linter()
|
||||
, "length_levels" = lintr::length_levels_linter()
|
||||
, "length_test" = lintr::length_test_linter()
|
||||
, "line_length" = lintr::line_length_linter(length = 120L)
|
||||
, "literal_coercion" = lintr::literal_coercion_linter()
|
||||
, "matrix" = lintr::matrix_apply_linter()
|
||||
|
@ -66,6 +68,7 @@ LINTERS_TO_USE <- list(
|
|||
, "redundant_equals" = lintr::redundant_equals_linter()
|
||||
, "regex_subset" = lintr::regex_subset_linter()
|
||||
, "routine_registration" = lintr::routine_registration_linter()
|
||||
, "scalar_in" = lintr::scalar_in_linter()
|
||||
, "semicolon" = lintr::semicolon_linter()
|
||||
, "seq" = lintr::seq_linter()
|
||||
, "spaces_inside" = lintr::spaces_inside_linter()
|
||||
|
|
37
.ci/setup.sh
37
.ci/setup.sh
|
@ -14,15 +14,13 @@ if [[ $OS_NAME == "macos" ]]; then
|
|||
if [[ $COMPILER == "clang" ]]; then
|
||||
brew install libomp
|
||||
if [[ $AZURE == "true" ]]; then
|
||||
sudo xcode-select -s /Applications/Xcode_11.7.app/Contents/Developer || exit 1
|
||||
sudo xcode-select -s /Applications/Xcode_13.1.0.app/Contents/Developer || exit 1
|
||||
fi
|
||||
else # gcc
|
||||
# Check https://github.com/actions/runner-images/tree/main/images/macos for available
|
||||
# versions of Xcode
|
||||
sudo xcode-select -s /Applications/Xcode_14.3.1.app/Contents/Developer || exit 1
|
||||
if [[ $TASK != "mpi" ]]; then
|
||||
brew install gcc
|
||||
fi
|
||||
brew install gcc
|
||||
fi
|
||||
if [[ $TASK == "mpi" ]]; then
|
||||
brew install open-mpi
|
||||
|
@ -30,10 +28,6 @@ if [[ $OS_NAME == "macos" ]]; then
|
|||
if [[ $TASK == "swig" ]]; then
|
||||
brew install swig
|
||||
fi
|
||||
curl \
|
||||
-sL \
|
||||
-o miniforge.sh \
|
||||
https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-${ARCH}.sh
|
||||
else # Linux
|
||||
if [[ $IN_UBUNTU_BASE_CONTAINER == "true" ]]; then
|
||||
# fixes error "unable to initialize frontend: Dialog"
|
||||
|
@ -45,35 +39,30 @@ else # Linux
|
|||
software-properties-common
|
||||
|
||||
sudo apt-get install --no-install-recommends -y \
|
||||
apt-utils \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
iputils-ping \
|
||||
jq \
|
||||
libcurl4 \
|
||||
libicu-dev \
|
||||
libssl-dev \
|
||||
libunwind8 \
|
||||
locales \
|
||||
locales-all \
|
||||
netcat \
|
||||
unzip \
|
||||
zip || exit 1
|
||||
locales-all || exit 1
|
||||
if [[ $COMPILER == "clang" ]]; then
|
||||
sudo apt-get install --no-install-recommends -y \
|
||||
clang \
|
||||
libomp-dev
|
||||
elif [[ $COMPILER == "clang-17" ]]; then
|
||||
sudo apt-get install wget
|
||||
sudo apt-get install --no-install-recommends -y \
|
||||
wget
|
||||
wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc
|
||||
sudo apt-add-repository deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
|
||||
sudo apt-add-repository deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y clang-17
|
||||
sudo apt-get install --no-install-recommends -y libomp-17-dev
|
||||
sudo apt-get install -y \
|
||||
clang-17 \
|
||||
libomp-17-dev
|
||||
fi
|
||||
|
||||
export LANG="en_US.UTF-8"
|
||||
|
@ -144,16 +133,14 @@ else # Linux
|
|||
apt-get install --no-install-recommends -y \
|
||||
cmake
|
||||
fi
|
||||
if [[ $SETUP_CONDA != "false" ]]; then
|
||||
curl \
|
||||
-sL \
|
||||
-o miniforge.sh \
|
||||
https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-${ARCH}.sh
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "${TASK}" != "r-package" ]] && [[ "${TASK}" != "r-rchk" ]]; then
|
||||
if [[ $SETUP_CONDA != "false" ]]; then
|
||||
curl \
|
||||
-sL \
|
||||
-o miniforge.sh \
|
||||
https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-${ARCH}.sh
|
||||
sh miniforge.sh -b -p $CONDA
|
||||
fi
|
||||
conda config --set always_yes yes --set changeps1 no
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e -E -u -o pipefail
|
||||
|
||||
# latest versions of lightgbm's dependencies,
|
||||
# including pre-releases and nightlies
|
||||
#
|
||||
# ref: https://github.com/pydata/xarray/blob/31111b3afe44fd6f7dac363264e94186cc5168d2/.github/workflows/upstream-dev-ci.yaml
|
||||
echo "installing testing dependencies"
|
||||
python -m pip install \
|
||||
cloudpickle \
|
||||
psutil \
|
||||
pytest
|
||||
echo "done installing testing dependencies"
|
||||
|
||||
echo "installing lightgbm's dependencies"
|
||||
python -m pip install \
|
||||
--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
|
||||
--prefer-binary \
|
||||
--pre \
|
||||
--upgrade \
|
||||
'numpy>=2.0.0.dev0' \
|
||||
'matplotlib>=3.10.0.dev0' \
|
||||
'pandas>=3.0.0.dev0' \
|
||||
'scikit-learn>=1.6.dev0' \
|
||||
'scipy>=1.15.0.dev0'
|
||||
|
||||
python -m pip install \
|
||||
--extra-index-url https://pypi.fury.io/arrow-nightlies/ \
|
||||
--prefer-binary \
|
||||
--pre \
|
||||
--upgrade \
|
||||
'pyarrow>=17.0.0.dev0'
|
||||
|
||||
python -m pip install \
|
||||
'cffi>=1.15.1'
|
||||
|
||||
echo "done installing lightgbm's dependencies"
|
||||
|
||||
echo "installing lightgbm"
|
||||
pip install --no-deps dist/*.whl
|
||||
echo "done installing lightgbm"
|
||||
|
||||
echo "installed package versions:"
|
||||
pip freeze
|
||||
|
||||
echo ""
|
||||
echo "running tests"
|
||||
pytest tests/c_api_test/
|
||||
pytest tests/python_package_test/
|
|
@ -3,19 +3,20 @@
|
|||
set -e -E -u -o pipefail
|
||||
|
||||
# oldest versions of dependencies published after
|
||||
# minimum supported Python version's first release
|
||||
# minimum supported Python version's first release,
|
||||
# for which there are wheels compatible with the
|
||||
# python:{version} image
|
||||
#
|
||||
# see https://devguide.python.org/versions/
|
||||
#
|
||||
echo "installing lightgbm's dependencies"
|
||||
pip install \
|
||||
'cffi==1.15.1' \
|
||||
'dataclasses' \
|
||||
'numpy==1.16.6' \
|
||||
'pandas==0.24.0' \
|
||||
'numpy==1.19.0' \
|
||||
'pandas==1.1.3' \
|
||||
'pyarrow==6.0.1' \
|
||||
'scikit-learn==0.18.2' \
|
||||
'scipy==0.19.0' \
|
||||
'scikit-learn==0.24.0' \
|
||||
'scipy==1.6.0' \
|
||||
|| exit 1
|
||||
echo "done installing lightgbm's dependencies"
|
||||
|
||||
|
|
185
.ci/test.sh
185
.ci/test.sh
|
@ -3,6 +3,7 @@
|
|||
set -e -E -o -u pipefail
|
||||
|
||||
# defaults
|
||||
CONDA_ENV="test-env"
|
||||
IN_UBUNTU_BASE_CONTAINER=${IN_UBUNTU_BASE_CONTAINER:-"false"}
|
||||
METHOD=${METHOD:-""}
|
||||
PRODUCES_ARTIFACTS=${PRODUCES_ARTIFACTS:-"false"}
|
||||
|
@ -10,6 +11,8 @@ SANITIZERS=${SANITIZERS:-""}
|
|||
|
||||
ARCH=$(uname -m)
|
||||
|
||||
LGB_VER=$(head -n 1 "${BUILD_DIRECTORY}/VERSION.txt")
|
||||
|
||||
if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "gcc" ]]; then
|
||||
export CXX=g++-11
|
||||
export CC=gcc-11
|
||||
|
@ -26,8 +29,21 @@ if [[ $IN_UBUNTU_BASE_CONTAINER == "true" ]]; then
|
|||
export LC_ALL="en_US.UTF-8"
|
||||
fi
|
||||
|
||||
# Setting MACOSX_DEPLOYMENT_TARGET prevents CMake from building against too-new
|
||||
# macOS features, and helps tools like Python build tools determine the appropriate
|
||||
# wheel compatibility tags.
|
||||
#
|
||||
# ref:
|
||||
# * https://cmake.org/cmake/help/latest/envvar/MACOSX_DEPLOYMENT_TARGET.html
|
||||
# * https://github.com/scikit-build/scikit-build-core/blob/acb7d0346e4a05bcb47a4ea3939c705ab71e3145/src/scikit_build_core/builder/macos.py#L36
|
||||
if [[ $ARCH == "x86_64" ]]; then
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.15
|
||||
else
|
||||
export MACOSX_DEPLOYMENT_TARGET=12.0
|
||||
fi
|
||||
|
||||
if [[ "${TASK}" == "r-package" ]] || [[ "${TASK}" == "r-rchk" ]]; then
|
||||
bash ${BUILD_DIRECTORY}/.ci/test_r_package.sh || exit 1
|
||||
bash "${BUILD_DIRECTORY}/.ci/test_r_package.sh" || exit 1
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
@ -54,27 +70,31 @@ if [[ $TASK == "if-else" ]]; then
|
|||
source activate $CONDA_ENV
|
||||
cmake -B build -S . || exit 1
|
||||
cmake --build build --target lightgbm -j4 || exit 1
|
||||
cd $BUILD_DIRECTORY/tests/cpp_tests && ../../lightgbm config=train.conf convert_model_language=cpp convert_model=../../src/boosting/gbdt_prediction.cpp && ../../lightgbm config=predict.conf output_result=origin.pred || exit 1
|
||||
cd $BUILD_DIRECTORY/tests/cpp_tests && ../../lightgbm config=predict.conf output_result=ifelse.pred && python test.py || exit 1
|
||||
cd "$BUILD_DIRECTORY/tests/cpp_tests"
|
||||
../../lightgbm config=train.conf convert_model_language=cpp convert_model=../../src/boosting/gbdt_prediction.cpp
|
||||
../../lightgbm config=predict.conf output_result=origin.pred
|
||||
../../lightgbm config=predict.conf output_result=ifelse.pred
|
||||
python test.py
|
||||
exit 0
|
||||
fi
|
||||
|
||||
cd "${BUILD_DIRECTORY}"
|
||||
|
||||
if [[ $TASK == "swig" ]]; then
|
||||
cmake -B build -S . -DUSE_SWIG=ON
|
||||
cmake --build build -j4 || exit 1
|
||||
if [[ $OS_NAME == "linux" ]] && [[ $COMPILER == "gcc" ]]; then
|
||||
objdump -T $BUILD_DIRECTORY/lib_lightgbm.so > $BUILD_DIRECTORY/objdump.log || exit 1
|
||||
objdump -T $BUILD_DIRECTORY/lib_lightgbm_swig.so >> $BUILD_DIRECTORY/objdump.log || exit 1
|
||||
python $BUILD_DIRECTORY/helpers/check_dynamic_dependencies.py $BUILD_DIRECTORY/objdump.log || exit 1
|
||||
objdump -T ./lib_lightgbm.so > ./objdump.log || exit 1
|
||||
objdump -T ./lib_lightgbm_swig.so >> ./objdump.log || exit 1
|
||||
python ./helpers/check_dynamic_dependencies.py ./objdump.log || exit 1
|
||||
fi
|
||||
if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
|
||||
cp $BUILD_DIRECTORY/build/lightgbmlib.jar $BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_$OS_NAME.jar
|
||||
cp ./build/lightgbmlib.jar $BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_$OS_NAME.jar
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ $TASK == "lint" ]]; then
|
||||
cd ${BUILD_DIRECTORY}
|
||||
mamba create -q -y -n $CONDA_ENV \
|
||||
${CONDA_PYTHON_REQUIREMENT} \
|
||||
'cmakelint>=1.4.2' \
|
||||
|
@ -83,19 +103,19 @@ if [[ $TASK == "lint" ]]; then
|
|||
'mypy>=1.8.0' \
|
||||
'pre-commit>=3.6.0' \
|
||||
'pyarrow>=6.0' \
|
||||
'r-lintr>=3.1'
|
||||
'r-lintr>=3.1.2'
|
||||
source activate $CONDA_ENV
|
||||
echo "Linting Python code"
|
||||
bash ${BUILD_DIRECTORY}/.ci/lint-python.sh || exit 1
|
||||
bash ./.ci/lint-python.sh || exit 1
|
||||
echo "Linting R code"
|
||||
Rscript ${BUILD_DIRECTORY}/.ci/lint_r_code.R ${BUILD_DIRECTORY} || exit 1
|
||||
Rscript ./.ci/lint_r_code.R "${BUILD_DIRECTORY}" || exit 1
|
||||
echo "Linting C++ code"
|
||||
bash ${BUILD_DIRECTORY}/.ci/lint-cpp.sh || exit 1
|
||||
bash ./.ci/lint-cpp.sh || exit 1
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
|
||||
cd $BUILD_DIRECTORY/docs
|
||||
cd "${BUILD_DIRECTORY}/docs"
|
||||
mamba env create \
|
||||
-n $CONDA_ENV \
|
||||
--file ./env.yml || exit 1
|
||||
|
@ -107,29 +127,32 @@ if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
|
|||
'rstcheck>=6.2.0' || exit 1
|
||||
source activate $CONDA_ENV
|
||||
# check reStructuredText formatting
|
||||
cd $BUILD_DIRECTORY/python-package
|
||||
cd "${BUILD_DIRECTORY}/python-package"
|
||||
rstcheck --report-level warning $(find . -type f -name "*.rst") || exit 1
|
||||
cd $BUILD_DIRECTORY/docs
|
||||
cd "${BUILD_DIRECTORY}/docs"
|
||||
rstcheck --report-level warning --ignore-directives=autoclass,autofunction,autosummary,doxygenfile $(find . -type f -name "*.rst") || exit 1
|
||||
# build docs
|
||||
make html || exit 1
|
||||
if [[ $TASK == "check-links" ]]; then
|
||||
# check docs for broken links
|
||||
pip install --user linkchecker
|
||||
pip install linkchecker
|
||||
linkchecker --config=.linkcheckerrc ./_build/html/*.html || exit 1
|
||||
exit 0
|
||||
fi
|
||||
# check the consistency of parameters' descriptions and other stuff
|
||||
cp $BUILD_DIRECTORY/docs/Parameters.rst $BUILD_DIRECTORY/docs/Parameters-backup.rst
|
||||
cp $BUILD_DIRECTORY/src/io/config_auto.cpp $BUILD_DIRECTORY/src/io/config_auto-backup.cpp
|
||||
python $BUILD_DIRECTORY/helpers/parameter_generator.py || exit 1
|
||||
diff $BUILD_DIRECTORY/docs/Parameters-backup.rst $BUILD_DIRECTORY/docs/Parameters.rst || exit 1
|
||||
diff $BUILD_DIRECTORY/src/io/config_auto-backup.cpp $BUILD_DIRECTORY/src/io/config_auto.cpp || exit 1
|
||||
cd "${BUILD_DIRECTORY}"
|
||||
cp ./docs/Parameters.rst ./docs/Parameters-backup.rst
|
||||
cp ./src/io/config_auto.cpp ./src/io/config_auto-backup.cpp
|
||||
python ./helpers/parameter_generator.py || exit 1
|
||||
diff ./docs/Parameters-backup.rst ./docs/Parameters.rst || exit 1
|
||||
diff ./src/io/config_auto-backup.cpp ./src/io/config_auto.cpp || exit 1
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ $PYTHON_VERSION == "3.7" ]]; then
|
||||
CONDA_REQUIREMENT_FILES="--file ${BUILD_DIRECTORY}/.ci/conda-envs/ci-core-py37.txt"
|
||||
elif [[ $PYTHON_VERSION == "3.8" ]]; then
|
||||
CONDA_REQUIREMENT_FILES="--file ${BUILD_DIRECTORY}/.ci/conda-envs/ci-core-py38.txt"
|
||||
else
|
||||
CONDA_REQUIREMENT_FILES="--file ${BUILD_DIRECTORY}/.ci/conda-envs/ci-core.txt"
|
||||
fi
|
||||
|
@ -143,38 +166,21 @@ mamba create \
|
|||
|
||||
source activate $CONDA_ENV
|
||||
|
||||
cd $BUILD_DIRECTORY
|
||||
|
||||
if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "clang" ]]; then
|
||||
# fix "OMP: Error #15: Initializing libiomp5.dylib, but found libomp.dylib already initialized." (OpenMP library conflict due to conda's MKL)
|
||||
for LIBOMP_ALIAS in libgomp.dylib libiomp5.dylib libomp.dylib; do sudo ln -sf "$(brew --cellar libomp)"/*/lib/libomp.dylib $CONDA_PREFIX/lib/$LIBOMP_ALIAS || exit 1; done
|
||||
fi
|
||||
cd "${BUILD_DIRECTORY}"
|
||||
|
||||
if [[ $TASK == "sdist" ]]; then
|
||||
cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
|
||||
sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
|
||||
pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz -v || exit 1
|
||||
sh ./build-python.sh sdist || exit 1
|
||||
sh .ci/check_python_dists.sh ./dist || exit 1
|
||||
pip install ./dist/lightgbm-$LGB_VER.tar.gz -v || exit 1
|
||||
if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
|
||||
cp $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
|
||||
cp ./dist/lightgbm-$LGB_VER.tar.gz $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
|
||||
fi
|
||||
pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
|
||||
pytest ./tests/python_package_test || exit 1
|
||||
exit 0
|
||||
elif [[ $TASK == "bdist" ]]; then
|
||||
if [[ $OS_NAME == "macos" ]]; then
|
||||
cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel || exit 1
|
||||
sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
|
||||
mv \
|
||||
./dist/*.whl \
|
||||
./dist/tmp.whl || exit 1
|
||||
if [[ $ARCH == "x86_64" ]]; then
|
||||
PLATFORM="macosx_10_15_x86_64.macosx_11_6_x86_64.macosx_12_5_x86_64"
|
||||
else
|
||||
echo "ERROR: macos wheels not supported yet on architecture '${ARCH}'"
|
||||
exit 1
|
||||
fi
|
||||
mv \
|
||||
./dist/tmp.whl \
|
||||
dist/lightgbm-$LGB_VER-py3-none-$PLATFORM.whl || exit 1
|
||||
sh ./build-python.sh bdist_wheel || exit 1
|
||||
sh .ci/check_python_dists.sh ./dist || exit 1
|
||||
if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
|
||||
cp dist/lightgbm-$LGB_VER-py3-none-macosx*.whl $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
|
||||
fi
|
||||
|
@ -184,91 +190,88 @@ elif [[ $TASK == "bdist" ]]; then
|
|||
else
|
||||
PLATFORM="manylinux2014_$ARCH"
|
||||
fi
|
||||
cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --integrated-opencl || exit 1
|
||||
sh ./build-python.sh bdist_wheel --integrated-opencl || exit 1
|
||||
mv \
|
||||
./dist/*.whl \
|
||||
./dist/tmp.whl || exit 1
|
||||
mv \
|
||||
./dist/tmp.whl \
|
||||
./dist/lightgbm-$LGB_VER-py3-none-$PLATFORM.whl || exit 1
|
||||
sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
|
||||
sh .ci/check_python_dists.sh ./dist || exit 1
|
||||
if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
|
||||
cp dist/lightgbm-$LGB_VER-py3-none-$PLATFORM.whl $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
|
||||
fi
|
||||
# Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py
|
||||
export LIGHTGBM_TEST_DUAL_CPU_GPU=1
|
||||
fi
|
||||
pip install --user $BUILD_DIRECTORY/dist/*.whl || exit 1
|
||||
pytest $BUILD_DIRECTORY/tests || exit 1
|
||||
pip install -v ./dist/*.whl || exit 1
|
||||
pytest ./tests || exit 1
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ $TASK == "gpu" ]]; then
|
||||
sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' $BUILD_DIRECTORY/include/LightGBM/config.h
|
||||
grep -q 'std::string device_type = "gpu"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit 1 # make sure that changes were really done
|
||||
sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' ./include/LightGBM/config.h
|
||||
grep -q 'std::string device_type = "gpu"' ./include/LightGBM/config.h || exit 1 # make sure that changes were really done
|
||||
if [[ $METHOD == "pip" ]]; then
|
||||
cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
|
||||
sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
|
||||
sh ./build-python.sh sdist || exit 1
|
||||
sh .ci/check_python_dists.sh ./dist || exit 1
|
||||
pip install \
|
||||
--user \
|
||||
-v \
|
||||
--config-settings=cmake.define.USE_GPU=ON \
|
||||
$BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
|
||||
./dist/lightgbm-$LGB_VER.tar.gz \
|
||||
|| exit 1
|
||||
pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
|
||||
pytest ./tests/python_package_test || exit 1
|
||||
exit 0
|
||||
elif [[ $METHOD == "wheel" ]]; then
|
||||
cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --gpu || exit 1
|
||||
sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
|
||||
pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
|
||||
pytest $BUILD_DIRECTORY/tests || exit 1
|
||||
sh ./build-python.sh bdist_wheel --gpu || exit 1
|
||||
sh ./.ci/check_python_dists.sh ./dist || exit 1
|
||||
pip install ./dist/lightgbm-$LGB_VER*.whl -v || exit 1
|
||||
pytest ./tests || exit 1
|
||||
exit 0
|
||||
elif [[ $METHOD == "source" ]]; then
|
||||
cmake -B build -S . -DUSE_GPU=ON
|
||||
fi
|
||||
elif [[ $TASK == "cuda" ]]; then
|
||||
sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' $BUILD_DIRECTORY/include/LightGBM/config.h
|
||||
grep -q 'std::string device_type = "cuda"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit 1 # make sure that changes were really done
|
||||
sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' ./include/LightGBM/config.h
|
||||
grep -q 'std::string device_type = "cuda"' ./include/LightGBM/config.h || exit 1 # make sure that changes were really done
|
||||
# by default ``gpu_use_dp=false`` for efficiency. change to ``true`` here for exact results in ci tests
|
||||
sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' $BUILD_DIRECTORY/include/LightGBM/config.h
|
||||
grep -q 'gpu_use_dp = true' $BUILD_DIRECTORY/include/LightGBM/config.h || exit 1 # make sure that changes were really done
|
||||
sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' ./include/LightGBM/config.h
|
||||
grep -q 'gpu_use_dp = true' ./include/LightGBM/config.h || exit 1 # make sure that changes were really done
|
||||
if [[ $METHOD == "pip" ]]; then
|
||||
cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
|
||||
sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
|
||||
sh ./build-python.sh sdist || exit 1
|
||||
sh ./.ci/check_python_dists.sh ./dist || exit 1
|
||||
pip install \
|
||||
--user \
|
||||
-v \
|
||||
--config-settings=cmake.define.USE_CUDA=ON \
|
||||
$BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
|
||||
./dist/lightgbm-$LGB_VER.tar.gz \
|
||||
|| exit 1
|
||||
pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
|
||||
pytest ./tests/python_package_test || exit 1
|
||||
exit 0
|
||||
elif [[ $METHOD == "wheel" ]]; then
|
||||
cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --cuda || exit 1
|
||||
sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
|
||||
pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
|
||||
pytest $BUILD_DIRECTORY/tests || exit 1
|
||||
sh ./build-python.sh bdist_wheel --cuda || exit 1
|
||||
sh ./.ci/check_python_dists.sh ./dist || exit 1
|
||||
pip install ./dist/lightgbm-$LGB_VER*.whl -v || exit 1
|
||||
pytest ./tests || exit 1
|
||||
exit 0
|
||||
elif [[ $METHOD == "source" ]]; then
|
||||
cmake -B build -S . -DUSE_CUDA=ON
|
||||
fi
|
||||
elif [[ $TASK == "mpi" ]]; then
|
||||
if [[ $METHOD == "pip" ]]; then
|
||||
cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
|
||||
sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
|
||||
sh ./build-python.sh sdist || exit 1
|
||||
sh ./.ci/check_python_dists.sh ./dist || exit 1
|
||||
pip install \
|
||||
--user \
|
||||
-v \
|
||||
--config-settings=cmake.define.USE_MPI=ON \
|
||||
$BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
|
||||
./dist/lightgbm-$LGB_VER.tar.gz \
|
||||
|| exit 1
|
||||
pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
|
||||
pytest ./tests/python_package_test || exit 1
|
||||
exit 0
|
||||
elif [[ $METHOD == "wheel" ]]; then
|
||||
cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --mpi || exit 1
|
||||
sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
|
||||
pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
|
||||
pytest $BUILD_DIRECTORY/tests || exit 1
|
||||
sh ./build-python.sh bdist_wheel --mpi || exit 1
|
||||
sh ./.ci/check_python_dists.sh ./dist || exit 1
|
||||
pip install ./dist/lightgbm-$LGB_VER*.whl -v || exit 1
|
||||
pytest ./tests || exit 1
|
||||
exit 0
|
||||
elif [[ $METHOD == "source" ]]; then
|
||||
cmake -B build -S . -DUSE_MPI=ON -DUSE_DEBUG=ON
|
||||
|
@ -279,22 +282,22 @@ fi
|
|||
|
||||
cmake --build build --target _lightgbm -j4 || exit 1
|
||||
|
||||
cd $BUILD_DIRECTORY && sh ./build-python.sh install --precompile --user || exit 1
|
||||
pytest $BUILD_DIRECTORY/tests || exit 1
|
||||
sh ./build-python.sh install --precompile || exit 1
|
||||
pytest ./tests || exit 1
|
||||
|
||||
if [[ $TASK == "regular" ]]; then
|
||||
if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
|
||||
if [[ $OS_NAME == "macos" ]]; then
|
||||
cp $BUILD_DIRECTORY/lib_lightgbm.dylib $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.dylib
|
||||
cp ./lib_lightgbm.dylib $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.dylib
|
||||
else
|
||||
if [[ $COMPILER == "gcc" ]]; then
|
||||
objdump -T $BUILD_DIRECTORY/lib_lightgbm.so > $BUILD_DIRECTORY/objdump.log || exit 1
|
||||
python $BUILD_DIRECTORY/helpers/check_dynamic_dependencies.py $BUILD_DIRECTORY/objdump.log || exit 1
|
||||
objdump -T ./lib_lightgbm.so > ./objdump.log || exit 1
|
||||
python ./helpers/check_dynamic_dependencies.py ./objdump.log || exit 1
|
||||
fi
|
||||
cp $BUILD_DIRECTORY/lib_lightgbm.so $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.so
|
||||
cp ./lib_lightgbm.so $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.so
|
||||
fi
|
||||
fi
|
||||
cd $BUILD_DIRECTORY/examples/python-guide
|
||||
cd "$BUILD_DIRECTORY/examples/python-guide"
|
||||
sed -i'.bak' '/import lightgbm as lgb/a\
|
||||
import matplotlib\
|
||||
matplotlib.use\(\"Agg\"\)\
|
||||
|
@ -306,7 +309,7 @@ matplotlib.use\(\"Agg\"\)\
|
|||
'ipywidgets>=8.1.2' \
|
||||
'notebook>=7.1.2'
|
||||
for f in *.py **/*.py; do python $f || exit 1; done # run all examples
|
||||
cd $BUILD_DIRECTORY/examples/python-guide/notebooks
|
||||
cd "$BUILD_DIRECTORY/examples/python-guide/notebooks"
|
||||
sed -i'.bak' 's/INTERACTIVE = False/assert False, \\"Interactive mode disabled\\"/' interactive_plot_example.ipynb
|
||||
jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb || exit 1 # run all notebooks
|
||||
|
||||
|
|
|
@ -106,10 +106,10 @@ if [[ $OS_NAME == "macos" ]]; then
|
|||
-target / || exit 1
|
||||
fi
|
||||
|
||||
# fix for issue where CRAN was not returning {lattice} when using R 3.6
|
||||
# fix for issue where CRAN was not returning {lattice} and {evaluate} when using R 3.6
|
||||
# "Warning: dependency ‘lattice’ is not available"
|
||||
if [[ "${R_MAJOR_VERSION}" == "3" ]]; then
|
||||
Rscript --vanilla -e "install.packages('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', repos = NULL, lib = '${R_LIB_PATH}')"
|
||||
Rscript --vanilla -e "install.packages(c('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', 'https://cran.r-project.org/src/contrib/Archive/evaluate/evaluate_0.23.tar.gz'), repos = NULL, lib = '${R_LIB_PATH}')"
|
||||
else
|
||||
# {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}.
|
||||
# This should be unnecessary on R >=4.4.0
|
||||
|
@ -136,7 +136,7 @@ if [[ $OS_NAME == "macos" ]]; then
|
|||
fi
|
||||
Rscript --vanilla -e "options(install.packages.compile.from.source = '${compile_from_source}'); install.packages(${packages}, repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}', dependencies = c('Depends', 'Imports', 'LinkingTo'), Ncpus = parallel::detectCores())" || exit 1
|
||||
|
||||
cd ${BUILD_DIRECTORY}
|
||||
cd "${BUILD_DIRECTORY}"
|
||||
|
||||
PKG_TARBALL="lightgbm_*.tar.gz"
|
||||
LOG_FILE_NAME="lightgbm.Rcheck/00check.log"
|
||||
|
@ -147,7 +147,7 @@ elif [[ $R_BUILD_TYPE == "cran" ]]; then
|
|||
# on Linux, we recreate configure in CI to test if
|
||||
# a change in a PR has changed configure.ac
|
||||
if [[ $OS_NAME == "linux" ]]; then
|
||||
${BUILD_DIRECTORY}/R-package/recreate-configure.sh
|
||||
./R-package/recreate-configure.sh
|
||||
|
||||
num_files_changed=$(
|
||||
git diff --name-only | wc -l
|
||||
|
|
|
@ -72,10 +72,14 @@ bytes_possibly_lost=$(
|
|||
| tr -d ","
|
||||
)
|
||||
echo "valgrind found ${bytes_possibly_lost} bytes possibly lost"
|
||||
if [[ ${bytes_possibly_lost} -gt 1056 ]]; then
|
||||
if [[ ${bytes_possibly_lost} -gt 1104 ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ensure 'grep --count' doesn't cause failures
|
||||
set +e
|
||||
|
||||
echo "checking for invalid reads"
|
||||
invalid_reads=$(
|
||||
cat ${VALGRIND_LOGS_FILE} \
|
||||
| grep --count -i "Invalid read"
|
||||
|
@ -85,6 +89,7 @@ if [[ ${invalid_reads} -gt 0 ]]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
echo "checking for invalid writes"
|
||||
invalid_writes=$(
|
||||
cat ${VALGRIND_LOGS_FILE} \
|
||||
| grep --count -i "Invalid write"
|
||||
|
|
|
@ -6,14 +6,11 @@ function Check-Output {
|
|||
}
|
||||
}
|
||||
|
||||
# unify environment variable for Azure DevOps and AppVeyor
|
||||
if (Test-Path env:APPVEYOR) {
|
||||
$env:APPVEYOR = "true"
|
||||
$env:ALLOW_SKIP_ARROW_TESTS = "1"
|
||||
}
|
||||
$env:CONDA_ENV = "test-env"
|
||||
$env:LGB_VER = (Get-Content $env:BUILD_SOURCESDIRECTORY\VERSION.txt).trim()
|
||||
|
||||
if ($env:TASK -eq "r-package") {
|
||||
& $env:BUILD_SOURCESDIRECTORY\.ci\test_r_package_windows.ps1 ; Check-Output $?
|
||||
& .\.ci\test_r_package_windows.ps1 ; Check-Output $?
|
||||
Exit 0
|
||||
}
|
||||
|
||||
|
@ -34,7 +31,7 @@ if ($env:TASK -eq "swig") {
|
|||
cmake -B build -S . -A x64 -DUSE_SWIG=ON ; Check-Output $?
|
||||
cmake --build build --target ALL_BUILD --config Release ; Check-Output $?
|
||||
if ($env:AZURE -eq "true") {
|
||||
cp $env:BUILD_SOURCESDIRECTORY/build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Check-Output $?
|
||||
cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Check-Output $?
|
||||
}
|
||||
Exit 0
|
||||
}
|
||||
|
@ -43,16 +40,12 @@ if ($env:TASK -eq "swig") {
|
|||
conda init powershell
|
||||
conda activate
|
||||
conda config --set always_yes yes --set changeps1 no
|
||||
|
||||
# ref:
|
||||
# * https://stackoverflow.com/a/62897729/3986677
|
||||
# * https://github.com/microsoft/LightGBM/issues/5899
|
||||
conda install "brotlipy>=0.7"
|
||||
|
||||
conda update -q -y conda
|
||||
conda update -q -y conda "python=$env:PYTHON_VERSION[build=*cpython]"
|
||||
|
||||
if ($env:PYTHON_VERSION -eq "3.7") {
|
||||
$env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py37.txt"
|
||||
} elseif ($env:PYTHON_VERSION -eq "3.8") {
|
||||
$env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py38.txt"
|
||||
} else {
|
||||
$env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core.txt"
|
||||
}
|
||||
|
@ -67,18 +60,17 @@ if ($env:TASK -ne "bdist") {
|
|||
conda activate $env:CONDA_ENV
|
||||
}
|
||||
|
||||
cd $env:BUILD_SOURCESDIRECTORY
|
||||
if ($env:TASK -eq "regular") {
|
||||
cmake -B build -S . -A x64 ; Check-Output $?
|
||||
cmake --build build --target ALL_BUILD --config Release ; Check-Output $?
|
||||
cd $env:BUILD_SOURCESDIRECTORY
|
||||
sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --precompile ; Check-Output $?
|
||||
cp $env:BUILD_SOURCESDIRECTORY/Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY
|
||||
cp $env:BUILD_SOURCESDIRECTORY/Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY
|
||||
sh ./build-python.sh install --precompile ; Check-Output $?
|
||||
cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY
|
||||
cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY
|
||||
}
|
||||
elseif ($env:TASK -eq "sdist") {
|
||||
cd $env:BUILD_SOURCESDIRECTORY
|
||||
sh $env:BUILD_SOURCESDIRECTORY/build-python.sh sdist ; Check-Output $?
|
||||
sh $env:BUILD_SOURCESDIRECTORY/.ci/check_python_dists.sh $env:BUILD_SOURCESDIRECTORY/dist ; Check-Output $?
|
||||
sh ./build-python.sh sdist ; Check-Output $?
|
||||
sh ./.ci/check_python_dists.sh ./dist ; Check-Output $?
|
||||
cd dist; pip install @(Get-ChildItem *.gz) -v ; Check-Output $?
|
||||
}
|
||||
elseif ($env:TASK -eq "bdist") {
|
||||
|
@ -92,17 +84,15 @@ elseif ($env:TASK -eq "bdist") {
|
|||
Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors
|
||||
|
||||
conda activate $env:CONDA_ENV
|
||||
cd $env:BUILD_SOURCESDIRECTORY
|
||||
sh "build-python.sh" bdist_wheel --integrated-opencl ; Check-Output $?
|
||||
sh $env:BUILD_SOURCESDIRECTORY/.ci/check_python_dists.sh $env:BUILD_SOURCESDIRECTORY/dist ; Check-Output $?
|
||||
cd dist; pip install --user @(Get-ChildItem *py3-none-win_amd64.whl) ; Check-Output $?
|
||||
sh ./.ci/check_python_dists.sh ./dist ; Check-Output $?
|
||||
cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Check-Output $?
|
||||
cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY
|
||||
} elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) {
|
||||
cd $env:BUILD_SOURCESDIRECTORY
|
||||
if ($env:COMPILER -eq "MINGW") {
|
||||
sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --user --mingw ; Check-Output $?
|
||||
sh ./build-python.sh install --mingw ; Check-Output $?
|
||||
} else {
|
||||
sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --user; Check-Output $?
|
||||
sh ./build-python.sh install; Check-Output $?
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,54 +7,41 @@ on:
|
|||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
- release/*
|
||||
# Run manually by clicking a button in the UI
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
restart_docker:
|
||||
description: 'Restart nvidia-docker on the runner before building?'
|
||||
required: true
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
# automatically cancel in-progress builds if another commit is pushed
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
github_actions: 'true'
|
||||
os_name: linux
|
||||
conda_env: test-env
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (linux, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
|
||||
# Optionally reinstall + restart docker on the runner before building.
|
||||
# This is safe as long as only 1 of these jobs runs at a time.
|
||||
restart-docker:
|
||||
name: set up docker
|
||||
runs-on: [self-hosted, linux]
|
||||
timeout-minutes: 60
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- method: wheel
|
||||
compiler: gcc
|
||||
python_version: "3.11"
|
||||
cuda_version: "11.8.0"
|
||||
task: cuda
|
||||
- method: source
|
||||
compiler: gcc
|
||||
python_version: "3.9"
|
||||
cuda_version: "12.2.0"
|
||||
task: cuda
|
||||
- method: pip
|
||||
compiler: clang
|
||||
python_version: "3.10"
|
||||
cuda_version: "11.8.0"
|
||||
task: cuda
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- name: Setup or update software on host machine
|
||||
if: ${{ inputs.restart_docker }}
|
||||
run: |
|
||||
# install core packages
|
||||
sudo apt-get update
|
||||
sudo apt-get install --no-install-recommends -y \
|
||||
apt-transport-https \
|
||||
ca-certificates \
|
||||
curl \
|
||||
git \
|
||||
gnupg-agent \
|
||||
lsb-release \
|
||||
software-properties-common
|
||||
# set up nvidia-docker
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
|
||||
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -y
|
||||
curl -sL https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
|
||||
|
@ -67,43 +54,76 @@ jobs:
|
|||
nvidia-docker2
|
||||
sudo chmod a+rw /var/run/docker.sock
|
||||
sudo systemctl restart docker
|
||||
- name: Remove old folder with repository
|
||||
run: sudo rm -rf $GITHUB_WORKSPACE
|
||||
- name: mark job successful
|
||||
run: |
|
||||
exit 0
|
||||
test:
|
||||
name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (${{ matrix.linux_version }}, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
|
||||
runs-on: [self-hosted, linux]
|
||||
needs: [restart-docker]
|
||||
container:
|
||||
image: nvcr.io/nvidia/cuda:${{ matrix.cuda_version }}-devel-${{ matrix.linux_version }}
|
||||
env:
|
||||
CMAKE_BUILD_PARALLEL_LEVEL: 4
|
||||
COMPILER: ${{ matrix.compiler }}
|
||||
CONDA: /tmp/miniforge
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
METHOD: ${{ matrix.method }}
|
||||
OS_NAME: linux
|
||||
PYTHON_VERSION: ${{ matrix.python_version }}
|
||||
TASK: ${{ matrix.task }}
|
||||
SKBUILD_STRICT_CONFIG: true
|
||||
options: --gpus all
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- method: wheel
|
||||
compiler: gcc
|
||||
python_version: "3.10"
|
||||
cuda_version: "11.8.0"
|
||||
linux_version: "ubuntu20.04"
|
||||
task: cuda
|
||||
- method: source
|
||||
compiler: gcc
|
||||
python_version: "3.12"
|
||||
cuda_version: "12.2.0"
|
||||
linux_version: "ubuntu22.04"
|
||||
task: cuda
|
||||
- method: pip
|
||||
compiler: clang
|
||||
python_version: "3.11"
|
||||
cuda_version: "11.8.0"
|
||||
linux_version: "ubuntu20.04"
|
||||
task: cuda
|
||||
steps:
|
||||
- name: Install latest git
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --no-install-recommends -y \
|
||||
ca-certificates \
|
||||
software-properties-common
|
||||
add-apt-repository ppa:git-core/ppa -y
|
||||
apt-get update
|
||||
apt-get install --no-install-recommends -y \
|
||||
git
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v1
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: true
|
||||
- name: Setup and run tests
|
||||
run: |
|
||||
export ROOT_DOCKER_FOLDER=/LightGBM
|
||||
cat > docker.env <<EOF
|
||||
GITHUB_ACTIONS=${{ env.github_actions }}
|
||||
OS_NAME=${{ env.os_name }}
|
||||
COMPILER=${{ matrix.compiler }}
|
||||
TASK=${{ matrix.task }}
|
||||
METHOD=${{ matrix.method }}
|
||||
CONDA_ENV=${{ env.conda_env }}
|
||||
PYTHON_VERSION=${{ matrix.python_version }}
|
||||
BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
|
||||
LGB_VER=$(head -n 1 VERSION.txt)
|
||||
EOF
|
||||
cat > docker-script.sh <<EOF
|
||||
export CONDA=\$HOME/miniforge
|
||||
export PATH=\$CONDA/bin:\$PATH
|
||||
nvidia-smi
|
||||
$ROOT_DOCKER_FOLDER/.ci/setup.sh || exit 1
|
||||
$ROOT_DOCKER_FOLDER/.ci/test.sh || exit 1
|
||||
EOF
|
||||
cuda_version="${{ matrix.cuda_version }}"
|
||||
cuda_major=${cuda_version%%.*}
|
||||
docker_img="nvcr.io/nvidia/cuda:${cuda_version}-devel"
|
||||
if [[ ${cuda_major} -eq 11 ]]; then
|
||||
docker_img="${docker_img}-ubuntu18.04"
|
||||
elif [[ ${cuda_major} -ge 12 ]]; then
|
||||
docker_img="${docker_img}-ubuntu20.04"
|
||||
fi
|
||||
docker run --env-file docker.env -v "$GITHUB_WORKSPACE":"$ROOT_DOCKER_FOLDER" --rm --gpus all "$docker_img" /bin/bash $ROOT_DOCKER_FOLDER/docker-script.sh
|
||||
export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
|
||||
export PATH=$CONDA/bin:$PATH
|
||||
|
||||
# check GPU usage
|
||||
nvidia-smi
|
||||
|
||||
# build and test
|
||||
$GITHUB_WORKSPACE/.ci/setup.sh
|
||||
$GITHUB_WORKSPACE/.ci/test.sh
|
||||
all-cuda-jobs-successful:
|
||||
if: always()
|
||||
runs-on: ubuntu-latest
|
||||
|
|
|
@ -8,10 +8,9 @@ on:
|
|||
- cron: '0 8 * * *'
|
||||
|
||||
env:
|
||||
CONDA_ENV: test-env
|
||||
GITHUB_ACTIONS: 'true'
|
||||
COMPILER: gcc
|
||||
OS_NAME: 'linux'
|
||||
PYTHON_VERSION: '3.11'
|
||||
PYTHON_VERSION: '3.12'
|
||||
TASK: 'check-links'
|
||||
|
||||
jobs:
|
||||
|
@ -20,7 +19,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: false
|
||||
|
|
|
@ -4,7 +4,6 @@ on:
|
|||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
- release/*
|
||||
|
||||
jobs:
|
||||
all-optional-checks-successful:
|
||||
|
@ -12,7 +11,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: false
|
||||
|
|
|
@ -7,7 +7,6 @@ on:
|
|||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
- release/*
|
||||
|
||||
# automatically cancel in-progress builds if another commit is pushed
|
||||
concurrency:
|
||||
|
@ -15,8 +14,8 @@ concurrency:
|
|||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
CONDA_ENV: test-env
|
||||
GITHUB_ACTIONS: 'true'
|
||||
CMAKE_BUILD_PARALLEL_LEVEL: 4
|
||||
SKBUILD_STRICT_CONFIG: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
|
@ -29,33 +28,37 @@ jobs:
|
|||
include:
|
||||
- os: macos-13
|
||||
task: regular
|
||||
python_version: '3.9'
|
||||
- os: macos-13
|
||||
task: sdist
|
||||
python_version: '3.10'
|
||||
- os: macos-13
|
||||
task: sdist
|
||||
python_version: '3.11'
|
||||
- os: macos-13
|
||||
task: bdist
|
||||
python_version: '3.7'
|
||||
python_version: '3.8'
|
||||
- os: macos-13
|
||||
task: if-else
|
||||
python_version: '3.9'
|
||||
- os: macos-14
|
||||
task: bdist
|
||||
method: wheel
|
||||
python_version: '3.10'
|
||||
# We're currently skipping MPI jobs on macOS, see https://github.com/microsoft/LightGBM/pull/6425
|
||||
# for further details.
|
||||
# - os: macos-13
|
||||
# task: mpi
|
||||
# method: source
|
||||
# python_version: '3.10'
|
||||
# - os: macos-13
|
||||
# task: mpi
|
||||
# method: pip
|
||||
# python_version: '3.11'
|
||||
# - os: macos-13
|
||||
# task: mpi
|
||||
# method: pip
|
||||
# python_version: '3.12'
|
||||
# - os: macos-13
|
||||
# task: mpi
|
||||
# method: wheel
|
||||
# python_version: '3.8'
|
||||
# python_version: '3.9'
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: true
|
||||
|
@ -65,7 +68,11 @@ jobs:
|
|||
export TASK="${{ matrix.task }}"
|
||||
export METHOD="${{ matrix.method }}"
|
||||
export PYTHON_VERSION="${{ matrix.python_version }}"
|
||||
if [[ "${{ matrix.os }}" == "macos-13" ]]; then
|
||||
if [[ "${{ matrix.os }}" == "macos-14" ]]; then
|
||||
# use clang when creating macOS release artifacts
|
||||
export COMPILER="clang"
|
||||
export OS_NAME="macos"
|
||||
elif [[ "${{ matrix.os }}" == "macos-13" ]]; then
|
||||
export COMPILER="gcc"
|
||||
export OS_NAME="macos"
|
||||
elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
|
||||
|
@ -73,18 +80,23 @@ jobs:
|
|||
export OS_NAME="linux"
|
||||
fi
|
||||
export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
|
||||
export LGB_VER=$(head -n 1 VERSION.txt)
|
||||
export CONDA=${HOME}/miniforge
|
||||
export PATH=${CONDA}/bin:${PATH}
|
||||
$GITHUB_WORKSPACE/.ci/setup.sh || exit 1
|
||||
$GITHUB_WORKSPACE/.ci/test.sh || exit 1
|
||||
test-oldest-versions:
|
||||
name: Python - oldest supported versions (ubuntu-latest)
|
||||
- name: upload wheels
|
||||
if: ${{ matrix.method == 'wheel' && matrix.os == 'macos-14' }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: macosx-arm64-wheel
|
||||
path: dist/*.whl
|
||||
test-latest-versions:
|
||||
name: Python - latest versions (ubuntu-latest)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: true
|
||||
|
@ -92,6 +104,7 @@ jobs:
|
|||
run: |
|
||||
docker run \
|
||||
--rm \
|
||||
--env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
|
||||
-v $(pwd):/opt/lgb-build \
|
||||
-w /opt/lgb-build \
|
||||
lightgbm/vsts-agent:manylinux_2_28_x86_64 \
|
||||
|
@ -102,12 +115,39 @@ jobs:
|
|||
--rm \
|
||||
-v $(pwd):/opt/lgb-build \
|
||||
-w /opt/lgb-build \
|
||||
python:3.6 \
|
||||
python:3.11 \
|
||||
/bin/bash ./.ci/test-python-latest.sh
|
||||
test-oldest-versions:
|
||||
name: Python - oldest supported versions (ubuntu-latest)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: true
|
||||
- name: Create wheel
|
||||
run: |
|
||||
docker run \
|
||||
--rm \
|
||||
--env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
|
||||
-v $(pwd):/opt/lgb-build \
|
||||
-w /opt/lgb-build \
|
||||
lightgbm/vsts-agent:manylinux_2_28_x86_64 \
|
||||
/bin/bash -c 'PATH=/opt/miniforge/bin:$PATH sh ./build-python.sh bdist_wheel --nomp'
|
||||
- name: Test compatibility
|
||||
run: |
|
||||
docker run \
|
||||
--rm \
|
||||
-v $(pwd):/opt/lgb-build \
|
||||
-w /opt/lgb-build \
|
||||
python:3.7 \
|
||||
/bin/bash ./.ci/test-python-oldest.sh
|
||||
all-python-package-jobs-successful:
|
||||
if: always()
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test, test-oldest-versions]
|
||||
needs: [test, test-latest-versions, test-oldest-versions]
|
||||
steps:
|
||||
- name: Note that all tests succeeded
|
||||
uses: re-actors/alls-green@v1.2.2
|
||||
|
|
|
@ -21,7 +21,7 @@ jobs:
|
|||
run: |
|
||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}"
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: true
|
||||
|
|
|
@ -7,7 +7,6 @@ on:
|
|||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
- release/*
|
||||
|
||||
# automatically cancel in-progress builds if another commit is pushed
|
||||
concurrency:
|
||||
|
@ -15,6 +14,7 @@ concurrency:
|
|||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
CMAKE_BUILD_PARALLEL_LEVEL: 4
|
||||
# hack to get around this:
|
||||
# https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html
|
||||
_R_CHECK_SYSTEM_CLOCK_: 0
|
||||
|
@ -189,7 +189,6 @@ jobs:
|
|||
run: |
|
||||
export TASK="${{ matrix.task }}"
|
||||
export COMPILER="${{ matrix.compiler }}"
|
||||
export GITHUB_ACTIONS="true"
|
||||
if [[ "${{ matrix.os }}" == "macos-13" ]]; then
|
||||
export OS_NAME="macos"
|
||||
elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
|
||||
|
@ -216,7 +215,6 @@ jobs:
|
|||
$env:R_VERSION = "${{ matrix.r_version }}"
|
||||
$env:R_BUILD_TYPE = "${{ matrix.build_type }}"
|
||||
$env:COMPILER = "${{ matrix.compiler }}"
|
||||
$env:GITHUB_ACTIONS = "true"
|
||||
$env:TASK = "${{ matrix.task }}"
|
||||
& "$env:GITHUB_WORKSPACE/.ci/test_windows.ps1"
|
||||
test-r-sanitizers:
|
||||
|
@ -237,7 +235,7 @@ jobs:
|
|||
run: |
|
||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}"
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: true
|
||||
|
@ -280,7 +278,7 @@ jobs:
|
|||
run: |
|
||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}"
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: true
|
||||
|
|
|
@ -24,7 +24,7 @@ jobs:
|
|||
run: |
|
||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}"
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: true
|
||||
|
|
|
@ -9,7 +9,6 @@ on:
|
|||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
- release/*
|
||||
|
||||
# automatically cancel in-progress builds if another commit is pushed
|
||||
concurrency:
|
||||
|
@ -18,10 +17,8 @@ concurrency:
|
|||
|
||||
env:
|
||||
COMPILER: 'gcc'
|
||||
CONDA_ENV: test-env
|
||||
GITHUB_ACTIONS: 'true'
|
||||
OS_NAME: 'linux'
|
||||
PYTHON_VERSION: '3.11'
|
||||
PYTHON_VERSION: '3.12'
|
||||
|
||||
jobs:
|
||||
test:
|
||||
|
@ -36,7 +33,7 @@ jobs:
|
|||
- task: check-docs
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: false
|
||||
|
@ -59,7 +56,7 @@ jobs:
|
|||
run: |
|
||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}"
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: true
|
||||
|
|
|
@ -12,7 +12,7 @@ jobs:
|
|||
SECRETS_WORKFLOW: ${{ secrets.WORKFLOW }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 5
|
||||
submodules: false
|
||||
|
|
|
@ -405,7 +405,7 @@ python-package/lightgbm/VERSION.txt
|
|||
|
||||
# R build artefacts
|
||||
**/autom4te.cache/
|
||||
conftest*
|
||||
R-package/conftest*
|
||||
R-package/config.status
|
||||
!R-package/data/agaricus.test.rda
|
||||
!R-package/data/agaricus.train.rda
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# coding: utf-8
|
||||
"""Script for generating files with NuGet package metadata."""
|
||||
|
||||
import datetime
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
|
|
@ -13,7 +13,7 @@ exclude: |
|
|||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.5.0
|
||||
rev: v4.6.0
|
||||
hooks:
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
|
@ -25,7 +25,7 @@ repos:
|
|||
args: ["--settings-path", "python-package/pyproject.toml"]
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
# Ruff version.
|
||||
rev: v0.2.1
|
||||
rev: v0.4.7
|
||||
hooks:
|
||||
# Run the linter.
|
||||
- id: ruff
|
||||
|
|
65
.vsts-ci.yml
65
.vsts-ci.yml
|
@ -7,15 +7,15 @@ trigger:
|
|||
- v*
|
||||
pr:
|
||||
- master
|
||||
- release/*
|
||||
variables:
|
||||
AZURE: 'true'
|
||||
PYTHON_VERSION: '3.11'
|
||||
CONDA_ENV: test-env
|
||||
CMAKE_BUILD_PARALLEL_LEVEL: 4
|
||||
PYTHON_VERSION: '3.12'
|
||||
runCodesignValidationInjection: false
|
||||
skipComponentGovernanceDetection: true
|
||||
DOTNET_CLI_TELEMETRY_OPTOUT: true
|
||||
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
|
||||
SKBUILD_STRICT_CONFIG: true
|
||||
resources:
|
||||
# The __work/ directory, where Azure DevOps writes the source files, needs to be read-write because
|
||||
# LightGBM's CI jobs write files in the source directory.
|
||||
|
@ -61,19 +61,19 @@ jobs:
|
|||
matrix:
|
||||
regular:
|
||||
TASK: regular
|
||||
PYTHON_VERSION: '3.9'
|
||||
PYTHON_VERSION: '3.10'
|
||||
sdist:
|
||||
TASK: sdist
|
||||
PYTHON_VERSION: '3.7'
|
||||
PYTHON_VERSION: '3.8'
|
||||
bdist:
|
||||
TASK: bdist
|
||||
PYTHON_VERSION: '3.8'
|
||||
PYTHON_VERSION: '3.9'
|
||||
inference:
|
||||
TASK: if-else
|
||||
mpi_source:
|
||||
TASK: mpi
|
||||
METHOD: source
|
||||
PYTHON_VERSION: '3.8'
|
||||
PYTHON_VERSION: '3.9'
|
||||
gpu_source:
|
||||
TASK: gpu
|
||||
METHOD: source
|
||||
|
@ -82,7 +82,6 @@ jobs:
|
|||
steps:
|
||||
- script: |
|
||||
echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
|
||||
echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
|
||||
echo "##vso[task.prependpath]/usr/lib64/openmpi/bin"
|
||||
echo "##vso[task.prependpath]$CONDA/bin"
|
||||
displayName: 'Set variables'
|
||||
|
@ -127,7 +126,7 @@ jobs:
|
|||
TASK: sdist
|
||||
bdist:
|
||||
TASK: bdist
|
||||
PYTHON_VERSION: '3.9'
|
||||
PYTHON_VERSION: '3.10'
|
||||
inference:
|
||||
TASK: if-else
|
||||
mpi_source:
|
||||
|
@ -136,30 +135,29 @@ jobs:
|
|||
mpi_pip:
|
||||
TASK: mpi
|
||||
METHOD: pip
|
||||
PYTHON_VERSION: '3.10'
|
||||
PYTHON_VERSION: '3.11'
|
||||
mpi_wheel:
|
||||
TASK: mpi
|
||||
METHOD: wheel
|
||||
PYTHON_VERSION: '3.8'
|
||||
PYTHON_VERSION: '3.9'
|
||||
gpu_source:
|
||||
TASK: gpu
|
||||
METHOD: source
|
||||
PYTHON_VERSION: '3.10'
|
||||
PYTHON_VERSION: '3.11'
|
||||
gpu_pip:
|
||||
TASK: gpu
|
||||
METHOD: pip
|
||||
PYTHON_VERSION: '3.9'
|
||||
PYTHON_VERSION: '3.10'
|
||||
gpu_wheel:
|
||||
TASK: gpu
|
||||
METHOD: wheel
|
||||
PYTHON_VERSION: '3.8'
|
||||
PYTHON_VERSION: '3.9'
|
||||
cpp_tests:
|
||||
TASK: cpp-tests
|
||||
METHOD: with-sanitizers
|
||||
steps:
|
||||
- script: |
|
||||
echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
|
||||
echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
|
||||
CONDA=$HOME/miniforge
|
||||
echo "##vso[task.setvariable variable=CONDA]$CONDA"
|
||||
echo "##vso[task.prependpath]$CONDA/bin"
|
||||
|
@ -188,8 +186,8 @@ jobs:
|
|||
- job: QEMU_multiarch
|
||||
###########################################
|
||||
variables:
|
||||
BUILD_DIRECTORY: /LightGBM
|
||||
COMPILER: gcc
|
||||
OS_NAME: 'linux'
|
||||
PRODUCES_ARTIFACTS: 'true'
|
||||
pool:
|
||||
vmImage: ubuntu-22.04
|
||||
|
@ -215,26 +213,12 @@ jobs:
|
|||
git clean -d -f -x
|
||||
displayName: 'Clean source directory'
|
||||
- script: |
|
||||
export ROOT_DOCKER_FOLDER=/LightGBM
|
||||
cat > docker.env <<EOF
|
||||
AZURE=$AZURE
|
||||
OS_NAME=$OS_NAME
|
||||
COMPILER=$COMPILER
|
||||
TASK=$TASK
|
||||
METHOD=$METHOD
|
||||
CONDA_ENV=$CONDA_ENV
|
||||
PYTHON_VERSION=$PYTHON_VERSION
|
||||
BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
|
||||
LGB_VER=$(head -n 1 VERSION.txt)
|
||||
PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS
|
||||
BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY
|
||||
EOF
|
||||
cat > docker-script.sh <<EOF
|
||||
export CONDA=\$HOME/miniforge
|
||||
export PATH=\$CONDA/bin:/opt/rh/llvm-toolset-7.0/root/usr/bin:\$PATH
|
||||
export LD_LIBRARY_PATH=/opt/rh/llvm-toolset-7.0/root/usr/lib64:\$LD_LIBRARY_PATH
|
||||
$ROOT_DOCKER_FOLDER/.ci/setup.sh || exit 1
|
||||
$ROOT_DOCKER_FOLDER/.ci/test.sh || exit 1
|
||||
\$BUILD_DIRECTORY/.ci/setup.sh || exit 1
|
||||
\$BUILD_DIRECTORY/.ci/test.sh || exit 1
|
||||
EOF
|
||||
IMAGE_URI="lightgbm/vsts-agent:manylinux2014_aarch64"
|
||||
docker pull "${IMAGE_URI}" || exit 1
|
||||
|
@ -243,11 +227,19 @@ jobs:
|
|||
docker run \
|
||||
--platform "${PLATFORM}" \
|
||||
--rm \
|
||||
--env-file docker.env \
|
||||
-v "$(Build.SourcesDirectory)":"$ROOT_DOCKER_FOLDER" \
|
||||
--env AZURE=true \
|
||||
--env BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY \
|
||||
--env BUILD_DIRECTORY=$BUILD_DIRECTORY \
|
||||
--env COMPILER=$COMPILER \
|
||||
--env METHOD=$METHOD \
|
||||
--env OS_NAME=linux \
|
||||
--env PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS \
|
||||
--env PYTHON_VERSION=$PYTHON_VERSION \
|
||||
--env TASK=$TASK \
|
||||
-v "$(Build.SourcesDirectory)":"$BUILD_DIRECTORY" \
|
||||
-v "$(Build.ArtifactStagingDirectory)":"$(Build.ArtifactStagingDirectory)" \
|
||||
"${IMAGE_URI}" \
|
||||
/bin/bash $ROOT_DOCKER_FOLDER/docker-script.sh
|
||||
/bin/bash $BUILD_DIRECTORY/docker-script.sh
|
||||
displayName: 'Setup and run tests'
|
||||
- task: PublishBuildArtifacts@1
|
||||
condition: and(succeeded(), in(variables['TASK'], 'bdist'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))
|
||||
|
@ -263,7 +255,7 @@ jobs:
|
|||
OS_NAME: 'macos'
|
||||
PRODUCES_ARTIFACTS: 'true'
|
||||
pool:
|
||||
vmImage: 'macOS-11'
|
||||
vmImage: 'macOS-12'
|
||||
strategy:
|
||||
matrix:
|
||||
regular:
|
||||
|
@ -283,7 +275,6 @@ jobs:
|
|||
steps:
|
||||
- script: |
|
||||
echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
|
||||
echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
|
||||
CONDA=$AGENT_HOMEDIRECTORY/miniforge
|
||||
echo "##vso[task.setvariable variable=CONDA]$CONDA"
|
||||
echo "##vso[task.prependpath]$CONDA/bin"
|
||||
|
|
|
@ -25,6 +25,14 @@ option(__INTEGRATE_OPENCL "Set to ON if building LightGBM with the OpenCL ICD Lo
|
|||
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
# If using Visual Studio generators, always target v10.x of the Windows SDK.
|
||||
# Doing this avoids lookups that could fall back to very old versions, e.g. by finding
|
||||
# outdated registry entries.
|
||||
# ref: https://cmake.org/cmake/help/latest/variable/CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION.html
|
||||
if(CMAKE_GENERATOR MATCHES "Visual Studio")
|
||||
set(CMAKE_SYSTEM_VERSION 10.0 CACHE INTERNAL "target Windows SDK version" FORCE)
|
||||
endif()
|
||||
|
||||
project(lightgbm LANGUAGES C CXX)
|
||||
|
||||
if(BUILD_CPP_TEST)
|
||||
|
@ -704,6 +712,83 @@ if(__BUILD_FOR_PYTHON)
|
|||
set(CMAKE_INSTALL_PREFIX "lightgbm")
|
||||
endif()
|
||||
|
||||
# The macOS linker puts an absolute path to linked libraries in lib_lightgb.dylib.
|
||||
# This block overrides that information for LightGBM's OpenMP dependency, to allow
|
||||
# finding that library in more places.
|
||||
#
|
||||
# This reduces the risk of runtime issues resulting from multiple libomp.dylib being loaded.
|
||||
#
|
||||
if(APPLE AND USE_OPENMP)
|
||||
# store path to libomp found at build time in a variable
|
||||
get_target_property(
|
||||
OpenMP_LIBRARY_LOCATION
|
||||
OpenMP::OpenMP_CXX
|
||||
INTERFACE_LINK_LIBRARIES
|
||||
)
|
||||
# get just the filename of that path
|
||||
# (to deal with the possibility that it might be 'libomp.dylib' or 'libgomp.dylib' or 'libiomp.dylib')
|
||||
get_filename_component(
|
||||
OpenMP_LIBRARY_NAME
|
||||
${OpenMP_LIBRARY_LOCATION}
|
||||
NAME
|
||||
)
|
||||
# get directory of that path
|
||||
get_filename_component(
|
||||
OpenMP_LIBRARY_DIR
|
||||
${OpenMP_LIBRARY_LOCATION}
|
||||
DIRECTORY
|
||||
)
|
||||
# get exact name of the library in a variable
|
||||
get_target_property(
|
||||
__LIB_LIGHTGBM_OUTPUT_NAME
|
||||
_lightgbm
|
||||
OUTPUT_NAME
|
||||
)
|
||||
if(NOT __LIB_LIGHTGBM_OUTPUT_NAME)
|
||||
set(__LIB_LIGHTGBM_OUTPUT_NAME "lib_lightgbm")
|
||||
endif()
|
||||
|
||||
if(CMAKE_SHARED_LIBRARY_SUFFIX_CXX)
|
||||
set(
|
||||
__LIB_LIGHTGBM_FILENAME "${__LIB_LIGHTGBM_OUTPUT_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX_CXX}"
|
||||
CACHE INTERNAL "lightgbm shared library filename"
|
||||
)
|
||||
else()
|
||||
set(
|
||||
__LIB_LIGHTGBM_FILENAME "${__LIB_LIGHTGBM_OUTPUT_NAME}.dylib"
|
||||
CACHE INTERNAL "lightgbm shared library filename"
|
||||
)
|
||||
endif()
|
||||
|
||||
# Override the absolute path to OpenMP with a relative one using @rpath.
|
||||
#
|
||||
# This also ensures that if a libomp.dylib has already been loaded, it'll just use that.
|
||||
add_custom_command(
|
||||
TARGET _lightgbm
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
install_name_tool
|
||||
-change
|
||||
${OpenMP_LIBRARY_LOCATION}
|
||||
"@rpath/${OpenMP_LIBRARY_NAME}"
|
||||
"${__LIB_LIGHTGBM_FILENAME}"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
COMMENT "Replacing hard-coded OpenMP install_name with '@rpath/${OpenMP_LIBRARY_NAME}'..."
|
||||
)
|
||||
# add RPATH entries to ensure the loader looks in the following, in the following order:
|
||||
#
|
||||
# - /opt/homebrew/opt/libomp/lib (where 'brew install' / 'brew link' puts libomp.dylib)
|
||||
# - ${OpenMP_LIBRARY_DIR} (wherever find_package(OpenMP) found OpenMP at build time)
|
||||
#
|
||||
set_target_properties(
|
||||
_lightgbm
|
||||
PROPERTIES
|
||||
BUILD_WITH_INSTALL_RPATH TRUE
|
||||
INSTALL_RPATH "/opt/homebrew/opt/libomp/lib;${OpenMP_LIBRARY_DIR}"
|
||||
INSTALL_RPATH_USE_LINK_PATH FALSE
|
||||
)
|
||||
endif()
|
||||
|
||||
install(
|
||||
TARGETS _lightgbm
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
|
||||
|
|
|
@ -1309,6 +1309,8 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
|
|||
#' For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
|
||||
#' means "save the fifth, sixth, and seventh tree"
|
||||
#'
|
||||
#' \emph{New in version 4.4.0}
|
||||
#'
|
||||
#' @return lgb.Booster
|
||||
#'
|
||||
#' @examples
|
||||
|
@ -1373,6 +1375,8 @@ lgb.save <- function(
|
|||
#' For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
|
||||
#' means "dump the fifth, sixth, and seventh tree"
|
||||
#'
|
||||
#' \emph{New in version 4.4.0}
|
||||
#'
|
||||
#' @return json format of model
|
||||
#'
|
||||
#' @examples
|
||||
|
|
|
@ -170,7 +170,12 @@ Dataset <- R6::R6Class(
|
|||
|
||||
# Check if more categorical features were output over the feature space
|
||||
data_is_not_filename <- !is.character(private$raw_data)
|
||||
if (data_is_not_filename && max(private$categorical_feature) > ncol(private$raw_data)) {
|
||||
if (
|
||||
data_is_not_filename
|
||||
&& !is.null(private$raw_data)
|
||||
&& is.null(private$used_indices)
|
||||
&& max(private$categorical_feature) > ncol(private$raw_data)
|
||||
) {
|
||||
stop(
|
||||
"lgb.Dataset.construct: supplied a too large value in categorical_feature: "
|
||||
, max(private$categorical_feature)
|
||||
|
@ -1049,6 +1054,9 @@ dimnames.lgb.Dataset <- function(x) {
|
|||
#' @title Slice a dataset
|
||||
#' @description Get a new \code{lgb.Dataset} containing the specified rows of
|
||||
#' original \code{lgb.Dataset} object
|
||||
#'
|
||||
#' \emph{Renamed from} \code{slice()} \emph{in 4.4.0}
|
||||
#'
|
||||
#' @param dataset Object of class \code{lgb.Dataset}
|
||||
#' @param idxset an integer vector of indices of rows needed
|
||||
#' @return constructed sub dataset
|
||||
|
|
|
@ -6,6 +6,9 @@
|
|||
#' @param start_iteration Index (1-based) of the first boosting round to include in the output.
|
||||
#' For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
|
||||
#' means "return information about the fifth, sixth, and seventh trees".
|
||||
#'
|
||||
#' \emph{New in version 4.4.0}
|
||||
#'
|
||||
#' @return
|
||||
#' A \code{data.table} with detailed information about model trees' nodes and leafs.
|
||||
#'
|
||||
|
|
|
@ -59,68 +59,66 @@
|
|||
|
||||
}
|
||||
|
||||
# [description]
|
||||
#
|
||||
# Besides applying checks, this function
|
||||
#
|
||||
# 1. turns feature *names* into 1-based integer positions, then
|
||||
# 2. adds an extra list element with skipped features, then
|
||||
# 3. turns 1-based integer positions into 0-based positions, and finally
|
||||
# 4. collapses the values of each list element into a string like "[0, 1]".
|
||||
#
|
||||
.check_interaction_constraints <- function(interaction_constraints, column_names) {
|
||||
|
||||
# Convert interaction constraints to feature numbers
|
||||
string_constraints <- list()
|
||||
|
||||
if (!is.null(interaction_constraints)) {
|
||||
|
||||
if (!methods::is(interaction_constraints, "list")) {
|
||||
stop("interaction_constraints must be a list")
|
||||
}
|
||||
constraint_is_character_or_numeric <- sapply(
|
||||
X = interaction_constraints
|
||||
, FUN = function(x) {
|
||||
return(is.character(x) || is.numeric(x))
|
||||
}
|
||||
)
|
||||
if (!all(constraint_is_character_or_numeric)) {
|
||||
stop("every element in interaction_constraints must be a character vector or numeric vector")
|
||||
}
|
||||
|
||||
for (constraint in interaction_constraints) {
|
||||
|
||||
# Check for character name
|
||||
if (is.character(constraint)) {
|
||||
|
||||
constraint_indices <- as.integer(match(constraint, column_names) - 1L)
|
||||
|
||||
# Provided indices, but some indices are not existing?
|
||||
if (sum(is.na(constraint_indices)) > 0L) {
|
||||
stop(
|
||||
"supplied an unknown feature in interaction_constraints "
|
||||
, sQuote(constraint[is.na(constraint_indices)])
|
||||
)
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
# Check that constraint indices are at most number of features
|
||||
if (max(constraint) > length(column_names)) {
|
||||
stop(
|
||||
"supplied a too large value in interaction_constraints: "
|
||||
, max(constraint)
|
||||
, " but only "
|
||||
, length(column_names)
|
||||
, " features"
|
||||
)
|
||||
}
|
||||
|
||||
# Store indices as [0, n-1] indexed instead of [1, n] indexed
|
||||
constraint_indices <- as.integer(constraint - 1L)
|
||||
|
||||
}
|
||||
|
||||
# Convert constraint to string
|
||||
constraint_string <- paste0("[", paste0(constraint_indices, collapse = ","), "]")
|
||||
string_constraints <- append(string_constraints, constraint_string)
|
||||
}
|
||||
|
||||
if (is.null(interaction_constraints)) {
|
||||
return(list())
|
||||
}
|
||||
if (!identical(class(interaction_constraints), "list")) {
|
||||
stop("interaction_constraints must be a list")
|
||||
}
|
||||
|
||||
return(string_constraints)
|
||||
column_indices <- seq_along(column_names)
|
||||
|
||||
# Convert feature names to 1-based integer positions and apply checks
|
||||
for (j in seq_along(interaction_constraints)) {
|
||||
constraint <- interaction_constraints[[j]]
|
||||
|
||||
if (is.character(constraint)) {
|
||||
constraint_indices <- match(constraint, column_names)
|
||||
} else if (is.numeric(constraint)) {
|
||||
constraint_indices <- as.integer(constraint)
|
||||
} else {
|
||||
stop("every element in interaction_constraints must be a character vector or numeric vector")
|
||||
}
|
||||
|
||||
# Features outside range?
|
||||
bad <- !(constraint_indices %in% column_indices)
|
||||
if (any(bad)) {
|
||||
stop(
|
||||
"unknown feature(s) in interaction_constraints: "
|
||||
, toString(sQuote(constraint[bad], q = "'"))
|
||||
)
|
||||
}
|
||||
|
||||
interaction_constraints[[j]] <- constraint_indices
|
||||
}
|
||||
|
||||
# Add missing features as new interaction set
|
||||
remaining_indices <- setdiff(
|
||||
column_indices, sort(unique(unlist(interaction_constraints)))
|
||||
)
|
||||
if (length(remaining_indices) > 0L) {
|
||||
interaction_constraints <- c(
|
||||
interaction_constraints, list(remaining_indices)
|
||||
)
|
||||
}
|
||||
|
||||
# Turn indices 0-based and convert to string
|
||||
for (j in seq_along(interaction_constraints)) {
|
||||
interaction_constraints[[j]] <- paste0(
|
||||
"[", paste0(interaction_constraints[[j]] - 1L, collapse = ","), "]"
|
||||
)
|
||||
}
|
||||
return(interaction_constraints)
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.71 for lightgbm 4.3.0.99.
|
||||
# Generated by GNU Autoconf 2.71 for lightgbm 4.4.0.99.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
||||
|
@ -607,8 +607,8 @@ MAKEFLAGS=
|
|||
# Identity of this package.
|
||||
PACKAGE_NAME='lightgbm'
|
||||
PACKAGE_TARNAME='lightgbm'
|
||||
PACKAGE_VERSION='4.3.0.99'
|
||||
PACKAGE_STRING='lightgbm 4.3.0.99'
|
||||
PACKAGE_VERSION='4.4.0.99'
|
||||
PACKAGE_STRING='lightgbm 4.4.0.99'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
|
@ -1211,7 +1211,7 @@ if test "$ac_init_help" = "long"; then
|
|||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures lightgbm 4.3.0.99 to adapt to many kinds of systems.
|
||||
\`configure' configures lightgbm 4.4.0.99 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
|
@ -1273,7 +1273,7 @@ fi
|
|||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of lightgbm 4.3.0.99:";;
|
||||
short | recursive ) echo "Configuration of lightgbm 4.4.0.99:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
|
@ -1341,7 +1341,7 @@ fi
|
|||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
lightgbm configure 4.3.0.99
|
||||
lightgbm configure 4.4.0.99
|
||||
generated by GNU Autoconf 2.71
|
||||
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
|
@ -1378,7 +1378,7 @@ cat >config.log <<_ACEOF
|
|||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by lightgbm $as_me 4.3.0.99, which was
|
||||
It was created by lightgbm $as_me 4.4.0.99, which was
|
||||
generated by GNU Autoconf 2.71. Invocation command line was
|
||||
|
||||
$ $0$ac_configure_args_raw
|
||||
|
@ -2454,7 +2454,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by lightgbm $as_me 4.3.0.99, which was
|
||||
This file was extended by lightgbm $as_me 4.4.0.99, which was
|
||||
generated by GNU Autoconf 2.71. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
|
@ -2509,7 +2509,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
|
|||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config='$ac_cs_config_escaped'
|
||||
ac_cs_version="\\
|
||||
lightgbm config.status 4.3.0.99
|
||||
lightgbm config.status 4.4.0.99
|
||||
configured by $0, generated by GNU Autoconf 2.71,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
|
|
@ -1,5 +1,15 @@
|
|||
# CRAN Submission History
|
||||
|
||||
## v4.4.0 - Submission 1 - (June 14, 2024)
|
||||
|
||||
### CRAN response
|
||||
|
||||
Accepted to CRAN
|
||||
|
||||
### Maintainer Notes
|
||||
|
||||
This was a standard release of `{lightgbm}`, not intended to fix any particular R-specific issues.
|
||||
|
||||
## v4.3.0 - Submission 1 - (January 18, 2024)
|
||||
|
||||
### CRAN response
|
||||
|
|
|
@ -12,8 +12,10 @@ lgb.dump(booster, num_iteration = NULL, start_iteration = 1L)
|
|||
\item{num_iteration}{Number of iterations to be dumped. NULL or <= 0 means use best iteration}
|
||||
|
||||
\item{start_iteration}{Index (1-based) of the first boosting round to dump.
|
||||
For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
|
||||
means "dump the fifth, sixth, and seventh tree"}
|
||||
For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
|
||||
means "dump the fifth, sixth, and seventh tree"
|
||||
|
||||
\emph{New in version 4.4.0}}
|
||||
}
|
||||
\value{
|
||||
json format of model
|
||||
|
|
|
@ -12,8 +12,10 @@ lgb.model.dt.tree(model, num_iteration = NULL, start_iteration = 1L)
|
|||
\item{num_iteration}{Number of iterations to include. NULL or <= 0 means use best iteration.}
|
||||
|
||||
\item{start_iteration}{Index (1-based) of the first boosting round to include in the output.
|
||||
For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
|
||||
means "return information about the fifth, sixth, and seventh trees".}
|
||||
For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
|
||||
means "return information about the fifth, sixth, and seventh trees".
|
||||
|
||||
\emph{New in version 4.4.0}}
|
||||
}
|
||||
\value{
|
||||
A \code{data.table} with detailed information about model trees' nodes and leafs.
|
||||
|
|
|
@ -14,8 +14,10 @@ lgb.save(booster, filename, num_iteration = NULL, start_iteration = 1L)
|
|||
\item{num_iteration}{Number of iterations to save, NULL or <= 0 means use best iteration}
|
||||
|
||||
\item{start_iteration}{Index (1-based) of the first boosting round to save.
|
||||
For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
|
||||
means "save the fifth, sixth, and seventh tree"}
|
||||
For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
|
||||
means "save the fifth, sixth, and seventh tree"
|
||||
|
||||
\emph{New in version 4.4.0}}
|
||||
}
|
||||
\value{
|
||||
lgb.Booster
|
||||
|
|
|
@ -17,6 +17,8 @@ constructed sub dataset
|
|||
\description{
|
||||
Get a new \code{lgb.Dataset} containing the specified rows of
|
||||
original \code{lgb.Dataset} object
|
||||
|
||||
\emph{Renamed from} \code{slice()} \emph{in 4.4.0}
|
||||
}
|
||||
\examples{
|
||||
\donttest{
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <LightGBM/utils/text_reader.h>
|
||||
|
||||
#include <R_ext/Rdynload.h>
|
||||
#include <R_ext/Altrep.h>
|
||||
|
||||
#define R_NO_REMAP
|
||||
#define R_USE_C99_IN_CXX
|
||||
|
@ -24,6 +25,150 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
|
||||
R_altrep_class_t lgb_altrepped_char_vec;
|
||||
R_altrep_class_t lgb_altrepped_int_arr;
|
||||
R_altrep_class_t lgb_altrepped_dbl_arr;
|
||||
|
||||
template <class T>
|
||||
void delete_cpp_array(SEXP R_ptr) {
|
||||
T *ptr_to_cpp_obj = static_cast<T*>(R_ExternalPtrAddr(R_ptr));
|
||||
delete[] ptr_to_cpp_obj;
|
||||
R_ClearExternalPtr(R_ptr);
|
||||
}
|
||||
|
||||
void delete_cpp_char_vec(SEXP R_ptr) {
|
||||
std::vector<char> *ptr_to_cpp_obj = static_cast<std::vector<char>*>(R_ExternalPtrAddr(R_ptr));
|
||||
delete ptr_to_cpp_obj;
|
||||
R_ClearExternalPtr(R_ptr);
|
||||
}
|
||||
|
||||
// Note: MSVC has issues with Altrep classes, so they are disabled for it.
|
||||
// See: https://github.com/microsoft/LightGBM/pull/6213#issuecomment-2111025768
|
||||
#ifdef _MSC_VER
|
||||
# define LGB_NO_ALTREP
|
||||
#endif
|
||||
|
||||
#ifndef LGB_NO_ALTREP
|
||||
SEXP make_altrepped_raw_vec(void *void_ptr) {
|
||||
std::unique_ptr<std::vector<char>> *ptr_to_cpp_vec = static_cast<std::unique_ptr<std::vector<char>>*>(void_ptr);
|
||||
SEXP R_ptr = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
SEXP R_raw = Rf_protect(R_new_altrep(lgb_altrepped_char_vec, R_NilValue, R_NilValue));
|
||||
|
||||
R_SetExternalPtrAddr(R_ptr, ptr_to_cpp_vec->get());
|
||||
R_RegisterCFinalizerEx(R_ptr, delete_cpp_char_vec, TRUE);
|
||||
ptr_to_cpp_vec->release();
|
||||
|
||||
R_set_altrep_data1(R_raw, R_ptr);
|
||||
Rf_unprotect(2);
|
||||
return R_raw;
|
||||
}
|
||||
#else
|
||||
SEXP make_r_raw_vec(void *void_ptr) {
|
||||
std::unique_ptr<std::vector<char>> *ptr_to_cpp_vec = static_cast<std::unique_ptr<std::vector<char>>*>(void_ptr);
|
||||
R_xlen_t len = ptr_to_cpp_vec->get()->size();
|
||||
SEXP out = Rf_protect(Rf_allocVector(RAWSXP, len));
|
||||
std::copy(ptr_to_cpp_vec->get()->begin(), ptr_to_cpp_vec->get()->end(), reinterpret_cast<char*>(RAW(out)));
|
||||
Rf_unprotect(1);
|
||||
return out;
|
||||
}
|
||||
#define make_altrepped_raw_vec make_r_raw_vec
|
||||
#endif
|
||||
|
||||
std::vector<char>* get_ptr_from_altrepped_raw(SEXP R_raw) {
|
||||
return static_cast<std::vector<char>*>(R_ExternalPtrAddr(R_altrep_data1(R_raw)));
|
||||
}
|
||||
|
||||
R_xlen_t get_altrepped_raw_len(SEXP R_raw) {
|
||||
return get_ptr_from_altrepped_raw(R_raw)->size();
|
||||
}
|
||||
|
||||
const void* get_altrepped_raw_dataptr_or_null(SEXP R_raw) {
|
||||
return get_ptr_from_altrepped_raw(R_raw)->data();
|
||||
}
|
||||
|
||||
void* get_altrepped_raw_dataptr(SEXP R_raw, Rboolean writeable) {
|
||||
return get_ptr_from_altrepped_raw(R_raw)->data();
|
||||
}
|
||||
|
||||
#ifndef LGB_NO_ALTREP
|
||||
template <class T>
|
||||
R_altrep_class_t get_altrep_class_for_type() {
|
||||
if (std::is_same<T, double>::value) {
|
||||
return lgb_altrepped_dbl_arr;
|
||||
} else {
|
||||
return lgb_altrepped_int_arr;
|
||||
}
|
||||
}
|
||||
#else
|
||||
template <class T>
|
||||
SEXPTYPE get_sexptype_class_for_type() {
|
||||
if (std::is_same<T, double>::value) {
|
||||
return REALSXP;
|
||||
} else {
|
||||
return INTSXP;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T* get_r_vec_ptr(SEXP x) {
|
||||
if (std::is_same<T, double>::value) {
|
||||
return static_cast<T*>(static_cast<void*>(REAL(x)));
|
||||
} else {
|
||||
return static_cast<T*>(static_cast<void*>(INTEGER(x)));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class T>
|
||||
struct arr_and_len {
|
||||
T *arr;
|
||||
int64_t len;
|
||||
};
|
||||
|
||||
#ifndef LGB_NO_ALTREP
|
||||
template <class T>
|
||||
SEXP make_altrepped_vec_from_arr(void *void_ptr) {
|
||||
T *arr = static_cast<arr_and_len<T>*>(void_ptr)->arr;
|
||||
uint64_t len = static_cast<arr_and_len<T>*>(void_ptr)->len;
|
||||
SEXP R_ptr = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
SEXP R_len = Rf_protect(Rf_allocVector(REALSXP, 1));
|
||||
SEXP R_vec = Rf_protect(R_new_altrep(get_altrep_class_for_type<T>(), R_NilValue, R_NilValue));
|
||||
|
||||
REAL(R_len)[0] = static_cast<double>(len);
|
||||
R_SetExternalPtrAddr(R_ptr, arr);
|
||||
R_RegisterCFinalizerEx(R_ptr, delete_cpp_array<T>, TRUE);
|
||||
|
||||
R_set_altrep_data1(R_vec, R_ptr);
|
||||
R_set_altrep_data2(R_vec, R_len);
|
||||
Rf_unprotect(3);
|
||||
return R_vec;
|
||||
}
|
||||
#else
|
||||
template <class T>
|
||||
SEXP make_R_vec_from_arr(void *void_ptr) {
|
||||
T *arr = static_cast<arr_and_len<T>*>(void_ptr)->arr;
|
||||
uint64_t len = static_cast<arr_and_len<T>*>(void_ptr)->len;
|
||||
SEXP out = Rf_protect(Rf_allocVector(get_sexptype_class_for_type<T>(), len));
|
||||
std::copy(arr, arr + len, get_r_vec_ptr<T>(out));
|
||||
Rf_unprotect(1);
|
||||
return out;
|
||||
}
|
||||
#define make_altrepped_vec_from_arr make_R_vec_from_arr
|
||||
#endif
|
||||
|
||||
R_xlen_t get_altrepped_vec_len(SEXP R_vec) {
|
||||
return static_cast<R_xlen_t>(Rf_asReal(R_altrep_data2(R_vec)));
|
||||
}
|
||||
|
||||
const void* get_altrepped_vec_dataptr_or_null(SEXP R_vec) {
|
||||
return R_ExternalPtrAddr(R_altrep_data1(R_vec));
|
||||
}
|
||||
|
||||
void* get_altrepped_vec_dataptr(SEXP R_vec, Rboolean writeable) {
|
||||
return R_ExternalPtrAddr(R_altrep_data1(R_vec));
|
||||
}
|
||||
|
||||
#define COL_MAJOR (0)
|
||||
|
||||
|
@ -143,18 +288,18 @@ SEXP LGBM_DatasetCreateFromFile_R(SEXP filename,
|
|||
SEXP parameters,
|
||||
SEXP reference) {
|
||||
R_API_BEGIN();
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
DatasetHandle handle = nullptr;
|
||||
DatasetHandle ref = nullptr;
|
||||
if (!Rf_isNull(reference)) {
|
||||
ref = R_ExternalPtrAddr(reference);
|
||||
}
|
||||
const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
|
||||
const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
|
||||
const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
|
||||
const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
|
||||
CHECK_CALL(LGBM_DatasetCreateFromFile(filename_ptr, parameters_ptr, ref, &handle));
|
||||
R_SetExternalPtrAddr(ret, handle);
|
||||
R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
|
||||
UNPROTECT(3);
|
||||
Rf_unprotect(3);
|
||||
return ret;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -168,14 +313,14 @@ SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
|
|||
SEXP parameters,
|
||||
SEXP reference) {
|
||||
R_API_BEGIN();
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
const int* p_indptr = INTEGER(indptr);
|
||||
const int* p_indices = INTEGER(indices);
|
||||
const double* p_data = REAL(data);
|
||||
int64_t nindptr = static_cast<int64_t>(Rf_asInteger(num_indptr));
|
||||
int64_t ndata = static_cast<int64_t>(Rf_asInteger(nelem));
|
||||
int64_t nrow = static_cast<int64_t>(Rf_asInteger(num_row));
|
||||
const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
|
||||
const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
|
||||
DatasetHandle handle = nullptr;
|
||||
DatasetHandle ref = nullptr;
|
||||
if (!Rf_isNull(reference)) {
|
||||
|
@ -186,7 +331,7 @@ SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
|
|||
nrow, parameters_ptr, ref, &handle));
|
||||
R_SetExternalPtrAddr(ret, handle);
|
||||
R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return ret;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -197,11 +342,11 @@ SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
|
|||
SEXP parameters,
|
||||
SEXP reference) {
|
||||
R_API_BEGIN();
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
int32_t nrow = static_cast<int32_t>(Rf_asInteger(num_row));
|
||||
int32_t ncol = static_cast<int32_t>(Rf_asInteger(num_col));
|
||||
double* p_mat = REAL(data);
|
||||
const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
|
||||
const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
|
||||
DatasetHandle handle = nullptr;
|
||||
DatasetHandle ref = nullptr;
|
||||
if (!Rf_isNull(reference)) {
|
||||
|
@ -211,7 +356,7 @@ SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
|
|||
parameters_ptr, ref, &handle));
|
||||
R_SetExternalPtrAddr(ret, handle);
|
||||
R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return ret;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -222,7 +367,7 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle,
|
|||
SEXP parameters) {
|
||||
R_API_BEGIN();
|
||||
_AssertDatasetHandleNotNull(handle);
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
int32_t len = static_cast<int32_t>(Rf_asInteger(len_used_row_indices));
|
||||
std::unique_ptr<int32_t[]> idxvec(new int32_t[len]);
|
||||
// convert from one-based to zero-based index
|
||||
|
@ -233,14 +378,14 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle,
|
|||
for (int32_t i = 0; i < len; ++i) {
|
||||
idxvec[i] = static_cast<int32_t>(used_row_indices_[i] - 1);
|
||||
}
|
||||
const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
|
||||
const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
|
||||
DatasetHandle res = nullptr;
|
||||
CHECK_CALL(LGBM_DatasetGetSubset(R_ExternalPtrAddr(handle),
|
||||
idxvec.get(), len, parameters_ptr,
|
||||
&res));
|
||||
R_SetExternalPtrAddr(ret, res);
|
||||
R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return ret;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -249,7 +394,7 @@ SEXP LGBM_DatasetSetFeatureNames_R(SEXP handle,
|
|||
SEXP feature_names) {
|
||||
R_API_BEGIN();
|
||||
_AssertDatasetHandleNotNull(handle);
|
||||
auto vec_names = Split(CHAR(PROTECT(Rf_asChar(feature_names))), '\t');
|
||||
auto vec_names = Split(CHAR(Rf_protect(Rf_asChar(feature_names))), '\t');
|
||||
int len = static_cast<int>(vec_names.size());
|
||||
std::unique_ptr<const char*[]> vec_sptr(new const char*[len]);
|
||||
for (int i = 0; i < len; ++i) {
|
||||
|
@ -257,13 +402,13 @@ SEXP LGBM_DatasetSetFeatureNames_R(SEXP handle,
|
|||
}
|
||||
CHECK_CALL(LGBM_DatasetSetFeatureNames(R_ExternalPtrAddr(handle),
|
||||
vec_sptr.get(), len));
|
||||
UNPROTECT(1);
|
||||
Rf_unprotect(1);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
||||
SEXP LGBM_DatasetGetFeatureNames_R(SEXP handle) {
|
||||
SEXP cont_token = PROTECT(R_MakeUnwindCont());
|
||||
SEXP cont_token = Rf_protect(R_MakeUnwindCont());
|
||||
R_API_BEGIN();
|
||||
_AssertDatasetHandleNotNull(handle);
|
||||
SEXP feature_names;
|
||||
|
@ -301,11 +446,11 @@ SEXP LGBM_DatasetGetFeatureNames_R(SEXP handle) {
|
|||
ptr_names.data()));
|
||||
}
|
||||
CHECK_EQ(len, out_len);
|
||||
feature_names = PROTECT(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
|
||||
feature_names = Rf_protect(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
|
||||
for (int i = 0; i < len; ++i) {
|
||||
SET_STRING_ELT(feature_names, i, safe_R_mkChar(ptr_names[i], &cont_token));
|
||||
}
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return feature_names;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -314,10 +459,10 @@ SEXP LGBM_DatasetSaveBinary_R(SEXP handle,
|
|||
SEXP filename) {
|
||||
R_API_BEGIN();
|
||||
_AssertDatasetHandleNotNull(handle);
|
||||
const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
|
||||
const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
|
||||
CHECK_CALL(LGBM_DatasetSaveBinary(R_ExternalPtrAddr(handle),
|
||||
filename_ptr));
|
||||
UNPROTECT(1);
|
||||
Rf_unprotect(1);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -339,7 +484,7 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
|
|||
R_API_BEGIN();
|
||||
_AssertDatasetHandleNotNull(handle);
|
||||
int len = Rf_asInteger(num_element);
|
||||
const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
|
||||
const char* name = CHAR(Rf_protect(Rf_asChar(field_name)));
|
||||
if (!strcmp("group", name) || !strcmp("query", name)) {
|
||||
CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, INTEGER(field_data), len, C_API_DTYPE_INT32));
|
||||
} else if (!strcmp("init_score", name)) {
|
||||
|
@ -349,7 +494,7 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
|
|||
std::copy(REAL(field_data), REAL(field_data) + len, vec.get());
|
||||
CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, vec.get(), len, C_API_DTYPE_FLOAT32));
|
||||
}
|
||||
UNPROTECT(1);
|
||||
Rf_unprotect(1);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -359,7 +504,7 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
|
|||
SEXP field_data) {
|
||||
R_API_BEGIN();
|
||||
_AssertDatasetHandleNotNull(handle);
|
||||
const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
|
||||
const char* name = CHAR(Rf_protect(Rf_asChar(field_name)));
|
||||
int out_len = 0;
|
||||
int out_type = 0;
|
||||
const void* res;
|
||||
|
@ -381,7 +526,7 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
|
|||
auto p_data = reinterpret_cast<const float*>(res);
|
||||
std::copy(p_data, p_data + out_len, REAL(field_data));
|
||||
}
|
||||
UNPROTECT(1);
|
||||
Rf_unprotect(1);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -391,7 +536,7 @@ SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
|
|||
SEXP out) {
|
||||
R_API_BEGIN();
|
||||
_AssertDatasetHandleNotNull(handle);
|
||||
const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
|
||||
const char* name = CHAR(Rf_protect(Rf_asChar(field_name)));
|
||||
int out_len = 0;
|
||||
int out_type = 0;
|
||||
const void* res;
|
||||
|
@ -400,7 +545,7 @@ SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
|
|||
out_len -= 1;
|
||||
}
|
||||
INTEGER(out)[0] = out_len;
|
||||
UNPROTECT(1);
|
||||
Rf_unprotect(1);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -408,10 +553,10 @@ SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
|
|||
SEXP LGBM_DatasetUpdateParamChecking_R(SEXP old_params,
|
||||
SEXP new_params) {
|
||||
R_API_BEGIN();
|
||||
const char* old_params_ptr = CHAR(PROTECT(Rf_asChar(old_params)));
|
||||
const char* new_params_ptr = CHAR(PROTECT(Rf_asChar(new_params)));
|
||||
const char* old_params_ptr = CHAR(Rf_protect(Rf_asChar(old_params)));
|
||||
const char* new_params_ptr = CHAR(Rf_protect(Rf_asChar(new_params)));
|
||||
CHECK_CALL(LGBM_DatasetUpdateParamChecking(old_params_ptr, new_params_ptr));
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -468,34 +613,34 @@ SEXP LGBM_BoosterCreate_R(SEXP train_data,
|
|||
SEXP parameters) {
|
||||
R_API_BEGIN();
|
||||
_AssertDatasetHandleNotNull(train_data);
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
|
||||
SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
|
||||
BoosterHandle handle = nullptr;
|
||||
CHECK_CALL(LGBM_BoosterCreate(R_ExternalPtrAddr(train_data), parameters_ptr, &handle));
|
||||
R_SetExternalPtrAddr(ret, handle);
|
||||
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return ret;
|
||||
R_API_END();
|
||||
}
|
||||
|
||||
SEXP LGBM_BoosterCreateFromModelfile_R(SEXP filename) {
|
||||
R_API_BEGIN();
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
int out_num_iterations = 0;
|
||||
const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
|
||||
const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
|
||||
BoosterHandle handle = nullptr;
|
||||
CHECK_CALL(LGBM_BoosterCreateFromModelfile(filename_ptr, &out_num_iterations, &handle));
|
||||
R_SetExternalPtrAddr(ret, handle);
|
||||
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return ret;
|
||||
R_API_END();
|
||||
}
|
||||
|
||||
SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
|
||||
R_API_BEGIN();
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
SEXP temp = NULL;
|
||||
int n_protected = 1;
|
||||
int out_num_iterations = 0;
|
||||
|
@ -510,7 +655,7 @@ SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
|
|||
break;
|
||||
}
|
||||
case STRSXP: {
|
||||
temp = PROTECT(STRING_ELT(model_str, 0));
|
||||
temp = Rf_protect(STRING_ELT(model_str, 0));
|
||||
n_protected++;
|
||||
model_str_ptr = reinterpret_cast<const char*>(CHAR(temp));
|
||||
}
|
||||
|
@ -519,7 +664,7 @@ SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
|
|||
CHECK_CALL(LGBM_BoosterLoadModelFromString(model_str_ptr, &out_num_iterations, &handle));
|
||||
R_SetExternalPtrAddr(ret, handle);
|
||||
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
|
||||
UNPROTECT(n_protected);
|
||||
Rf_unprotect(n_protected);
|
||||
return ret;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -558,9 +703,9 @@ SEXP LGBM_BoosterResetParameter_R(SEXP handle,
|
|||
SEXP parameters) {
|
||||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
|
||||
const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
|
||||
CHECK_CALL(LGBM_BoosterResetParameter(R_ExternalPtrAddr(handle), parameters_ptr));
|
||||
UNPROTECT(1);
|
||||
Rf_unprotect(1);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -650,7 +795,7 @@ SEXP LGBM_BoosterGetLowerBoundValue_R(SEXP handle,
|
|||
}
|
||||
|
||||
SEXP LGBM_BoosterGetEvalNames_R(SEXP handle) {
|
||||
SEXP cont_token = PROTECT(R_MakeUnwindCont());
|
||||
SEXP cont_token = Rf_protect(R_MakeUnwindCont());
|
||||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
SEXP eval_names;
|
||||
|
@ -689,11 +834,11 @@ SEXP LGBM_BoosterGetEvalNames_R(SEXP handle) {
|
|||
ptr_names.data()));
|
||||
}
|
||||
CHECK_EQ(out_len, len);
|
||||
eval_names = PROTECT(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
|
||||
eval_names = Rf_protect(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
|
||||
for (int i = 0; i < len; ++i) {
|
||||
SET_STRING_ELT(eval_names, i, safe_R_mkChar(ptr_names[i], &cont_token));
|
||||
}
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return eval_names;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -763,14 +908,14 @@ SEXP LGBM_BoosterPredictForFile_R(SEXP handle,
|
|||
SEXP result_filename) {
|
||||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
const char* data_filename_ptr = CHAR(PROTECT(Rf_asChar(data_filename)));
|
||||
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
|
||||
const char* result_filename_ptr = CHAR(PROTECT(Rf_asChar(result_filename)));
|
||||
const char* data_filename_ptr = CHAR(Rf_protect(Rf_asChar(data_filename)));
|
||||
const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
|
||||
const char* result_filename_ptr = CHAR(Rf_protect(Rf_asChar(result_filename)));
|
||||
int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
|
||||
CHECK_CALL(LGBM_BoosterPredictForFile(R_ExternalPtrAddr(handle), data_filename_ptr,
|
||||
Rf_asInteger(data_has_header), pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), parameter_ptr,
|
||||
result_filename_ptr));
|
||||
UNPROTECT(3);
|
||||
Rf_unprotect(3);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -819,12 +964,12 @@ SEXP LGBM_BoosterPredictForCSC_R(SEXP handle,
|
|||
int64_t nrow = static_cast<int64_t>(Rf_asInteger(num_row));
|
||||
double* ptr_ret = REAL(out_result);
|
||||
int64_t out_len;
|
||||
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
|
||||
const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
|
||||
CHECK_CALL(LGBM_BoosterPredictForCSC(R_ExternalPtrAddr(handle),
|
||||
p_indptr, C_API_DTYPE_INT32, p_indices,
|
||||
p_data, C_API_DTYPE_FLOAT64, nindptr, ndata,
|
||||
nrow, pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), parameter_ptr, &out_len, ptr_ret));
|
||||
UNPROTECT(1);
|
||||
Rf_unprotect(1);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -844,7 +989,7 @@ SEXP LGBM_BoosterPredictForCSR_R(SEXP handle,
|
|||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
|
||||
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
|
||||
const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
|
||||
int64_t out_len;
|
||||
CHECK_CALL(LGBM_BoosterPredictForCSR(R_ExternalPtrAddr(handle),
|
||||
INTEGER(indptr), C_API_DTYPE_INT32, INTEGER(indices),
|
||||
|
@ -852,7 +997,7 @@ SEXP LGBM_BoosterPredictForCSR_R(SEXP handle,
|
|||
Rf_xlength(indptr), Rf_xlength(data), Rf_asInteger(ncols),
|
||||
pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
|
||||
parameter_ptr, &out_len, REAL(out_result)));
|
||||
UNPROTECT(1);
|
||||
Rf_unprotect(1);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -871,7 +1016,7 @@ SEXP LGBM_BoosterPredictForCSRSingleRow_R(SEXP handle,
|
|||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
|
||||
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
|
||||
const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
|
||||
int nnz = static_cast<int>(Rf_xlength(data));
|
||||
const int indptr[] = {0, nnz};
|
||||
int64_t out_len;
|
||||
|
@ -881,7 +1026,7 @@ SEXP LGBM_BoosterPredictForCSRSingleRow_R(SEXP handle,
|
|||
2, nnz, Rf_asInteger(ncols),
|
||||
pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
|
||||
parameter_ptr, &out_len, REAL(out_result)));
|
||||
UNPROTECT(1);
|
||||
Rf_unprotect(1);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -901,8 +1046,8 @@ SEXP LGBM_BoosterPredictForCSRSingleRowFastInit_R(SEXP handle,
|
|||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
|
||||
SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
|
||||
FastConfigHandle out_fastConfig;
|
||||
CHECK_CALL(LGBM_BoosterPredictForCSRSingleRowFastInit(R_ExternalPtrAddr(handle),
|
||||
pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
|
||||
|
@ -910,7 +1055,7 @@ SEXP LGBM_BoosterPredictForCSRSingleRowFastInit_R(SEXP handle,
|
|||
parameter_ptr, &out_fastConfig));
|
||||
R_SetExternalPtrAddr(ret, out_fastConfig);
|
||||
R_RegisterCFinalizerEx(ret, LGBM_FastConfigFree_wrapped, TRUE);
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return ret;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -950,12 +1095,12 @@ SEXP LGBM_BoosterPredictForMat_R(SEXP handle,
|
|||
int32_t ncol = static_cast<int32_t>(Rf_asInteger(num_col));
|
||||
const double* p_mat = REAL(data);
|
||||
double* ptr_ret = REAL(out_result);
|
||||
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
|
||||
const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
|
||||
int64_t out_len;
|
||||
CHECK_CALL(LGBM_BoosterPredictForMat(R_ExternalPtrAddr(handle),
|
||||
p_mat, C_API_DTYPE_FLOAT64, nrow, ncol, COL_MAJOR,
|
||||
pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), parameter_ptr, &out_len, ptr_ret));
|
||||
UNPROTECT(1);
|
||||
Rf_unprotect(1);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -964,8 +1109,6 @@ struct SparseOutputPointers {
|
|||
void* indptr;
|
||||
int32_t* indices;
|
||||
void* data;
|
||||
int indptr_type;
|
||||
int data_type;
|
||||
SparseOutputPointers(void* indptr, int32_t* indices, void* data)
|
||||
: indptr(indptr), indices(indices), data(data) {}
|
||||
};
|
||||
|
@ -985,12 +1128,12 @@ SEXP LGBM_BoosterPredictSparseOutput_R(SEXP handle,
|
|||
SEXP start_iteration,
|
||||
SEXP num_iteration,
|
||||
SEXP parameter) {
|
||||
SEXP cont_token = PROTECT(R_MakeUnwindCont());
|
||||
SEXP cont_token = Rf_protect(R_MakeUnwindCont());
|
||||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
const char* out_names[] = {"indptr", "indices", "data", ""};
|
||||
SEXP out = PROTECT(Rf_mkNamed(VECSXP, out_names));
|
||||
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
|
||||
SEXP out = Rf_protect(Rf_mkNamed(VECSXP, out_names));
|
||||
const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
|
||||
|
||||
int64_t out_len[2];
|
||||
void *out_indptr;
|
||||
|
@ -1015,17 +1158,28 @@ SEXP LGBM_BoosterPredictSparseOutput_R(SEXP handle,
|
|||
&delete_SparseOutputPointers
|
||||
};
|
||||
|
||||
SEXP out_indptr_R = safe_R_int(out_len[1], &cont_token);
|
||||
SET_VECTOR_ELT(out, 0, out_indptr_R);
|
||||
SEXP out_indices_R = safe_R_int(out_len[0], &cont_token);
|
||||
SET_VECTOR_ELT(out, 1, out_indices_R);
|
||||
SEXP out_data_R = safe_R_real(out_len[0], &cont_token);
|
||||
SET_VECTOR_ELT(out, 2, out_data_R);
|
||||
std::memcpy(INTEGER(out_indptr_R), out_indptr, out_len[1]*sizeof(int));
|
||||
std::memcpy(INTEGER(out_indices_R), out_indices, out_len[0]*sizeof(int));
|
||||
std::memcpy(REAL(out_data_R), out_data, out_len[0]*sizeof(double));
|
||||
arr_and_len<int> indptr_str{static_cast<int*>(out_indptr), out_len[1]};
|
||||
SET_VECTOR_ELT(
|
||||
out, 0,
|
||||
R_UnwindProtect(make_altrepped_vec_from_arr<int>,
|
||||
static_cast<void*>(&indptr_str), throw_R_memerr, &cont_token, cont_token));
|
||||
pointers_struct->indptr = nullptr;
|
||||
|
||||
UNPROTECT(3);
|
||||
arr_and_len<int> indices_str{static_cast<int*>(out_indices), out_len[0]};
|
||||
SET_VECTOR_ELT(
|
||||
out, 1,
|
||||
R_UnwindProtect(make_altrepped_vec_from_arr<int>,
|
||||
static_cast<void*>(&indices_str), throw_R_memerr, &cont_token, cont_token));
|
||||
pointers_struct->indices = nullptr;
|
||||
|
||||
arr_and_len<double> data_str{static_cast<double*>(out_data), out_len[0]};
|
||||
SET_VECTOR_ELT(
|
||||
out, 2,
|
||||
R_UnwindProtect(make_altrepped_vec_from_arr<double>,
|
||||
static_cast<void*>(&data_str), throw_R_memerr, &cont_token, cont_token));
|
||||
pointers_struct->data = nullptr;
|
||||
|
||||
Rf_unprotect(3);
|
||||
return out;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -1042,14 +1196,14 @@ SEXP LGBM_BoosterPredictForMatSingleRow_R(SEXP handle,
|
|||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
|
||||
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
|
||||
const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
|
||||
double* ptr_ret = REAL(out_result);
|
||||
int64_t out_len;
|
||||
CHECK_CALL(LGBM_BoosterPredictForMatSingleRow(R_ExternalPtrAddr(handle),
|
||||
REAL(data), C_API_DTYPE_FLOAT64, Rf_xlength(data), 1,
|
||||
pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
|
||||
parameter_ptr, &out_len, ptr_ret));
|
||||
UNPROTECT(1);
|
||||
Rf_unprotect(1);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -1065,8 +1219,8 @@ SEXP LGBM_BoosterPredictForMatSingleRowFastInit_R(SEXP handle,
|
|||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
|
||||
SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
|
||||
const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
|
||||
FastConfigHandle out_fastConfig;
|
||||
CHECK_CALL(LGBM_BoosterPredictForMatSingleRowFastInit(R_ExternalPtrAddr(handle),
|
||||
pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
|
||||
|
@ -1074,7 +1228,7 @@ SEXP LGBM_BoosterPredictForMatSingleRowFastInit_R(SEXP handle,
|
|||
parameter_ptr, &out_fastConfig));
|
||||
R_SetExternalPtrAddr(ret, out_fastConfig);
|
||||
R_RegisterCFinalizerEx(ret, LGBM_FastConfigFree_wrapped, TRUE);
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return ret;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -1097,18 +1251,46 @@ SEXP LGBM_BoosterSaveModel_R(SEXP handle,
|
|||
SEXP start_iteration) {
|
||||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
|
||||
const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
|
||||
CHECK_CALL(LGBM_BoosterSaveModel(R_ExternalPtrAddr(handle), Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), Rf_asInteger(feature_importance_type), filename_ptr));
|
||||
UNPROTECT(1);
|
||||
Rf_unprotect(1);
|
||||
return R_NilValue;
|
||||
R_API_END();
|
||||
}
|
||||
|
||||
// Note: for some reason, MSVC crashes when an error is thrown here
|
||||
// if the buffer variable is defined as 'std::unique_ptr<std::vector<char>>',
|
||||
// but not if it is defined as '<std::vector<char>'.
|
||||
#ifndef _MSC_VER
|
||||
SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
|
||||
SEXP num_iteration,
|
||||
SEXP feature_importance_type,
|
||||
SEXP start_iteration) {
|
||||
SEXP cont_token = PROTECT(R_MakeUnwindCont());
|
||||
SEXP cont_token = Rf_protect(R_MakeUnwindCont());
|
||||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
int64_t out_len = 0;
|
||||
int64_t buf_len = 1024 * 1024;
|
||||
int num_iter = Rf_asInteger(num_iteration);
|
||||
int start_iter = Rf_asInteger(start_iteration);
|
||||
int importance_type = Rf_asInteger(feature_importance_type);
|
||||
std::unique_ptr<std::vector<char>> inner_char_buf(new std::vector<char>(buf_len));
|
||||
CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, buf_len, &out_len, inner_char_buf->data()));
|
||||
inner_char_buf->resize(out_len);
|
||||
if (out_len > buf_len) {
|
||||
CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, inner_char_buf->data()));
|
||||
}
|
||||
SEXP out = R_UnwindProtect(make_altrepped_raw_vec, &inner_char_buf, throw_R_memerr, &cont_token, cont_token);
|
||||
Rf_unprotect(1);
|
||||
return out;
|
||||
R_API_END();
|
||||
}
|
||||
#else
|
||||
SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
|
||||
SEXP num_iteration,
|
||||
SEXP feature_importance_type,
|
||||
SEXP start_iteration) {
|
||||
SEXP cont_token = Rf_protect(R_MakeUnwindCont());
|
||||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
int64_t out_len = 0;
|
||||
|
@ -1118,23 +1300,24 @@ SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
|
|||
int importance_type = Rf_asInteger(feature_importance_type);
|
||||
std::vector<char> inner_char_buf(buf_len);
|
||||
CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
|
||||
SEXP model_str = PROTECT(safe_R_raw(out_len, &cont_token));
|
||||
SEXP model_str = Rf_protect(safe_R_raw(out_len, &cont_token));
|
||||
// if the model string was larger than the initial buffer, call the function again, writing directly to the R object
|
||||
if (out_len > buf_len) {
|
||||
CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, reinterpret_cast<char*>(RAW(model_str))));
|
||||
} else {
|
||||
std::copy(inner_char_buf.begin(), inner_char_buf.begin() + out_len, reinterpret_cast<char*>(RAW(model_str)));
|
||||
}
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return model_str;
|
||||
R_API_END();
|
||||
}
|
||||
#endif
|
||||
|
||||
SEXP LGBM_BoosterDumpModel_R(SEXP handle,
|
||||
SEXP num_iteration,
|
||||
SEXP feature_importance_type,
|
||||
SEXP start_iteration) {
|
||||
SEXP cont_token = PROTECT(R_MakeUnwindCont());
|
||||
SEXP cont_token = Rf_protect(R_MakeUnwindCont());
|
||||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
SEXP model_str;
|
||||
|
@ -1150,15 +1333,15 @@ SEXP LGBM_BoosterDumpModel_R(SEXP handle,
|
|||
inner_char_buf.resize(out_len);
|
||||
CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, inner_char_buf.data()));
|
||||
}
|
||||
model_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
|
||||
model_str = Rf_protect(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
|
||||
SET_STRING_ELT(model_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return model_str;
|
||||
R_API_END();
|
||||
}
|
||||
|
||||
SEXP LGBM_DumpParamAliases_R() {
|
||||
SEXP cont_token = PROTECT(R_MakeUnwindCont());
|
||||
SEXP cont_token = Rf_protect(R_MakeUnwindCont());
|
||||
R_API_BEGIN();
|
||||
SEXP aliases_str;
|
||||
int64_t out_len = 0;
|
||||
|
@ -1170,15 +1353,15 @@ SEXP LGBM_DumpParamAliases_R() {
|
|||
inner_char_buf.resize(out_len);
|
||||
CHECK_CALL(LGBM_DumpParamAliases(out_len, &out_len, inner_char_buf.data()));
|
||||
}
|
||||
aliases_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
|
||||
aliases_str = Rf_protect(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
|
||||
SET_STRING_ELT(aliases_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return aliases_str;
|
||||
R_API_END();
|
||||
}
|
||||
|
||||
SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
|
||||
SEXP cont_token = PROTECT(R_MakeUnwindCont());
|
||||
SEXP cont_token = Rf_protect(R_MakeUnwindCont());
|
||||
R_API_BEGIN();
|
||||
_AssertBoosterHandleNotNull(handle);
|
||||
SEXP params_str;
|
||||
|
@ -1191,9 +1374,9 @@ SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
|
|||
inner_char_buf.resize(out_len);
|
||||
CHECK_CALL(LGBM_BoosterGetLoadedParam(R_ExternalPtrAddr(handle), out_len, &out_len, inner_char_buf.data()));
|
||||
}
|
||||
params_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
|
||||
params_str = Rf_protect(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
|
||||
SET_STRING_ELT(params_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
|
||||
UNPROTECT(2);
|
||||
Rf_unprotect(2);
|
||||
return params_str;
|
||||
R_API_END();
|
||||
}
|
||||
|
@ -1281,4 +1464,21 @@ LIGHTGBM_C_EXPORT void R_init_lightgbm(DllInfo *dll);
|
|||
void R_init_lightgbm(DllInfo *dll) {
|
||||
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
|
||||
R_useDynamicSymbols(dll, FALSE);
|
||||
|
||||
#ifndef LGB_NO_ALTREP
|
||||
lgb_altrepped_char_vec = R_make_altraw_class("lgb_altrepped_char_vec", "lightgbm", dll);
|
||||
R_set_altrep_Length_method(lgb_altrepped_char_vec, get_altrepped_raw_len);
|
||||
R_set_altvec_Dataptr_method(lgb_altrepped_char_vec, get_altrepped_raw_dataptr);
|
||||
R_set_altvec_Dataptr_or_null_method(lgb_altrepped_char_vec, get_altrepped_raw_dataptr_or_null);
|
||||
|
||||
lgb_altrepped_int_arr = R_make_altinteger_class("lgb_altrepped_int_arr", "lightgbm", dll);
|
||||
R_set_altrep_Length_method(lgb_altrepped_int_arr, get_altrepped_vec_len);
|
||||
R_set_altvec_Dataptr_method(lgb_altrepped_int_arr, get_altrepped_vec_dataptr);
|
||||
R_set_altvec_Dataptr_or_null_method(lgb_altrepped_int_arr, get_altrepped_vec_dataptr_or_null);
|
||||
|
||||
lgb_altrepped_dbl_arr = R_make_altreal_class("lgb_altrepped_dbl_arr", "lightgbm", dll);
|
||||
R_set_altrep_Length_method(lgb_altrepped_dbl_arr, get_altrepped_vec_len);
|
||||
R_set_altvec_Dataptr_method(lgb_altrepped_dbl_arr, get_altrepped_vec_dataptr);
|
||||
R_set_altvec_Dataptr_or_null_method(lgb_altrepped_dbl_arr, get_altrepped_vec_dataptr_or_null);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -2776,14 +2776,12 @@ test_that(paste0("lgb.train() throws an informative error if the members of inte
|
|||
test_that("lgb.train() throws an informative error if interaction_constraints contains a too large index", {
|
||||
dtrain <- lgb.Dataset(train$data, label = train$label)
|
||||
params <- list(objective = "regression",
|
||||
interaction_constraints = list(c(1L, length(colnames(train$data)) + 1L), 3L))
|
||||
expect_error({
|
||||
bst <- lightgbm(
|
||||
data = dtrain
|
||||
, params = params
|
||||
, nrounds = 2L
|
||||
)
|
||||
}, "supplied a too large value in interaction_constraints")
|
||||
interaction_constraints = list(c(1L, ncol(train$data) + 1L:2L), 3L))
|
||||
expect_error(
|
||||
lightgbm(data = dtrain, params = params, nrounds = 2L)
|
||||
, "unknown feature(s) in interaction_constraints: '127', '128'"
|
||||
, fixed = TRUE
|
||||
)
|
||||
})
|
||||
|
||||
test_that(paste0("lgb.train() gives same result when interaction_constraints is specified as a list of ",
|
||||
|
@ -2876,6 +2874,37 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
|
|||
|
||||
})
|
||||
|
||||
test_that("Interaction constraints add missing features correctly as new group", {
|
||||
dtrain <- lgb.Dataset(
|
||||
train$data[, 1L:6L] # Pick only some columns
|
||||
, label = train$label
|
||||
, params = list(num_threads = .LGB_MAX_THREADS)
|
||||
)
|
||||
|
||||
list_of_constraints <- list(
|
||||
list(3L, 1L:2L)
|
||||
, list("cap-shape=convex", c("cap-shape=bell", "cap-shape=conical"))
|
||||
)
|
||||
|
||||
for (constraints in list_of_constraints) {
|
||||
params <- list(
|
||||
objective = "regression"
|
||||
, interaction_constraints = constraints
|
||||
, verbose = .LGB_VERBOSITY
|
||||
, num_threads = .LGB_MAX_THREADS
|
||||
)
|
||||
bst <- lightgbm(data = dtrain, params = params, nrounds = 10L)
|
||||
|
||||
expected_list <- list("[2]", "[0,1]", "[3,4,5]")
|
||||
expect_equal(bst$params$interaction_constraints, expected_list)
|
||||
|
||||
expected_string <- "[interaction_constraints: [2],[0,1],[3,4,5]]"
|
||||
expect_true(
|
||||
grepl(expected_string, bst$save_model_to_string(), fixed = TRUE)
|
||||
)
|
||||
}
|
||||
})
|
||||
|
||||
.generate_trainset_for_monotone_constraints_tests <- function(x3_to_categorical) {
|
||||
n_samples <- 3000L
|
||||
x1_positively_correlated_with_y <- runif(n = n_samples, min = 0.0, max = 1.0)
|
||||
|
|
|
@ -440,6 +440,35 @@ test_that("lgb.Dataset: should be able to run lgb.cv() immediately after using l
|
|||
expect_true(methods::is(bst, "lgb.CVBooster"))
|
||||
})
|
||||
|
||||
test_that("lgb.Dataset: should be able to be used in lgb.cv() when constructed with categorical feature indices", {
|
||||
data("mtcars")
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1L])
|
||||
categorical_feature <- which(names(mtcars) %in% c("cyl", "vs", "am", "gear", "carb")) - 1L
|
||||
dtrain <- lgb.Dataset(
|
||||
data = x
|
||||
, label = y
|
||||
, categorical_feature = categorical_feature
|
||||
, free_raw_data = TRUE
|
||||
, params = list(num_threads = .LGB_MAX_THREADS)
|
||||
)
|
||||
# constructing the Dataset frees the raw data
|
||||
dtrain$construct()
|
||||
params <- list(
|
||||
objective = "regression"
|
||||
, num_leaves = 2L
|
||||
, verbose = .LGB_VERBOSITY
|
||||
, num_threads = .LGB_MAX_THREADS
|
||||
)
|
||||
# cv should reuse the same categorical features without checking the indices
|
||||
bst <- lgb.cv(params = params, data = dtrain, stratified = FALSE, nrounds = 1L)
|
||||
expect_equal(
|
||||
unlist(bst$boosters[[1L]]$booster$params$categorical_feature)
|
||||
, categorical_feature - 1L # 0-based
|
||||
)
|
||||
})
|
||||
|
||||
|
||||
test_that("lgb.Dataset: should be able to use and retrieve long feature names", {
|
||||
# set one feature to a value longer than the default buffer size used
|
||||
# in LGBM_DatasetGetFeatureNames_R
|
||||
|
@ -621,3 +650,12 @@ test_that("lgb.Dataset can be constructed with categorical features and without
|
|||
lgb.Dataset(raw_mat, categorical_feature = 2L)$construct()
|
||||
}, regexp = "supplied a too large value in categorical_feature: 2 but only 1 features")
|
||||
})
|
||||
|
||||
test_that("lgb.Dataset.slice fails with a categorical feature index greater than the number of features", {
|
||||
data <- matrix(runif(100L), nrow = 50L, ncol = 2L)
|
||||
ds <- lgb.Dataset(data = data, categorical_feature = 3L)
|
||||
subset <- ds$slice(1L:20L)
|
||||
expect_error({
|
||||
subset$construct()
|
||||
}, regexp = "supplied a too large value in categorical_feature: 3 but only 2 features")
|
||||
})
|
||||
|
|
|
@ -174,7 +174,7 @@ test_that("Loading a Booster from a text file works", {
|
|||
, bagging_freq = 1L
|
||||
, boost_from_average = FALSE
|
||||
, categorical_feature = c(1L, 2L)
|
||||
, interaction_constraints = list(c(1L, 2L), 1L)
|
||||
, interaction_constraints = list(1L:2L, 3L, 4L:ncol(train$data))
|
||||
, feature_contri = rep(0.5, ncol(train$data))
|
||||
, metric = c("mape", "average_precision")
|
||||
, learning_rate = 1.0
|
||||
|
|
|
@ -147,3 +147,21 @@ test_that(".equal_or_both_null produces expected results", {
|
|||
expect_false(.equal_or_both_null(10.0, 1L))
|
||||
expect_true(.equal_or_both_null(0L, 0L))
|
||||
})
|
||||
|
||||
test_that(".check_interaction_constraints() adds skipped features", {
|
||||
ref <- letters[1L:5L]
|
||||
ic_num <- list(1L, c(2L, 3L))
|
||||
ic_char <- list("a", c("b", "c"))
|
||||
expected <- list("[0]", "[1,2]", "[3,4]")
|
||||
|
||||
ic_checked_num <- .check_interaction_constraints(
|
||||
interaction_constraints = ic_num, column_names = ref
|
||||
)
|
||||
|
||||
ic_checked_char <- .check_interaction_constraints(
|
||||
interaction_constraints = ic_char, column_names = ref
|
||||
)
|
||||
|
||||
expect_equal(ic_checked_num, expected)
|
||||
expect_equal(ic_checked_char, expected)
|
||||
})
|
||||
|
|
|
@ -133,7 +133,7 @@ Support
|
|||
-------
|
||||
|
||||
- Ask a question [on Stack Overflow with the `lightgbm` tag](https://stackoverflow.com/questions/ask?tags=lightgbm), we monitor this for new questions.
|
||||
- Open **bug reports** and **feature requests** (not questions) on [GitHub issues](https://github.com/microsoft/LightGBM/issues).
|
||||
- Open **bug reports** and **feature requests** on [GitHub issues](https://github.com/microsoft/LightGBM/issues).
|
||||
|
||||
How to Contribute
|
||||
-----------------
|
||||
|
@ -156,8 +156,6 @@ Qi Meng, Guolin Ke, Taifeng Wang, Wei Chen, Qiwei Ye, Zhi-Ming Ma, Tie-Yan Liu.
|
|||
|
||||
Huan Zhang, Si Si and Cho-Jui Hsieh. "[GPU Acceleration for Large-scale Tree Boosting](https://arxiv.org/abs/1706.08359)". SysML Conference, 2018.
|
||||
|
||||
**Note**: If you use LightGBM in your GitHub projects, please add `lightgbm` in the `requirements.txt`.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
4.3.0.99
|
||||
4.4.0.99
|
||||
|
|
|
@ -149,7 +149,7 @@ and copy memory as required by creating new processes instead of forking (or, us
|
|||
|
||||
Cloud platform container services may cause LightGBM to hang, if they use Linux fork to run multiple containers on a
|
||||
single instance. For example, LightGBM hangs in AWS Batch array jobs, which `use the ECS agent
|
||||
<https://aws.amazon.com/batch/faqs/#Features>`__ to manage multiple running jobs. Setting ``nthreads=1`` mitigates the issue.
|
||||
<https://aws.amazon.com/batch/faqs>`__ to manage multiple running jobs. Setting ``nthreads=1`` mitigates the issue.
|
||||
|
||||
12. Why is early stopping not enabled by default in LightGBM?
|
||||
-------------------------------------------------------------
|
||||
|
@ -321,7 +321,7 @@ We are doing our best to provide universal wheels which have high running speed
|
|||
However, sometimes it's just impossible to guarantee the possibility of usage of LightGBM in any specific environment (see `Microsoft/LightGBM#1743 <https://github.com/microsoft/LightGBM/issues/1743>`__).
|
||||
|
||||
Therefore, the first thing you should try in case of segfaults is **compiling from the source** using ``pip install --no-binary lightgbm lightgbm``.
|
||||
For the OS-specific prerequisites see `this guide <https://github.com/microsoft/LightGBM/blob/master/python-package/README.rst#user-content-build-from-sources>`__.
|
||||
For the OS-specific prerequisites see https://github.com/microsoft/LightGBM/blob/master/python-package/README.rst.
|
||||
|
||||
Also, feel free to post a new issue in our GitHub repository. We always look at each case individually and try to find a root cause.
|
||||
|
||||
|
|
|
@ -602,9 +602,9 @@ And open an issue in GitHub `here`_ with that log.
|
|||
|
||||
.. _Boost: https://www.boost.org/users/history/
|
||||
|
||||
.. _Prebuilt Boost x86_64: https://www.rpmfind.net/linux/fedora/linux/releases/38/Everything/x86_64/os/Packages/m/mingw64-boost-static-1.78.0-4.fc38.noarch.rpm
|
||||
.. _Prebuilt Boost x86_64: https://www.rpmfind.net/linux/fedora/linux/releases/40/Everything/x86_64/os/Packages/m/mingw64-boost-static-1.78.0-9.fc40.noarch.rpm
|
||||
|
||||
.. _Prebuilt Boost i686: https://www.rpmfind.net/linux/fedora/linux/releases/38/Everything/x86_64/os/Packages/m/mingw32-boost-static-1.78.0-4.fc38.noarch.rpm
|
||||
.. _Prebuilt Boost i686: https://www.rpmfind.net/linux/fedora/linux/releases/40/Everything/x86_64/os/Packages/m/mingw32-boost-static-1.78.0-9.fc40.noarch.rpm
|
||||
|
||||
.. _7zip: https://www.7-zip.org/download.html
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ To get good results using a leaf-wise tree, these are some important parameters:
|
|||
1. ``num_leaves``. This is the main parameter to control the complexity of the tree model.
|
||||
Theoretically, we can set ``num_leaves = 2^(max_depth)`` to obtain the same number of leaves as depth-wise tree.
|
||||
However, this simple conversion is not good in practice.
|
||||
The reason is that a leaf-wise tree is typically much deeper than a depth-wise tree for a fixed number of leaves. Unconstrained depth can induce over-fitting.
|
||||
A leaf-wise tree is typically much deeper than a depth-wise tree for a fixed number of leaves. Unconstrained depth can induce over-fitting.
|
||||
Thus, when trying to tune the ``num_leaves``, we should let it be smaller than ``2^(max_depth)``.
|
||||
For example, when the ``max_depth=7`` the depth-wise tree can get good accuracy,
|
||||
but setting ``num_leaves`` to ``127`` may cause over-fitting, and setting it to ``70`` or ``80`` may get better accuracy than depth-wise.
|
||||
|
@ -33,6 +33,7 @@ To get good results using a leaf-wise tree, these are some important parameters:
|
|||
In practice, setting it to hundreds or thousands is enough for a large dataset.
|
||||
|
||||
3. ``max_depth``. You also can use ``max_depth`` to limit the tree depth explicitly.
|
||||
If you set ``max_depth``, also explicitly set ``num_leaves`` to some value ``<= 2^max_depth``.
|
||||
|
||||
For Faster Speed
|
||||
----------------
|
||||
|
|
|
@ -414,6 +414,8 @@ Learning Control Parameters
|
|||
|
||||
- when early stopping is used (i.e. ``early_stopping_round > 0``), require the early stopping metric to improve by at least this delta to be considered an improvement
|
||||
|
||||
- *New in 4.4.0*
|
||||
|
||||
- ``first_metric_only`` :raw-html:`<a id="first_metric_only" title="Permalink to this parameter" href="#first_metric_only">🔗︎</a>`, default = ``false``, type = bool
|
||||
|
||||
- LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping
|
||||
|
|
|
@ -59,8 +59,9 @@ Many of the examples in this page use functionality from ``numpy``. To run the e
|
|||
|
||||
.. code:: python
|
||||
|
||||
data = np.random.rand(500, 10) # 500 entities, each contains 10 features
|
||||
label = np.random.randint(2, size=500) # binary target
|
||||
rng = np.random.default_rng()
|
||||
data = rng.uniform(size=(500, 10)) # 500 entities, each contains 10 features
|
||||
label = rng.integers(low=0, high=2, size=(500, )) # binary target
|
||||
train_data = lgb.Dataset(data, label=label)
|
||||
|
||||
**To load a scipy.sparse.csr\_matrix array into Dataset:**
|
||||
|
@ -139,7 +140,8 @@ It doesn't need to convert to one-hot encoding, and is much faster than one-hot
|
|||
|
||||
.. code:: python
|
||||
|
||||
w = np.random.rand(500, )
|
||||
rng = np.random.default_rng()
|
||||
w = rng.uniform(size=(500, ))
|
||||
train_data = lgb.Dataset(data, label=label, weight=w)
|
||||
|
||||
or
|
||||
|
@ -147,7 +149,8 @@ or
|
|||
.. code:: python
|
||||
|
||||
train_data = lgb.Dataset(data, label=label)
|
||||
w = np.random.rand(500, )
|
||||
rng = np.random.default_rng()
|
||||
w = rng.uniform(size=(500, ))
|
||||
train_data.set_weight(w)
|
||||
|
||||
And you can use ``Dataset.set_init_score()`` to set initial score, and ``Dataset.set_group()`` to set group/query data for ranking tasks.
|
||||
|
@ -249,7 +252,8 @@ A model that has been trained or loaded can perform predictions on datasets:
|
|||
.. code:: python
|
||||
|
||||
# 7 entities, each contains 10 features
|
||||
data = np.random.rand(7, 10)
|
||||
rng = np.random.default_rng()
|
||||
data = rng.uniform(size=(7, 10))
|
||||
ypred = bst.predict(data)
|
||||
|
||||
If early stopping is enabled during training, you can get predictions from the best iteration with ``bst.best_iteration``:
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute.
|
||||
"""Sphinx configuration file."""
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import sys
|
||||
|
|
|
@ -22,15 +22,15 @@ import lightgbm as lgb
|
|||
#################
|
||||
# Simulate some binary data with a single categorical and
|
||||
# single continuous predictor
|
||||
np.random.seed(0)
|
||||
rng = np.random.default_rng(seed=0)
|
||||
N = 1000
|
||||
X = pd.DataFrame({"continuous": range(N), "categorical": np.repeat([0, 1, 2, 3, 4], N / 5)})
|
||||
CATEGORICAL_EFFECTS = [-1, -1, -2, -2, 2]
|
||||
LINEAR_TERM = np.array(
|
||||
[-0.5 + 0.01 * X["continuous"][k] + CATEGORICAL_EFFECTS[X["categorical"][k]] for k in range(X.shape[0])]
|
||||
) + np.random.normal(0, 1, X.shape[0])
|
||||
) + rng.normal(loc=0, scale=1, size=X.shape[0])
|
||||
TRUE_PROB = expit(LINEAR_TERM)
|
||||
Y = np.random.binomial(1, TRUE_PROB, size=N)
|
||||
Y = rng.binomial(n=1, p=TRUE_PROB, size=N)
|
||||
DATA = {
|
||||
"X": X,
|
||||
"probability_labels": TRUE_PROB,
|
||||
|
@ -65,10 +65,9 @@ def experiment(objective, label_type, data):
|
|||
result : dict
|
||||
Experiment summary stats.
|
||||
"""
|
||||
np.random.seed(0)
|
||||
nrounds = 5
|
||||
lgb_data = data[f"lgb_with_{label_type}_labels"]
|
||||
params = {"objective": objective, "feature_fraction": 1, "bagging_fraction": 1, "verbose": -1}
|
||||
params = {"objective": objective, "feature_fraction": 1, "bagging_fraction": 1, "verbose": -1, "seed": 123}
|
||||
time_zero = time.time()
|
||||
gbm = lgb.train(params, lgb_data, num_boost_round=nrounds)
|
||||
y_fitted = gbm.predict(data["X"])
|
||||
|
|
|
@ -12,6 +12,7 @@ Version history for these symbols can be found at the following:
|
|||
* GLIBCXX: https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html
|
||||
* OMP/GOMP: https://github.com/gcc-mirror/gcc/blob/master/libgomp/libgomp.map
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
|
|
@ -6,6 +6,7 @@ with list of all parameters, aliases table and other routines
|
|||
along with parameters description in LightGBM/docs/Parameters.rst file
|
||||
from the information in LightGBM/include/LightGBM/config.h file.
|
||||
"""
|
||||
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
|
|
@ -396,6 +396,7 @@ struct Config {
|
|||
|
||||
// check = >=0.0
|
||||
// desc = when early stopping is used (i.e. ``early_stopping_round > 0``), require the early stopping metric to improve by at least this delta to be considered an improvement
|
||||
// desc = *New in 4.4.0*
|
||||
double early_stopping_min_delta = 0.0;
|
||||
|
||||
// desc = LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping
|
||||
|
@ -1146,7 +1147,7 @@ struct Config {
|
|||
static const std::string DumpAliases();
|
||||
|
||||
private:
|
||||
void CheckParamConflict();
|
||||
void CheckParamConflict(const std::unordered_map<std::string, std::string>& params);
|
||||
void GetMembersFromString(const std::unordered_map<std::string, std::string>& params);
|
||||
std::string SaveMembersToString() const;
|
||||
void GetAucMuWeights();
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
PMML Generator
|
||||
==============
|
||||
|
||||
The old Python convert script is removed due to it cannot support the new format of categorical features.
|
||||
|
||||
Please refer to https://github.com/jpmml/jpmml-lightgbm.
|
|
@ -11,8 +11,6 @@ Preparation
|
|||
|
||||
32-bit Python is not supported. Please install 64-bit version. If you have a strong need to install with 32-bit Python, refer to `Build 32-bit Version with 32-bit Python section <#build-32-bit-version-with-32-bit-python>`__.
|
||||
|
||||
`setuptools <https://pypi.org/project/setuptools>`_ is needed.
|
||||
|
||||
Install from `PyPI <https://pypi.org/project/lightgbm>`_
|
||||
''''''''''''''''''''''''''''''''''''''''''''''''''''''''
|
||||
|
||||
|
@ -299,10 +297,6 @@ Refer to the walk through examples in `Python guide folder <https://github.com/m
|
|||
Development Guide
|
||||
-----------------
|
||||
|
||||
The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps/pep-0008/>`_.
|
||||
|
||||
The package's documentation strings (docstrings) are written in the `numpydoc style <https://numpydoc.readthedocs.io/en/latest/format.html>`_.
|
||||
|
||||
To check that a contribution to the package matches its style expectations, run the following from the root of the repo.
|
||||
|
||||
.. code:: sh
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
Contributors: https://github.com/microsoft/LightGBM/graphs/contributors.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from .basic import Booster, Dataset, Sequence, register_logger
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# coding: utf-8
|
||||
"""Wrapper for C API of LightGBM."""
|
||||
|
||||
import abc
|
||||
import ctypes
|
||||
import inspect
|
||||
|
@ -355,10 +356,10 @@ def _list_to_1d_numpy(
|
|||
array = data.ravel()
|
||||
return _cast_numpy_array_to_dtype(array, dtype)
|
||||
elif _is_1d_list(data):
|
||||
return np.array(data, dtype=dtype, copy=False)
|
||||
return np.asarray(data, dtype=dtype)
|
||||
elif isinstance(data, pd_Series):
|
||||
_check_for_bad_pandas_dtypes(data.to_frame().dtypes)
|
||||
return np.array(data, dtype=dtype, copy=False) # SparseArray should be supported as well
|
||||
return np.asarray(data, dtype=dtype) # SparseArray should be supported as well
|
||||
else:
|
||||
raise TypeError(
|
||||
f"Wrong type({type(data).__name__}) for {name}.\n" "It should be list, numpy 1-D array or pandas Series"
|
||||
|
@ -556,7 +557,8 @@ class LightGBMError(Exception):
|
|||
|
||||
|
||||
# DeprecationWarning is not shown by default, so let's create our own with higher level
|
||||
class LGBMDeprecationWarning(UserWarning):
|
||||
# ref: https://peps.python.org/pep-0565/#additional-use-case-for-futurewarning
|
||||
class LGBMDeprecationWarning(FutureWarning):
|
||||
"""Custom deprecation warning."""
|
||||
|
||||
pass
|
||||
|
@ -726,7 +728,7 @@ def _convert_from_sliced_object(data: np.ndarray) -> np.ndarray:
|
|||
def _c_float_array(data: np.ndarray) -> Tuple[_ctypes_float_ptr, int, np.ndarray]:
|
||||
"""Get pointer of float numpy array / list."""
|
||||
if _is_1d_list(data):
|
||||
data = np.array(data, copy=False)
|
||||
data = np.asarray(data)
|
||||
if _is_numpy_1d_array(data):
|
||||
data = _convert_from_sliced_object(data)
|
||||
assert data.flags.c_contiguous
|
||||
|
@ -747,7 +749,7 @@ def _c_float_array(data: np.ndarray) -> Tuple[_ctypes_float_ptr, int, np.ndarray
|
|||
def _c_int_array(data: np.ndarray) -> Tuple[_ctypes_int_ptr, int, np.ndarray]:
|
||||
"""Get pointer of int numpy array / list."""
|
||||
if _is_1d_list(data):
|
||||
data = np.array(data, copy=False)
|
||||
data = np.asarray(data)
|
||||
if _is_numpy_1d_array(data):
|
||||
data = _convert_from_sliced_object(data)
|
||||
assert data.flags.c_contiguous
|
||||
|
@ -1268,7 +1270,7 @@ class _InnerPredictor:
|
|||
preds: Optional[np.ndarray],
|
||||
) -> Tuple[np.ndarray, int]:
|
||||
if mat.dtype == np.float32 or mat.dtype == np.float64:
|
||||
data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
|
||||
data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
|
||||
else: # change non-float data to float data, need to copy
|
||||
data = np.array(mat.reshape(mat.size), dtype=np.float32)
|
||||
ptr_data, type_ptr_data, _ = _c_float_array(data)
|
||||
|
@ -2283,9 +2285,9 @@ class Dataset:
|
|||
|
||||
self._handle = ctypes.c_void_p()
|
||||
if mat.dtype == np.float32 or mat.dtype == np.float64:
|
||||
data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
|
||||
data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
|
||||
else: # change non-float data to float data, need to copy
|
||||
data = np.array(mat.reshape(mat.size), dtype=np.float32)
|
||||
data = np.asarray(mat.reshape(mat.size), dtype=np.float32)
|
||||
|
||||
ptr_data, type_ptr_data, _ = _c_float_array(data)
|
||||
_safe_call(
|
||||
|
@ -2330,7 +2332,7 @@ class Dataset:
|
|||
nrow[i] = mat.shape[0]
|
||||
|
||||
if mat.dtype == np.float32 or mat.dtype == np.float64:
|
||||
mats[i] = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
|
||||
mats[i] = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
|
||||
else: # change non-float data to float data, need to copy
|
||||
mats[i] = np.array(mat.reshape(mat.size), dtype=np.float32)
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# coding: utf-8
|
||||
"""Callbacks library."""
|
||||
|
||||
from collections import OrderedDict
|
||||
from dataclasses import dataclass
|
||||
from functools import partial
|
||||
|
|
|
@ -37,18 +37,6 @@ except ImportError:
|
|||
|
||||
concat = None
|
||||
|
||||
"""numpy"""
|
||||
try:
|
||||
from numpy.random import Generator as np_random_Generator
|
||||
except ImportError:
|
||||
|
||||
class np_random_Generator: # type: ignore
|
||||
"""Dummy class for np.random.Generator."""
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any):
|
||||
pass
|
||||
|
||||
|
||||
"""matplotlib"""
|
||||
try:
|
||||
import matplotlib # noqa: F401
|
||||
|
|
|
@ -6,6 +6,7 @@ dask.Array and dask.DataFrame collections.
|
|||
|
||||
It is based on dask-lightgbm, which was based on dask-xgboost.
|
||||
"""
|
||||
|
||||
import operator
|
||||
import socket
|
||||
from collections import defaultdict
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# coding: utf-8
|
||||
"""Library with training routines of LightGBM."""
|
||||
|
||||
import copy
|
||||
import json
|
||||
import warnings
|
||||
|
@ -511,7 +512,7 @@ def _make_n_folds(
|
|||
if hasattr(folds, "split"):
|
||||
group_info = full_data.get_group()
|
||||
if group_info is not None:
|
||||
group_info = np.array(group_info, dtype=np.int32, copy=False)
|
||||
group_info = np.asarray(group_info, dtype=np.int32)
|
||||
flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
|
||||
else:
|
||||
flatted_group = np.zeros(num_data, dtype=np.int32)
|
||||
|
@ -525,7 +526,7 @@ def _make_n_folds(
|
|||
if not SKLEARN_INSTALLED:
|
||||
raise LightGBMError("scikit-learn is required for ranking cv")
|
||||
# ranking task, split according to groups
|
||||
group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False)
|
||||
group_info = np.asarray(full_data.get_group(), dtype=np.int32)
|
||||
flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
|
||||
group_kfold = _LGBMGroupKFold(n_splits=nfold)
|
||||
folds = group_kfold.split(X=np.empty(num_data), groups=flatted_group)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# coding: utf-8
|
||||
"""Find the path to LightGBM dynamic library files."""
|
||||
|
||||
from pathlib import Path
|
||||
from platform import system
|
||||
from typing import List
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# coding: utf-8
|
||||
"""Plotting library."""
|
||||
|
||||
import math
|
||||
from copy import deepcopy
|
||||
from io import BytesIO
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# coding: utf-8
|
||||
"""Scikit-learn wrapper interface for LightGBM."""
|
||||
|
||||
import copy
|
||||
from inspect import signature
|
||||
from pathlib import Path
|
||||
|
@ -40,7 +41,6 @@ from .compat import (
|
|||
_LGBMModelBase,
|
||||
_LGBMRegressorBase,
|
||||
dt_DataTable,
|
||||
np_random_Generator,
|
||||
pd_DataFrame,
|
||||
)
|
||||
from .engine import train
|
||||
|
@ -454,6 +454,30 @@ _lgbmmodel_doc_predict = """
|
|||
"""
|
||||
|
||||
|
||||
def _extract_evaluation_meta_data(
|
||||
*,
|
||||
collection: Optional[Union[Dict[Any, Any], List[Any]]],
|
||||
name: str,
|
||||
i: int,
|
||||
) -> Optional[Any]:
|
||||
"""Try to extract the ith element of one of the ``eval_*`` inputs."""
|
||||
if collection is None:
|
||||
return None
|
||||
elif isinstance(collection, list):
|
||||
# It's possible, for example, to pass 3 eval sets through `eval_set`,
|
||||
# but only 1 init_score through `eval_init_score`.
|
||||
#
|
||||
# This if-else accounts for that possiblity.
|
||||
if len(collection) > i:
|
||||
return collection[i]
|
||||
else:
|
||||
return None
|
||||
elif isinstance(collection, dict):
|
||||
return collection.get(i, None)
|
||||
else:
|
||||
raise TypeError(f"{name} should be dict or list")
|
||||
|
||||
|
||||
class LGBMModel(_LGBMModelBase):
|
||||
"""Implementation of the scikit-learn API for LightGBM."""
|
||||
|
||||
|
@ -475,7 +499,7 @@ class LGBMModel(_LGBMModelBase):
|
|||
colsample_bytree: float = 1.0,
|
||||
reg_alpha: float = 0.0,
|
||||
reg_lambda: float = 0.0,
|
||||
random_state: Optional[Union[int, np.random.RandomState, "np.random.Generator"]] = None,
|
||||
random_state: Optional[Union[int, np.random.RandomState, np.random.Generator]] = None,
|
||||
n_jobs: Optional[int] = None,
|
||||
importance_type: str = "split",
|
||||
**kwargs: Any,
|
||||
|
@ -492,6 +516,7 @@ class LGBMModel(_LGBMModelBase):
|
|||
Maximum tree leaves for base learners.
|
||||
max_depth : int, optional (default=-1)
|
||||
Maximum tree depth for base learners, <=0 means no limit.
|
||||
If setting this to a positive value, consider also changing ``num_leaves`` to ``<= 2^max_depth``.
|
||||
learning_rate : float, optional (default=0.1)
|
||||
Boosting learning rate.
|
||||
You can use ``callbacks`` parameter of ``fit`` method to shrink/adapt learning rate
|
||||
|
@ -738,7 +763,7 @@ class LGBMModel(_LGBMModelBase):
|
|||
|
||||
if isinstance(params["random_state"], np.random.RandomState):
|
||||
params["random_state"] = params["random_state"].randint(np.iinfo(np.int32).max)
|
||||
elif isinstance(params["random_state"], np_random_Generator):
|
||||
elif isinstance(params["random_state"], np.random.Generator):
|
||||
params["random_state"] = int(params["random_state"].integers(np.iinfo(np.int32).max))
|
||||
if self._n_classes > 2:
|
||||
for alias in _ConfigAliases.get("num_class"):
|
||||
|
@ -868,17 +893,6 @@ class LGBMModel(_LGBMModelBase):
|
|||
|
||||
valid_sets: List[Dataset] = []
|
||||
if eval_set is not None:
|
||||
|
||||
def _get_meta_data(collection, name, i):
|
||||
if collection is None:
|
||||
return None
|
||||
elif isinstance(collection, list):
|
||||
return collection[i] if len(collection) > i else None
|
||||
elif isinstance(collection, dict):
|
||||
return collection.get(i, None)
|
||||
else:
|
||||
raise TypeError(f"{name} should be dict or list")
|
||||
|
||||
if isinstance(eval_set, tuple):
|
||||
eval_set = [eval_set]
|
||||
for i, valid_data in enumerate(eval_set):
|
||||
|
@ -886,8 +900,16 @@ class LGBMModel(_LGBMModelBase):
|
|||
if valid_data[0] is X and valid_data[1] is y:
|
||||
valid_set = train_set
|
||||
else:
|
||||
valid_weight = _get_meta_data(eval_sample_weight, "eval_sample_weight", i)
|
||||
valid_class_weight = _get_meta_data(eval_class_weight, "eval_class_weight", i)
|
||||
valid_weight = _extract_evaluation_meta_data(
|
||||
collection=eval_sample_weight,
|
||||
name="eval_sample_weight",
|
||||
i=i,
|
||||
)
|
||||
valid_class_weight = _extract_evaluation_meta_data(
|
||||
collection=eval_class_weight,
|
||||
name="eval_class_weight",
|
||||
i=i,
|
||||
)
|
||||
if valid_class_weight is not None:
|
||||
if isinstance(valid_class_weight, dict) and self._class_map is not None:
|
||||
valid_class_weight = {self._class_map[k]: v for k, v in valid_class_weight.items()}
|
||||
|
@ -896,8 +918,16 @@ class LGBMModel(_LGBMModelBase):
|
|||
valid_weight = valid_class_sample_weight
|
||||
else:
|
||||
valid_weight = np.multiply(valid_weight, valid_class_sample_weight)
|
||||
valid_init_score = _get_meta_data(eval_init_score, "eval_init_score", i)
|
||||
valid_group = _get_meta_data(eval_group, "eval_group", i)
|
||||
valid_init_score = _extract_evaluation_meta_data(
|
||||
collection=eval_init_score,
|
||||
name="eval_init_score",
|
||||
i=i,
|
||||
)
|
||||
valid_group = _extract_evaluation_meta_data(
|
||||
collection=eval_group,
|
||||
name="eval_group",
|
||||
i=i,
|
||||
)
|
||||
valid_set = Dataset(
|
||||
data=valid_data[0],
|
||||
label=valid_data[1],
|
||||
|
|
|
@ -15,11 +15,11 @@ classifiers = [
|
|||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence"
|
||||
]
|
||||
dependencies = [
|
||||
"dataclasses ; python_version < '3.7'",
|
||||
"numpy",
|
||||
"numpy>=1.17.0",
|
||||
"scipy"
|
||||
]
|
||||
description = "LightGBM Python Package"
|
||||
|
@ -29,8 +29,8 @@ maintainers = [
|
|||
]
|
||||
name = "lightgbm"
|
||||
readme = "README.rst"
|
||||
requires-python = ">=3.6"
|
||||
version = "4.3.0.99"
|
||||
requires-python = ">=3.7"
|
||||
version = "4.4.0.99"
|
||||
|
||||
[project.optional-dependencies]
|
||||
arrow = [
|
||||
|
@ -79,7 +79,7 @@ logging.level = "INFO"
|
|||
sdist.reproducible = true
|
||||
wheel.py-api = "py3"
|
||||
experimental = false
|
||||
strict-config = true
|
||||
strict-config = false
|
||||
minimum-version = "0.9.3"
|
||||
|
||||
# end:build-system
|
||||
|
@ -156,6 +156,8 @@ select = [
|
|||
"E",
|
||||
# pyflakes
|
||||
"F",
|
||||
# NumPy-specific rules
|
||||
"NPY",
|
||||
# pylint
|
||||
"PL",
|
||||
# flake8-return: unnecessary assignment before return
|
||||
|
|
|
@ -289,14 +289,14 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) {
|
|||
}
|
||||
|
||||
// check for conflicts
|
||||
CheckParamConflict();
|
||||
CheckParamConflict(params);
|
||||
}
|
||||
|
||||
bool CheckMultiClassObjective(const std::string& objective) {
|
||||
return (objective == std::string("multiclass") || objective == std::string("multiclassova"));
|
||||
}
|
||||
|
||||
void Config::CheckParamConflict() {
|
||||
void Config::CheckParamConflict(const std::unordered_map<std::string, std::string>& params) {
|
||||
// check if objective, metric, and num_class match
|
||||
int num_class_check = num_class;
|
||||
bool objective_type_multiclass = CheckMultiClassObjective(objective) || (objective == std::string("custom") && num_class_check > 1);
|
||||
|
@ -356,14 +356,24 @@ void Config::CheckParamConflict() {
|
|||
tree_learner.c_str());
|
||||
}
|
||||
}
|
||||
// Check max_depth and num_leaves
|
||||
if (max_depth > 0) {
|
||||
|
||||
// max_depth defaults to -1, so max_depth>0 implies "you explicitly overrode the default"
|
||||
//
|
||||
// Changing max_depth while leaving num_leaves at its default (31) can lead to 2 undesirable situations:
|
||||
//
|
||||
// * (0 <= max_depth <= 4) it's not possible to produce a tree with 31 leaves
|
||||
// - this block reduces num_leaves to 2^max_depth
|
||||
// * (max_depth > 4) 31 leaves is less than a full depth-wise tree, which might lead to underfitting
|
||||
// - this block warns about that
|
||||
// ref: https://github.com/microsoft/LightGBM/issues/2898#issuecomment-1002860601
|
||||
if (max_depth > 0 && (params.count("num_leaves") == 0 || params.at("num_leaves").empty())) {
|
||||
double full_num_leaves = std::pow(2, max_depth);
|
||||
if (full_num_leaves > num_leaves
|
||||
&& num_leaves == kDefaultNumLeaves) {
|
||||
Log::Warning("Accuracy may be bad since you didn't explicitly set num_leaves OR 2^max_depth > num_leaves."
|
||||
" (num_leaves=%d).",
|
||||
num_leaves);
|
||||
if (full_num_leaves > num_leaves) {
|
||||
Log::Warning("Provided parameters constrain tree depth (max_depth=%d) without explicitly setting 'num_leaves'. "
|
||||
"This can lead to underfitting. To resolve this warning, pass 'num_leaves' (<=%.0f) in params. "
|
||||
"Alternatively, pass (max_depth=-1) and just use 'num_leaves' to constrain model complexity.",
|
||||
max_depth,
|
||||
full_num_leaves);
|
||||
}
|
||||
|
||||
if (full_num_leaves < num_leaves) {
|
||||
|
|
|
@ -274,7 +274,7 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
|
|||
dataset.reset(LoadFromBinFile(filename, bin_filename.c_str(), rank, num_machines, &num_global_data, &used_data_indices));
|
||||
|
||||
// checks whether there's a initial score file when loaded from binary data files
|
||||
// the intial score file should with suffix ".bin.init"
|
||||
// the initial score file should with suffix ".bin.init"
|
||||
dataset->metadata_.LoadInitialScore(bin_filename);
|
||||
|
||||
dataset->device_type_ = config_.device_type;
|
||||
|
@ -344,7 +344,7 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
|
|||
// load data from binary file
|
||||
dataset.reset(LoadFromBinFile(filename, bin_filename.c_str(), 0, 1, &num_global_data, &used_data_indices));
|
||||
// checks whether there's a initial score file when loaded from binary data files
|
||||
// the intial score file should with suffix ".bin.init"
|
||||
// the initial score file should with suffix ".bin.init"
|
||||
dataset->metadata_.LoadInitialScore(bin_filename);
|
||||
}
|
||||
// not need to check validation data
|
||||
|
|
|
@ -125,7 +125,7 @@ def load_from_mat(filename, reference):
|
|||
mat = np.loadtxt(str(filename), dtype=np.float64)
|
||||
label = mat[:, 0].astype(np.float32)
|
||||
mat = mat[:, 1:]
|
||||
data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
|
||||
data = np.asarray(mat.reshape(mat.size), dtype=np.float64)
|
||||
handle = ctypes.c_void_p()
|
||||
ref = None
|
||||
if reference is not None:
|
||||
|
@ -203,7 +203,7 @@ def test_booster():
|
|||
mat = data[:, 1:]
|
||||
preb = np.empty(mat.shape[0], dtype=np.float64)
|
||||
num_preb = ctypes.c_int64(0)
|
||||
data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
|
||||
data = np.asarray(mat.reshape(mat.size), dtype=np.float64)
|
||||
LIB.LGBM_BoosterPredictForMat(
|
||||
booster2,
|
||||
data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def rng():
|
||||
return np.random.default_rng()
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def rng_fixed_seed():
|
||||
return np.random.default_rng(seed=42)
|
|
@ -20,6 +20,10 @@ if os.getenv("ALLOW_SKIP_ARROW_TESTS") == "1":
|
|||
else:
|
||||
import pyarrow as pa # type: ignore
|
||||
|
||||
assert (
|
||||
lgb.compat.PYARROW_INSTALLED is True
|
||||
), "'pyarrow' and its dependencies must be installed to run the arrow tests"
|
||||
|
||||
# ----------------------------------------------------------------------------------------------- #
|
||||
# UTILITIES #
|
||||
# ----------------------------------------------------------------------------------------------- #
|
||||
|
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
|||
import numpy as np
|
||||
import pytest
|
||||
from scipy import sparse
|
||||
from sklearn.datasets import dump_svmlight_file, load_svmlight_file
|
||||
from sklearn.datasets import dump_svmlight_file, load_svmlight_file, make_blobs
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
import lightgbm as lgb
|
||||
|
@ -136,7 +136,7 @@ def _create_sequence_from_ndarray(data, num_seq, batch_size):
|
|||
@pytest.mark.parametrize("batch_size", [3, None])
|
||||
@pytest.mark.parametrize("include_0_and_nan", [False, True])
|
||||
@pytest.mark.parametrize("num_seq", [1, 3])
|
||||
def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq):
|
||||
def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq, rng):
|
||||
params = {"bin_construct_sample_cnt": sample_count}
|
||||
|
||||
nrow = 50
|
||||
|
@ -175,7 +175,6 @@ def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq):
|
|||
|
||||
# Test for validation set.
|
||||
# Select some random rows as valid data.
|
||||
rng = np.random.default_rng() # Pass integer to set seed when needed.
|
||||
valid_idx = (rng.random(10) * nrow).astype(np.int32)
|
||||
valid_data = data[valid_idx, :]
|
||||
valid_X = valid_data[:, :-1]
|
||||
|
@ -201,7 +200,7 @@ def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("num_seq", [1, 2])
|
||||
def test_sequence_get_data(num_seq):
|
||||
def test_sequence_get_data(num_seq, rng):
|
||||
nrow = 20
|
||||
ncol = 11
|
||||
data = np.arange(nrow * ncol, dtype=np.float64).reshape((nrow, ncol))
|
||||
|
@ -212,7 +211,7 @@ def test_sequence_get_data(num_seq):
|
|||
seq_ds = lgb.Dataset(seqs, label=Y, params=None, free_raw_data=False).construct()
|
||||
assert seq_ds.get_data() == seqs
|
||||
|
||||
used_indices = np.random.choice(np.arange(nrow), nrow // 3, replace=False)
|
||||
used_indices = rng.choice(a=np.arange(nrow), size=nrow // 3, replace=False)
|
||||
subset_data = seq_ds.subset(used_indices).construct()
|
||||
np.testing.assert_array_equal(subset_data.get_data(), X[sorted(used_indices)])
|
||||
|
||||
|
@ -246,8 +245,8 @@ def test_chunked_dataset_linear():
|
|||
valid_data.construct()
|
||||
|
||||
|
||||
def test_save_dataset_subset_and_load_from_file(tmp_path):
|
||||
data = np.random.rand(100, 2)
|
||||
def test_save_dataset_subset_and_load_from_file(tmp_path, rng):
|
||||
data = rng.standard_normal(size=(100, 2))
|
||||
params = {"max_bin": 50, "min_data_in_bin": 10}
|
||||
ds = lgb.Dataset(data, params=params)
|
||||
ds.subset([1, 2, 3, 5, 8]).save_binary(tmp_path / "subset.bin")
|
||||
|
@ -267,18 +266,18 @@ def test_subset_group():
|
|||
assert subset_group[1] == 9
|
||||
|
||||
|
||||
def test_add_features_throws_if_num_data_unequal():
|
||||
X1 = np.random.random((100, 1))
|
||||
X2 = np.random.random((10, 1))
|
||||
def test_add_features_throws_if_num_data_unequal(rng):
|
||||
X1 = rng.uniform(size=(100, 1))
|
||||
X2 = rng.uniform(size=(10, 1))
|
||||
d1 = lgb.Dataset(X1).construct()
|
||||
d2 = lgb.Dataset(X2).construct()
|
||||
with pytest.raises(lgb.basic.LightGBMError):
|
||||
d1.add_features_from(d2)
|
||||
|
||||
|
||||
def test_add_features_throws_if_datasets_unconstructed():
|
||||
X1 = np.random.random((100, 1))
|
||||
X2 = np.random.random((100, 1))
|
||||
def test_add_features_throws_if_datasets_unconstructed(rng):
|
||||
X1 = rng.uniform(size=(100, 1))
|
||||
X2 = rng.uniform(size=(100, 1))
|
||||
with pytest.raises(ValueError):
|
||||
d1 = lgb.Dataset(X1)
|
||||
d2 = lgb.Dataset(X2)
|
||||
|
@ -293,8 +292,8 @@ def test_add_features_throws_if_datasets_unconstructed():
|
|||
d1.add_features_from(d2)
|
||||
|
||||
|
||||
def test_add_features_equal_data_on_alternating_used_unused(tmp_path):
|
||||
X = np.random.random((100, 5))
|
||||
def test_add_features_equal_data_on_alternating_used_unused(tmp_path, rng):
|
||||
X = rng.uniform(size=(100, 5))
|
||||
X[:, [1, 3]] = 0
|
||||
names = [f"col_{i}" for i in range(5)]
|
||||
for j in range(1, 5):
|
||||
|
@ -313,8 +312,8 @@ def test_add_features_equal_data_on_alternating_used_unused(tmp_path):
|
|||
assert dtxt == d1txt
|
||||
|
||||
|
||||
def test_add_features_same_booster_behaviour(tmp_path):
|
||||
X = np.random.random((100, 5))
|
||||
def test_add_features_same_booster_behaviour(tmp_path, rng):
|
||||
X = rng.uniform(size=(100, 5))
|
||||
X[:, [1, 3]] = 0
|
||||
names = [f"col_{i}" for i in range(5)]
|
||||
for j in range(1, 5):
|
||||
|
@ -322,7 +321,7 @@ def test_add_features_same_booster_behaviour(tmp_path):
|
|||
d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
|
||||
d1.add_features_from(d2)
|
||||
d = lgb.Dataset(X, feature_name=names).construct()
|
||||
y = np.random.random(100)
|
||||
y = rng.uniform(size=(100,))
|
||||
d1.set_label(y)
|
||||
d.set_label(y)
|
||||
b1 = lgb.Booster(train_set=d1)
|
||||
|
@ -341,11 +340,11 @@ def test_add_features_same_booster_behaviour(tmp_path):
|
|||
assert dtxt == d1txt
|
||||
|
||||
|
||||
def test_add_features_from_different_sources():
|
||||
def test_add_features_from_different_sources(rng):
|
||||
pd = pytest.importorskip("pandas")
|
||||
n_row = 100
|
||||
n_col = 5
|
||||
X = np.random.random((n_row, n_col))
|
||||
X = rng.uniform(size=(n_row, n_col))
|
||||
xxs = [X, sparse.csr_matrix(X), pd.DataFrame(X)]
|
||||
names = [f"col_{i}" for i in range(n_col)]
|
||||
seq = _create_sequence_from_ndarray(X, 1, 30)
|
||||
|
@ -380,9 +379,9 @@ def test_add_features_from_different_sources():
|
|||
assert d1.feature_name == res_feature_names
|
||||
|
||||
|
||||
def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_features(capsys):
|
||||
def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_features(capsys, rng):
|
||||
arr_a = np.zeros((100, 1), dtype=np.float32)
|
||||
arr_b = np.random.normal(size=(100, 5))
|
||||
arr_b = rng.uniform(size=(100, 5))
|
||||
|
||||
dataset_a = lgb.Dataset(arr_a).construct()
|
||||
expected_msg = (
|
||||
|
@ -402,10 +401,10 @@ def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_feat
|
|||
assert dataset_a._handle.value == original_handle
|
||||
|
||||
|
||||
def test_cegb_affects_behavior(tmp_path):
|
||||
X = np.random.random((100, 5))
|
||||
def test_cegb_affects_behavior(tmp_path, rng):
|
||||
X = rng.uniform(size=(100, 5))
|
||||
X[:, [1, 3]] = 0
|
||||
y = np.random.random(100)
|
||||
y = rng.uniform(size=(100,))
|
||||
names = [f"col_{i}" for i in range(5)]
|
||||
ds = lgb.Dataset(X, feature_name=names).construct()
|
||||
ds.set_label(y)
|
||||
|
@ -433,10 +432,10 @@ def test_cegb_affects_behavior(tmp_path):
|
|||
assert basetxt != casetxt
|
||||
|
||||
|
||||
def test_cegb_scaling_equalities(tmp_path):
|
||||
X = np.random.random((100, 5))
|
||||
def test_cegb_scaling_equalities(tmp_path, rng):
|
||||
X = rng.uniform(size=(100, 5))
|
||||
X[:, [1, 3]] = 0
|
||||
y = np.random.random(100)
|
||||
y = rng.uniform(size=(100,))
|
||||
names = [f"col_{i}" for i in range(5)]
|
||||
ds = lgb.Dataset(X, feature_name=names).construct()
|
||||
ds.set_label(y)
|
||||
|
@ -573,10 +572,10 @@ def test_dataset_construction_overwrites_user_provided_metadata_fields():
|
|||
np_assert_array_equal(dtrain.get_field("weight"), expected_weight, strict=True)
|
||||
|
||||
|
||||
def test_dataset_construction_with_high_cardinality_categorical_succeeds():
|
||||
def test_dataset_construction_with_high_cardinality_categorical_succeeds(rng):
|
||||
pd = pytest.importorskip("pandas")
|
||||
X = pd.DataFrame({"x1": np.random.randint(0, 5_000, 10_000)})
|
||||
y = np.random.rand(10_000)
|
||||
X = pd.DataFrame({"x1": rng.integers(low=0, high=5_000, size=(10_000,))})
|
||||
y = rng.uniform(size=(10_000,))
|
||||
ds = lgb.Dataset(X, y, categorical_feature=["x1"])
|
||||
ds.construct()
|
||||
assert ds.num_data() == 10_000
|
||||
|
@ -663,11 +662,11 @@ def test_choose_param_value_objective(objective_alias):
|
|||
|
||||
@pytest.mark.parametrize("collection", ["1d_np", "2d_np", "pd_float", "pd_str", "1d_list", "2d_list"])
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
def test_list_to_1d_numpy(collection, dtype):
|
||||
def test_list_to_1d_numpy(collection, dtype, rng):
|
||||
collection2y = {
|
||||
"1d_np": np.random.rand(10),
|
||||
"2d_np": np.random.rand(10, 1),
|
||||
"pd_float": np.random.rand(10),
|
||||
"1d_np": rng.uniform(size=(10,)),
|
||||
"2d_np": rng.uniform(size=(10, 1)),
|
||||
"pd_float": rng.uniform(size=(10,)),
|
||||
"pd_str": ["a", "b"],
|
||||
"1d_list": [1] * 10,
|
||||
"2d_list": [[1], [2]],
|
||||
|
@ -696,7 +695,7 @@ def test_list_to_1d_numpy(collection, dtype):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("init_score_type", ["array", "dataframe", "list"])
|
||||
def test_init_score_for_multiclass_classification(init_score_type):
|
||||
def test_init_score_for_multiclass_classification(init_score_type, rng):
|
||||
init_score = [[i * 10 + j for j in range(3)] for i in range(10)]
|
||||
if init_score_type == "array":
|
||||
init_score = np.array(init_score)
|
||||
|
@ -704,7 +703,7 @@ def test_init_score_for_multiclass_classification(init_score_type):
|
|||
if not PANDAS_INSTALLED:
|
||||
pytest.skip("Pandas is not installed.")
|
||||
init_score = pd_DataFrame(init_score)
|
||||
data = np.random.rand(10, 2)
|
||||
data = rng.uniform(size=(10, 2))
|
||||
ds = lgb.Dataset(data, init_score=init_score).construct()
|
||||
np.testing.assert_equal(ds.get_field("init_score"), init_score)
|
||||
np.testing.assert_equal(ds.init_score, init_score)
|
||||
|
@ -741,16 +740,20 @@ def test_param_aliases():
|
|||
|
||||
|
||||
def _bad_gradients(preds, _):
|
||||
return np.random.randn(len(preds) + 1), np.random.rand(len(preds) + 1)
|
||||
rng = np.random.default_rng()
|
||||
# "bad" = 1 element too many
|
||||
size = (len(preds) + 1,)
|
||||
return rng.standard_normal(size=size), rng.uniform(size=size)
|
||||
|
||||
|
||||
def _good_gradients(preds, _):
|
||||
return np.random.randn(*preds.shape), np.random.rand(*preds.shape)
|
||||
rng = np.random.default_rng()
|
||||
return rng.standard_normal(size=preds.shape), rng.uniform(size=preds.shape)
|
||||
|
||||
|
||||
def test_custom_objective_safety():
|
||||
def test_custom_objective_safety(rng):
|
||||
nrows = 100
|
||||
X = np.random.randn(nrows, 5)
|
||||
X = rng.standard_normal(size=(nrows, 5))
|
||||
y_binary = np.arange(nrows) % 2
|
||||
classes = [0, 1, 2]
|
||||
nclass = len(classes)
|
||||
|
@ -771,10 +774,13 @@ def test_custom_objective_safety():
|
|||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
@pytest.mark.parametrize("feature_name", [["x1", "x2"], "auto"])
|
||||
def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name):
|
||||
def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name, rng):
|
||||
pd = pytest.importorskip("pandas")
|
||||
X = np.random.rand(10, 2).astype(dtype)
|
||||
df = pd.DataFrame(X)
|
||||
X = rng.uniform(size=(10, 2)).astype(dtype)
|
||||
# copy=False is necessary because starting with pandas 3.0, pd.DataFrame() creates
|
||||
# a copy of the input numpy array by default
|
||||
# ref: https://github.com/pandas-dev/pandas/issues/58913
|
||||
df = pd.DataFrame(X, copy=False)
|
||||
built_data = lgb.basic._data_from_pandas(
|
||||
data=df, feature_name=feature_name, categorical_feature="auto", pandas_categorical=None
|
||||
)[0]
|
||||
|
@ -784,9 +790,9 @@ def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name):
|
|||
|
||||
@pytest.mark.parametrize("feature_name", [["x1"], [42], "auto"])
|
||||
@pytest.mark.parametrize("categories", ["seen", "unseen"])
|
||||
def test_categorical_code_conversion_doesnt_modify_original_data(feature_name, categories):
|
||||
def test_categorical_code_conversion_doesnt_modify_original_data(feature_name, categories, rng):
|
||||
pd = pytest.importorskip("pandas")
|
||||
X = np.random.choice(["a", "b"], 100).reshape(-1, 1)
|
||||
X = rng.choice(a=["a", "b"], size=(100, 1))
|
||||
column_name = "a" if feature_name == "auto" else feature_name[0]
|
||||
df = pd.DataFrame(X.copy(), columns=[column_name], dtype="category")
|
||||
if categories == "seen":
|
||||
|
@ -814,15 +820,15 @@ def test_categorical_code_conversion_doesnt_modify_original_data(feature_name, c
|
|||
|
||||
|
||||
@pytest.mark.parametrize("min_data_in_bin", [2, 10])
|
||||
def test_feature_num_bin(min_data_in_bin):
|
||||
def test_feature_num_bin(min_data_in_bin, rng):
|
||||
X = np.vstack(
|
||||
[
|
||||
np.random.rand(100),
|
||||
rng.uniform(size=(100,)),
|
||||
np.array([1, 2] * 50),
|
||||
np.array([0, 1, 2] * 33 + [0]),
|
||||
np.array([1, 2] * 49 + 2 * [np.nan]),
|
||||
np.zeros(100),
|
||||
np.random.choice([0, 1], 100),
|
||||
rng.choice(a=[0, 1], size=(100,)),
|
||||
]
|
||||
).T
|
||||
n_continuous = X.shape[1] - 1
|
||||
|
@ -862,9 +868,9 @@ def test_feature_num_bin(min_data_in_bin):
|
|||
ds.feature_num_bin(num_features)
|
||||
|
||||
|
||||
def test_feature_num_bin_with_max_bin_by_feature():
|
||||
X = np.random.rand(100, 3)
|
||||
max_bin_by_feature = np.random.randint(3, 30, size=X.shape[1])
|
||||
def test_feature_num_bin_with_max_bin_by_feature(rng):
|
||||
X = rng.uniform(size=(100, 3))
|
||||
max_bin_by_feature = rng.integers(low=3, high=30, size=X.shape[1])
|
||||
ds = lgb.Dataset(X, params={"max_bin_by_feature": max_bin_by_feature}).construct()
|
||||
actual_num_bins = [ds.feature_num_bin(i) for i in range(X.shape[1])]
|
||||
np.testing.assert_equal(actual_num_bins, max_bin_by_feature)
|
||||
|
@ -882,8 +888,62 @@ def test_set_leaf_output():
|
|||
np.testing.assert_allclose(bst.predict(X), y_pred + 1)
|
||||
|
||||
|
||||
def test_feature_names_are_set_correctly_when_no_feature_names_passed_into_Dataset():
|
||||
def test_feature_names_are_set_correctly_when_no_feature_names_passed_into_Dataset(rng):
|
||||
ds = lgb.Dataset(
|
||||
data=np.random.randn(100, 3),
|
||||
data=rng.standard_normal(size=(100, 3)),
|
||||
)
|
||||
assert ds.construct().feature_name == ["Column_0", "Column_1", "Column_2"]
|
||||
|
||||
|
||||
# NOTE: this intentionally contains values where num_leaves <, ==, and > (max_depth^2)
|
||||
@pytest.mark.parametrize(("max_depth", "num_leaves"), [(-1, 3), (-1, 50), (5, 3), (5, 31), (5, 32), (8, 3), (8, 31)])
|
||||
def test_max_depth_warning_is_not_raised_if_num_leaves_is_also_provided(capsys, num_leaves, max_depth):
|
||||
X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
|
||||
lgb.Booster(
|
||||
params={
|
||||
"objective": "binary",
|
||||
"max_depth": max_depth,
|
||||
"num_leaves": num_leaves,
|
||||
"num_iterations": 1,
|
||||
"verbose": 0,
|
||||
},
|
||||
train_set=lgb.Dataset(X, label=y),
|
||||
)
|
||||
assert "Provided parameters constrain tree depth" not in capsys.readouterr().out
|
||||
|
||||
|
||||
# NOTE: max_depth < 5 is significant here because the default for num_leaves=31. With max_depth=5,
|
||||
# a full depth-wise tree would have 2^5 = 32 leaves.
|
||||
@pytest.mark.parametrize("max_depth", [1, 2, 3, 4])
|
||||
def test_max_depth_warning_is_not_raised_if_max_depth_gt_1_and_lt_5_and_num_leaves_omitted(capsys, max_depth):
|
||||
X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
|
||||
lgb.Booster(
|
||||
params={
|
||||
"objective": "binary",
|
||||
"max_depth": max_depth,
|
||||
"num_iterations": 1,
|
||||
"verbose": 0,
|
||||
},
|
||||
train_set=lgb.Dataset(X, label=y),
|
||||
)
|
||||
assert "Provided parameters constrain tree depth" not in capsys.readouterr().out
|
||||
|
||||
|
||||
@pytest.mark.parametrize("max_depth", [5, 6, 7, 8, 9])
|
||||
def test_max_depth_warning_is_raised_if_max_depth_gte_5_and_num_leaves_omitted(capsys, max_depth):
|
||||
X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
|
||||
lgb.Booster(
|
||||
params={
|
||||
"objective": "binary",
|
||||
"max_depth": max_depth,
|
||||
"num_iterations": 1,
|
||||
"verbose": 0,
|
||||
},
|
||||
train_set=lgb.Dataset(X, label=y),
|
||||
)
|
||||
expected_warning = (
|
||||
f"[LightGBM] [Warning] Provided parameters constrain tree depth (max_depth={max_depth}) without explicitly "
|
||||
f"setting 'num_leaves'. This can lead to underfitting. To resolve this warning, pass 'num_leaves' (<={2**max_depth}) "
|
||||
"in params. Alternatively, pass (max_depth=-1) and just use 'num_leaves' to constrain model complexity."
|
||||
)
|
||||
assert expected_warning in capsys.readouterr().out
|
||||
|
|
|
@ -550,7 +550,7 @@ def test_multi_class_error():
|
|||
@pytest.mark.skipif(
|
||||
getenv("TASK", "") == "cuda", reason="Skip due to differences in implementation details of CUDA version"
|
||||
)
|
||||
def test_auc_mu():
|
||||
def test_auc_mu(rng):
|
||||
# should give same result as binary auc for 2 classes
|
||||
X, y = load_digits(n_class=10, return_X_y=True)
|
||||
y_new = np.zeros((len(y)))
|
||||
|
@ -578,7 +578,7 @@ def test_auc_mu():
|
|||
assert results_auc_mu["training"]["auc_mu"][-1] == pytest.approx(0.5)
|
||||
# test that weighted data gives different auc_mu
|
||||
lgb_X = lgb.Dataset(X, label=y)
|
||||
lgb_X_weighted = lgb.Dataset(X, label=y, weight=np.abs(np.random.normal(size=y.shape)))
|
||||
lgb_X_weighted = lgb.Dataset(X, label=y, weight=np.abs(rng.standard_normal(size=y.shape)))
|
||||
results_unweighted = {}
|
||||
results_weighted = {}
|
||||
params = dict(params, num_classes=10, num_leaves=5)
|
||||
|
@ -1432,9 +1432,9 @@ def test_feature_name():
|
|||
assert feature_names == gbm.feature_name()
|
||||
|
||||
|
||||
def test_feature_name_with_non_ascii():
|
||||
X_train = np.random.normal(size=(100, 4))
|
||||
y_train = np.random.random(100)
|
||||
def test_feature_name_with_non_ascii(rng):
|
||||
X_train = rng.normal(size=(100, 4))
|
||||
y_train = rng.normal(size=(100,))
|
||||
# This has non-ascii strings.
|
||||
feature_names = ["F_零", "F_一", "F_二", "F_三"]
|
||||
params = {"verbose": -1}
|
||||
|
@ -1448,9 +1448,14 @@ def test_feature_name_with_non_ascii():
|
|||
assert feature_names == gbm2.feature_name()
|
||||
|
||||
|
||||
def test_parameters_are_loaded_from_model_file(tmp_path, capsys):
|
||||
X = np.hstack([np.random.rand(100, 1), np.random.randint(0, 5, (100, 2))])
|
||||
y = np.random.rand(100)
|
||||
def test_parameters_are_loaded_from_model_file(tmp_path, capsys, rng):
|
||||
X = np.hstack(
|
||||
[
|
||||
rng.uniform(size=(100, 1)),
|
||||
rng.integers(low=0, high=5, size=(100, 2)),
|
||||
]
|
||||
)
|
||||
y = rng.uniform(size=(100,))
|
||||
ds = lgb.Dataset(X, y)
|
||||
params = {
|
||||
"bagging_fraction": 0.8,
|
||||
|
@ -1702,29 +1707,29 @@ def test_all_expected_params_are_written_out_to_model_text(tmp_path):
|
|||
assert param_str in model_txt_from_memory
|
||||
|
||||
|
||||
def test_pandas_categorical():
|
||||
# why fixed seed?
|
||||
# sometimes there is no difference how cols are treated (cat or not cat)
|
||||
def test_pandas_categorical(rng_fixed_seed):
|
||||
pd = pytest.importorskip("pandas")
|
||||
np.random.seed(42) # sometimes there is no difference how cols are treated (cat or not cat)
|
||||
X = pd.DataFrame(
|
||||
{
|
||||
"A": np.random.permutation(["a", "b", "c", "d"] * 75), # str
|
||||
"B": np.random.permutation([1, 2, 3] * 100), # int
|
||||
"C": np.random.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60), # float
|
||||
"D": np.random.permutation([True, False] * 150), # bool
|
||||
"E": pd.Categorical(np.random.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True),
|
||||
"A": rng_fixed_seed.permutation(["a", "b", "c", "d"] * 75), # str
|
||||
"B": rng_fixed_seed.permutation([1, 2, 3] * 100), # int
|
||||
"C": rng_fixed_seed.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60), # float
|
||||
"D": rng_fixed_seed.permutation([True, False] * 150), # bool
|
||||
"E": pd.Categorical(rng_fixed_seed.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True),
|
||||
}
|
||||
) # str and ordered categorical
|
||||
y = np.random.permutation([0, 1] * 150)
|
||||
y = rng_fixed_seed.permutation([0, 1] * 150)
|
||||
X_test = pd.DataFrame(
|
||||
{
|
||||
"A": np.random.permutation(["a", "b", "e"] * 20), # unseen category
|
||||
"B": np.random.permutation([1, 3] * 30),
|
||||
"C": np.random.permutation([0.1, -0.1, 0.2, 0.2] * 15),
|
||||
"D": np.random.permutation([True, False] * 30),
|
||||
"E": pd.Categorical(np.random.permutation(["z", "y"] * 30), ordered=True),
|
||||
"A": rng_fixed_seed.permutation(["a", "b", "e"] * 20), # unseen category
|
||||
"B": rng_fixed_seed.permutation([1, 3] * 30),
|
||||
"C": rng_fixed_seed.permutation([0.1, -0.1, 0.2, 0.2] * 15),
|
||||
"D": rng_fixed_seed.permutation([True, False] * 30),
|
||||
"E": pd.Categorical(rng_fixed_seed.permutation(["z", "y"] * 30), ordered=True),
|
||||
}
|
||||
)
|
||||
np.random.seed() # reset seed
|
||||
cat_cols_actual = ["A", "B", "C", "D"]
|
||||
cat_cols_to_store = cat_cols_actual + ["E"]
|
||||
X[cat_cols_actual] = X[cat_cols_actual].astype("category")
|
||||
|
@ -1786,21 +1791,21 @@ def test_pandas_categorical():
|
|||
assert gbm7.pandas_categorical == cat_values
|
||||
|
||||
|
||||
def test_pandas_sparse():
|
||||
def test_pandas_sparse(rng):
|
||||
pd = pytest.importorskip("pandas")
|
||||
X = pd.DataFrame(
|
||||
{
|
||||
"A": pd.arrays.SparseArray(np.random.permutation([0, 1, 2] * 100)),
|
||||
"B": pd.arrays.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
|
||||
"C": pd.arrays.SparseArray(np.random.permutation([True, False] * 150)),
|
||||
"A": pd.arrays.SparseArray(rng.permutation([0, 1, 2] * 100)),
|
||||
"B": pd.arrays.SparseArray(rng.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
|
||||
"C": pd.arrays.SparseArray(rng.permutation([True, False] * 150)),
|
||||
}
|
||||
)
|
||||
y = pd.Series(pd.arrays.SparseArray(np.random.permutation([0, 1] * 150)))
|
||||
y = pd.Series(pd.arrays.SparseArray(rng.permutation([0, 1] * 150)))
|
||||
X_test = pd.DataFrame(
|
||||
{
|
||||
"A": pd.arrays.SparseArray(np.random.permutation([0, 2] * 30)),
|
||||
"B": pd.arrays.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
|
||||
"C": pd.arrays.SparseArray(np.random.permutation([True, False] * 30)),
|
||||
"A": pd.arrays.SparseArray(rng.permutation([0, 2] * 30)),
|
||||
"B": pd.arrays.SparseArray(rng.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
|
||||
"C": pd.arrays.SparseArray(rng.permutation([True, False] * 30)),
|
||||
}
|
||||
)
|
||||
for dtype in pd.concat([X.dtypes, X_test.dtypes, pd.Series(y.dtypes)]):
|
||||
|
@ -1816,9 +1821,9 @@ def test_pandas_sparse():
|
|||
np.testing.assert_allclose(pred_sparse, pred_dense)
|
||||
|
||||
|
||||
def test_reference_chain():
|
||||
X = np.random.normal(size=(100, 2))
|
||||
y = np.random.normal(size=100)
|
||||
def test_reference_chain(rng):
|
||||
X = rng.normal(size=(100, 2))
|
||||
y = rng.normal(size=(100,))
|
||||
tmp_dat = lgb.Dataset(X, y)
|
||||
# take subsets and train
|
||||
tmp_dat_train = tmp_dat.subset(np.arange(80))
|
||||
|
@ -1940,28 +1945,28 @@ def test_contribs_sparse_multiclass():
|
|||
np.testing.assert_allclose(contribs_csc_array, contribs_dense)
|
||||
|
||||
|
||||
@pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason="not enough RAM")
|
||||
def test_int32_max_sparse_contribs():
|
||||
params = {"objective": "binary"}
|
||||
train_features = np.random.rand(100, 1000)
|
||||
train_targets = [0] * 50 + [1] * 50
|
||||
lgb_train = lgb.Dataset(train_features, train_targets)
|
||||
gbm = lgb.train(params, lgb_train, num_boost_round=2)
|
||||
csr_input_shape = (3000000, 1000)
|
||||
test_features = csr_matrix(csr_input_shape)
|
||||
for i in range(0, csr_input_shape[0], csr_input_shape[0] // 6):
|
||||
for j in range(0, 1000, 100):
|
||||
test_features[i, j] = random.random()
|
||||
y_pred_csr = gbm.predict(test_features, pred_contrib=True)
|
||||
# Note there is an extra column added to the output for the expected value
|
||||
csr_output_shape = (csr_input_shape[0], csr_input_shape[1] + 1)
|
||||
assert y_pred_csr.shape == csr_output_shape
|
||||
y_pred_csc = gbm.predict(test_features.tocsc(), pred_contrib=True)
|
||||
# Note output CSC shape should be same as CSR output shape
|
||||
assert y_pred_csc.shape == csr_output_shape
|
||||
# @pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason="not enough RAM")
|
||||
# def test_int32_max_sparse_contribs(rng):
|
||||
# params = {"objective": "binary"}
|
||||
# train_features = rng.uniform(size=(100, 1000))
|
||||
# train_targets = [0] * 50 + [1] * 50
|
||||
# lgb_train = lgb.Dataset(train_features, train_targets)
|
||||
# gbm = lgb.train(params, lgb_train, num_boost_round=2)
|
||||
# csr_input_shape = (3000000, 1000)
|
||||
# test_features = csr_matrix(csr_input_shape)
|
||||
# for i in range(0, csr_input_shape[0], csr_input_shape[0] // 6):
|
||||
# for j in range(0, 1000, 100):
|
||||
# test_features[i, j] = random.random()
|
||||
# y_pred_csr = gbm.predict(test_features, pred_contrib=True)
|
||||
# # Note there is an extra column added to the output for the expected value
|
||||
# csr_output_shape = (csr_input_shape[0], csr_input_shape[1] + 1)
|
||||
# assert y_pred_csr.shape == csr_output_shape
|
||||
# y_pred_csc = gbm.predict(test_features.tocsc(), pred_contrib=True)
|
||||
# # Note output CSC shape should be same as CSR output shape
|
||||
# assert y_pred_csc.shape == csr_output_shape
|
||||
|
||||
|
||||
def test_sliced_data():
|
||||
def test_sliced_data(rng):
|
||||
def train_and_get_predictions(features, labels):
|
||||
dataset = lgb.Dataset(features, label=labels)
|
||||
lgb_params = {
|
||||
|
@ -1977,7 +1982,7 @@ def test_sliced_data():
|
|||
return gbm.predict(features)
|
||||
|
||||
num_samples = 100
|
||||
features = np.random.rand(num_samples, 5)
|
||||
features = rng.uniform(size=(num_samples, 5))
|
||||
positive_samples = int(num_samples * 0.25)
|
||||
labels = np.append(
|
||||
np.ones(positive_samples, dtype=np.float32), np.zeros(num_samples - positive_samples, dtype=np.float32)
|
||||
|
@ -2011,13 +2016,13 @@ def test_sliced_data():
|
|||
np.testing.assert_allclose(origin_pred, sliced_pred)
|
||||
|
||||
|
||||
def test_init_with_subset():
|
||||
data = np.random.random((50, 2))
|
||||
def test_init_with_subset(rng):
|
||||
data = rng.uniform(size=(50, 2))
|
||||
y = [1] * 25 + [0] * 25
|
||||
lgb_train = lgb.Dataset(data, y, free_raw_data=False)
|
||||
subset_index_1 = np.random.choice(np.arange(50), 30, replace=False)
|
||||
subset_index_1 = rng.choice(a=np.arange(50), size=30, replace=False)
|
||||
subset_data_1 = lgb_train.subset(subset_index_1)
|
||||
subset_index_2 = np.random.choice(np.arange(50), 20, replace=False)
|
||||
subset_index_2 = rng.choice(a=np.arange(50), size=20, replace=False)
|
||||
subset_data_2 = lgb_train.subset(subset_index_2)
|
||||
params = {"objective": "binary", "verbose": -1}
|
||||
init_gbm = lgb.train(params=params, train_set=subset_data_1, num_boost_round=10, keep_training_booster=True)
|
||||
|
@ -2037,9 +2042,9 @@ def test_init_with_subset():
|
|||
assert subset_data_4.get_data() == "lgb_train_data.bin"
|
||||
|
||||
|
||||
def test_training_on_constructed_subset_without_params():
|
||||
X = np.random.random((100, 10))
|
||||
y = np.random.random(100)
|
||||
def test_training_on_constructed_subset_without_params(rng):
|
||||
X = rng.uniform(size=(100, 10))
|
||||
y = rng.uniform(size=(100,))
|
||||
lgb_data = lgb.Dataset(X, y)
|
||||
subset_indices = [1, 2, 3, 4]
|
||||
subset = lgb_data.subset(subset_indices).construct()
|
||||
|
@ -2051,9 +2056,10 @@ def test_training_on_constructed_subset_without_params():
|
|||
|
||||
def generate_trainset_for_monotone_constraints_tests(x3_to_category=True):
|
||||
number_of_dpoints = 3000
|
||||
x1_positively_correlated_with_y = np.random.random(size=number_of_dpoints)
|
||||
x2_negatively_correlated_with_y = np.random.random(size=number_of_dpoints)
|
||||
x3_negatively_correlated_with_y = np.random.random(size=number_of_dpoints)
|
||||
rng = np.random.default_rng()
|
||||
x1_positively_correlated_with_y = rng.uniform(size=number_of_dpoints)
|
||||
x2_negatively_correlated_with_y = rng.uniform(size=number_of_dpoints)
|
||||
x3_negatively_correlated_with_y = rng.uniform(size=number_of_dpoints)
|
||||
x = np.column_stack(
|
||||
(
|
||||
x1_positively_correlated_with_y,
|
||||
|
@ -2062,8 +2068,8 @@ def generate_trainset_for_monotone_constraints_tests(x3_to_category=True):
|
|||
)
|
||||
)
|
||||
|
||||
zs = np.random.normal(loc=0.0, scale=0.01, size=number_of_dpoints)
|
||||
scales = 10.0 * (np.random.random(6) + 0.5)
|
||||
zs = rng.normal(loc=0.0, scale=0.01, size=number_of_dpoints)
|
||||
scales = 10.0 * (rng.uniform(size=6) + 0.5)
|
||||
y = (
|
||||
scales[0] * x1_positively_correlated_with_y
|
||||
+ np.sin(scales[1] * np.pi * x1_positively_correlated_with_y)
|
||||
|
@ -2265,9 +2271,8 @@ def test_max_bin_by_feature():
|
|||
assert len(np.unique(est.predict(X))) == 3
|
||||
|
||||
|
||||
def test_small_max_bin():
|
||||
np.random.seed(0)
|
||||
y = np.random.choice([0, 1], 100)
|
||||
def test_small_max_bin(rng_fixed_seed):
|
||||
y = rng_fixed_seed.choice([0, 1], 100)
|
||||
x = np.ones((100, 1))
|
||||
x[:30, 0] = -1
|
||||
x[60:, 0] = 2
|
||||
|
@ -2278,7 +2283,6 @@ def test_small_max_bin():
|
|||
params["max_bin"] = 3
|
||||
lgb_x = lgb.Dataset(x, label=y)
|
||||
lgb.train(params, lgb_x, num_boost_round=5)
|
||||
np.random.seed() # reset seed
|
||||
|
||||
|
||||
def test_refit():
|
||||
|
@ -2293,14 +2297,14 @@ def test_refit():
|
|||
assert err_pred > new_err_pred
|
||||
|
||||
|
||||
def test_refit_dataset_params():
|
||||
def test_refit_dataset_params(rng):
|
||||
# check refit accepts dataset_params
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
lgb_train = lgb.Dataset(X, y, init_score=np.zeros(y.size))
|
||||
train_params = {"objective": "binary", "verbose": -1, "seed": 123}
|
||||
gbm = lgb.train(train_params, lgb_train, num_boost_round=10)
|
||||
non_weight_err_pred = log_loss(y, gbm.predict(X))
|
||||
refit_weight = np.random.rand(y.shape[0])
|
||||
refit_weight = rng.uniform(size=(y.shape[0],))
|
||||
dataset_params = {
|
||||
"max_bin": 260,
|
||||
"min_data_in_bin": 5,
|
||||
|
@ -3011,7 +3015,7 @@ def test_model_size():
|
|||
@pytest.mark.skipif(
|
||||
getenv("TASK", "") == "cuda", reason="Skip due to differences in implementation details of CUDA version"
|
||||
)
|
||||
def test_get_split_value_histogram():
|
||||
def test_get_split_value_histogram(rng_fixed_seed):
|
||||
X, y = make_synthetic_regression()
|
||||
X = np.repeat(X, 3, axis=0)
|
||||
y = np.repeat(y, 3, axis=0)
|
||||
|
@ -3351,7 +3355,7 @@ def test_binning_same_sign():
|
|||
assert predicted[1] == pytest.approx(predicted[2])
|
||||
|
||||
|
||||
def test_dataset_update_params():
|
||||
def test_dataset_update_params(rng):
|
||||
default_params = {
|
||||
"max_bin": 100,
|
||||
"max_bin_by_feature": [20, 10],
|
||||
|
@ -3400,8 +3404,8 @@ def test_dataset_update_params():
|
|||
"linear_tree": True,
|
||||
"precise_float_parser": False,
|
||||
}
|
||||
X = np.random.random((100, 2))
|
||||
y = np.random.random(100)
|
||||
X = rng.uniform(size=(100, 2))
|
||||
y = rng.uniform(size=(100,))
|
||||
|
||||
# decreasing without freeing raw data is allowed
|
||||
lgb_data = lgb.Dataset(X, y, params=default_params, free_raw_data=False).construct()
|
||||
|
@ -3443,12 +3447,12 @@ def test_dataset_update_params():
|
|||
lgb.train(new_params, lgb_data, num_boost_round=3)
|
||||
|
||||
|
||||
def test_dataset_params_with_reference():
|
||||
def test_dataset_params_with_reference(rng):
|
||||
default_params = {"max_bin": 100}
|
||||
X = np.random.random((100, 2))
|
||||
y = np.random.random(100)
|
||||
X_val = np.random.random((100, 2))
|
||||
y_val = np.random.random(100)
|
||||
X = rng.uniform(size=(100, 2))
|
||||
y = rng.uniform(size=(100,))
|
||||
X_val = rng.uniform(size=(100, 2))
|
||||
y_val = rng.uniform(size=(100,))
|
||||
lgb_train = lgb.Dataset(X, y, params=default_params, free_raw_data=False).construct()
|
||||
lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train, free_raw_data=False).construct()
|
||||
assert lgb_train.get_params() == default_params
|
||||
|
@ -3486,7 +3490,7 @@ def test_path_smoothing():
|
|||
assert err < err_new
|
||||
|
||||
|
||||
def test_trees_to_dataframe():
|
||||
def test_trees_to_dataframe(rng):
|
||||
pytest.importorskip("pandas")
|
||||
|
||||
def _imptcs_to_numpy(X, impcts_dict):
|
||||
|
@ -3516,7 +3520,7 @@ def test_trees_to_dataframe():
|
|||
|
||||
# test edge case with one leaf
|
||||
X = np.ones((10, 2))
|
||||
y = np.random.rand(10)
|
||||
y = rng.uniform(size=(10,))
|
||||
data = lgb.Dataset(X, label=y)
|
||||
bst = lgb.train({"objective": "binary", "verbose": -1}, data, num_trees)
|
||||
tree_df = bst.trees_to_dataframe()
|
||||
|
@ -3574,11 +3578,10 @@ def test_interaction_constraints():
|
|||
)
|
||||
|
||||
|
||||
def test_linear_trees_num_threads():
|
||||
def test_linear_trees_num_threads(rng_fixed_seed):
|
||||
# check that number of threads does not affect result
|
||||
np.random.seed(0)
|
||||
x = np.arange(0, 1000, 0.1)
|
||||
y = 2 * x + np.random.normal(0, 0.1, len(x))
|
||||
y = 2 * x + rng_fixed_seed.normal(loc=0, scale=0.1, size=(len(x),))
|
||||
x = x[:, np.newaxis]
|
||||
lgb_train = lgb.Dataset(x, label=y)
|
||||
params = {"verbose": -1, "objective": "regression", "seed": 0, "linear_tree": True, "num_threads": 2}
|
||||
|
@ -3590,11 +3593,10 @@ def test_linear_trees_num_threads():
|
|||
np.testing.assert_allclose(pred1, pred2)
|
||||
|
||||
|
||||
def test_linear_trees(tmp_path):
|
||||
def test_linear_trees(tmp_path, rng_fixed_seed):
|
||||
# check that setting linear_tree=True fits better than ordinary trees when data has linear relationship
|
||||
np.random.seed(0)
|
||||
x = np.arange(0, 100, 0.1)
|
||||
y = 2 * x + np.random.normal(0, 0.1, len(x))
|
||||
y = 2 * x + rng_fixed_seed.normal(0, 0.1, len(x))
|
||||
x = x[:, np.newaxis]
|
||||
lgb_train = lgb.Dataset(x, label=y)
|
||||
params = {"verbose": -1, "metric": "mse", "seed": 0, "num_leaves": 2}
|
||||
|
@ -4099,21 +4101,20 @@ def test_record_evaluation_with_cv(train_metric):
|
|||
np.testing.assert_allclose(cv_hist[key], eval_result[dataset][f"{metric}-{agg}"])
|
||||
|
||||
|
||||
def test_pandas_with_numpy_regular_dtypes():
|
||||
def test_pandas_with_numpy_regular_dtypes(rng_fixed_seed):
|
||||
pd = pytest.importorskip("pandas")
|
||||
uints = ["uint8", "uint16", "uint32", "uint64"]
|
||||
ints = ["int8", "int16", "int32", "int64"]
|
||||
bool_and_floats = ["bool", "float16", "float32", "float64"]
|
||||
rng = np.random.RandomState(42)
|
||||
|
||||
n_samples = 100
|
||||
# data as float64
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"x1": rng.randint(0, 2, n_samples),
|
||||
"x2": rng.randint(1, 3, n_samples),
|
||||
"x3": 10 * rng.randint(1, 3, n_samples),
|
||||
"x4": 100 * rng.randint(1, 3, n_samples),
|
||||
"x1": rng_fixed_seed.integers(low=0, high=2, size=n_samples),
|
||||
"x2": rng_fixed_seed.integers(low=1, high=3, size=n_samples),
|
||||
"x3": 10 * rng_fixed_seed.integers(low=1, high=3, size=n_samples),
|
||||
"x4": 100 * rng_fixed_seed.integers(low=1, high=3, size=n_samples),
|
||||
}
|
||||
)
|
||||
df = df.astype(np.float64)
|
||||
|
@ -4139,15 +4140,14 @@ def test_pandas_with_numpy_regular_dtypes():
|
|||
np.testing.assert_allclose(preds, preds2)
|
||||
|
||||
|
||||
def test_pandas_nullable_dtypes():
|
||||
def test_pandas_nullable_dtypes(rng_fixed_seed):
|
||||
pd = pytest.importorskip("pandas")
|
||||
rng = np.random.RandomState(0)
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"x1": rng.randint(1, 3, size=100),
|
||||
"x1": rng_fixed_seed.integers(low=1, high=3, size=100),
|
||||
"x2": np.linspace(-1, 1, 100),
|
||||
"x3": pd.arrays.SparseArray(rng.randint(0, 11, size=100)),
|
||||
"x4": rng.rand(100) < 0.5,
|
||||
"x3": pd.arrays.SparseArray(rng_fixed_seed.integers(low=0, high=11, size=100)),
|
||||
"x4": rng_fixed_seed.uniform(size=(100,)) < 0.5,
|
||||
}
|
||||
)
|
||||
# introduce some missing values
|
||||
|
@ -4219,7 +4219,7 @@ def test_boost_from_average_with_single_leaf_trees():
|
|||
assert y.min() <= mean_preds <= y.max()
|
||||
|
||||
|
||||
def test_cegb_split_buffer_clean():
|
||||
def test_cegb_split_buffer_clean(rng_fixed_seed):
|
||||
# modified from https://github.com/microsoft/LightGBM/issues/3679#issuecomment-938652811
|
||||
# and https://github.com/microsoft/LightGBM/pull/5087
|
||||
# test that the ``splits_per_leaf_`` of CEGB is cleaned before training a new tree
|
||||
|
@ -4228,11 +4228,9 @@ def test_cegb_split_buffer_clean():
|
|||
# Check failed: (best_split_info.left_count) > (0)
|
||||
|
||||
R, C = 1000, 100
|
||||
seed = 29
|
||||
np.random.seed(seed)
|
||||
data = np.random.randn(R, C)
|
||||
data = rng_fixed_seed.standard_normal(size=(R, C))
|
||||
for i in range(1, C):
|
||||
data[i] += data[0] * np.random.randn()
|
||||
data[i] += data[0] * rng_fixed_seed.standard_normal()
|
||||
|
||||
N = int(0.8 * len(data))
|
||||
train_data = data[:N]
|
||||
|
|
|
@ -340,7 +340,7 @@ def test_grid_search():
|
|||
assert evals_result == grid.best_estimator_.evals_result_
|
||||
|
||||
|
||||
def test_random_search():
|
||||
def test_random_search(rng):
|
||||
X, y = load_iris(return_X_y=True)
|
||||
y = y.astype(str) # utilize label encoder at it's max power
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
|
||||
|
@ -349,8 +349,8 @@ def test_random_search():
|
|||
params = {"subsample": 0.8, "subsample_freq": 1}
|
||||
param_dist = {
|
||||
"boosting_type": ["rf", "gbdt"],
|
||||
"n_estimators": [np.random.randint(low=3, high=10) for i in range(n_iter)],
|
||||
"reg_alpha": [np.random.uniform(low=0.01, high=0.06) for i in range(n_iter)],
|
||||
"n_estimators": rng.integers(low=3, high=10, size=(n_iter,)).tolist(),
|
||||
"reg_alpha": rng.uniform(low=0.01, high=0.06, size=(n_iter,)).tolist(),
|
||||
}
|
||||
fit_params = {"eval_set": [(X_val, y_val)], "eval_metric": constant_metric, "callbacks": [lgb.early_stopping(2)]}
|
||||
rand = RandomizedSearchCV(
|
||||
|
@ -556,29 +556,29 @@ def test_feature_importances_type():
|
|||
assert importance_split_top1 != importance_gain_top1
|
||||
|
||||
|
||||
def test_pandas_categorical():
|
||||
# why fixed seed?
|
||||
# sometimes there is no difference how cols are treated (cat or not cat)
|
||||
def test_pandas_categorical(rng_fixed_seed):
|
||||
pd = pytest.importorskip("pandas")
|
||||
np.random.seed(42) # sometimes there is no difference how cols are treated (cat or not cat)
|
||||
X = pd.DataFrame(
|
||||
{
|
||||
"A": np.random.permutation(["a", "b", "c", "d"] * 75), # str
|
||||
"B": np.random.permutation([1, 2, 3] * 100), # int
|
||||
"C": np.random.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60), # float
|
||||
"D": np.random.permutation([True, False] * 150), # bool
|
||||
"E": pd.Categorical(np.random.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True),
|
||||
"A": rng_fixed_seed.permutation(["a", "b", "c", "d"] * 75), # str
|
||||
"B": rng_fixed_seed.permutation([1, 2, 3] * 100), # int
|
||||
"C": rng_fixed_seed.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60), # float
|
||||
"D": rng_fixed_seed.permutation([True, False] * 150), # bool
|
||||
"E": pd.Categorical(rng_fixed_seed.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True),
|
||||
}
|
||||
) # str and ordered categorical
|
||||
y = np.random.permutation([0, 1] * 150)
|
||||
y = rng_fixed_seed.permutation([0, 1] * 150)
|
||||
X_test = pd.DataFrame(
|
||||
{
|
||||
"A": np.random.permutation(["a", "b", "e"] * 20), # unseen category
|
||||
"B": np.random.permutation([1, 3] * 30),
|
||||
"C": np.random.permutation([0.1, -0.1, 0.2, 0.2] * 15),
|
||||
"D": np.random.permutation([True, False] * 30),
|
||||
"E": pd.Categorical(np.random.permutation(["z", "y"] * 30), ordered=True),
|
||||
"A": rng_fixed_seed.permutation(["a", "b", "e"] * 20), # unseen category
|
||||
"B": rng_fixed_seed.permutation([1, 3] * 30),
|
||||
"C": rng_fixed_seed.permutation([0.1, -0.1, 0.2, 0.2] * 15),
|
||||
"D": rng_fixed_seed.permutation([True, False] * 30),
|
||||
"E": pd.Categorical(rng_fixed_seed.permutation(["z", "y"] * 30), ordered=True),
|
||||
}
|
||||
)
|
||||
np.random.seed() # reset seed
|
||||
cat_cols_actual = ["A", "B", "C", "D"]
|
||||
cat_cols_to_store = cat_cols_actual + ["E"]
|
||||
X[cat_cols_actual] = X[cat_cols_actual].astype("category")
|
||||
|
@ -620,21 +620,21 @@ def test_pandas_categorical():
|
|||
assert gbm6.booster_.pandas_categorical == cat_values
|
||||
|
||||
|
||||
def test_pandas_sparse():
|
||||
def test_pandas_sparse(rng):
|
||||
pd = pytest.importorskip("pandas")
|
||||
X = pd.DataFrame(
|
||||
{
|
||||
"A": pd.arrays.SparseArray(np.random.permutation([0, 1, 2] * 100)),
|
||||
"B": pd.arrays.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
|
||||
"C": pd.arrays.SparseArray(np.random.permutation([True, False] * 150)),
|
||||
"A": pd.arrays.SparseArray(rng.permutation([0, 1, 2] * 100)),
|
||||
"B": pd.arrays.SparseArray(rng.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
|
||||
"C": pd.arrays.SparseArray(rng.permutation([True, False] * 150)),
|
||||
}
|
||||
)
|
||||
y = pd.Series(pd.arrays.SparseArray(np.random.permutation([0, 1] * 150)))
|
||||
y = pd.Series(pd.arrays.SparseArray(rng.permutation([0, 1] * 150)))
|
||||
X_test = pd.DataFrame(
|
||||
{
|
||||
"A": pd.arrays.SparseArray(np.random.permutation([0, 2] * 30)),
|
||||
"B": pd.arrays.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
|
||||
"C": pd.arrays.SparseArray(np.random.permutation([True, False] * 30)),
|
||||
"A": pd.arrays.SparseArray(rng.permutation([0, 2] * 30)),
|
||||
"B": pd.arrays.SparseArray(rng.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
|
||||
"C": pd.arrays.SparseArray(rng.permutation([True, False] * 30)),
|
||||
}
|
||||
)
|
||||
for dtype in pd.concat([X.dtypes, X_test.dtypes, pd.Series(y.dtypes)]):
|
||||
|
@ -1073,11 +1073,11 @@ def test_multiple_eval_metrics():
|
|||
assert "binary_logloss" in gbm.evals_result_["training"]
|
||||
|
||||
|
||||
def test_nan_handle():
|
||||
def test_nan_handle(rng):
|
||||
nrows = 100
|
||||
ncols = 10
|
||||
X = np.random.randn(nrows, ncols)
|
||||
y = np.random.randn(nrows) + np.full(nrows, 1e30)
|
||||
X = rng.standard_normal(size=(nrows, ncols))
|
||||
y = rng.standard_normal(size=(nrows,)) + np.full(nrows, 1e30)
|
||||
weight = np.zeros(nrows)
|
||||
params = {"n_estimators": 20, "verbose": -1}
|
||||
params_fit = {"X": X, "y": y, "sample_weight": weight, "eval_set": (X, y), "callbacks": [lgb.early_stopping(5)]}
|
||||
|
@ -1276,6 +1276,20 @@ def test_check_is_fitted():
|
|||
check_is_fitted(model)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("estimator_class", [lgb.LGBMModel, lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker])
|
||||
@pytest.mark.parametrize("max_depth", [3, 4, 5, 8])
|
||||
def test_max_depth_warning_is_never_raised(capsys, estimator_class, max_depth):
|
||||
X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
|
||||
params = {"n_estimators": 1, "max_depth": max_depth, "verbose": 0}
|
||||
if estimator_class is lgb.LGBMModel:
|
||||
estimator_class(**{**params, "objective": "binary"}).fit(X, y)
|
||||
elif estimator_class is lgb.LGBMRanker:
|
||||
estimator_class(**params).fit(X, y, group=np.ones(X.shape[0]))
|
||||
else:
|
||||
estimator_class(**params).fit(X, y)
|
||||
assert "Provided parameters constrain tree depth" not in capsys.readouterr().out
|
||||
|
||||
|
||||
@parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()])
|
||||
def test_sklearn_integration(estimator, check):
|
||||
estimator.set_params(min_child_samples=1, min_data_in_bin=1)
|
||||
|
@ -1410,13 +1424,13 @@ def test_validate_features(task):
|
|||
@pytest.mark.parametrize("X_type", ["dt_DataTable", "list2d", "numpy", "scipy_csc", "scipy_csr", "pd_DataFrame"])
|
||||
@pytest.mark.parametrize("y_type", ["list1d", "numpy", "pd_Series", "pd_DataFrame"])
|
||||
@pytest.mark.parametrize("task", ["binary-classification", "multiclass-classification", "regression"])
|
||||
def test_classification_and_regression_minimally_work_with_all_all_accepted_data_types(X_type, y_type, task):
|
||||
def test_classification_and_regression_minimally_work_with_all_all_accepted_data_types(X_type, y_type, task, rng):
|
||||
if any(t.startswith("pd_") for t in [X_type, y_type]) and not PANDAS_INSTALLED:
|
||||
pytest.skip("pandas is not installed")
|
||||
if any(t.startswith("dt_") for t in [X_type, y_type]) and not DATATABLE_INSTALLED:
|
||||
pytest.skip("datatable is not installed")
|
||||
X, y, g = _create_data(task, n_samples=2_000)
|
||||
weights = np.abs(np.random.randn(y.shape[0]))
|
||||
weights = np.abs(rng.standard_normal(size=(y.shape[0],)))
|
||||
|
||||
if task == "binary-classification" or task == "regression":
|
||||
init_score = np.full_like(y, np.mean(y))
|
||||
|
@ -1487,13 +1501,13 @@ def test_classification_and_regression_minimally_work_with_all_all_accepted_data
|
|||
@pytest.mark.parametrize("X_type", ["dt_DataTable", "list2d", "numpy", "scipy_csc", "scipy_csr", "pd_DataFrame"])
|
||||
@pytest.mark.parametrize("y_type", ["list1d", "numpy", "pd_DataFrame", "pd_Series"])
|
||||
@pytest.mark.parametrize("g_type", ["list1d_float", "list1d_int", "numpy", "pd_Series"])
|
||||
def test_ranking_minimally_works_with_all_all_accepted_data_types(X_type, y_type, g_type):
|
||||
def test_ranking_minimally_works_with_all_all_accepted_data_types(X_type, y_type, g_type, rng):
|
||||
if any(t.startswith("pd_") for t in [X_type, y_type, g_type]) and not PANDAS_INSTALLED:
|
||||
pytest.skip("pandas is not installed")
|
||||
if any(t.startswith("dt_") for t in [X_type, y_type, g_type]) and not DATATABLE_INSTALLED:
|
||||
pytest.skip("datatable is not installed")
|
||||
X, y, g = _create_data(task="ranking", n_samples=1_000)
|
||||
weights = np.abs(np.random.randn(y.shape[0]))
|
||||
weights = np.abs(rng.standard_normal(size=(y.shape[0],)))
|
||||
init_score = np.full_like(y, np.mean(y))
|
||||
X_valid = X * 2
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче