зеркало из https://github.com/microsoft/LightGBM.git
Merge remote-tracking branch 'origin/master' into cuda/metric-binary
This commit is contained in:
Коммит
9f13ccc8ec
12
.ci/setup.sh
12
.ci/setup.sh
|
@ -42,13 +42,13 @@ else # Linux
|
|||
iputils-ping \
|
||||
jq \
|
||||
libcurl4 \
|
||||
libicu66 \
|
||||
libssl1.1 \
|
||||
libicu-dev \
|
||||
libssl-dev \
|
||||
libunwind8 \
|
||||
locales \
|
||||
netcat \
|
||||
unzip \
|
||||
zip
|
||||
zip || exit -1
|
||||
if [[ $COMPILER == "clang" ]]; then
|
||||
sudo apt-get install --no-install-recommends -y \
|
||||
clang \
|
||||
|
@ -60,6 +60,10 @@ else # Linux
|
|||
sudo locale-gen ${LANG}
|
||||
sudo update-locale
|
||||
fi
|
||||
if [[ $TASK == "r-package" ]] && [[ $COMPILER == "clang" ]]; then
|
||||
sudo apt-get install --no-install-recommends -y \
|
||||
libomp-dev
|
||||
fi
|
||||
if [[ $TASK == "mpi" ]]; then
|
||||
if [[ $IN_UBUNTU_LATEST_CONTAINER == "true" ]]; then
|
||||
sudo apt-get update
|
||||
|
@ -75,10 +79,10 @@ else # Linux
|
|||
fi
|
||||
if [[ $TASK == "gpu" ]]; then
|
||||
if [[ $IN_UBUNTU_LATEST_CONTAINER == "true" ]]; then
|
||||
sudo add-apt-repository ppa:mhier/libboost-latest -y
|
||||
sudo apt-get update
|
||||
sudo apt-get install --no-install-recommends -y \
|
||||
libboost1.74-dev \
|
||||
libboost-filesystem1.74-dev \
|
||||
ocl-icd-opencl-dev
|
||||
else # in manylinux image
|
||||
sudo yum update -y
|
||||
|
|
|
@ -17,7 +17,7 @@ env:
|
|||
jobs:
|
||||
check-links:
|
||||
timeout-minutes: 60
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
|
|
@ -9,7 +9,7 @@ on:
|
|||
|
||||
jobs:
|
||||
noResponse:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: lee-dohm/no-response@v0.5.0
|
||||
with:
|
||||
|
|
|
@ -9,7 +9,7 @@ on:
|
|||
jobs:
|
||||
all-successful:
|
||||
timeout-minutes: 120
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
|
|
@ -9,7 +9,7 @@ jobs:
|
|||
name: r-configure
|
||||
timeout-minutes: 60
|
||||
runs-on: ubuntu-latest
|
||||
container: "ubuntu:20.04"
|
||||
container: "ubuntu:22.04"
|
||||
steps:
|
||||
- name: Install essential software before checkout
|
||||
run: |
|
||||
|
|
|
@ -33,22 +33,22 @@ jobs:
|
|||
################
|
||||
# CMake builds #
|
||||
################
|
||||
- os: ubuntu-latest
|
||||
- os: ubuntu-22.04
|
||||
task: r-package
|
||||
compiler: gcc
|
||||
r_version: 3.6
|
||||
build_type: cmake
|
||||
- os: ubuntu-latest
|
||||
- os: ubuntu-22.04
|
||||
task: r-package
|
||||
compiler: gcc
|
||||
r_version: 4.2
|
||||
build_type: cmake
|
||||
- os: ubuntu-latest
|
||||
- os: ubuntu-22.04
|
||||
task: r-package
|
||||
compiler: clang
|
||||
r_version: 3.6
|
||||
build_type: cmake
|
||||
- os: ubuntu-latest
|
||||
- os: ubuntu-22.04
|
||||
task: r-package
|
||||
compiler: clang
|
||||
r_version: 4.2
|
||||
|
@ -114,7 +114,7 @@ jobs:
|
|||
toolchain: MSYS
|
||||
r_version: 4.2
|
||||
build_type: cran
|
||||
- os: ubuntu-latest
|
||||
- os: ubuntu-22.04
|
||||
task: r-package
|
||||
compiler: gcc
|
||||
r_version: 4.2
|
||||
|
@ -127,7 +127,7 @@ jobs:
|
|||
################
|
||||
# Other checks #
|
||||
################
|
||||
- os: ubuntu-latest
|
||||
- os: ubuntu-22.04
|
||||
task: r-rchk
|
||||
compiler: gcc
|
||||
r_version: 4.2
|
||||
|
@ -151,7 +151,7 @@ jobs:
|
|||
CTAN_MIRROR: https://ctan.math.illinois.edu/systems/win32/miktex
|
||||
TINYTEX_INSTALLER: TinyTeX
|
||||
- name: Setup and run tests on Linux and macOS
|
||||
if: matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest'
|
||||
if: matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-22.04'
|
||||
shell: bash
|
||||
run: |
|
||||
export TASK="${{ matrix.task }}"
|
||||
|
@ -159,7 +159,7 @@ jobs:
|
|||
export GITHUB_ACTIONS="true"
|
||||
if [[ "${{ matrix.os }}" == "macOS-latest" ]]; then
|
||||
export OS_NAME="macos"
|
||||
elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
|
||||
elif [[ "${{ matrix.os }}" == "ubuntu-22.04" ]]; then
|
||||
export OS_NAME="linux"
|
||||
fi
|
||||
export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
|
||||
|
@ -181,9 +181,9 @@ jobs:
|
|||
$env:TASK = "${{ matrix.task }}"
|
||||
& "$env:GITHUB_WORKSPACE/.ci/test_windows.ps1"
|
||||
test-r-sanitizers:
|
||||
name: r-sanitizers (ubuntu-latest, R-devel, ${{ matrix.compiler }} ASAN/UBSAN)
|
||||
name: r-sanitizers (ubuntu-22.04, R-devel, ${{ matrix.compiler }} ASAN/UBSAN)
|
||||
timeout-minutes: 60
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
container: wch1/r-debug
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
@ -219,7 +219,7 @@ jobs:
|
|||
test-r-debian-clang:
|
||||
name: r-package (debian, R-devel, clang)
|
||||
timeout-minutes: 60
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
container: rhub/debian-clang-devel
|
||||
steps:
|
||||
- name: Install Git before checkout
|
||||
|
@ -248,7 +248,7 @@ jobs:
|
|||
fi
|
||||
all-successful:
|
||||
# https://github.community/t/is-it-possible-to-require-all-github-actions-tasks-to-pass-without-enumerating-them/117957/4?u=graingert
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [test, test-r-sanitizers, test-r-debian-clang]
|
||||
steps:
|
||||
- name: Note that all tests succeeded
|
||||
|
|
|
@ -21,7 +21,7 @@ env:
|
|||
jobs:
|
||||
test:
|
||||
name: ${{ matrix.task }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
timeout-minutes: 60
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
@ -47,7 +47,7 @@ jobs:
|
|||
r-check-docs:
|
||||
name: r-package-check-docs
|
||||
timeout-minutes: 60
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
container: rocker/verse
|
||||
steps:
|
||||
- name: Trust git cloning LightGBM
|
||||
|
@ -82,7 +82,7 @@ jobs:
|
|||
fi
|
||||
all-successful:
|
||||
# https://github.community/t/is-it-possible-to-require-all-github-actions-tasks-to-pass-without-enumerating-them/117957/4?u=graingert
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [test, r-check-docs]
|
||||
steps:
|
||||
- name: Note that all tests succeeded
|
||||
|
|
|
@ -21,7 +21,7 @@ resources:
|
|||
- container: linux-artifact-builder
|
||||
image: lightgbm/vsts-agent:manylinux_2_28_x86_64
|
||||
- container: ubuntu-latest
|
||||
image: 'ubuntu:20.04'
|
||||
image: 'ubuntu:22.04'
|
||||
options: "--name ci-container -v /usr/bin/docker:/tmp/docker:ro"
|
||||
- container: rbase
|
||||
image: wch1/r-debug
|
||||
|
@ -151,7 +151,7 @@ jobs:
|
|||
OS_NAME: 'linux'
|
||||
PRODUCES_ARTIFACTS: 'true'
|
||||
pool:
|
||||
vmImage: ubuntu-latest
|
||||
vmImage: ubuntu-22.04
|
||||
timeoutInMinutes: 180
|
||||
strategy:
|
||||
matrix:
|
||||
|
@ -299,7 +299,7 @@ jobs:
|
|||
###########################################
|
||||
condition: not(startsWith(variables['Build.SourceBranch'], 'refs/pull/'))
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
vmImage: 'ubuntu-22.04'
|
||||
container: rbase
|
||||
steps:
|
||||
- script: |
|
||||
|
@ -330,7 +330,7 @@ jobs:
|
|||
- R_artifact
|
||||
condition: and(succeeded(), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
vmImage: 'ubuntu-22.04'
|
||||
steps:
|
||||
# Create archives with complete source code included (with git submodules)
|
||||
- task: ArchiveFiles@2
|
||||
|
|
|
@ -1 +1 @@
|
|||
2.69-11.1
|
||||
2.71-2
|
||||
|
|
|
@ -352,22 +352,22 @@ This section briefly explains the key files for building a CRAN package. To upda
|
|||
At build time, `configure` will be run and used to create a file `Makevars`, using `Makevars.in` as a template.
|
||||
|
||||
1. Edit `configure.ac`.
|
||||
2. Create `configure` with `autoconf`. Do not edit it by hand. This file must be generated on Ubuntu 20.04.
|
||||
2. Create `configure` with `autoconf`. Do not edit it by hand. This file must be generated on Ubuntu 22.04.
|
||||
|
||||
If you have an Ubuntu 20.04 environment available, run the provided script from the root of the `LightGBM` repository.
|
||||
If you have an Ubuntu 22.04 environment available, run the provided script from the root of the `LightGBM` repository.
|
||||
|
||||
```shell
|
||||
./R-package/recreate-configure.sh
|
||||
```
|
||||
|
||||
If you do not have easy access to an Ubuntu 20.04 environment, the `configure` script can be generated using Docker by running the code below from the root of this repo.
|
||||
If you do not have easy access to an Ubuntu 22.04 environment, the `configure` script can be generated using Docker by running the code below from the root of this repo.
|
||||
|
||||
```shell
|
||||
docker run \
|
||||
--rm \
|
||||
-v $(pwd):/opt/LightGBM \
|
||||
-w /opt/LightGBM \
|
||||
-t ubuntu:20.04 \
|
||||
-t ubuntu:22.04 \
|
||||
./R-package/recreate-configure.sh
|
||||
```
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,7 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
# recreates 'configure' from 'configure.ac'
|
||||
# this script should run on Ubuntu 20.04
|
||||
# this script should run on Ubuntu 22.04
|
||||
AUTOCONF_VERSION=$(cat R-package/AUTOCONF_UBUNTU_VERSION)
|
||||
|
||||
# R packages cannot have versions like 3.0.0rc1, but
|
||||
|
|
|
@ -21,7 +21,7 @@ from sklearn.model_selection import GroupKFold, TimeSeriesSplit, train_test_spli
|
|||
import lightgbm as lgb
|
||||
from lightgbm.compat import PANDAS_INSTALLED, pd_DataFrame
|
||||
|
||||
from .utils import (SERIALIZERS, dummy_obj, load_boston, load_breast_cancer, load_digits, load_iris, logistic_sigmoid,
|
||||
from .utils import (SERIALIZERS, dummy_obj, load_breast_cancer, load_digits, load_iris, logistic_sigmoid,
|
||||
make_synthetic_regression, mse_obj, pickle_and_unpickle_object, sklearn_multiclass_custom_objective,
|
||||
softmax)
|
||||
|
||||
|
@ -114,7 +114,8 @@ def test_rf():
|
|||
|
||||
@pytest.mark.parametrize('objective', ['regression', 'regression_l1', 'huber', 'fair', 'poisson'])
|
||||
def test_regression(objective):
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression()
|
||||
y = np.abs(y)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
|
||||
params = {
|
||||
'objective': objective,
|
||||
|
@ -133,13 +134,13 @@ def test_regression(objective):
|
|||
)
|
||||
ret = mean_squared_error(y_test, gbm.predict(X_test))
|
||||
if objective == 'huber':
|
||||
assert ret < 35
|
||||
assert ret < 430
|
||||
elif objective == 'fair':
|
||||
assert ret < 17
|
||||
assert ret < 296
|
||||
elif objective == 'poisson':
|
||||
assert ret < 8
|
||||
assert ret < 193
|
||||
else:
|
||||
assert ret < 7
|
||||
assert ret < 338
|
||||
assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
|
||||
|
||||
|
||||
|
@ -924,7 +925,7 @@ def test_early_stopping_min_delta(first_only, single_metric, greater_is_better):
|
|||
|
||||
|
||||
def test_continue_train():
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression()
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
|
||||
params = {
|
||||
'objective': 'regression',
|
||||
|
@ -948,7 +949,7 @@ def test_continue_train():
|
|||
init_model='model.txt'
|
||||
)
|
||||
ret = mean_absolute_error(y_test, gbm.predict(X_test))
|
||||
assert ret < 2.0
|
||||
assert ret < 13.6
|
||||
assert evals_result['valid_0']['l1'][-1] == pytest.approx(ret)
|
||||
np.testing.assert_allclose(evals_result['valid_0']['l1'], evals_result['valid_0']['custom_mae'])
|
||||
|
||||
|
@ -968,7 +969,7 @@ def test_continue_train_reused_dataset():
|
|||
|
||||
|
||||
def test_continue_train_dart():
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression()
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
|
||||
params = {
|
||||
'boosting_type': 'dart',
|
||||
|
@ -989,7 +990,7 @@ def test_continue_train_dart():
|
|||
init_model=init_gbm
|
||||
)
|
||||
ret = mean_absolute_error(y_test, gbm.predict(X_test))
|
||||
assert ret < 2.0
|
||||
assert ret < 13.6
|
||||
assert evals_result['valid_0']['l1'][-1] == pytest.approx(ret)
|
||||
|
||||
|
||||
|
@ -1920,10 +1921,12 @@ def test_refit_dataset_params():
|
|||
np.testing.assert_allclose(stored_weights, refit_weight)
|
||||
|
||||
|
||||
def test_mape_rf():
|
||||
X, y = load_boston(return_X_y=True)
|
||||
@pytest.mark.parametrize('boosting_type', ['rf', 'dart'])
|
||||
def test_mape_for_specific_boosting_types(boosting_type):
|
||||
X, y = make_synthetic_regression()
|
||||
y = abs(y)
|
||||
params = {
|
||||
'boosting_type': 'rf',
|
||||
'boosting_type': boosting_type,
|
||||
'objective': 'mape',
|
||||
'verbose': -1,
|
||||
'bagging_freq': 1,
|
||||
|
@ -1935,25 +1938,9 @@ def test_mape_rf():
|
|||
gbm = lgb.train(params, lgb_train, num_boost_round=20)
|
||||
pred = gbm.predict(X)
|
||||
pred_mean = pred.mean()
|
||||
assert pred_mean > 20
|
||||
|
||||
|
||||
def test_mape_dart():
|
||||
X, y = load_boston(return_X_y=True)
|
||||
params = {
|
||||
'boosting_type': 'dart',
|
||||
'objective': 'mape',
|
||||
'verbose': -1,
|
||||
'bagging_freq': 1,
|
||||
'bagging_fraction': 0.8,
|
||||
'feature_fraction': 0.8,
|
||||
'boost_from_average': False
|
||||
}
|
||||
lgb_train = lgb.Dataset(X, y)
|
||||
gbm = lgb.train(params, lgb_train, num_boost_round=40)
|
||||
pred = gbm.predict(X)
|
||||
pred_mean = pred.mean()
|
||||
assert pred_mean > 18
|
||||
# the following checks that dart and rf with mape can predict outside the 0-1 range
|
||||
# https://github.com/microsoft/LightGBM/issues/1579
|
||||
assert pred_mean > 8
|
||||
|
||||
|
||||
def check_constant_features(y_true, expected_pred, more_params):
|
||||
|
@ -2667,19 +2654,22 @@ def test_model_size():
|
|||
|
||||
@pytest.mark.skipif(getenv('TASK', '') == 'cuda_exp', reason='Skip due to differences in implementation details of CUDA Experimental version')
|
||||
def test_get_split_value_histogram():
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression()
|
||||
X = np.repeat(X, 3, axis=0)
|
||||
y = np.repeat(y, 3, axis=0)
|
||||
X[:, 2] = np.random.default_rng(0).integers(0, 20, size=X.shape[0])
|
||||
lgb_train = lgb.Dataset(X, y, categorical_feature=[2])
|
||||
gbm = lgb.train({'verbose': -1}, lgb_train, num_boost_round=20)
|
||||
# test XGBoost-style return value
|
||||
params = {'feature': 0, 'xgboost_style': True}
|
||||
assert gbm.get_split_value_histogram(**params).shape == (9, 2)
|
||||
assert gbm.get_split_value_histogram(bins=999, **params).shape == (9, 2)
|
||||
assert gbm.get_split_value_histogram(**params).shape == (12, 2)
|
||||
assert gbm.get_split_value_histogram(bins=999, **params).shape == (12, 2)
|
||||
assert gbm.get_split_value_histogram(bins=-1, **params).shape == (1, 2)
|
||||
assert gbm.get_split_value_histogram(bins=0, **params).shape == (1, 2)
|
||||
assert gbm.get_split_value_histogram(bins=1, **params).shape == (1, 2)
|
||||
assert gbm.get_split_value_histogram(bins=2, **params).shape == (2, 2)
|
||||
assert gbm.get_split_value_histogram(bins=6, **params).shape == (5, 2)
|
||||
assert gbm.get_split_value_histogram(bins=7, **params).shape == (6, 2)
|
||||
assert gbm.get_split_value_histogram(bins=6, **params).shape == (6, 2)
|
||||
assert gbm.get_split_value_histogram(bins=7, **params).shape == (7, 2)
|
||||
if lgb.compat.PANDAS_INSTALLED:
|
||||
np.testing.assert_allclose(
|
||||
gbm.get_split_value_histogram(0, xgboost_style=True).values,
|
||||
|
@ -2700,8 +2690,8 @@ def test_get_split_value_histogram():
|
|||
)
|
||||
# test numpy-style return value
|
||||
hist, bins = gbm.get_split_value_histogram(0)
|
||||
assert len(hist) == 23
|
||||
assert len(bins) == 24
|
||||
assert len(hist) == 20
|
||||
assert len(bins) == 21
|
||||
hist, bins = gbm.get_split_value_histogram(0, bins=999)
|
||||
assert len(hist) == 999
|
||||
assert len(bins) == 1000
|
||||
|
@ -2790,7 +2780,7 @@ def test_early_stopping_for_only_first_metric():
|
|||
)
|
||||
assert assumed_iteration == len(ret[list(ret.keys())[0]])
|
||||
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression()
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=73)
|
||||
lgb_train = lgb.Dataset(X_train, y_train)
|
||||
|
@ -2798,16 +2788,16 @@ def test_early_stopping_for_only_first_metric():
|
|||
lgb_valid2 = lgb.Dataset(X_test2, y_test2, reference=lgb_train)
|
||||
|
||||
iter_valid1_l1 = 3
|
||||
iter_valid1_l2 = 14
|
||||
iter_valid2_l1 = 2
|
||||
iter_valid1_l2 = 3
|
||||
iter_valid2_l1 = 3
|
||||
iter_valid2_l2 = 15
|
||||
assert len(set([iter_valid1_l1, iter_valid1_l2, iter_valid2_l1, iter_valid2_l2])) == 4
|
||||
assert len(set([iter_valid1_l1, iter_valid1_l2, iter_valid2_l1, iter_valid2_l2])) == 2
|
||||
iter_min_l1 = min([iter_valid1_l1, iter_valid2_l1])
|
||||
iter_min_l2 = min([iter_valid1_l2, iter_valid2_l2])
|
||||
iter_min_valid1 = min([iter_valid1_l1, iter_valid1_l2])
|
||||
|
||||
iter_cv_l1 = 4
|
||||
iter_cv_l2 = 12
|
||||
iter_cv_l1 = 15
|
||||
iter_cv_l2 = 13
|
||||
assert len(set([iter_cv_l1, iter_cv_l2])) == 2
|
||||
iter_cv_min = min([iter_cv_l1, iter_cv_l2])
|
||||
|
||||
|
@ -3153,7 +3143,7 @@ def test_trees_to_dataframe():
|
|||
|
||||
|
||||
def test_interaction_constraints():
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression(n_samples=200)
|
||||
num_features = X.shape[1]
|
||||
train_data = lgb.Dataset(X, label=y)
|
||||
# check that constraint containing all features is equivalent to no constraint
|
||||
|
@ -3166,9 +3156,7 @@ def test_interaction_constraints():
|
|||
pred2 = est.predict(X)
|
||||
np.testing.assert_allclose(pred1, pred2)
|
||||
# check that constraint partitioning the features reduces train accuracy
|
||||
est = lgb.train(dict(params, interaction_constraints=[list(range(num_features // 2)),
|
||||
list(range(num_features // 2, num_features))]),
|
||||
train_data, num_boost_round=10)
|
||||
est = lgb.train(dict(params, interaction_constraints=[[0, 2], [1, 3]]), train_data, num_boost_round=10)
|
||||
pred3 = est.predict(X)
|
||||
assert mean_squared_error(y, pred1) < mean_squared_error(y, pred3)
|
||||
# check that constraints consisting of single features reduce accuracy further
|
||||
|
@ -3568,7 +3556,7 @@ def test_dump_model_hook():
|
|||
|
||||
@pytest.mark.skipif(getenv('TASK', '') == 'cuda_exp', reason='Forced splits are not yet supported by CUDA Experimental version')
|
||||
def test_force_split_with_feature_fraction(tmp_path):
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression()
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
|
||||
lgb_train = lgb.Dataset(X_train, y_train)
|
||||
|
||||
|
@ -3595,7 +3583,7 @@ def test_force_split_with_feature_fraction(tmp_path):
|
|||
|
||||
gbm = lgb.train(params, lgb_train)
|
||||
ret = mean_absolute_error(y_test, gbm.predict(X_test))
|
||||
assert ret < 2.0
|
||||
assert ret < 15.7
|
||||
|
||||
tree_info = gbm.dump_model()["tree_info"]
|
||||
assert len(tree_info) > 1
|
||||
|
|
|
@ -21,8 +21,8 @@ from sklearn.utils.validation import check_is_fitted
|
|||
import lightgbm as lgb
|
||||
from lightgbm.compat import PANDAS_INSTALLED, pd_DataFrame
|
||||
|
||||
from .utils import (load_boston, load_breast_cancer, load_digits, load_iris, load_linnerud, make_ranking,
|
||||
make_synthetic_regression, sklearn_multiclass_custom_objective, softmax)
|
||||
from .utils import (load_breast_cancer, load_digits, load_iris, load_linnerud, make_ranking, make_synthetic_regression,
|
||||
sklearn_multiclass_custom_objective, softmax)
|
||||
|
||||
decreasing_generator = itertools.count(0, -1)
|
||||
task_to_model_factory = {
|
||||
|
@ -112,12 +112,12 @@ def test_binary():
|
|||
|
||||
|
||||
def test_regression():
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression()
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
|
||||
gbm = lgb.LGBMRegressor(n_estimators=50, verbose=-1)
|
||||
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], callbacks=[lgb.early_stopping(5)])
|
||||
ret = mean_squared_error(y_test, gbm.predict(X_test))
|
||||
assert ret < 7
|
||||
assert ret < 174
|
||||
assert gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1] == pytest.approx(ret)
|
||||
|
||||
|
||||
|
@ -226,12 +226,12 @@ def test_objective_aliases(custom_objective):
|
|||
|
||||
|
||||
def test_regression_with_custom_objective():
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression()
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
|
||||
gbm = lgb.LGBMRegressor(n_estimators=50, verbose=-1, objective=objective_ls)
|
||||
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], callbacks=[lgb.early_stopping(5)])
|
||||
ret = mean_squared_error(y_test, gbm.predict(X_test))
|
||||
assert ret < 7.0
|
||||
assert ret < 174
|
||||
assert gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1] == pytest.approx(ret)
|
||||
|
||||
|
||||
|
@ -249,13 +249,12 @@ def test_binary_classification_with_custom_objective():
|
|||
|
||||
|
||||
def test_dart():
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression()
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
|
||||
gbm = lgb.LGBMRegressor(boosting_type='dart', n_estimators=50)
|
||||
gbm.fit(X_train, y_train)
|
||||
score = gbm.score(X_test, y_test)
|
||||
assert score >= 0.8
|
||||
assert score <= 1.
|
||||
assert 0.8 <= score <= 1.0
|
||||
|
||||
|
||||
def test_stacking_classifier():
|
||||
|
@ -280,7 +279,9 @@ def test_stacking_classifier():
|
|||
|
||||
|
||||
def test_stacking_regressor():
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression(n_samples=200)
|
||||
n_features = X.shape[1]
|
||||
n_input_models = 2
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
|
||||
regressors = [('gbm1', lgb.LGBMRegressor(n_estimators=3)),
|
||||
('gbm2', lgb.LGBMRegressor(n_estimators=3))]
|
||||
|
@ -291,11 +292,11 @@ def test_stacking_regressor():
|
|||
score = reg.score(X_test, y_test)
|
||||
assert score >= 0.2
|
||||
assert score <= 1.
|
||||
assert reg.n_features_in_ == 13 # number of input features
|
||||
assert len(reg.named_estimators_['gbm1'].feature_importances_) == 13
|
||||
assert reg.n_features_in_ == n_features # number of input features
|
||||
assert len(reg.named_estimators_['gbm1'].feature_importances_) == n_features
|
||||
assert reg.named_estimators_['gbm1'].n_features_in_ == reg.named_estimators_['gbm2'].n_features_in_
|
||||
assert reg.final_estimator_.n_features_in_ == 15 # number of concatenated features
|
||||
assert len(reg.final_estimator_.feature_importances_) == 15
|
||||
assert reg.final_estimator_.n_features_in_ == n_features + n_input_models # number of concatenated features
|
||||
assert len(reg.final_estimator_.feature_importances_) == n_features + n_input_models
|
||||
|
||||
|
||||
def test_grid_search():
|
||||
|
@ -765,7 +766,8 @@ def test_evaluate_train_set():
|
|||
|
||||
|
||||
def test_metrics():
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression()
|
||||
y = abs(y)
|
||||
params = {'n_estimators': 2, 'verbose': -1}
|
||||
params_fit = {'X': X, 'y': y, 'eval_set': (X, y)}
|
||||
|
||||
|
@ -1102,7 +1104,7 @@ def test_first_metric_only():
|
|||
else:
|
||||
assert gbm.n_estimators == gbm.best_iteration_
|
||||
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = make_synthetic_regression(n_samples=300)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=72)
|
||||
params = {'n_estimators': 30,
|
||||
|
@ -1114,11 +1116,11 @@ def test_first_metric_only():
|
|||
params_fit = {'X': X_train,
|
||||
'y': y_train}
|
||||
|
||||
iter_valid1_l1 = 3
|
||||
iter_valid1_l2 = 18
|
||||
iter_valid2_l1 = 11
|
||||
iter_valid2_l2 = 7
|
||||
assert len(set([iter_valid1_l1, iter_valid1_l2, iter_valid2_l1, iter_valid2_l2])) == 4
|
||||
iter_valid1_l1 = 4
|
||||
iter_valid1_l2 = 4
|
||||
iter_valid2_l1 = 2
|
||||
iter_valid2_l2 = 2
|
||||
assert len(set([iter_valid1_l1, iter_valid1_l2, iter_valid2_l1, iter_valid2_l2])) == 2
|
||||
iter_min_l1 = min([iter_valid1_l1, iter_valid2_l1])
|
||||
iter_min_l2 = min([iter_valid1_l2, iter_valid2_l2])
|
||||
iter_min = min([iter_min_l1, iter_min_l2])
|
||||
|
|
|
@ -13,11 +13,6 @@ import lightgbm as lgb
|
|||
SERIALIZERS = ["pickle", "joblib", "cloudpickle"]
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def load_boston(**kwargs):
|
||||
return sklearn.datasets.load_boston(**kwargs)
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def load_breast_cancer(**kwargs):
|
||||
return sklearn.datasets.load_breast_cancer(**kwargs)
|
||||
|
|
Загрузка…
Ссылка в новой задаче