зеркало из https://github.com/microsoft/LightGBM.git
move __version__ import to compat.py, test with all ML tasks
This commit is contained in:
Родитель
722474d07c
Коммит
86b5ab3b7b
|
@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, List
|
|||
# see https://github.com/microsoft/LightGBM/issues/6509
|
||||
"""sklearn"""
|
||||
try:
|
||||
from sklearn import __version__ as _sklearn_version
|
||||
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from sklearn.utils.class_weight import compute_sample_weight
|
||||
|
@ -117,6 +118,7 @@ except ImportError:
|
|||
_LGBMCheckClassificationTargets = None
|
||||
_LGBMComputeSampleWeight = None
|
||||
_LGBMValidateData = None
|
||||
_sklearn_version = None
|
||||
|
||||
# additional scikit-learn imports only for type hints
|
||||
if TYPE_CHECKING:
|
||||
|
|
|
@ -40,6 +40,7 @@ from .compat import (
|
|||
_LGBMModelBase,
|
||||
_LGBMRegressorBase,
|
||||
_LGBMValidateData,
|
||||
_sklearn_version,
|
||||
dt_DataTable,
|
||||
pd_DataFrame,
|
||||
)
|
||||
|
@ -729,11 +730,9 @@ class LGBMModel(_LGBMModelBase):
|
|||
# _LGBMModelBase.__sklearn_tags__() cannot be called unconditionally,
|
||||
# because that method isn't defined for scikit-learn<1.6
|
||||
if not hasattr(_LGBMModelBase, "__sklearn_tags__"):
|
||||
from sklearn import __version__ as sklearn_version
|
||||
|
||||
err_msg = (
|
||||
"__sklearn_tags__() should not be called when using scikit-learn<1.6. "
|
||||
f"detected version: {sklearn_version}"
|
||||
f"detected version: {_sklearn_version}"
|
||||
)
|
||||
raise AttributeError(err_msg)
|
||||
|
||||
|
|
|
@ -43,6 +43,7 @@ task_to_model_factory = {
|
|||
"multiclass-classification": lgb.LGBMClassifier,
|
||||
"regression": lgb.LGBMRegressor,
|
||||
}
|
||||
all_tasks = tuple(task_to_model_factory.keys())
|
||||
|
||||
|
||||
def _create_data(task, n_samples=100, n_features=4):
|
||||
|
@ -1459,7 +1460,7 @@ def test_sklearn_tags_should_correctly_reflect_lightgbm_specific_values(estimato
|
|||
assert sklearn_tags._xfail_checks == more_tags["_xfail_checks"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("task", ["binary-classification", "multiclass-classification", "ranking", "regression"])
|
||||
@pytest.mark.parametrize("task", all_tasks)
|
||||
def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task):
|
||||
pd = pytest.importorskip("pandas")
|
||||
X, y, g = _create_data(task)
|
||||
|
@ -1563,7 +1564,7 @@ def test_default_n_jobs(tmp_path):
|
|||
|
||||
|
||||
@pytest.mark.skipif(not PANDAS_INSTALLED, reason="pandas is not installed")
|
||||
@pytest.mark.parametrize("task", ["binary-classification", "multiclass-classification", "ranking", "regression"])
|
||||
@pytest.mark.parametrize("task", all_tasks)
|
||||
def test_validate_features(task):
|
||||
X, y, g = _create_data(task, n_features=4)
|
||||
features = ["x1", "x2", "x3", "x4"]
|
||||
|
@ -1586,24 +1587,49 @@ def test_validate_features(task):
|
|||
|
||||
# LightGBM's 'predict_disable_shape_check' mechanism is intentionally not respected by
|
||||
# its scikit-learn estimators, for consistency with scikit-learn's own behavior.
|
||||
@pytest.mark.parametrize("task", all_tasks)
|
||||
@pytest.mark.parametrize("predict_disable_shape_check", [True, False])
|
||||
def test_predict_rejects_inputs_with_incorrect_number_of_features(predict_disable_shape_check):
|
||||
X, y, _ = _create_data(task="regression", n_features=4)
|
||||
def test_predict_rejects_inputs_with_incorrect_number_of_features(predict_disable_shape_check, task):
|
||||
X, y, g = _create_data(task, n_features=4)
|
||||
model_factory = task_to_model_factory[task]
|
||||
fit_kwargs = {"X": X[:, :-1], "y": y}
|
||||
if task == "ranking":
|
||||
estimator_name = "LGBMRanker"
|
||||
fit_kwargs.update({"group": g})
|
||||
elif task == "regression":
|
||||
estimator_name = "LGBMRegressor"
|
||||
else:
|
||||
estimator_name = "LGBMClassifier"
|
||||
|
||||
# train on the first 3 features
|
||||
model = lgb.LGBMRegressor(n_estimators=5, num_leaves=7, verbose=-1).fit(X[:, :-1], y)
|
||||
model = model_factory(n_estimators=5, num_leaves=7, verbose=-1).fit(**fit_kwargs)
|
||||
|
||||
# more cols in X than features: error
|
||||
with pytest.raises(ValueError, match="X has 4 features, but LGBMRegressor is expecting 3 features as input"):
|
||||
err_msg = f"X has 4 features, but {estimator_name} is expecting 3 features as input"
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
model.predict(X, predict_disable_shape_check=predict_disable_shape_check)
|
||||
|
||||
if estimator_name == "LGBMClassifier":
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
model.predict_proba(X, predict_disable_shape_check=predict_disable_shape_check)
|
||||
|
||||
# fewer cols in X than features: error
|
||||
with pytest.raises(ValueError, match="X has 2 features, but LGBMRegressor is expecting 3 features as input"):
|
||||
err_msg = f"X has 2 features, but {estimator_name} is expecting 3 features as input"
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
model.predict(X[:, :-2], predict_disable_shape_check=predict_disable_shape_check)
|
||||
|
||||
if estimator_name == "LGBMClassifier":
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
model.predict_proba(X[:, :-2], predict_disable_shape_check=predict_disable_shape_check)
|
||||
|
||||
# same number of columns in both: no error
|
||||
preds = model.predict(X[:, :-1], predict_disable_shape_check=predict_disable_shape_check)
|
||||
assert preds.shape == y.shape
|
||||
|
||||
if estimator_name == "LGBMClassifier":
|
||||
preds = model.predict(X[:, :-1], predict_disable_shape_check=predict_disable_shape_check)
|
||||
assert preds.shape == y.shape
|
||||
|
||||
|
||||
@pytest.mark.parametrize("X_type", ["dt_DataTable", "list2d", "numpy", "scipy_csc", "scipy_csr", "pd_DataFrame"])
|
||||
@pytest.mark.parametrize("y_type", ["list1d", "numpy", "pd_Series", "pd_DataFrame"])
|
||||
|
|
Загрузка…
Ссылка в новой задаче