зеркало из https://github.com/microsoft/LightGBM.git
[python-package] Add `feature_names_in_` attribute for scikit-learn estimators (fixes #6279) (#6310)
This commit is contained in:
Родитель
7d9106d209
Коммит
f811c82708
|
@ -1144,11 +1144,23 @@ class LGBMModel(_LGBMModelBase):
|
|||
|
||||
@property
|
||||
def feature_name_(self) -> List[str]:
|
||||
""":obj:`list` of shape = [n_features]: The names of features."""
|
||||
""":obj:`list` of shape = [n_features]: The names of features.
|
||||
|
||||
.. note::
|
||||
|
||||
If input does not contain feature names, they will be added during fitting in the format ``Column_0``, ``Column_1``, ..., ``Column_N``.
|
||||
"""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError("No feature_name found. Need to call fit beforehand.")
|
||||
return self._Booster.feature_name() # type: ignore[union-attr]
|
||||
|
||||
@property
|
||||
def feature_names_in_(self) -> np.ndarray:
|
||||
""":obj:`array` of shape = [n_features]: scikit-learn compatible version of ``.feature_name_``."""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError("No feature_names_in_ found. Need to call fit beforehand.")
|
||||
return np.array(self.feature_name_)
|
||||
|
||||
|
||||
class LGBMRegressor(_LGBMRegressorBase, LGBMModel):
|
||||
"""LightGBM regressor."""
|
||||
|
|
|
@ -1290,6 +1290,46 @@ def test_max_depth_warning_is_never_raised(capsys, estimator_class, max_depth):
|
|||
assert "Provided parameters constrain tree depth" not in capsys.readouterr().out
|
||||
|
||||
|
||||
@pytest.mark.parametrize("estimator_class", [lgb.LGBMModel, lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker])
|
||||
def test_getting_feature_names_in_np_input(estimator_class):
|
||||
# input is a numpy array, which doesn't have feature names. LightGBM adds
|
||||
# feature names to the fitted model, which is inconsistent with sklearn's behavior
|
||||
X, y = load_digits(n_class=2, return_X_y=True)
|
||||
params = {"n_estimators": 2, "num_leaves": 7}
|
||||
if estimator_class is lgb.LGBMModel:
|
||||
model = estimator_class(**{**params, "objective": "binary"})
|
||||
else:
|
||||
model = estimator_class(**params)
|
||||
with pytest.raises(lgb.compat.LGBMNotFittedError):
|
||||
check_is_fitted(model)
|
||||
if isinstance(model, lgb.LGBMRanker):
|
||||
model.fit(X, y, group=[X.shape[0]])
|
||||
else:
|
||||
model.fit(X, y)
|
||||
np.testing.assert_array_equal(model.feature_names_in_, np.array([f"Column_{i}" for i in range(X.shape[1])]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("estimator_class", [lgb.LGBMModel, lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker])
|
||||
def test_getting_feature_names_in_pd_input(estimator_class):
|
||||
X, y = load_digits(n_class=2, return_X_y=True, as_frame=True)
|
||||
col_names = X.columns.to_list()
|
||||
assert isinstance(col_names, list) and all(
|
||||
isinstance(c, str) for c in col_names
|
||||
), "input data must have feature names for this test to cover the expected functionality"
|
||||
params = {"n_estimators": 2, "num_leaves": 7}
|
||||
if estimator_class is lgb.LGBMModel:
|
||||
model = estimator_class(**{**params, "objective": "binary"})
|
||||
else:
|
||||
model = estimator_class(**params)
|
||||
with pytest.raises(lgb.compat.LGBMNotFittedError):
|
||||
check_is_fitted(model)
|
||||
if isinstance(model, lgb.LGBMRanker):
|
||||
model.fit(X, y, group=[X.shape[0]])
|
||||
else:
|
||||
model.fit(X, y)
|
||||
np.testing.assert_array_equal(model.feature_names_in_, X.columns)
|
||||
|
||||
|
||||
@parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()])
|
||||
def test_sklearn_integration(estimator, check):
|
||||
estimator.set_params(min_child_samples=1, min_data_in_bin=1)
|
||||
|
|
Загрузка…
Ссылка в новой задаче