зеркало из https://github.com/microsoft/LightGBM.git
[python] Configure choice of `feature_importance_` in sklearn API (#1470)
* ignore vim temporary files * add importance_type arg to sklearn API * update documentation info * remote a trailing space * remove trailing space (again :)) * add instructions on importance choices to sklearn API * drop mention of constructor in the feature type setting * adding a test for different feture types * remove trailing spaces, make shorter assert in feature importance type handling test * fixing style issue introduced with the new test
This commit is contained in:
Родитель
fac4afe099
Коммит
dae7551629
|
@ -181,6 +181,7 @@ BundleArtifacts/
|
|||
ClientBin/
|
||||
~$*
|
||||
*~
|
||||
.*.swp
|
||||
*.dbmdl
|
||||
*.dbproj.schemaview
|
||||
*.pfx
|
||||
|
|
|
@ -134,7 +134,7 @@ class LGBMModel(_LGBMModelBase):
|
|||
min_split_gain=0., min_child_weight=1e-3, min_child_samples=20,
|
||||
subsample=1., subsample_freq=0, colsample_bytree=1.,
|
||||
reg_alpha=0., reg_lambda=0., random_state=None,
|
||||
n_jobs=-1, silent=True, **kwargs):
|
||||
n_jobs=-1, silent=True, importance_type='split', **kwargs):
|
||||
"""Construct a gradient boosting model.
|
||||
|
||||
Parameters
|
||||
|
@ -193,6 +193,10 @@ class LGBMModel(_LGBMModelBase):
|
|||
Number of parallel threads.
|
||||
silent : bool, optional (default=True)
|
||||
Whether to print messages while running boosting.
|
||||
importance_type : str, optional (default='split')
|
||||
The type of feature importance to be filled into ``feature_importances_``.
|
||||
If "split", result contains numbers of times the feature is used in a model.
|
||||
If "gain", result contains total gains of splits which use the feature.
|
||||
**kwargs : other parameters
|
||||
Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more parameters.
|
||||
|
||||
|
@ -264,6 +268,7 @@ class LGBMModel(_LGBMModelBase):
|
|||
self.random_state = random_state
|
||||
self.n_jobs = n_jobs
|
||||
self.silent = silent
|
||||
self.importance_type = importance_type
|
||||
self._Booster = None
|
||||
self._evals_result = None
|
||||
self._best_score = None
|
||||
|
@ -399,6 +404,7 @@ class LGBMModel(_LGBMModelBase):
|
|||
if 'verbose' not in params and self.silent:
|
||||
params['verbose'] = 0
|
||||
params.pop('silent', None)
|
||||
params.pop('importance_type', None)
|
||||
params.pop('n_estimators', None)
|
||||
params.pop('class_weight', None)
|
||||
if self._n_classes is not None and self._n_classes > 2:
|
||||
|
@ -606,11 +612,13 @@ class LGBMModel(_LGBMModelBase):
|
|||
Note
|
||||
----
|
||||
Feature importance in sklearn interface used to normalize to 1,
|
||||
it's deprecated after 2.0.4 and same as Booster.feature_importance() now.
|
||||
it's deprecated after 2.0.4 and is the same as Booster.feature_importance() now.
|
||||
``importance_type`` attribute is passed to the function
|
||||
to configure the type of importance values to be extracted.
|
||||
"""
|
||||
if self._n_features is None:
|
||||
raise LGBMNotFittedError('No feature_importances found. Need to call fit beforehand.')
|
||||
return self.booster_.feature_importance()
|
||||
return self.booster_.feature_importance(importance_type=self.importance_type)
|
||||
|
||||
|
||||
class LGBMRegressor(LGBMModel, _LGBMRegressorBase):
|
||||
|
|
|
@ -166,6 +166,19 @@ class TestSklearn(unittest.TestCase):
|
|||
importances = clf.feature_importances_
|
||||
self.assertEqual(len(importances), 4)
|
||||
|
||||
def test_feature_importances_type(self):
|
||||
clf = lgb.LGBMClassifier(n_estimators=100)
|
||||
data = load_iris()
|
||||
clf.fit(data.data, data.target)
|
||||
clf.set_params(importance_type='split')
|
||||
importances_split = clf.feature_importances_
|
||||
clf.set_params(importance_type='gain')
|
||||
importances_gain = clf.feature_importances_
|
||||
# Test that the largest element is NOT the same, the smallest can be the same, i.e. zero
|
||||
importance_split_top1 = sorted(importances_split, reverse=True)[0]
|
||||
importance_gain_top1 = sorted(importances_gain, reverse=True)[0]
|
||||
self.assertNotEqual(importance_split_top1, importance_gain_top1)
|
||||
|
||||
def test_sklearn_backward_compatibility(self):
|
||||
iris = load_iris()
|
||||
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)
|
||||
|
|
Загрузка…
Ссылка в новой задаче