зеркало из https://github.com/microsoft/LightGBM.git
* fix tests * fix tests * fix test comments * simplify tests * Apply suggestions from code review
This commit is contained in:
Родитель
84b519b77c
Коммит
37485fff5d
|
@ -372,7 +372,7 @@ def _predict_part(
|
||||||
|
|
||||||
# dask.DataFrame.map_partitions() expects each call to return a pandas DataFrame or Series
|
# dask.DataFrame.map_partitions() expects each call to return a pandas DataFrame or Series
|
||||||
if isinstance(part, pd_DataFrame):
|
if isinstance(part, pd_DataFrame):
|
||||||
if pred_proba or pred_contrib:
|
if pred_proba or pred_contrib or pred_leaf:
|
||||||
result = pd_DataFrame(result, index=part.index)
|
result = pd_DataFrame(result, index=part.index)
|
||||||
else:
|
else:
|
||||||
result = pd_Series(result, index=part.index, name='predictions')
|
result = pd_Series(result, index=part.index, name='predictions')
|
||||||
|
|
|
@ -223,6 +223,7 @@ def test_classifier(output, centers, client, listen_port):
|
||||||
dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw)
|
dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw)
|
||||||
p1 = dask_classifier.predict(dX)
|
p1 = dask_classifier.predict(dX)
|
||||||
p1_proba = dask_classifier.predict_proba(dX).compute()
|
p1_proba = dask_classifier.predict_proba(dX).compute()
|
||||||
|
p1_pred_leaf = dask_classifier.predict(dX, pred_leaf=True)
|
||||||
p1_local = dask_classifier.to_local().predict(X)
|
p1_local = dask_classifier.to_local().predict(X)
|
||||||
s1 = _accuracy_score(dy, p1)
|
s1 = _accuracy_score(dy, p1)
|
||||||
p1 = p1.compute()
|
p1 = p1.compute()
|
||||||
|
@ -241,6 +242,17 @@ def test_classifier(output, centers, client, listen_port):
|
||||||
assert_eq(p1_local, p2)
|
assert_eq(p1_local, p2)
|
||||||
assert_eq(y, p1_local)
|
assert_eq(y, p1_local)
|
||||||
|
|
||||||
|
# pref_leaf values should have the right shape
|
||||||
|
# and values that look like valid tree nodes
|
||||||
|
pred_leaf_vals = p1_pred_leaf.compute()
|
||||||
|
assert pred_leaf_vals.shape == (
|
||||||
|
X.shape[0],
|
||||||
|
dask_classifier.booster_.num_trees()
|
||||||
|
)
|
||||||
|
assert np.max(pred_leaf_vals) <= params['num_leaves']
|
||||||
|
assert np.min(pred_leaf_vals) >= 0
|
||||||
|
assert len(np.unique(pred_leaf_vals)) <= params['num_leaves']
|
||||||
|
|
||||||
# be sure LightGBM actually used at least one categorical column,
|
# be sure LightGBM actually used at least one categorical column,
|
||||||
# and that it was correctly treated as a categorical feature
|
# and that it was correctly treated as a categorical feature
|
||||||
if output == 'dataframe-with-categorical':
|
if output == 'dataframe-with-categorical':
|
||||||
|
@ -380,6 +392,8 @@ def test_regressor(output, client, listen_port):
|
||||||
)
|
)
|
||||||
dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw)
|
dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw)
|
||||||
p1 = dask_regressor.predict(dX)
|
p1 = dask_regressor.predict(dX)
|
||||||
|
p1_pred_leaf = dask_regressor.predict(dX, pred_leaf=True)
|
||||||
|
|
||||||
if not output.startswith('dataframe'):
|
if not output.startswith('dataframe'):
|
||||||
s1 = _r2_score(dy, p1)
|
s1 = _r2_score(dy, p1)
|
||||||
p1 = p1.compute()
|
p1 = p1.compute()
|
||||||
|
@ -399,6 +413,17 @@ def test_regressor(output, client, listen_port):
|
||||||
# Predictions should be roughly the same.
|
# Predictions should be roughly the same.
|
||||||
assert_eq(p1, p1_local)
|
assert_eq(p1, p1_local)
|
||||||
|
|
||||||
|
# pref_leaf values should have the right shape
|
||||||
|
# and values that look like valid tree nodes
|
||||||
|
pred_leaf_vals = p1_pred_leaf.compute()
|
||||||
|
assert pred_leaf_vals.shape == (
|
||||||
|
X.shape[0],
|
||||||
|
dask_regressor.booster_.num_trees()
|
||||||
|
)
|
||||||
|
assert np.max(pred_leaf_vals) <= params['num_leaves']
|
||||||
|
assert np.min(pred_leaf_vals) >= 0
|
||||||
|
assert len(np.unique(pred_leaf_vals)) <= params['num_leaves']
|
||||||
|
|
||||||
# The checks below are skipped
|
# The checks below are skipped
|
||||||
# for the categorical data case because it's difficult to get
|
# for the categorical data case because it's difficult to get
|
||||||
# a good fit from just categoricals for a regression problem
|
# a good fit from just categoricals for a regression problem
|
||||||
|
@ -582,6 +607,7 @@ def test_ranker(output, client, listen_port, group):
|
||||||
dask_ranker = dask_ranker.fit(dX, dy, sample_weight=dw, group=dg)
|
dask_ranker = dask_ranker.fit(dX, dy, sample_weight=dw, group=dg)
|
||||||
rnkvec_dask = dask_ranker.predict(dX)
|
rnkvec_dask = dask_ranker.predict(dX)
|
||||||
rnkvec_dask = rnkvec_dask.compute()
|
rnkvec_dask = rnkvec_dask.compute()
|
||||||
|
p1_pred_leaf = dask_ranker.predict(dX, pred_leaf=True)
|
||||||
rnkvec_dask_local = dask_ranker.to_local().predict(X)
|
rnkvec_dask_local = dask_ranker.to_local().predict(X)
|
||||||
|
|
||||||
local_ranker = lgb.LGBMRanker(**params)
|
local_ranker = lgb.LGBMRanker(**params)
|
||||||
|
@ -595,6 +621,17 @@ def test_ranker(output, client, listen_port, group):
|
||||||
assert spearmanr(rnkvec_dask, rnkvec_local).correlation > 0.8
|
assert spearmanr(rnkvec_dask, rnkvec_local).correlation > 0.8
|
||||||
assert_eq(rnkvec_dask, rnkvec_dask_local)
|
assert_eq(rnkvec_dask, rnkvec_dask_local)
|
||||||
|
|
||||||
|
# pref_leaf values should have the right shape
|
||||||
|
# and values that look like valid tree nodes
|
||||||
|
pred_leaf_vals = p1_pred_leaf.compute()
|
||||||
|
assert pred_leaf_vals.shape == (
|
||||||
|
X.shape[0],
|
||||||
|
dask_ranker.booster_.num_trees()
|
||||||
|
)
|
||||||
|
assert np.max(pred_leaf_vals) <= params['num_leaves']
|
||||||
|
assert np.min(pred_leaf_vals) >= 0
|
||||||
|
assert len(np.unique(pred_leaf_vals)) <= params['num_leaves']
|
||||||
|
|
||||||
# be sure LightGBM actually used at least one categorical column,
|
# be sure LightGBM actually used at least one categorical column,
|
||||||
# and that it was correctly treated as a categorical feature
|
# and that it was correctly treated as a categorical feature
|
||||||
if output == 'dataframe-with-categorical':
|
if output == 'dataframe-with-categorical':
|
||||||
|
|
Загрузка…
Ссылка в новой задаче