зеркало из https://github.com/microsoft/LightGBM.git
[ci] [python-package] enable ruff-format on all Python code (#6336)
This commit is contained in:
Родитель
2a08565513
Коммит
dd31208ab7
|
@ -23,14 +23,33 @@ except ImportError:
|
|||
pass
|
||||
|
||||
|
||||
_version_path = Path(__file__).absolute().parent / 'VERSION.txt'
|
||||
_version_path = Path(__file__).absolute().parent / "VERSION.txt"
|
||||
if _version_path.is_file():
|
||||
__version__ = _version_path.read_text(encoding='utf-8').strip()
|
||||
__version__ = _version_path.read_text(encoding="utf-8").strip()
|
||||
|
||||
__all__ = ['Dataset', 'Booster', 'CVBooster', 'Sequence',
|
||||
'register_logger',
|
||||
'train', 'cv',
|
||||
'LGBMModel', 'LGBMRegressor', 'LGBMClassifier', 'LGBMRanker',
|
||||
'DaskLGBMRegressor', 'DaskLGBMClassifier', 'DaskLGBMRanker',
|
||||
'log_evaluation', 'record_evaluation', 'reset_parameter', 'early_stopping', 'EarlyStopException',
|
||||
'plot_importance', 'plot_split_value_histogram', 'plot_metric', 'plot_tree', 'create_tree_digraph']
|
||||
__all__ = [
|
||||
"Dataset",
|
||||
"Booster",
|
||||
"CVBooster",
|
||||
"Sequence",
|
||||
"register_logger",
|
||||
"train",
|
||||
"cv",
|
||||
"LGBMModel",
|
||||
"LGBMRegressor",
|
||||
"LGBMClassifier",
|
||||
"LGBMRanker",
|
||||
"DaskLGBMRegressor",
|
||||
"DaskLGBMClassifier",
|
||||
"DaskLGBMRanker",
|
||||
"log_evaluation",
|
||||
"record_evaluation",
|
||||
"reset_parameter",
|
||||
"early_stopping",
|
||||
"EarlyStopException",
|
||||
"plot_importance",
|
||||
"plot_split_value_histogram",
|
||||
"plot_metric",
|
||||
"plot_tree",
|
||||
"create_tree_digraph",
|
||||
]
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -18,21 +18,21 @@ if TYPE_CHECKING:
|
|||
from .engine import CVBooster
|
||||
|
||||
__all__ = [
|
||||
'EarlyStopException',
|
||||
'early_stopping',
|
||||
'log_evaluation',
|
||||
'record_evaluation',
|
||||
'reset_parameter',
|
||||
"EarlyStopException",
|
||||
"early_stopping",
|
||||
"log_evaluation",
|
||||
"record_evaluation",
|
||||
"reset_parameter",
|
||||
]
|
||||
|
||||
_EvalResultDict = Dict[str, Dict[str, List[Any]]]
|
||||
_EvalResultTuple = Union[
|
||||
_LGBM_BoosterEvalMethodResultType,
|
||||
_LGBM_BoosterEvalMethodResultWithStandardDeviationType
|
||||
_LGBM_BoosterEvalMethodResultWithStandardDeviationType,
|
||||
]
|
||||
_ListOfEvalResultTuples = Union[
|
||||
List[_LGBM_BoosterEvalMethodResultType],
|
||||
List[_LGBM_BoosterEvalMethodResultWithStandardDeviationType]
|
||||
List[_LGBM_BoosterEvalMethodResultWithStandardDeviationType],
|
||||
]
|
||||
|
||||
|
||||
|
@ -95,8 +95,8 @@ class _LogEvaluationCallback:
|
|||
|
||||
def __call__(self, env: CallbackEnv) -> None:
|
||||
if self.period > 0 and env.evaluation_result_list and (env.iteration + 1) % self.period == 0:
|
||||
result = '\t'.join([_format_eval_result(x, self.show_stdv) for x in env.evaluation_result_list])
|
||||
_log_info(f'[{env.iteration + 1}]\t{result}')
|
||||
result = "\t".join([_format_eval_result(x, self.show_stdv) for x in env.evaluation_result_list])
|
||||
_log_info(f"[{env.iteration + 1}]\t{result}")
|
||||
|
||||
|
||||
def log_evaluation(period: int = 1, show_stdv: bool = True) -> _LogEvaluationCallback:
|
||||
|
@ -133,7 +133,7 @@ class _RecordEvaluationCallback:
|
|||
self.before_iteration = False
|
||||
|
||||
if not isinstance(eval_result, dict):
|
||||
raise TypeError('eval_result should be a dictionary')
|
||||
raise TypeError("eval_result should be a dictionary")
|
||||
self.eval_result = eval_result
|
||||
|
||||
def _init(self, env: CallbackEnv) -> None:
|
||||
|
@ -152,8 +152,8 @@ class _RecordEvaluationCallback:
|
|||
if len(item) == 4:
|
||||
self.eval_result[data_name].setdefault(eval_name, [])
|
||||
else:
|
||||
self.eval_result[data_name].setdefault(f'{eval_name}-mean', [])
|
||||
self.eval_result[data_name].setdefault(f'{eval_name}-stdv', [])
|
||||
self.eval_result[data_name].setdefault(f"{eval_name}-mean", [])
|
||||
self.eval_result[data_name].setdefault(f"{eval_name}-stdv", [])
|
||||
|
||||
def __call__(self, env: CallbackEnv) -> None:
|
||||
if env.iteration == env.begin_iteration:
|
||||
|
@ -171,8 +171,8 @@ class _RecordEvaluationCallback:
|
|||
data_name, eval_name = item[1].split()
|
||||
res_mean = item[2]
|
||||
res_stdv = item[4] # type: ignore[misc]
|
||||
self.eval_result[data_name][f'{eval_name}-mean'].append(res_mean)
|
||||
self.eval_result[data_name][f'{eval_name}-stdv'].append(res_stdv)
|
||||
self.eval_result[data_name][f"{eval_name}-mean"].append(res_mean)
|
||||
self.eval_result[data_name][f"{eval_name}-stdv"].append(res_stdv)
|
||||
|
||||
|
||||
def record_evaluation(eval_result: Dict[str, Dict[str, List[Any]]]) -> Callable:
|
||||
|
@ -230,8 +230,10 @@ class _ResetParameterCallback:
|
|||
elif callable(value):
|
||||
new_param = value(env.iteration - env.begin_iteration)
|
||||
else:
|
||||
raise ValueError("Only list and callable values are supported "
|
||||
"as a mapping from boosting round index to new parameter value.")
|
||||
raise ValueError(
|
||||
"Only list and callable values are supported "
|
||||
"as a mapping from boosting round index to new parameter value."
|
||||
)
|
||||
if new_param != env.params.get(key, None):
|
||||
new_parameters[key] = new_param
|
||||
if new_parameters:
|
||||
|
@ -276,9 +278,8 @@ class _EarlyStoppingCallback:
|
|||
stopping_rounds: int,
|
||||
first_metric_only: bool = False,
|
||||
verbose: bool = True,
|
||||
min_delta: Union[float, List[float]] = 0.0
|
||||
min_delta: Union[float, List[float]] = 0.0,
|
||||
) -> None:
|
||||
|
||||
if not isinstance(stopping_rounds, int) or stopping_rounds <= 0:
|
||||
raise ValueError(f"stopping_rounds should be an integer and greater than 0. got: {stopping_rounds}")
|
||||
|
||||
|
@ -298,7 +299,7 @@ class _EarlyStoppingCallback:
|
|||
self.best_iter: List[int] = []
|
||||
self.best_score_list: List[_ListOfEvalResultTuples] = []
|
||||
self.cmp_op: List[Callable[[float, float], bool]] = []
|
||||
self.first_metric = ''
|
||||
self.first_metric = ""
|
||||
|
||||
def _gt_delta(self, curr_score: float, best_score: float, delta: float) -> bool:
|
||||
return curr_score > best_score + delta
|
||||
|
@ -321,29 +322,24 @@ class _EarlyStoppingCallback:
|
|||
|
||||
def _init(self, env: CallbackEnv) -> None:
|
||||
if env.evaluation_result_list is None or env.evaluation_result_list == []:
|
||||
raise ValueError(
|
||||
"For early stopping, at least one dataset and eval metric is required for evaluation"
|
||||
)
|
||||
raise ValueError("For early stopping, at least one dataset and eval metric is required for evaluation")
|
||||
|
||||
is_dart = any(env.params.get(alias, "") == 'dart' for alias in _ConfigAliases.get("boosting"))
|
||||
is_dart = any(env.params.get(alias, "") == "dart" for alias in _ConfigAliases.get("boosting"))
|
||||
if is_dart:
|
||||
self.enabled = False
|
||||
_log_warning('Early stopping is not available in dart mode')
|
||||
_log_warning("Early stopping is not available in dart mode")
|
||||
return
|
||||
|
||||
# validation sets are guaranteed to not be identical to the training data in cv()
|
||||
if isinstance(env.model, Booster):
|
||||
only_train_set = (
|
||||
len(env.evaluation_result_list) == 1
|
||||
and self._is_train_set(
|
||||
ds_name=env.evaluation_result_list[0][0],
|
||||
eval_name=env.evaluation_result_list[0][1].split(" ")[0],
|
||||
env=env
|
||||
)
|
||||
only_train_set = len(env.evaluation_result_list) == 1 and self._is_train_set(
|
||||
ds_name=env.evaluation_result_list[0][0],
|
||||
eval_name=env.evaluation_result_list[0][1].split(" ")[0],
|
||||
env=env,
|
||||
)
|
||||
if only_train_set:
|
||||
self.enabled = False
|
||||
_log_warning('Only training set found, disabling early stopping.')
|
||||
_log_warning("Only training set found, disabling early stopping.")
|
||||
return
|
||||
|
||||
if self.verbose:
|
||||
|
@ -355,26 +351,26 @@ class _EarlyStoppingCallback:
|
|||
n_datasets = len(env.evaluation_result_list) // n_metrics
|
||||
if isinstance(self.min_delta, list):
|
||||
if not all(t >= 0 for t in self.min_delta):
|
||||
raise ValueError('Values for early stopping min_delta must be non-negative.')
|
||||
raise ValueError("Values for early stopping min_delta must be non-negative.")
|
||||
if len(self.min_delta) == 0:
|
||||
if self.verbose:
|
||||
_log_info('Disabling min_delta for early stopping.')
|
||||
_log_info("Disabling min_delta for early stopping.")
|
||||
deltas = [0.0] * n_datasets * n_metrics
|
||||
elif len(self.min_delta) == 1:
|
||||
if self.verbose:
|
||||
_log_info(f'Using {self.min_delta[0]} as min_delta for all metrics.')
|
||||
_log_info(f"Using {self.min_delta[0]} as min_delta for all metrics.")
|
||||
deltas = self.min_delta * n_datasets * n_metrics
|
||||
else:
|
||||
if len(self.min_delta) != n_metrics:
|
||||
raise ValueError('Must provide a single value for min_delta or as many as metrics.')
|
||||
raise ValueError("Must provide a single value for min_delta or as many as metrics.")
|
||||
if self.first_metric_only and self.verbose:
|
||||
_log_info(f'Using only {self.min_delta[0]} as early stopping min_delta.')
|
||||
_log_info(f"Using only {self.min_delta[0]} as early stopping min_delta.")
|
||||
deltas = self.min_delta * n_datasets
|
||||
else:
|
||||
if self.min_delta < 0:
|
||||
raise ValueError('Early stopping min_delta must be non-negative.')
|
||||
raise ValueError("Early stopping min_delta must be non-negative.")
|
||||
if self.min_delta > 0 and n_metrics > 1 and not self.first_metric_only and self.verbose:
|
||||
_log_info(f'Using {self.min_delta} as min_delta for all metrics.')
|
||||
_log_info(f"Using {self.min_delta} as min_delta for all metrics.")
|
||||
deltas = [self.min_delta] * n_datasets * n_metrics
|
||||
|
||||
# split is needed for "<dataset type> <metric>" case (e.g. "train l1")
|
||||
|
@ -382,18 +378,19 @@ class _EarlyStoppingCallback:
|
|||
for eval_ret, delta in zip(env.evaluation_result_list, deltas):
|
||||
self.best_iter.append(0)
|
||||
if eval_ret[3]: # greater is better
|
||||
self.best_score.append(float('-inf'))
|
||||
self.best_score.append(float("-inf"))
|
||||
self.cmp_op.append(partial(self._gt_delta, delta=delta))
|
||||
else:
|
||||
self.best_score.append(float('inf'))
|
||||
self.best_score.append(float("inf"))
|
||||
self.cmp_op.append(partial(self._lt_delta, delta=delta))
|
||||
|
||||
def _final_iteration_check(self, env: CallbackEnv, eval_name_splitted: List[str], i: int) -> None:
|
||||
if env.iteration == env.end_iteration - 1:
|
||||
if self.verbose:
|
||||
best_score_str = '\t'.join([_format_eval_result(x, show_stdv=True) for x in self.best_score_list[i]])
|
||||
_log_info('Did not meet early stopping. '
|
||||
f'Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}')
|
||||
best_score_str = "\t".join([_format_eval_result(x, show_stdv=True) for x in self.best_score_list[i]])
|
||||
_log_info(
|
||||
"Did not meet early stopping. " f"Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}"
|
||||
)
|
||||
if self.first_metric_only:
|
||||
_log_info(f"Evaluated only: {eval_name_splitted[-1]}")
|
||||
raise EarlyStopException(self.best_iter[i], self.best_score_list[i])
|
||||
|
@ -409,7 +406,7 @@ class _EarlyStoppingCallback:
|
|||
"Please report it at https://github.com/microsoft/LightGBM/issues"
|
||||
)
|
||||
# self.best_score_list is initialized to an empty list
|
||||
first_time_updating_best_score_list = (self.best_score_list == [])
|
||||
first_time_updating_best_score_list = self.best_score_list == []
|
||||
for i in range(len(env.evaluation_result_list)):
|
||||
score = env.evaluation_result_list[i][2]
|
||||
if first_time_updating_best_score_list or self.cmp_op[i](score, self.best_score[i]):
|
||||
|
@ -426,12 +423,14 @@ class _EarlyStoppingCallback:
|
|||
if self._is_train_set(
|
||||
ds_name=env.evaluation_result_list[i][0],
|
||||
eval_name=eval_name_splitted[0],
|
||||
env=env
|
||||
env=env,
|
||||
):
|
||||
continue # train data for lgb.cv or sklearn wrapper (underlying lgb.train)
|
||||
elif env.iteration - self.best_iter[i] >= self.stopping_rounds:
|
||||
if self.verbose:
|
||||
eval_result_str = '\t'.join([_format_eval_result(x, show_stdv=True) for x in self.best_score_list[i]])
|
||||
eval_result_str = "\t".join(
|
||||
[_format_eval_result(x, show_stdv=True) for x in self.best_score_list[i]]
|
||||
)
|
||||
_log_info(f"Early stopping, best iteration is:\n[{self.best_iter[i] + 1}]\t{eval_result_str}")
|
||||
if self.first_metric_only:
|
||||
_log_info(f"Evaluated only: {eval_name_splitted[-1]}")
|
||||
|
@ -439,7 +438,12 @@ class _EarlyStoppingCallback:
|
|||
self._final_iteration_check(env, eval_name_splitted, i)
|
||||
|
||||
|
||||
def early_stopping(stopping_rounds: int, first_metric_only: bool = False, verbose: bool = True, min_delta: Union[float, List[float]] = 0.0) -> _EarlyStoppingCallback:
|
||||
def early_stopping(
|
||||
stopping_rounds: int,
|
||||
first_metric_only: bool = False,
|
||||
verbose: bool = True,
|
||||
min_delta: Union[float, List[float]] = 0.0,
|
||||
) -> _EarlyStoppingCallback:
|
||||
"""Create a callback that activates early stopping.
|
||||
|
||||
Activates early stopping.
|
||||
|
@ -473,4 +477,9 @@ def early_stopping(stopping_rounds: int, first_metric_only: bool = False, verbos
|
|||
callback : _EarlyStoppingCallback
|
||||
The callback that activates early stopping.
|
||||
"""
|
||||
return _EarlyStoppingCallback(stopping_rounds=stopping_rounds, first_metric_only=first_metric_only, verbose=verbose, min_delta=min_delta)
|
||||
return _EarlyStoppingCallback(
|
||||
stopping_rounds=stopping_rounds,
|
||||
first_metric_only=first_metric_only,
|
||||
verbose=verbose,
|
||||
min_delta=min_delta,
|
||||
)
|
||||
|
|
|
@ -8,6 +8,7 @@ try:
|
|||
from pandas import DataFrame as pd_DataFrame
|
||||
from pandas import Series as pd_Series
|
||||
from pandas import concat
|
||||
|
||||
try:
|
||||
from pandas import CategoricalDtype as pd_CategoricalDtype
|
||||
except ImportError:
|
||||
|
@ -40,15 +41,18 @@ except ImportError:
|
|||
try:
|
||||
from numpy.random import Generator as np_random_Generator
|
||||
except ImportError:
|
||||
|
||||
class np_random_Generator: # type: ignore
|
||||
"""Dummy class for np.random.Generator."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
|
||||
"""matplotlib"""
|
||||
try:
|
||||
import matplotlib # noqa: F401
|
||||
|
||||
MATPLOTLIB_INSTALLED = True
|
||||
except ImportError:
|
||||
MATPLOTLIB_INSTALLED = False
|
||||
|
@ -56,6 +60,7 @@ except ImportError:
|
|||
"""graphviz"""
|
||||
try:
|
||||
import graphviz # noqa: F401
|
||||
|
||||
GRAPHVIZ_INSTALLED = True
|
||||
except ImportError:
|
||||
GRAPHVIZ_INSTALLED = False
|
||||
|
@ -63,6 +68,7 @@ except ImportError:
|
|||
"""datatable"""
|
||||
try:
|
||||
import datatable
|
||||
|
||||
if hasattr(datatable, "Frame"):
|
||||
dt_DataTable = datatable.Frame
|
||||
else:
|
||||
|
@ -85,6 +91,7 @@ try:
|
|||
from sklearn.utils.class_weight import compute_sample_weight
|
||||
from sklearn.utils.multiclass import check_classification_targets
|
||||
from sklearn.utils.validation import assert_all_finite, check_array, check_X_y
|
||||
|
||||
try:
|
||||
from sklearn.exceptions import NotFittedError
|
||||
from sklearn.model_selection import BaseCrossValidator, GroupKFold, StratifiedKFold
|
||||
|
@ -155,6 +162,7 @@ try:
|
|||
from dask.dataframe import DataFrame as dask_DataFrame
|
||||
from dask.dataframe import Series as dask_Series
|
||||
from dask.distributed import Client, Future, default_client, wait
|
||||
|
||||
DASK_INSTALLED = True
|
||||
except ImportError:
|
||||
DASK_INSTALLED = False
|
||||
|
@ -195,6 +203,7 @@ except ImportError:
|
|||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
|
||||
"""pyarrow"""
|
||||
try:
|
||||
import pyarrow.compute as pa_compute
|
||||
|
@ -205,6 +214,7 @@ try:
|
|||
from pyarrow.cffi import ffi as arrow_cffi
|
||||
from pyarrow.types import is_floating as arrow_is_floating
|
||||
from pyarrow.types import is_integer as arrow_is_integer
|
||||
|
||||
PYARROW_INSTALLED = True
|
||||
except ImportError:
|
||||
PYARROW_INSTALLED = False
|
||||
|
@ -266,4 +276,5 @@ except ImportError:
|
|||
def _LGBMCpuCount(only_physical_cores: bool = True) -> int:
|
||||
return cpu_count()
|
||||
|
||||
|
||||
__all__: List[str] = []
|
||||
|
|
|
@ -51,9 +51,9 @@ from .sklearn import (
|
|||
)
|
||||
|
||||
__all__ = [
|
||||
'DaskLGBMClassifier',
|
||||
'DaskLGBMRanker',
|
||||
'DaskLGBMRegressor',
|
||||
"DaskLGBMClassifier",
|
||||
"DaskLGBMRanker",
|
||||
"DaskLGBMRegressor",
|
||||
]
|
||||
|
||||
_DaskCollection = Union[dask_Array, dask_DataFrame, dask_Series]
|
||||
|
@ -67,7 +67,7 @@ class _RemoteSocket:
|
|||
def acquire(self) -> int:
|
||||
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
self.socket.bind(('', 0))
|
||||
self.socket.bind(("", 0))
|
||||
return self.socket.getsockname()[1]
|
||||
|
||||
def release(self) -> None:
|
||||
|
@ -153,9 +153,11 @@ def _concat(seq: List[_DaskPart]) -> _DaskPart:
|
|||
elif isinstance(seq[0], (pd_DataFrame, pd_Series)):
|
||||
return concat(seq, axis=0)
|
||||
elif isinstance(seq[0], ss.spmatrix):
|
||||
return ss.vstack(seq, format='csr')
|
||||
return ss.vstack(seq, format="csr")
|
||||
else:
|
||||
raise TypeError(f'Data must be one of: numpy arrays, pandas dataframes, sparse matrices (from scipy). Got {type(seq[0]).__name__}.')
|
||||
raise TypeError(
|
||||
f"Data must be one of: numpy arrays, pandas dataframes, sparse matrices (from scipy). Got {type(seq[0]).__name__}."
|
||||
)
|
||||
|
||||
|
||||
def _remove_list_padding(*args: Any) -> List[List[Any]]:
|
||||
|
@ -186,41 +188,41 @@ def _train_part(
|
|||
return_model: bool,
|
||||
time_out: int,
|
||||
remote_socket: _RemoteSocket,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> Optional[LGBMModel]:
|
||||
network_params = {
|
||||
'machines': machines,
|
||||
'local_listen_port': local_listen_port,
|
||||
'time_out': time_out,
|
||||
'num_machines': num_machines
|
||||
"machines": machines,
|
||||
"local_listen_port": local_listen_port,
|
||||
"time_out": time_out,
|
||||
"num_machines": num_machines,
|
||||
}
|
||||
params.update(network_params)
|
||||
|
||||
is_ranker = issubclass(model_factory, LGBMRanker)
|
||||
|
||||
# Concatenate many parts into one
|
||||
data = _concat([x['data'] for x in list_of_parts])
|
||||
label = _concat([x['label'] for x in list_of_parts])
|
||||
data = _concat([x["data"] for x in list_of_parts])
|
||||
label = _concat([x["label"] for x in list_of_parts])
|
||||
|
||||
if 'weight' in list_of_parts[0]:
|
||||
weight = _concat([x['weight'] for x in list_of_parts])
|
||||
if "weight" in list_of_parts[0]:
|
||||
weight = _concat([x["weight"] for x in list_of_parts])
|
||||
else:
|
||||
weight = None
|
||||
|
||||
if 'group' in list_of_parts[0]:
|
||||
group = _concat([x['group'] for x in list_of_parts])
|
||||
if "group" in list_of_parts[0]:
|
||||
group = _concat([x["group"] for x in list_of_parts])
|
||||
else:
|
||||
group = None
|
||||
|
||||
if 'init_score' in list_of_parts[0]:
|
||||
init_score = _concat([x['init_score'] for x in list_of_parts])
|
||||
if "init_score" in list_of_parts[0]:
|
||||
init_score = _concat([x["init_score"] for x in list_of_parts])
|
||||
else:
|
||||
init_score = None
|
||||
|
||||
# construct local eval_set data.
|
||||
n_evals = max(len(x.get('eval_set', [])) for x in list_of_parts)
|
||||
eval_names = kwargs.pop('eval_names', None)
|
||||
eval_class_weight = kwargs.get('eval_class_weight')
|
||||
n_evals = max(len(x.get("eval_set", [])) for x in list_of_parts)
|
||||
eval_names = kwargs.pop("eval_names", None)
|
||||
eval_class_weight = kwargs.get("eval_class_weight")
|
||||
local_eval_set = None
|
||||
local_eval_names = None
|
||||
local_eval_sample_weight = None
|
||||
|
@ -228,8 +230,8 @@ def _train_part(
|
|||
local_eval_group = None
|
||||
|
||||
if n_evals:
|
||||
has_eval_sample_weight = any(x.get('eval_sample_weight') is not None for x in list_of_parts)
|
||||
has_eval_init_score = any(x.get('eval_init_score') is not None for x in list_of_parts)
|
||||
has_eval_sample_weight = any(x.get("eval_sample_weight") is not None for x in list_of_parts)
|
||||
has_eval_init_score = any(x.get("eval_init_score") is not None for x in list_of_parts)
|
||||
|
||||
local_eval_set = []
|
||||
evals_result_names = []
|
||||
|
@ -251,7 +253,7 @@ def _train_part(
|
|||
init_score_e = []
|
||||
g_e = []
|
||||
for part in list_of_parts:
|
||||
if not part.get('eval_set'):
|
||||
if not part.get("eval_set"):
|
||||
continue
|
||||
|
||||
# require that eval_name exists in evaluated result data in case dropped due to padding.
|
||||
|
@ -259,12 +261,12 @@ def _train_part(
|
|||
if eval_names:
|
||||
evals_result_name = eval_names[i]
|
||||
else:
|
||||
evals_result_name = f'valid_{i}'
|
||||
evals_result_name = f"valid_{i}"
|
||||
|
||||
eval_set = part['eval_set'][i]
|
||||
eval_set = part["eval_set"][i]
|
||||
if eval_set is _DatasetNames.TRAINSET:
|
||||
x_e.append(part['data'])
|
||||
y_e.append(part['label'])
|
||||
x_e.append(part["data"])
|
||||
y_e.append(part["label"])
|
||||
else:
|
||||
x_e.extend(eval_set[0])
|
||||
y_e.extend(eval_set[1])
|
||||
|
@ -272,24 +274,24 @@ def _train_part(
|
|||
if evals_result_name not in evals_result_names:
|
||||
evals_result_names.append(evals_result_name)
|
||||
|
||||
eval_weight = part.get('eval_sample_weight')
|
||||
eval_weight = part.get("eval_sample_weight")
|
||||
if eval_weight:
|
||||
if eval_weight[i] is _DatasetNames.SAMPLE_WEIGHT:
|
||||
w_e.append(part['weight'])
|
||||
w_e.append(part["weight"])
|
||||
else:
|
||||
w_e.extend(eval_weight[i])
|
||||
|
||||
eval_init_score = part.get('eval_init_score')
|
||||
eval_init_score = part.get("eval_init_score")
|
||||
if eval_init_score:
|
||||
if eval_init_score[i] is _DatasetNames.INIT_SCORE:
|
||||
init_score_e.append(part['init_score'])
|
||||
init_score_e.append(part["init_score"])
|
||||
else:
|
||||
init_score_e.extend(eval_init_score[i])
|
||||
|
||||
eval_group = part.get('eval_group')
|
||||
eval_group = part.get("eval_group")
|
||||
if eval_group:
|
||||
if eval_group[i] is _DatasetNames.GROUP:
|
||||
g_e.append(part['group'])
|
||||
g_e.append(part["group"])
|
||||
else:
|
||||
g_e.extend(eval_group[i])
|
||||
|
||||
|
@ -313,7 +315,7 @@ def _train_part(
|
|||
if eval_names:
|
||||
local_eval_names = [eval_names[i] for i in eval_component_idx]
|
||||
if eval_class_weight:
|
||||
kwargs['eval_class_weight'] = [eval_class_weight[i] for i in eval_component_idx]
|
||||
kwargs["eval_class_weight"] = [eval_class_weight[i] for i in eval_component_idx]
|
||||
|
||||
model = model_factory(**params)
|
||||
if remote_socket is not None:
|
||||
|
@ -331,7 +333,7 @@ def _train_part(
|
|||
eval_init_score=local_eval_init_score,
|
||||
eval_group=local_eval_group,
|
||||
eval_names=local_eval_names,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
else:
|
||||
model.fit(
|
||||
|
@ -343,7 +345,7 @@ def _train_part(
|
|||
eval_sample_weight=local_eval_sample_weight,
|
||||
eval_init_score=local_eval_init_score,
|
||||
eval_names=local_eval_names,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
finally:
|
||||
|
@ -389,7 +391,9 @@ def _machines_to_worker_map(machines: str, worker_addresses: Iterable[str]) -> D
|
|||
machine_addresses = machines.split(",")
|
||||
|
||||
if len(set(machine_addresses)) != len(machine_addresses):
|
||||
raise ValueError(f"Found duplicates in 'machines' ({machines}). Each entry in 'machines' must be a unique IP-port combination.")
|
||||
raise ValueError(
|
||||
f"Found duplicates in 'machines' ({machines}). Each entry in 'machines' must be a unique IP-port combination."
|
||||
)
|
||||
|
||||
machine_to_port = defaultdict(set)
|
||||
for address in machine_addresses:
|
||||
|
@ -423,7 +427,7 @@ def _train(
|
|||
eval_group: Optional[List[_DaskVectorLike]] = None,
|
||||
eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None,
|
||||
eval_at: Optional[Union[List[int], Tuple[int, ...]]] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> LGBMModel:
|
||||
"""Inner train routine.
|
||||
|
||||
|
@ -512,36 +516,34 @@ def _train(
|
|||
params = deepcopy(params)
|
||||
|
||||
# capture whether local_listen_port or its aliases were provided
|
||||
listen_port_in_params = any(
|
||||
alias in params for alias in _ConfigAliases.get("local_listen_port")
|
||||
)
|
||||
listen_port_in_params = any(alias in params for alias in _ConfigAliases.get("local_listen_port"))
|
||||
|
||||
# capture whether machines or its aliases were provided
|
||||
machines_in_params = any(
|
||||
alias in params for alias in _ConfigAliases.get("machines")
|
||||
)
|
||||
machines_in_params = any(alias in params for alias in _ConfigAliases.get("machines"))
|
||||
|
||||
params = _choose_param_value(
|
||||
main_param_name="tree_learner",
|
||||
params=params,
|
||||
default_value="data"
|
||||
default_value="data",
|
||||
)
|
||||
allowed_tree_learners = {
|
||||
'data',
|
||||
'data_parallel',
|
||||
'feature',
|
||||
'feature_parallel',
|
||||
'voting',
|
||||
'voting_parallel'
|
||||
"data",
|
||||
"data_parallel",
|
||||
"feature",
|
||||
"feature_parallel",
|
||||
"voting",
|
||||
"voting_parallel",
|
||||
}
|
||||
if params["tree_learner"] not in allowed_tree_learners:
|
||||
_log_warning(f'Parameter tree_learner set to {params["tree_learner"]}, which is not allowed. Using "data" as default')
|
||||
params['tree_learner'] = 'data'
|
||||
_log_warning(
|
||||
f'Parameter tree_learner set to {params["tree_learner"]}, which is not allowed. Using "data" as default'
|
||||
)
|
||||
params["tree_learner"] = "data"
|
||||
|
||||
# Some passed-in parameters can be removed:
|
||||
# * 'num_machines': set automatically from Dask worker list
|
||||
# * 'num_threads': overridden to match nthreads on each Dask process
|
||||
for param_alias in _ConfigAliases.get('num_machines', 'num_threads'):
|
||||
for param_alias in _ConfigAliases.get("num_machines", "num_threads"):
|
||||
if param_alias in params:
|
||||
_log_warning(f"Parameter {param_alias} will be ignored.")
|
||||
params.pop(param_alias)
|
||||
|
@ -549,23 +551,23 @@ def _train(
|
|||
# Split arrays/dataframes into parts. Arrange parts into dicts to enforce co-locality
|
||||
data_parts = _split_to_parts(data=data, is_matrix=True)
|
||||
label_parts = _split_to_parts(data=label, is_matrix=False)
|
||||
parts = [{'data': x, 'label': y} for (x, y) in zip(data_parts, label_parts)]
|
||||
parts = [{"data": x, "label": y} for (x, y) in zip(data_parts, label_parts)]
|
||||
n_parts = len(parts)
|
||||
|
||||
if sample_weight is not None:
|
||||
weight_parts = _split_to_parts(data=sample_weight, is_matrix=False)
|
||||
for i in range(n_parts):
|
||||
parts[i]['weight'] = weight_parts[i]
|
||||
parts[i]["weight"] = weight_parts[i]
|
||||
|
||||
if group is not None:
|
||||
group_parts = _split_to_parts(data=group, is_matrix=False)
|
||||
for i in range(n_parts):
|
||||
parts[i]['group'] = group_parts[i]
|
||||
parts[i]["group"] = group_parts[i]
|
||||
|
||||
if init_score is not None:
|
||||
init_score_parts = _split_to_parts(data=init_score, is_matrix=False)
|
||||
for i in range(n_parts):
|
||||
parts[i]['init_score'] = init_score_parts[i]
|
||||
parts[i]["init_score"] = init_score_parts[i]
|
||||
|
||||
# evals_set will to be re-constructed into smaller lists of (X, y) tuples, where
|
||||
# X and y are each delayed sub-lists of original eval dask Collections.
|
||||
|
@ -575,47 +577,16 @@ def _train(
|
|||
n_largest_eval_parts = max(x[0].npartitions for x in eval_set)
|
||||
|
||||
eval_sets: Dict[
|
||||
int,
|
||||
List[
|
||||
Union[
|
||||
_DatasetNames,
|
||||
Tuple[
|
||||
List[Optional[_DaskMatrixLike]],
|
||||
List[Optional[_DaskVectorLike]]
|
||||
]
|
||||
]
|
||||
]
|
||||
int, List[Union[_DatasetNames, Tuple[List[Optional[_DaskMatrixLike]], List[Optional[_DaskVectorLike]]]]]
|
||||
] = defaultdict(list)
|
||||
if eval_sample_weight:
|
||||
eval_sample_weights: Dict[
|
||||
int,
|
||||
List[
|
||||
Union[
|
||||
_DatasetNames,
|
||||
List[Optional[_DaskVectorLike]]
|
||||
]
|
||||
]
|
||||
] = defaultdict(list)
|
||||
eval_sample_weights: Dict[int, List[Union[_DatasetNames, List[Optional[_DaskVectorLike]]]]] = defaultdict(
|
||||
list
|
||||
)
|
||||
if eval_group:
|
||||
eval_groups: Dict[
|
||||
int,
|
||||
List[
|
||||
Union[
|
||||
_DatasetNames,
|
||||
List[Optional[_DaskVectorLike]]
|
||||
]
|
||||
]
|
||||
] = defaultdict(list)
|
||||
eval_groups: Dict[int, List[Union[_DatasetNames, List[Optional[_DaskVectorLike]]]]] = defaultdict(list)
|
||||
if eval_init_score:
|
||||
eval_init_scores: Dict[
|
||||
int,
|
||||
List[
|
||||
Union[
|
||||
_DatasetNames,
|
||||
List[Optional[_DaskMatrixLike]]
|
||||
]
|
||||
]
|
||||
] = defaultdict(list)
|
||||
eval_init_scores: Dict[int, List[Union[_DatasetNames, List[Optional[_DaskMatrixLike]]]]] = defaultdict(list)
|
||||
|
||||
for i, (X_eval, y_eval) in enumerate(eval_set):
|
||||
n_this_eval_parts = X_eval.npartitions
|
||||
|
@ -704,13 +675,13 @@ def _train(
|
|||
|
||||
# assign sub-eval_set components to worker parts.
|
||||
for parts_idx, e_set in eval_sets.items():
|
||||
parts[parts_idx]['eval_set'] = e_set
|
||||
parts[parts_idx]["eval_set"] = e_set
|
||||
if eval_sample_weight:
|
||||
parts[parts_idx]['eval_sample_weight'] = eval_sample_weights[parts_idx]
|
||||
parts[parts_idx]["eval_sample_weight"] = eval_sample_weights[parts_idx]
|
||||
if eval_init_score:
|
||||
parts[parts_idx]['eval_init_score'] = eval_init_scores[parts_idx]
|
||||
parts[parts_idx]["eval_init_score"] = eval_init_scores[parts_idx]
|
||||
if eval_group:
|
||||
parts[parts_idx]['eval_group'] = eval_groups[parts_idx]
|
||||
parts[parts_idx]["eval_group"] = eval_groups[parts_idx]
|
||||
|
||||
# Start computation in the background
|
||||
parts = list(map(delayed, parts))
|
||||
|
@ -718,7 +689,7 @@ def _train(
|
|||
wait(parts)
|
||||
|
||||
for part in parts:
|
||||
if part.status == 'error': # type: ignore
|
||||
if part.status == "error": # type: ignore
|
||||
# trigger error locally
|
||||
return part # type: ignore[return-value]
|
||||
|
||||
|
@ -735,7 +706,7 @@ def _train(
|
|||
for worker in worker_map:
|
||||
has_eval_set = False
|
||||
for part in worker_map[worker]:
|
||||
if 'eval_set' in part.result(): # type: ignore[attr-defined]
|
||||
if "eval_set" in part.result(): # type: ignore[attr-defined]
|
||||
has_eval_set = True
|
||||
break
|
||||
|
||||
|
@ -747,13 +718,13 @@ def _train(
|
|||
|
||||
# assign general validation set settings to fit kwargs.
|
||||
if eval_names:
|
||||
kwargs['eval_names'] = eval_names
|
||||
kwargs["eval_names"] = eval_names
|
||||
if eval_class_weight:
|
||||
kwargs['eval_class_weight'] = eval_class_weight
|
||||
kwargs["eval_class_weight"] = eval_class_weight
|
||||
if eval_metric:
|
||||
kwargs['eval_metric'] = eval_metric
|
||||
kwargs["eval_metric"] = eval_metric
|
||||
if eval_at:
|
||||
kwargs['eval_at'] = eval_at
|
||||
kwargs["eval_at"] = eval_at
|
||||
|
||||
master_worker = next(iter(worker_map))
|
||||
worker_ncores = client.ncores()
|
||||
|
@ -763,14 +734,14 @@ def _train(
|
|||
params = _choose_param_value(
|
||||
main_param_name="local_listen_port",
|
||||
params=params,
|
||||
default_value=12400
|
||||
default_value=12400,
|
||||
)
|
||||
local_listen_port = params.pop("local_listen_port")
|
||||
|
||||
params = _choose_param_value(
|
||||
main_param_name="machines",
|
||||
params=params,
|
||||
default_value=None
|
||||
default_value=None,
|
||||
)
|
||||
machines = params.pop("machines")
|
||||
|
||||
|
@ -781,7 +752,7 @@ def _train(
|
|||
_log_info("Using passed-in 'machines' parameter")
|
||||
worker_address_to_port = _machines_to_worker_map(
|
||||
machines=machines,
|
||||
worker_addresses=worker_addresses
|
||||
worker_addresses=worker_addresses,
|
||||
)
|
||||
else:
|
||||
if listen_port_in_params:
|
||||
|
@ -795,19 +766,16 @@ def _train(
|
|||
)
|
||||
raise LightGBMError(msg)
|
||||
|
||||
worker_address_to_port = {
|
||||
address: local_listen_port
|
||||
for address in worker_addresses
|
||||
}
|
||||
worker_address_to_port = {address: local_listen_port for address in worker_addresses}
|
||||
else:
|
||||
_log_info("Finding random open ports for workers")
|
||||
worker_to_socket_future, worker_address_to_port = _assign_open_ports_to_workers(client, list(worker_map.keys()))
|
||||
worker_to_socket_future, worker_address_to_port = _assign_open_ports_to_workers(
|
||||
client, list(worker_map.keys())
|
||||
)
|
||||
|
||||
machines = ','.join([
|
||||
f'{urlparse(worker_address).hostname}:{port}'
|
||||
for worker_address, port
|
||||
in worker_address_to_port.items()
|
||||
])
|
||||
machines = ",".join(
|
||||
[f"{urlparse(worker_address).hostname}:{port}" for worker_address, port in worker_address_to_port.items()]
|
||||
)
|
||||
|
||||
num_machines = len(worker_address_to_port)
|
||||
|
||||
|
@ -823,18 +791,18 @@ def _train(
|
|||
client.submit(
|
||||
_train_part,
|
||||
model_factory=model_factory,
|
||||
params={**params, 'num_threads': worker_ncores[worker]},
|
||||
params={**params, "num_threads": worker_ncores[worker]},
|
||||
list_of_parts=list_of_parts,
|
||||
machines=machines,
|
||||
local_listen_port=worker_address_to_port[worker],
|
||||
num_machines=num_machines,
|
||||
time_out=params.get('time_out', 120),
|
||||
time_out=params.get("time_out", 120),
|
||||
remote_socket=worker_to_socket_future.get(worker, None),
|
||||
return_model=(worker == master_worker),
|
||||
workers=[worker],
|
||||
allow_other_workers=False,
|
||||
pure=False,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
for worker, list_of_parts in worker_map.items()
|
||||
]
|
||||
|
@ -848,14 +816,14 @@ def _train(
|
|||
# on the Dask cluster you're connected to and which workers have pieces of
|
||||
# the training data
|
||||
if not listen_port_in_params:
|
||||
for param in _ConfigAliases.get('local_listen_port'):
|
||||
for param in _ConfigAliases.get("local_listen_port"):
|
||||
model._other_params.pop(param, None)
|
||||
|
||||
if not machines_in_params:
|
||||
for param in _ConfigAliases.get('machines'):
|
||||
for param in _ConfigAliases.get("machines"):
|
||||
model._other_params.pop(param, None)
|
||||
|
||||
for param in _ConfigAliases.get('num_machines', 'timeout'):
|
||||
for param in _ConfigAliases.get("num_machines", "timeout"):
|
||||
model._other_params.pop(param, None)
|
||||
|
||||
return model
|
||||
|
@ -868,9 +836,8 @@ def _predict_part(
|
|||
pred_proba: bool,
|
||||
pred_leaf: bool,
|
||||
pred_contrib: bool,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> _DaskPart:
|
||||
|
||||
result: _DaskPart
|
||||
if part.shape[0] == 0:
|
||||
result = np.array([])
|
||||
|
@ -880,7 +847,7 @@ def _predict_part(
|
|||
raw_score=raw_score,
|
||||
pred_leaf=pred_leaf,
|
||||
pred_contrib=pred_contrib,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
else:
|
||||
result = model.predict(
|
||||
|
@ -888,7 +855,7 @@ def _predict_part(
|
|||
raw_score=raw_score,
|
||||
pred_leaf=pred_leaf,
|
||||
pred_contrib=pred_contrib,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# dask.DataFrame.map_partitions() expects each call to return a pandas DataFrame or Series
|
||||
|
@ -896,7 +863,7 @@ def _predict_part(
|
|||
if len(result.shape) == 2:
|
||||
result = pd_DataFrame(result, index=part.index)
|
||||
else:
|
||||
result = pd_Series(result, index=part.index, name='predictions')
|
||||
result = pd_Series(result, index=part.index, name="predictions")
|
||||
|
||||
return result
|
||||
|
||||
|
@ -910,7 +877,7 @@ def _predict(
|
|||
pred_leaf: bool = False,
|
||||
pred_contrib: bool = False,
|
||||
dtype: _PredictionDtype = np.float32,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> Union[dask_Array, List[dask_Array]]:
|
||||
"""Inner predict routine.
|
||||
|
||||
|
@ -943,7 +910,7 @@ def _predict(
|
|||
If ``pred_contrib=True``, the feature contributions for each sample.
|
||||
"""
|
||||
if not all((DASK_INSTALLED, PANDAS_INSTALLED, SKLEARN_INSTALLED)):
|
||||
raise LightGBMError('dask, pandas and scikit-learn are required for lightgbm.dask')
|
||||
raise LightGBMError("dask, pandas and scikit-learn are required for lightgbm.dask")
|
||||
if isinstance(data, dask_DataFrame):
|
||||
return data.map_partitions(
|
||||
_predict_part,
|
||||
|
@ -952,19 +919,14 @@ def _predict(
|
|||
pred_proba=pred_proba,
|
||||
pred_leaf=pred_leaf,
|
||||
pred_contrib=pred_contrib,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
).values
|
||||
elif isinstance(data, dask_Array):
|
||||
# for multi-class classification with sparse matrices, pred_contrib predictions
|
||||
# are returned as a list of sparse matrices (one per class)
|
||||
num_classes = model._n_classes
|
||||
|
||||
if (
|
||||
num_classes > 2
|
||||
and pred_contrib
|
||||
and isinstance(data._meta, ss.spmatrix)
|
||||
):
|
||||
|
||||
if num_classes > 2 and pred_contrib and isinstance(data._meta, ss.spmatrix):
|
||||
predict_function = partial(
|
||||
_predict_part,
|
||||
model=model,
|
||||
|
@ -972,7 +934,7 @@ def _predict(
|
|||
pred_proba=pred_proba,
|
||||
pred_leaf=False,
|
||||
pred_contrib=True,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
delayed_chunks = data.to_delayed()
|
||||
|
@ -999,16 +961,16 @@ def _predict(
|
|||
part = dask_array_from_delayed(
|
||||
value=_extract(partition, i),
|
||||
shape=(nrows_per_chunk[j], num_cols),
|
||||
meta=pred_meta
|
||||
meta=pred_meta,
|
||||
)
|
||||
out[i].append(part)
|
||||
|
||||
# by default, dask.array.concatenate() concatenates sparse arrays into a COO matrix
|
||||
# the code below is used instead to ensure that the sparse type is preserved during concatentation
|
||||
if isinstance(pred_meta, ss.csr_matrix):
|
||||
concat_fn = partial(ss.vstack, format='csr')
|
||||
concat_fn = partial(ss.vstack, format="csr")
|
||||
elif isinstance(pred_meta, ss.csc_matrix):
|
||||
concat_fn = partial(ss.vstack, format='csc')
|
||||
concat_fn = partial(ss.vstack, format="csc")
|
||||
else:
|
||||
concat_fn = ss.vstack
|
||||
|
||||
|
@ -1020,7 +982,7 @@ def _predict(
|
|||
dask_array_from_delayed(
|
||||
value=delayed(concat_fn)(out[i]),
|
||||
shape=(data.shape[0], num_cols),
|
||||
meta=pred_meta
|
||||
meta=pred_meta,
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -1042,7 +1004,7 @@ def _predict(
|
|||
if len(pred_row.shape) > 1:
|
||||
chunks += (pred_row.shape[1],)
|
||||
else:
|
||||
map_blocks_kwargs['drop_axis'] = 1
|
||||
map_blocks_kwargs["drop_axis"] = 1
|
||||
return data.map_blocks(
|
||||
predict_fn,
|
||||
chunks=chunks,
|
||||
|
@ -1051,11 +1013,10 @@ def _predict(
|
|||
**map_blocks_kwargs,
|
||||
)
|
||||
else:
|
||||
raise TypeError(f'Data must be either Dask Array or Dask DataFrame. Got {type(data).__name__}.')
|
||||
raise TypeError(f"Data must be either Dask Array or Dask DataFrame. Got {type(data).__name__}.")
|
||||
|
||||
|
||||
class _DaskLGBMModel:
|
||||
|
||||
@property
|
||||
def client_(self) -> Client:
|
||||
""":obj:`dask.distributed.Client`: Dask client.
|
||||
|
@ -1064,7 +1025,7 @@ class _DaskLGBMModel:
|
|||
with ``model.set_params(client=client)``.
|
||||
"""
|
||||
if not getattr(self, "fitted_", False):
|
||||
raise LGBMNotFittedError('Cannot access property client_ before calling fit().')
|
||||
raise LGBMNotFittedError("Cannot access property client_ before calling fit().")
|
||||
|
||||
return _get_dask_client(client=self.client)
|
||||
|
||||
|
@ -1093,12 +1054,12 @@ class _DaskLGBMModel:
|
|||
eval_group: Optional[List[_DaskVectorLike]] = None,
|
||||
eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None,
|
||||
eval_at: Optional[Union[List[int], Tuple[int, ...]]] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> "_DaskLGBMModel":
|
||||
if not DASK_INSTALLED:
|
||||
raise LightGBMError('dask is required for lightgbm.dask')
|
||||
raise LightGBMError("dask is required for lightgbm.dask")
|
||||
if not all((DASK_INSTALLED, PANDAS_INSTALLED, SKLEARN_INSTALLED)):
|
||||
raise LightGBMError('dask, pandas and scikit-learn are required for lightgbm.dask')
|
||||
raise LightGBMError("dask, pandas and scikit-learn are required for lightgbm.dask")
|
||||
|
||||
params = self.get_params(True) # type: ignore[attr-defined]
|
||||
params.pop("client", None)
|
||||
|
@ -1120,7 +1081,7 @@ class _DaskLGBMModel:
|
|||
eval_group=eval_group,
|
||||
eval_metric=eval_metric,
|
||||
eval_at=eval_at,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self.set_params(**model.get_params()) # type: ignore[attr-defined]
|
||||
|
@ -1137,7 +1098,10 @@ class _DaskLGBMModel:
|
|||
return model
|
||||
|
||||
@staticmethod
|
||||
def _lgb_dask_copy_extra_params(source: Union["_DaskLGBMModel", LGBMModel], dest: Union["_DaskLGBMModel", LGBMModel]) -> None:
|
||||
def _lgb_dask_copy_extra_params(
|
||||
source: Union["_DaskLGBMModel", LGBMModel],
|
||||
dest: Union["_DaskLGBMModel", LGBMModel],
|
||||
) -> None:
|
||||
params = source.get_params() # type: ignore[union-attr]
|
||||
attributes = source.__dict__
|
||||
extra_param_names = set(attributes.keys()).difference(params.keys())
|
||||
|
@ -1150,7 +1114,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
|
|||
|
||||
def __init__(
|
||||
self,
|
||||
boosting_type: str = 'gbdt',
|
||||
boosting_type: str = "gbdt",
|
||||
num_leaves: int = 31,
|
||||
max_depth: int = -1,
|
||||
learning_rate: float = 0.1,
|
||||
|
@ -1158,19 +1122,19 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
|
|||
subsample_for_bin: int = 200000,
|
||||
objective: Optional[Union[str, _LGBM_ScikitCustomObjectiveFunction]] = None,
|
||||
class_weight: Optional[Union[dict, str]] = None,
|
||||
min_split_gain: float = 0.,
|
||||
min_split_gain: float = 0.0,
|
||||
min_child_weight: float = 1e-3,
|
||||
min_child_samples: int = 20,
|
||||
subsample: float = 1.,
|
||||
subsample: float = 1.0,
|
||||
subsample_freq: int = 0,
|
||||
colsample_bytree: float = 1.,
|
||||
reg_alpha: float = 0.,
|
||||
reg_lambda: float = 0.,
|
||||
random_state: Optional[Union[int, np.random.RandomState, 'np.random.Generator']] = None,
|
||||
colsample_bytree: float = 1.0,
|
||||
reg_alpha: float = 0.0,
|
||||
reg_lambda: float = 0.0,
|
||||
random_state: Optional[Union[int, np.random.RandomState, "np.random.Generator"]] = None,
|
||||
n_jobs: Optional[int] = None,
|
||||
importance_type: str = 'split',
|
||||
importance_type: str = "split",
|
||||
client: Optional[Client] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Docstring is inherited from the lightgbm.LGBMClassifier.__init__."""
|
||||
self.client = client
|
||||
|
@ -1194,11 +1158,11 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
|
|||
random_state=random_state,
|
||||
n_jobs=n_jobs,
|
||||
importance_type=importance_type,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
_base_doc = LGBMClassifier.__init__.__doc__
|
||||
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition('**kwargs') # type: ignore
|
||||
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore
|
||||
__init__.__doc__ = f"""
|
||||
{_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
|
||||
{' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
|
||||
|
@ -1220,7 +1184,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
|
|||
eval_class_weight: Optional[List[Union[dict, str]]] = None,
|
||||
eval_init_score: Optional[List[_DaskCollection]] = None,
|
||||
eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> "DaskLGBMClassifier":
|
||||
"""Docstring is inherited from the lightgbm.LGBMClassifier.fit."""
|
||||
self._lgb_dask_fit(
|
||||
|
@ -1235,7 +1199,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
|
|||
eval_class_weight=eval_class_weight,
|
||||
eval_init_score=eval_init_score,
|
||||
eval_metric=eval_metric,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
return self
|
||||
|
||||
|
@ -1247,15 +1211,13 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
|
|||
group_shape="Dask Array or Dask Series or None, optional (default=None)",
|
||||
eval_sample_weight_shape="list of Dask Array or Dask Series, or None, optional (default=None)",
|
||||
eval_init_score_shape="list of Dask Array, Dask Series or Dask DataFrame (for multi-class task), or None, optional (default=None)",
|
||||
eval_group_shape="list of Dask Array or Dask Series, or None, optional (default=None)"
|
||||
eval_group_shape="list of Dask Array or Dask Series, or None, optional (default=None)",
|
||||
)
|
||||
|
||||
# DaskLGBMClassifier does not support group, eval_group.
|
||||
_base_doc = (_base_doc[:_base_doc.find('group :')]
|
||||
+ _base_doc[_base_doc.find('eval_set :'):])
|
||||
_base_doc = _base_doc[: _base_doc.find("group :")] + _base_doc[_base_doc.find("eval_set :") :]
|
||||
|
||||
_base_doc = (_base_doc[:_base_doc.find('eval_group :')]
|
||||
+ _base_doc[_base_doc.find('eval_metric :'):])
|
||||
_base_doc = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :]
|
||||
|
||||
# DaskLGBMClassifier support for callbacks and init_model is not tested
|
||||
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
|
||||
|
@ -1278,7 +1240,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
|
|||
pred_leaf: bool = False,
|
||||
pred_contrib: bool = False,
|
||||
validate_features: bool = False,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> dask_Array:
|
||||
"""Docstring is inherited from the lightgbm.LGBMClassifier.predict."""
|
||||
return _predict(
|
||||
|
@ -1292,7 +1254,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
|
|||
pred_leaf=pred_leaf,
|
||||
pred_contrib=pred_contrib,
|
||||
validate_features=validate_features,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
predict.__doc__ = _lgbmmodel_doc_predict.format(
|
||||
|
@ -1301,7 +1263,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
|
|||
output_name="predicted_result",
|
||||
predicted_result_shape="Dask Array of shape = [n_samples] or shape = [n_samples, n_classes]",
|
||||
X_leaves_shape="Dask Array of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]",
|
||||
X_SHAP_values_shape="Dask Array of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes] or (if multi-class and using sparse inputs) a list of ``n_classes`` Dask Arrays of shape = [n_samples, n_features + 1]"
|
||||
X_SHAP_values_shape="Dask Array of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes] or (if multi-class and using sparse inputs) a list of ``n_classes`` Dask Arrays of shape = [n_samples, n_features + 1]",
|
||||
)
|
||||
|
||||
def predict_proba(
|
||||
|
@ -1313,7 +1275,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
|
|||
pred_leaf: bool = False,
|
||||
pred_contrib: bool = False,
|
||||
validate_features: bool = False,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> dask_Array:
|
||||
"""Docstring is inherited from the lightgbm.LGBMClassifier.predict_proba."""
|
||||
return _predict(
|
||||
|
@ -1327,7 +1289,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
|
|||
pred_leaf=pred_leaf,
|
||||
pred_contrib=pred_contrib,
|
||||
validate_features=validate_features,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
predict_proba.__doc__ = _lgbmmodel_doc_predict.format(
|
||||
|
@ -1336,7 +1298,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
|
|||
output_name="predicted_probability",
|
||||
predicted_result_shape="Dask Array of shape = [n_samples] or shape = [n_samples, n_classes]",
|
||||
X_leaves_shape="Dask Array of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]",
|
||||
X_SHAP_values_shape="Dask Array of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes] or (if multi-class and using sparse inputs) a list of ``n_classes`` Dask Arrays of shape = [n_samples, n_features + 1]"
|
||||
X_SHAP_values_shape="Dask Array of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes] or (if multi-class and using sparse inputs) a list of ``n_classes`` Dask Arrays of shape = [n_samples, n_features + 1]",
|
||||
)
|
||||
|
||||
def to_local(self) -> LGBMClassifier:
|
||||
|
@ -1355,7 +1317,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
|
|||
|
||||
def __init__(
|
||||
self,
|
||||
boosting_type: str = 'gbdt',
|
||||
boosting_type: str = "gbdt",
|
||||
num_leaves: int = 31,
|
||||
max_depth: int = -1,
|
||||
learning_rate: float = 0.1,
|
||||
|
@ -1363,19 +1325,19 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
|
|||
subsample_for_bin: int = 200000,
|
||||
objective: Optional[Union[str, _LGBM_ScikitCustomObjectiveFunction]] = None,
|
||||
class_weight: Optional[Union[dict, str]] = None,
|
||||
min_split_gain: float = 0.,
|
||||
min_split_gain: float = 0.0,
|
||||
min_child_weight: float = 1e-3,
|
||||
min_child_samples: int = 20,
|
||||
subsample: float = 1.,
|
||||
subsample: float = 1.0,
|
||||
subsample_freq: int = 0,
|
||||
colsample_bytree: float = 1.,
|
||||
reg_alpha: float = 0.,
|
||||
reg_lambda: float = 0.,
|
||||
random_state: Optional[Union[int, np.random.RandomState, 'np.random.Generator']] = None,
|
||||
colsample_bytree: float = 1.0,
|
||||
reg_alpha: float = 0.0,
|
||||
reg_lambda: float = 0.0,
|
||||
random_state: Optional[Union[int, np.random.RandomState, "np.random.Generator"]] = None,
|
||||
n_jobs: Optional[int] = None,
|
||||
importance_type: str = 'split',
|
||||
importance_type: str = "split",
|
||||
client: Optional[Client] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Docstring is inherited from the lightgbm.LGBMRegressor.__init__."""
|
||||
self.client = client
|
||||
|
@ -1399,11 +1361,11 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
|
|||
random_state=random_state,
|
||||
n_jobs=n_jobs,
|
||||
importance_type=importance_type,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
_base_doc = LGBMRegressor.__init__.__doc__
|
||||
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition('**kwargs') # type: ignore
|
||||
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore
|
||||
__init__.__doc__ = f"""
|
||||
{_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
|
||||
{' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
|
||||
|
@ -1424,7 +1386,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
|
|||
eval_sample_weight: Optional[List[_DaskVectorLike]] = None,
|
||||
eval_init_score: Optional[List[_DaskVectorLike]] = None,
|
||||
eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> "DaskLGBMRegressor":
|
||||
"""Docstring is inherited from the lightgbm.LGBMRegressor.fit."""
|
||||
self._lgb_dask_fit(
|
||||
|
@ -1438,7 +1400,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
|
|||
eval_sample_weight=eval_sample_weight,
|
||||
eval_init_score=eval_init_score,
|
||||
eval_metric=eval_metric,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
return self
|
||||
|
||||
|
@ -1450,18 +1412,15 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
|
|||
group_shape="Dask Array or Dask Series or None, optional (default=None)",
|
||||
eval_sample_weight_shape="list of Dask Array or Dask Series, or None, optional (default=None)",
|
||||
eval_init_score_shape="list of Dask Array or Dask Series, or None, optional (default=None)",
|
||||
eval_group_shape="list of Dask Array or Dask Series, or None, optional (default=None)"
|
||||
eval_group_shape="list of Dask Array or Dask Series, or None, optional (default=None)",
|
||||
)
|
||||
|
||||
# DaskLGBMRegressor does not support group, eval_class_weight, eval_group.
|
||||
_base_doc = (_base_doc[:_base_doc.find('group :')]
|
||||
+ _base_doc[_base_doc.find('eval_set :'):])
|
||||
_base_doc = _base_doc[: _base_doc.find("group :")] + _base_doc[_base_doc.find("eval_set :") :]
|
||||
|
||||
_base_doc = (_base_doc[:_base_doc.find('eval_class_weight :')]
|
||||
+ _base_doc[_base_doc.find('eval_init_score :'):])
|
||||
_base_doc = _base_doc[: _base_doc.find("eval_class_weight :")] + _base_doc[_base_doc.find("eval_init_score :") :]
|
||||
|
||||
_base_doc = (_base_doc[:_base_doc.find('eval_group :')]
|
||||
+ _base_doc[_base_doc.find('eval_metric :'):])
|
||||
_base_doc = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :]
|
||||
|
||||
# DaskLGBMRegressor support for callbacks and init_model is not tested
|
||||
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
|
||||
|
@ -1484,7 +1443,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
|
|||
pred_leaf: bool = False,
|
||||
pred_contrib: bool = False,
|
||||
validate_features: bool = False,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> dask_Array:
|
||||
"""Docstring is inherited from the lightgbm.LGBMRegressor.predict."""
|
||||
return _predict(
|
||||
|
@ -1497,7 +1456,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
|
|||
pred_leaf=pred_leaf,
|
||||
pred_contrib=pred_contrib,
|
||||
validate_features=validate_features,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
predict.__doc__ = _lgbmmodel_doc_predict.format(
|
||||
|
@ -1506,7 +1465,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
|
|||
output_name="predicted_result",
|
||||
predicted_result_shape="Dask Array of shape = [n_samples]",
|
||||
X_leaves_shape="Dask Array of shape = [n_samples, n_trees]",
|
||||
X_SHAP_values_shape="Dask Array of shape = [n_samples, n_features + 1]"
|
||||
X_SHAP_values_shape="Dask Array of shape = [n_samples, n_features + 1]",
|
||||
)
|
||||
|
||||
def to_local(self) -> LGBMRegressor:
|
||||
|
@ -1525,7 +1484,7 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
|
|||
|
||||
def __init__(
|
||||
self,
|
||||
boosting_type: str = 'gbdt',
|
||||
boosting_type: str = "gbdt",
|
||||
num_leaves: int = 31,
|
||||
max_depth: int = -1,
|
||||
learning_rate: float = 0.1,
|
||||
|
@ -1533,19 +1492,19 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
|
|||
subsample_for_bin: int = 200000,
|
||||
objective: Optional[Union[str, _LGBM_ScikitCustomObjectiveFunction]] = None,
|
||||
class_weight: Optional[Union[dict, str]] = None,
|
||||
min_split_gain: float = 0.,
|
||||
min_split_gain: float = 0.0,
|
||||
min_child_weight: float = 1e-3,
|
||||
min_child_samples: int = 20,
|
||||
subsample: float = 1.,
|
||||
subsample: float = 1.0,
|
||||
subsample_freq: int = 0,
|
||||
colsample_bytree: float = 1.,
|
||||
reg_alpha: float = 0.,
|
||||
reg_lambda: float = 0.,
|
||||
random_state: Optional[Union[int, np.random.RandomState, 'np.random.Generator']] = None,
|
||||
colsample_bytree: float = 1.0,
|
||||
reg_alpha: float = 0.0,
|
||||
reg_lambda: float = 0.0,
|
||||
random_state: Optional[Union[int, np.random.RandomState, "np.random.Generator"]] = None,
|
||||
n_jobs: Optional[int] = None,
|
||||
importance_type: str = 'split',
|
||||
importance_type: str = "split",
|
||||
client: Optional[Client] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Docstring is inherited from the lightgbm.LGBMRanker.__init__."""
|
||||
self.client = client
|
||||
|
@ -1569,11 +1528,11 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
|
|||
random_state=random_state,
|
||||
n_jobs=n_jobs,
|
||||
importance_type=importance_type,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
_base_doc = LGBMRanker.__init__.__doc__
|
||||
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition('**kwargs') # type: ignore
|
||||
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore
|
||||
__init__.__doc__ = f"""
|
||||
{_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
|
||||
{' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
|
||||
|
@ -1597,7 +1556,7 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
|
|||
eval_group: Optional[List[_DaskVectorLike]] = None,
|
||||
eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None,
|
||||
eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5),
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> "DaskLGBMRanker":
|
||||
"""Docstring is inherited from the lightgbm.LGBMRanker.fit."""
|
||||
self._lgb_dask_fit(
|
||||
|
@ -1614,7 +1573,7 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
|
|||
eval_group=eval_group,
|
||||
eval_metric=eval_metric,
|
||||
eval_at=eval_at,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
return self
|
||||
|
||||
|
@ -1626,17 +1585,18 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
|
|||
group_shape="Dask Array or Dask Series or None, optional (default=None)",
|
||||
eval_sample_weight_shape="list of Dask Array or Dask Series, or None, optional (default=None)",
|
||||
eval_init_score_shape="list of Dask Array or Dask Series, or None, optional (default=None)",
|
||||
eval_group_shape="list of Dask Array or Dask Series, or None, optional (default=None)"
|
||||
eval_group_shape="list of Dask Array or Dask Series, or None, optional (default=None)",
|
||||
)
|
||||
|
||||
# DaskLGBMRanker does not support eval_class_weight or early stopping
|
||||
_base_doc = (_base_doc[:_base_doc.find('eval_class_weight :')]
|
||||
+ _base_doc[_base_doc.find('eval_init_score :'):])
|
||||
_base_doc = _base_doc[: _base_doc.find("eval_class_weight :")] + _base_doc[_base_doc.find("eval_init_score :") :]
|
||||
|
||||
_base_doc = (_base_doc[:_base_doc.find('feature_name :')]
|
||||
+ "eval_at : list or tuple of int, optional (default=(1, 2, 3, 4, 5))\n"
|
||||
+ f"{' ':8}The evaluation positions of the specified metric.\n"
|
||||
+ f"{' ':4}{_base_doc[_base_doc.find('feature_name :'):]}")
|
||||
_base_doc = (
|
||||
_base_doc[: _base_doc.find("feature_name :")]
|
||||
+ "eval_at : list or tuple of int, optional (default=(1, 2, 3, 4, 5))\n"
|
||||
+ f"{' ':8}The evaluation positions of the specified metric.\n"
|
||||
+ f"{' ':4}{_base_doc[_base_doc.find('feature_name :'):]}"
|
||||
)
|
||||
|
||||
# DaskLGBMRanker support for callbacks and init_model is not tested
|
||||
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
|
||||
|
@ -1659,7 +1619,7 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
|
|||
pred_leaf: bool = False,
|
||||
pred_contrib: bool = False,
|
||||
validate_features: bool = False,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> dask_Array:
|
||||
"""Docstring is inherited from the lightgbm.LGBMRanker.predict."""
|
||||
return _predict(
|
||||
|
@ -1672,7 +1632,7 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
|
|||
pred_leaf=pred_leaf,
|
||||
pred_contrib=pred_contrib,
|
||||
validate_features=validate_features,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
predict.__doc__ = _lgbmmodel_doc_predict.format(
|
||||
|
@ -1681,7 +1641,7 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
|
|||
output_name="predicted_result",
|
||||
predicted_result_shape="Dask Array of shape = [n_samples]",
|
||||
X_leaves_shape="Dask Array of shape = [n_samples, n_trees]",
|
||||
X_SHAP_values_shape="Dask Array of shape = [n_samples, n_features + 1]"
|
||||
X_SHAP_values_shape="Dask Array of shape = [n_samples, n_features + 1]",
|
||||
)
|
||||
|
||||
def to_local(self) -> LGBMRanker:
|
||||
|
|
|
@ -28,9 +28,9 @@ from .basic import (
|
|||
from .compat import SKLEARN_INSTALLED, _LGBMBaseCrossValidator, _LGBMGroupKFold, _LGBMStratifiedKFold
|
||||
|
||||
__all__ = [
|
||||
'cv',
|
||||
'CVBooster',
|
||||
'train',
|
||||
"cv",
|
||||
"CVBooster",
|
||||
"train",
|
||||
]
|
||||
|
||||
|
||||
|
@ -41,13 +41,13 @@ _LGBM_CustomMetricFunction = Union[
|
|||
],
|
||||
Callable[
|
||||
[np.ndarray, Dataset],
|
||||
List[_LGBM_EvalFunctionResultType]
|
||||
List[_LGBM_EvalFunctionResultType],
|
||||
],
|
||||
]
|
||||
|
||||
_LGBM_PreprocFunction = Callable[
|
||||
[Dataset, Dataset, Dict[str, Any]],
|
||||
Tuple[Dataset, Dataset, Dict[str, Any]]
|
||||
Tuple[Dataset, Dataset, Dict[str, Any]],
|
||||
]
|
||||
|
||||
|
||||
|
@ -59,10 +59,10 @@ def train(
|
|||
valid_names: Optional[List[str]] = None,
|
||||
feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None,
|
||||
init_model: Optional[Union[str, Path, Booster]] = None,
|
||||
feature_name: _LGBM_FeatureNameConfiguration = 'auto',
|
||||
categorical_feature: _LGBM_CategoricalFeatureConfiguration = 'auto',
|
||||
feature_name: _LGBM_FeatureNameConfiguration = "auto",
|
||||
categorical_feature: _LGBM_CategoricalFeatureConfiguration = "auto",
|
||||
keep_training_booster: bool = False,
|
||||
callbacks: Optional[List[Callable]] = None
|
||||
callbacks: Optional[List[Callable]] = None,
|
||||
) -> Booster:
|
||||
"""Perform the training with given parameters.
|
||||
|
||||
|
@ -169,14 +169,14 @@ def train(
|
|||
# create predictor first
|
||||
params = copy.deepcopy(params)
|
||||
params = _choose_param_value(
|
||||
main_param_name='objective',
|
||||
main_param_name="objective",
|
||||
params=params,
|
||||
default_value=None
|
||||
default_value=None,
|
||||
)
|
||||
fobj: Optional[_LGBM_CustomObjectiveFunction] = None
|
||||
if callable(params["objective"]):
|
||||
fobj = params["objective"]
|
||||
params["objective"] = 'none'
|
||||
params["objective"] = "none"
|
||||
for alias in _ConfigAliases.get("num_iterations"):
|
||||
if alias in params:
|
||||
num_boost_round = params.pop(alias)
|
||||
|
@ -186,33 +186,26 @@ def train(
|
|||
params = _choose_param_value(
|
||||
main_param_name="early_stopping_round",
|
||||
params=params,
|
||||
default_value=None
|
||||
default_value=None,
|
||||
)
|
||||
if params["early_stopping_round"] is None:
|
||||
params.pop("early_stopping_round")
|
||||
first_metric_only = params.get('first_metric_only', False)
|
||||
first_metric_only = params.get("first_metric_only", False)
|
||||
|
||||
predictor: Optional[_InnerPredictor] = None
|
||||
if isinstance(init_model, (str, Path)):
|
||||
predictor = _InnerPredictor.from_model_file(
|
||||
model_file=init_model,
|
||||
pred_parameter=params
|
||||
)
|
||||
predictor = _InnerPredictor.from_model_file(model_file=init_model, pred_parameter=params)
|
||||
elif isinstance(init_model, Booster):
|
||||
predictor = _InnerPredictor.from_booster(
|
||||
booster=init_model,
|
||||
pred_parameter=dict(init_model.params, **params)
|
||||
)
|
||||
predictor = _InnerPredictor.from_booster(booster=init_model, pred_parameter=dict(init_model.params, **params))
|
||||
|
||||
if predictor is not None:
|
||||
init_iteration = predictor.current_iteration()
|
||||
else:
|
||||
init_iteration = 0
|
||||
|
||||
train_set._update_params(params) \
|
||||
._set_predictor(predictor) \
|
||||
.set_feature_name(feature_name) \
|
||||
.set_categorical_feature(categorical_feature)
|
||||
train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature(
|
||||
categorical_feature
|
||||
)
|
||||
|
||||
is_valid_contain_train = False
|
||||
train_data_name = "training"
|
||||
|
@ -234,13 +227,13 @@ def train(
|
|||
if valid_names is not None and len(valid_names) > i:
|
||||
name_valid_sets.append(valid_names[i])
|
||||
else:
|
||||
name_valid_sets.append(f'valid_{i}')
|
||||
name_valid_sets.append(f"valid_{i}")
|
||||
# process callbacks
|
||||
if callbacks is None:
|
||||
callbacks_set = set()
|
||||
else:
|
||||
for i, cb in enumerate(callbacks):
|
||||
cb.__dict__.setdefault('order', i - len(callbacks))
|
||||
cb.__dict__.setdefault("order", i - len(callbacks))
|
||||
callbacks_set = set(callbacks)
|
||||
|
||||
if "early_stopping_round" in params:
|
||||
|
@ -251,15 +244,16 @@ def train(
|
|||
verbose=_choose_param_value(
|
||||
main_param_name="verbosity",
|
||||
params=params,
|
||||
default_value=1
|
||||
).pop("verbosity") > 0
|
||||
default_value=1,
|
||||
).pop("verbosity")
|
||||
> 0,
|
||||
)
|
||||
)
|
||||
|
||||
callbacks_before_iter_set = {cb for cb in callbacks_set if getattr(cb, 'before_iteration', False)}
|
||||
callbacks_before_iter_set = {cb for cb in callbacks_set if getattr(cb, "before_iteration", False)}
|
||||
callbacks_after_iter_set = callbacks_set - callbacks_before_iter_set
|
||||
callbacks_before_iter = sorted(callbacks_before_iter_set, key=attrgetter('order'))
|
||||
callbacks_after_iter = sorted(callbacks_after_iter_set, key=attrgetter('order'))
|
||||
callbacks_before_iter = sorted(callbacks_before_iter_set, key=attrgetter("order"))
|
||||
callbacks_after_iter = sorted(callbacks_after_iter_set, key=attrgetter("order"))
|
||||
|
||||
# construct booster
|
||||
try:
|
||||
|
@ -277,12 +271,16 @@ def train(
|
|||
# start training
|
||||
for i in range(init_iteration, init_iteration + num_boost_round):
|
||||
for cb in callbacks_before_iter:
|
||||
cb(callback.CallbackEnv(model=booster,
|
||||
params=params,
|
||||
iteration=i,
|
||||
begin_iteration=init_iteration,
|
||||
end_iteration=init_iteration + num_boost_round,
|
||||
evaluation_result_list=None))
|
||||
cb(
|
||||
callback.CallbackEnv(
|
||||
model=booster,
|
||||
params=params,
|
||||
iteration=i,
|
||||
begin_iteration=init_iteration,
|
||||
end_iteration=init_iteration + num_boost_round,
|
||||
evaluation_result_list=None,
|
||||
)
|
||||
)
|
||||
|
||||
booster.update(fobj=fobj)
|
||||
|
||||
|
@ -294,12 +292,16 @@ def train(
|
|||
evaluation_result_list.extend(booster.eval_valid(feval))
|
||||
try:
|
||||
for cb in callbacks_after_iter:
|
||||
cb(callback.CallbackEnv(model=booster,
|
||||
params=params,
|
||||
iteration=i,
|
||||
begin_iteration=init_iteration,
|
||||
end_iteration=init_iteration + num_boost_round,
|
||||
evaluation_result_list=evaluation_result_list))
|
||||
cb(
|
||||
callback.CallbackEnv(
|
||||
model=booster,
|
||||
params=params,
|
||||
iteration=i,
|
||||
begin_iteration=init_iteration,
|
||||
end_iteration=init_iteration + num_boost_round,
|
||||
evaluation_result_list=evaluation_result_list,
|
||||
)
|
||||
)
|
||||
except callback.EarlyStopException as earlyStopException:
|
||||
booster.best_iteration = earlyStopException.best_iteration + 1
|
||||
evaluation_result_list = earlyStopException.best_score
|
||||
|
@ -334,7 +336,7 @@ class CVBooster:
|
|||
|
||||
def __init__(
|
||||
self,
|
||||
model_file: Optional[Union[str, Path]] = None
|
||||
model_file: Optional[Union[str, Path]] = None,
|
||||
):
|
||||
"""Initialize the CVBooster.
|
||||
|
||||
|
@ -361,18 +363,23 @@ class CVBooster:
|
|||
"""Serialize CVBooster to dict."""
|
||||
models_str = []
|
||||
for booster in self.boosters:
|
||||
models_str.append(booster.model_to_string(num_iteration=num_iteration, start_iteration=start_iteration,
|
||||
importance_type=importance_type))
|
||||
models_str.append(
|
||||
booster.model_to_string(
|
||||
num_iteration=num_iteration, start_iteration=start_iteration, importance_type=importance_type
|
||||
)
|
||||
)
|
||||
return {"boosters": models_str, "best_iteration": self.best_iteration}
|
||||
|
||||
def __getattr__(self, name: str) -> Callable[[Any, Any], List[Any]]:
|
||||
"""Redirect methods call of CVBooster."""
|
||||
|
||||
def handler_function(*args: Any, **kwargs: Any) -> List[Any]:
|
||||
"""Call methods with each booster, and concatenate their results."""
|
||||
ret = []
|
||||
for booster in self.boosters:
|
||||
ret.append(getattr(booster, name)(*args, **kwargs))
|
||||
return ret
|
||||
|
||||
return handler_function
|
||||
|
||||
def __getstate__(self) -> Dict[str, Any]:
|
||||
|
@ -401,7 +408,7 @@ class CVBooster:
|
|||
self,
|
||||
num_iteration: Optional[int] = None,
|
||||
start_iteration: int = 0,
|
||||
importance_type: str = 'split'
|
||||
importance_type: str = "split",
|
||||
) -> str:
|
||||
"""Save CVBooster to JSON string.
|
||||
|
||||
|
@ -430,7 +437,7 @@ class CVBooster:
|
|||
filename: Union[str, Path],
|
||||
num_iteration: Optional[int] = None,
|
||||
start_iteration: int = 0,
|
||||
importance_type: str = 'split'
|
||||
importance_type: str = "split",
|
||||
) -> "CVBooster":
|
||||
"""Save CVBooster to a file as JSON text.
|
||||
|
||||
|
@ -469,16 +476,18 @@ def _make_n_folds(
|
|||
fpreproc: Optional[_LGBM_PreprocFunction],
|
||||
stratified: bool,
|
||||
shuffle: bool,
|
||||
eval_train_metric: bool
|
||||
eval_train_metric: bool,
|
||||
) -> CVBooster:
|
||||
"""Make a n-fold list of Booster from random indices."""
|
||||
full_data = full_data.construct()
|
||||
num_data = full_data.num_data()
|
||||
if folds is not None:
|
||||
if not hasattr(folds, '__iter__') and not hasattr(folds, 'split'):
|
||||
raise AttributeError("folds should be a generator or iterator of (train_idx, test_idx) tuples "
|
||||
"or scikit-learn splitter object with split method")
|
||||
if hasattr(folds, 'split'):
|
||||
if not hasattr(folds, "__iter__") and not hasattr(folds, "split"):
|
||||
raise AttributeError(
|
||||
"folds should be a generator or iterator of (train_idx, test_idx) tuples "
|
||||
"or scikit-learn splitter object with split method"
|
||||
)
|
||||
if hasattr(folds, "split"):
|
||||
group_info = full_data.get_group()
|
||||
if group_info is not None:
|
||||
group_info = np.array(group_info, dtype=np.int32, copy=False)
|
||||
|
@ -487,11 +496,13 @@ def _make_n_folds(
|
|||
flatted_group = np.zeros(num_data, dtype=np.int32)
|
||||
folds = folds.split(X=np.empty(num_data), y=full_data.get_label(), groups=flatted_group)
|
||||
else:
|
||||
if any(params.get(obj_alias, "") in {"lambdarank", "rank_xendcg", "xendcg",
|
||||
"xe_ndcg", "xe_ndcg_mart", "xendcg_mart"}
|
||||
for obj_alias in _ConfigAliases.get("objective")):
|
||||
if any(
|
||||
params.get(obj_alias, "")
|
||||
in {"lambdarank", "rank_xendcg", "xendcg", "xe_ndcg", "xe_ndcg_mart", "xendcg_mart"}
|
||||
for obj_alias in _ConfigAliases.get("objective")
|
||||
):
|
||||
if not SKLEARN_INSTALLED:
|
||||
raise LightGBMError('scikit-learn is required for ranking cv')
|
||||
raise LightGBMError("scikit-learn is required for ranking cv")
|
||||
# ranking task, split according to groups
|
||||
group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False)
|
||||
flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
|
||||
|
@ -499,7 +510,7 @@ def _make_n_folds(
|
|||
folds = group_kfold.split(X=np.empty(num_data), groups=flatted_group)
|
||||
elif stratified:
|
||||
if not SKLEARN_INSTALLED:
|
||||
raise LightGBMError('scikit-learn is required for stratified cv')
|
||||
raise LightGBMError("scikit-learn is required for stratified cv")
|
||||
skf = _LGBMStratifiedKFold(n_splits=nfold, shuffle=shuffle, random_state=seed)
|
||||
folds = skf.split(X=np.empty(num_data), y=full_data.get_label())
|
||||
else:
|
||||
|
@ -508,7 +519,7 @@ def _make_n_folds(
|
|||
else:
|
||||
randidx = np.arange(num_data)
|
||||
kstep = int(num_data / nfold)
|
||||
test_id = [randidx[i: i + kstep] for i in range(0, num_data, kstep)]
|
||||
test_id = [randidx[i : i + kstep] for i in range(0, num_data, kstep)]
|
||||
train_id = [np.concatenate([test_id[i] for i in range(nfold) if k != i]) for k in range(nfold)]
|
||||
folds = zip(train_id, test_id)
|
||||
|
||||
|
@ -523,14 +534,14 @@ def _make_n_folds(
|
|||
tparam = params
|
||||
booster_for_fold = Booster(tparam, train_set)
|
||||
if eval_train_metric:
|
||||
booster_for_fold.add_valid(train_set, 'train')
|
||||
booster_for_fold.add_valid(valid_set, 'valid')
|
||||
booster_for_fold.add_valid(train_set, "train")
|
||||
booster_for_fold.add_valid(valid_set, "valid")
|
||||
ret.boosters.append(booster_for_fold)
|
||||
return ret
|
||||
|
||||
|
||||
def _agg_cv_result(
|
||||
raw_results: List[List[_LGBM_BoosterEvalMethodResultType]]
|
||||
raw_results: List[List[_LGBM_BoosterEvalMethodResultType]],
|
||||
) -> List[_LGBM_BoosterEvalMethodResultWithStandardDeviationType]:
|
||||
"""Aggregate cross-validation results."""
|
||||
cvmap: Dict[str, List[float]] = OrderedDict()
|
||||
|
@ -541,7 +552,7 @@ def _agg_cv_result(
|
|||
metric_type[key] = one_line[3]
|
||||
cvmap.setdefault(key, [])
|
||||
cvmap[key].append(one_line[2])
|
||||
return [('cv_agg', k, float(np.mean(v)), metric_type[k], float(np.std(v))) for k, v in cvmap.items()]
|
||||
return [("cv_agg", k, float(np.mean(v)), metric_type[k], float(np.std(v))) for k, v in cvmap.items()]
|
||||
|
||||
|
||||
def cv(
|
||||
|
@ -555,13 +566,13 @@ def cv(
|
|||
metrics: Optional[Union[str, List[str]]] = None,
|
||||
feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None,
|
||||
init_model: Optional[Union[str, Path, Booster]] = None,
|
||||
feature_name: _LGBM_FeatureNameConfiguration = 'auto',
|
||||
categorical_feature: _LGBM_CategoricalFeatureConfiguration = 'auto',
|
||||
feature_name: _LGBM_FeatureNameConfiguration = "auto",
|
||||
categorical_feature: _LGBM_CategoricalFeatureConfiguration = "auto",
|
||||
fpreproc: Optional[_LGBM_PreprocFunction] = None,
|
||||
seed: int = 0,
|
||||
callbacks: Optional[List[Callable]] = None,
|
||||
eval_train_metric: bool = False,
|
||||
return_cvbooster: bool = False
|
||||
return_cvbooster: bool = False,
|
||||
) -> Dict[str, Union[List[float], CVBooster]]:
|
||||
"""Perform the cross-validation with given parameters.
|
||||
|
||||
|
@ -683,14 +694,14 @@ def cv(
|
|||
|
||||
params = copy.deepcopy(params)
|
||||
params = _choose_param_value(
|
||||
main_param_name='objective',
|
||||
main_param_name="objective",
|
||||
params=params,
|
||||
default_value=None
|
||||
default_value=None,
|
||||
)
|
||||
fobj: Optional[_LGBM_CustomObjectiveFunction] = None
|
||||
if callable(params["objective"]):
|
||||
fobj = params["objective"]
|
||||
params["objective"] = 'none'
|
||||
params["objective"] = "none"
|
||||
for alias in _ConfigAliases.get("num_iterations"):
|
||||
if alias in params:
|
||||
_log_warning(f"Found '{alias}' in params. Will use it instead of 'num_boost_round' argument")
|
||||
|
@ -700,21 +711,21 @@ def cv(
|
|||
params = _choose_param_value(
|
||||
main_param_name="early_stopping_round",
|
||||
params=params,
|
||||
default_value=None
|
||||
default_value=None,
|
||||
)
|
||||
if params["early_stopping_round"] is None:
|
||||
params.pop("early_stopping_round")
|
||||
first_metric_only = params.get('first_metric_only', False)
|
||||
first_metric_only = params.get("first_metric_only", False)
|
||||
|
||||
if isinstance(init_model, (str, Path)):
|
||||
predictor = _InnerPredictor.from_model_file(
|
||||
model_file=init_model,
|
||||
pred_parameter=params
|
||||
pred_parameter=params,
|
||||
)
|
||||
elif isinstance(init_model, Booster):
|
||||
predictor = _InnerPredictor.from_booster(
|
||||
booster=init_model,
|
||||
pred_parameter=dict(init_model.params, **params)
|
||||
pred_parameter=dict(init_model.params, **params),
|
||||
)
|
||||
else:
|
||||
predictor = None
|
||||
|
@ -722,25 +733,31 @@ def cv(
|
|||
if metrics is not None:
|
||||
for metric_alias in _ConfigAliases.get("metric"):
|
||||
params.pop(metric_alias, None)
|
||||
params['metric'] = metrics
|
||||
params["metric"] = metrics
|
||||
|
||||
train_set._update_params(params) \
|
||||
._set_predictor(predictor) \
|
||||
.set_feature_name(feature_name) \
|
||||
.set_categorical_feature(categorical_feature)
|
||||
train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature(
|
||||
categorical_feature
|
||||
)
|
||||
|
||||
results = defaultdict(list)
|
||||
cvfolds = _make_n_folds(full_data=train_set, folds=folds, nfold=nfold,
|
||||
params=params, seed=seed, fpreproc=fpreproc,
|
||||
stratified=stratified, shuffle=shuffle,
|
||||
eval_train_metric=eval_train_metric)
|
||||
cvfolds = _make_n_folds(
|
||||
full_data=train_set,
|
||||
folds=folds,
|
||||
nfold=nfold,
|
||||
params=params,
|
||||
seed=seed,
|
||||
fpreproc=fpreproc,
|
||||
stratified=stratified,
|
||||
shuffle=shuffle,
|
||||
eval_train_metric=eval_train_metric,
|
||||
)
|
||||
|
||||
# setup callbacks
|
||||
if callbacks is None:
|
||||
callbacks_set = set()
|
||||
else:
|
||||
for i, cb in enumerate(callbacks):
|
||||
cb.__dict__.setdefault('order', i - len(callbacks))
|
||||
cb.__dict__.setdefault("order", i - len(callbacks))
|
||||
callbacks_set = set(callbacks)
|
||||
|
||||
if "early_stopping_round" in params:
|
||||
|
@ -751,46 +768,55 @@ def cv(
|
|||
verbose=_choose_param_value(
|
||||
main_param_name="verbosity",
|
||||
params=params,
|
||||
default_value=1
|
||||
).pop("verbosity") > 0
|
||||
default_value=1,
|
||||
).pop("verbosity")
|
||||
> 0,
|
||||
)
|
||||
)
|
||||
|
||||
callbacks_before_iter_set = {cb for cb in callbacks_set if getattr(cb, 'before_iteration', False)}
|
||||
callbacks_before_iter_set = {cb for cb in callbacks_set if getattr(cb, "before_iteration", False)}
|
||||
callbacks_after_iter_set = callbacks_set - callbacks_before_iter_set
|
||||
callbacks_before_iter = sorted(callbacks_before_iter_set, key=attrgetter('order'))
|
||||
callbacks_after_iter = sorted(callbacks_after_iter_set, key=attrgetter('order'))
|
||||
callbacks_before_iter = sorted(callbacks_before_iter_set, key=attrgetter("order"))
|
||||
callbacks_after_iter = sorted(callbacks_after_iter_set, key=attrgetter("order"))
|
||||
|
||||
for i in range(num_boost_round):
|
||||
for cb in callbacks_before_iter:
|
||||
cb(callback.CallbackEnv(model=cvfolds,
|
||||
params=params,
|
||||
iteration=i,
|
||||
begin_iteration=0,
|
||||
end_iteration=num_boost_round,
|
||||
evaluation_result_list=None))
|
||||
cb(
|
||||
callback.CallbackEnv(
|
||||
model=cvfolds,
|
||||
params=params,
|
||||
iteration=i,
|
||||
begin_iteration=0,
|
||||
end_iteration=num_boost_round,
|
||||
evaluation_result_list=None,
|
||||
)
|
||||
)
|
||||
cvfolds.update(fobj=fobj) # type: ignore[call-arg]
|
||||
res = _agg_cv_result(cvfolds.eval_valid(feval)) # type: ignore[call-arg]
|
||||
for _, key, mean, _, std in res:
|
||||
results[f'{key}-mean'].append(mean)
|
||||
results[f'{key}-stdv'].append(std)
|
||||
results[f"{key}-mean"].append(mean)
|
||||
results[f"{key}-stdv"].append(std)
|
||||
try:
|
||||
for cb in callbacks_after_iter:
|
||||
cb(callback.CallbackEnv(model=cvfolds,
|
||||
params=params,
|
||||
iteration=i,
|
||||
begin_iteration=0,
|
||||
end_iteration=num_boost_round,
|
||||
evaluation_result_list=res))
|
||||
cb(
|
||||
callback.CallbackEnv(
|
||||
model=cvfolds,
|
||||
params=params,
|
||||
iteration=i,
|
||||
begin_iteration=0,
|
||||
end_iteration=num_boost_round,
|
||||
evaluation_result_list=res,
|
||||
)
|
||||
)
|
||||
except callback.EarlyStopException as earlyStopException:
|
||||
cvfolds.best_iteration = earlyStopException.best_iteration + 1
|
||||
for bst in cvfolds.boosters:
|
||||
bst.best_iteration = cvfolds.best_iteration
|
||||
for k in results:
|
||||
results[k] = results[k][:cvfolds.best_iteration]
|
||||
results[k] = results[k][: cvfolds.best_iteration]
|
||||
break
|
||||
|
||||
if return_cvbooster:
|
||||
results['cvbooster'] = cvfolds # type: ignore[assignment]
|
||||
results["cvbooster"] = cvfolds # type: ignore[assignment]
|
||||
|
||||
return dict(results)
|
||||
|
|
|
@ -16,17 +16,19 @@ def find_lib_path() -> List[str]:
|
|||
List of all found library paths to LightGBM.
|
||||
"""
|
||||
curr_path = Path(__file__).absolute()
|
||||
dll_path = [curr_path.parents[1],
|
||||
curr_path.parents[0] / 'bin',
|
||||
curr_path.parents[0] / 'lib']
|
||||
if system() in ('Windows', 'Microsoft'):
|
||||
dll_path.append(curr_path.parents[1] / 'Release')
|
||||
dll_path.append(curr_path.parents[1] / 'windows' / 'x64' / 'DLL')
|
||||
dll_path = [p / 'lib_lightgbm.dll' for p in dll_path]
|
||||
dll_path = [
|
||||
curr_path.parents[1],
|
||||
curr_path.parents[0] / "bin",
|
||||
curr_path.parents[0] / "lib",
|
||||
]
|
||||
if system() in ("Windows", "Microsoft"):
|
||||
dll_path.append(curr_path.parents[1] / "Release")
|
||||
dll_path.append(curr_path.parents[1] / "windows" / "x64" / "DLL")
|
||||
dll_path = [p / "lib_lightgbm.dll" for p in dll_path]
|
||||
else:
|
||||
dll_path = [p / 'lib_lightgbm.so' for p in dll_path]
|
||||
dll_path = [p / "lib_lightgbm.so" for p in dll_path]
|
||||
lib_path = [str(p) for p in dll_path if p.is_file()]
|
||||
if not lib_path:
|
||||
dll_path_joined = '\n'.join(map(str, dll_path))
|
||||
raise Exception(f'Cannot find lightgbm library file in following paths:\n{dll_path_joined}')
|
||||
dll_path_joined = "\n".join(map(str, dll_path))
|
||||
raise Exception(f"Cannot find lightgbm library file in following paths:\n{dll_path_joined}")
|
||||
return lib_path
|
||||
|
|
|
@ -12,11 +12,11 @@ from .compat import GRAPHVIZ_INSTALLED, MATPLOTLIB_INSTALLED, pd_DataFrame
|
|||
from .sklearn import LGBMModel
|
||||
|
||||
__all__ = [
|
||||
'create_tree_digraph',
|
||||
'plot_importance',
|
||||
'plot_metric',
|
||||
'plot_split_value_histogram',
|
||||
'plot_tree',
|
||||
"create_tree_digraph",
|
||||
"plot_importance",
|
||||
"plot_metric",
|
||||
"plot_split_value_histogram",
|
||||
"plot_tree",
|
||||
]
|
||||
|
||||
|
||||
|
@ -27,9 +27,7 @@ def _check_not_tuple_of_2_elements(obj: Any, obj_name: str) -> None:
|
|||
|
||||
|
||||
def _float2str(value: float, precision: Optional[int]) -> str:
|
||||
return (f"{value:.{precision}f}"
|
||||
if precision is not None and not isinstance(value, str)
|
||||
else str(value))
|
||||
return f"{value:.{precision}f}" if precision is not None and not isinstance(value, str) else str(value)
|
||||
|
||||
|
||||
def plot_importance(
|
||||
|
@ -38,17 +36,17 @@ def plot_importance(
|
|||
height: float = 0.2,
|
||||
xlim: Optional[Tuple[float, float]] = None,
|
||||
ylim: Optional[Tuple[float, float]] = None,
|
||||
title: Optional[str] = 'Feature importance',
|
||||
xlabel: Optional[str] = 'Feature importance',
|
||||
ylabel: Optional[str] = 'Features',
|
||||
importance_type: str = 'auto',
|
||||
title: Optional[str] = "Feature importance",
|
||||
xlabel: Optional[str] = "Feature importance",
|
||||
ylabel: Optional[str] = "Features",
|
||||
importance_type: str = "auto",
|
||||
max_num_features: Optional[int] = None,
|
||||
ignore_zero: bool = True,
|
||||
figsize: Optional[Tuple[float, float]] = None,
|
||||
dpi: Optional[int] = None,
|
||||
grid: bool = True,
|
||||
precision: Optional[int] = 3,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Plot model's feature importances.
|
||||
|
||||
|
@ -104,7 +102,7 @@ def plot_importance(
|
|||
if MATPLOTLIB_INSTALLED:
|
||||
import matplotlib.pyplot as plt
|
||||
else:
|
||||
raise ImportError('You must install matplotlib and restart your session to plot importance.')
|
||||
raise ImportError("You must install matplotlib and restart your session to plot importance.")
|
||||
|
||||
if isinstance(booster, LGBMModel):
|
||||
if importance_type == "auto":
|
||||
|
@ -114,7 +112,7 @@ def plot_importance(
|
|||
if importance_type == "auto":
|
||||
importance_type = "split"
|
||||
else:
|
||||
raise TypeError('booster must be Booster or LGBMModel.')
|
||||
raise TypeError("booster must be Booster or LGBMModel.")
|
||||
|
||||
importance = booster.feature_importance(importance_type=importance_type)
|
||||
feature_name = booster.feature_name()
|
||||
|
@ -131,28 +129,26 @@ def plot_importance(
|
|||
|
||||
if ax is None:
|
||||
if figsize is not None:
|
||||
_check_not_tuple_of_2_elements(figsize, 'figsize')
|
||||
_check_not_tuple_of_2_elements(figsize, "figsize")
|
||||
_, ax = plt.subplots(1, 1, figsize=figsize, dpi=dpi)
|
||||
|
||||
ylocs = np.arange(len(values))
|
||||
ax.barh(ylocs, values, align='center', height=height, **kwargs)
|
||||
ax.barh(ylocs, values, align="center", height=height, **kwargs)
|
||||
|
||||
for x, y in zip(values, ylocs):
|
||||
ax.text(x + 1, y,
|
||||
_float2str(x, precision) if importance_type == 'gain' else x,
|
||||
va='center')
|
||||
ax.text(x + 1, y, _float2str(x, precision) if importance_type == "gain" else x, va="center")
|
||||
|
||||
ax.set_yticks(ylocs)
|
||||
ax.set_yticklabels(labels)
|
||||
|
||||
if xlim is not None:
|
||||
_check_not_tuple_of_2_elements(xlim, 'xlim')
|
||||
_check_not_tuple_of_2_elements(xlim, "xlim")
|
||||
else:
|
||||
xlim = (0, max(values) * 1.1)
|
||||
ax.set_xlim(xlim)
|
||||
|
||||
if ylim is not None:
|
||||
_check_not_tuple_of_2_elements(ylim, 'ylim')
|
||||
_check_not_tuple_of_2_elements(ylim, "ylim")
|
||||
else:
|
||||
ylim = (-1, len(values))
|
||||
ax.set_ylim(ylim)
|
||||
|
@ -160,7 +156,7 @@ def plot_importance(
|
|||
if title is not None:
|
||||
ax.set_title(title)
|
||||
if xlabel is not None:
|
||||
xlabel = xlabel.replace('@importance_type@', importance_type)
|
||||
xlabel = xlabel.replace("@importance_type@", importance_type)
|
||||
ax.set_xlabel(xlabel)
|
||||
if ylabel is not None:
|
||||
ax.set_ylabel(ylabel)
|
||||
|
@ -176,13 +172,13 @@ def plot_split_value_histogram(
|
|||
width_coef: float = 0.8,
|
||||
xlim: Optional[Tuple[float, float]] = None,
|
||||
ylim: Optional[Tuple[float, float]] = None,
|
||||
title: Optional[str] = 'Split value histogram for feature with @index/name@ @feature@',
|
||||
xlabel: Optional[str] = 'Feature split value',
|
||||
ylabel: Optional[str] = 'Count',
|
||||
title: Optional[str] = "Split value histogram for feature with @index/name@ @feature@",
|
||||
xlabel: Optional[str] = "Feature split value",
|
||||
ylabel: Optional[str] = "Count",
|
||||
figsize: Optional[Tuple[float, float]] = None,
|
||||
dpi: Optional[int] = None,
|
||||
grid: bool = True,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Plot split value histogram for the specified feature of the model.
|
||||
|
||||
|
@ -238,29 +234,28 @@ def plot_split_value_histogram(
|
|||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import MaxNLocator
|
||||
else:
|
||||
raise ImportError('You must install matplotlib and restart your session to plot split value histogram.')
|
||||
raise ImportError("You must install matplotlib and restart your session to plot split value histogram.")
|
||||
|
||||
if isinstance(booster, LGBMModel):
|
||||
booster = booster.booster_
|
||||
elif not isinstance(booster, Booster):
|
||||
raise TypeError('booster must be Booster or LGBMModel.')
|
||||
raise TypeError("booster must be Booster or LGBMModel.")
|
||||
|
||||
hist, split_bins = booster.get_split_value_histogram(feature=feature, bins=bins, xgboost_style=False)
|
||||
if np.count_nonzero(hist) == 0:
|
||||
raise ValueError('Cannot plot split value histogram, '
|
||||
f'because feature {feature} was not used in splitting')
|
||||
raise ValueError("Cannot plot split value histogram, " f"because feature {feature} was not used in splitting")
|
||||
width = width_coef * (split_bins[1] - split_bins[0])
|
||||
centred = (split_bins[:-1] + split_bins[1:]) / 2
|
||||
|
||||
if ax is None:
|
||||
if figsize is not None:
|
||||
_check_not_tuple_of_2_elements(figsize, 'figsize')
|
||||
_check_not_tuple_of_2_elements(figsize, "figsize")
|
||||
_, ax = plt.subplots(1, 1, figsize=figsize, dpi=dpi)
|
||||
|
||||
ax.bar(centred, hist, align='center', width=width, **kwargs)
|
||||
ax.bar(centred, hist, align="center", width=width, **kwargs)
|
||||
|
||||
if xlim is not None:
|
||||
_check_not_tuple_of_2_elements(xlim, 'xlim')
|
||||
_check_not_tuple_of_2_elements(xlim, "xlim")
|
||||
else:
|
||||
range_result = split_bins[-1] - split_bins[0]
|
||||
xlim = (split_bins[0] - range_result * 0.2, split_bins[-1] + range_result * 0.2)
|
||||
|
@ -268,14 +263,14 @@ def plot_split_value_histogram(
|
|||
|
||||
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
|
||||
if ylim is not None:
|
||||
_check_not_tuple_of_2_elements(ylim, 'ylim')
|
||||
_check_not_tuple_of_2_elements(ylim, "ylim")
|
||||
else:
|
||||
ylim = (0, max(hist) * 1.1)
|
||||
ax.set_ylim(ylim)
|
||||
|
||||
if title is not None:
|
||||
title = title.replace('@feature@', str(feature))
|
||||
title = title.replace('@index/name@', ('name' if isinstance(feature, str) else 'index'))
|
||||
title = title.replace("@feature@", str(feature))
|
||||
title = title.replace("@index/name@", ("name" if isinstance(feature, str) else "index"))
|
||||
ax.set_title(title)
|
||||
if xlabel is not None:
|
||||
ax.set_xlabel(xlabel)
|
||||
|
@ -292,12 +287,12 @@ def plot_metric(
|
|||
ax=None,
|
||||
xlim: Optional[Tuple[float, float]] = None,
|
||||
ylim: Optional[Tuple[float, float]] = None,
|
||||
title: Optional[str] = 'Metric during training',
|
||||
xlabel: Optional[str] = 'Iterations',
|
||||
ylabel: Optional[str] = '@metric@',
|
||||
title: Optional[str] = "Metric during training",
|
||||
xlabel: Optional[str] = "Iterations",
|
||||
ylabel: Optional[str] = "@metric@",
|
||||
figsize: Optional[Tuple[float, float]] = None,
|
||||
dpi: Optional[int] = None,
|
||||
grid: bool = True
|
||||
grid: bool = True,
|
||||
) -> Any:
|
||||
"""Plot one metric during training.
|
||||
|
||||
|
@ -345,31 +340,33 @@ def plot_metric(
|
|||
if MATPLOTLIB_INSTALLED:
|
||||
import matplotlib.pyplot as plt
|
||||
else:
|
||||
raise ImportError('You must install matplotlib and restart your session to plot metric.')
|
||||
raise ImportError("You must install matplotlib and restart your session to plot metric.")
|
||||
|
||||
if isinstance(booster, LGBMModel):
|
||||
eval_results = deepcopy(booster.evals_result_)
|
||||
elif isinstance(booster, dict):
|
||||
eval_results = deepcopy(booster)
|
||||
elif isinstance(booster, Booster):
|
||||
raise TypeError("booster must be dict or LGBMModel. To use plot_metric with Booster type, first record the metrics using record_evaluation callback then pass that to plot_metric as argument `booster`")
|
||||
raise TypeError(
|
||||
"booster must be dict or LGBMModel. To use plot_metric with Booster type, first record the metrics using record_evaluation callback then pass that to plot_metric as argument `booster`"
|
||||
)
|
||||
else:
|
||||
raise TypeError('booster must be dict or LGBMModel.')
|
||||
raise TypeError("booster must be dict or LGBMModel.")
|
||||
|
||||
num_data = len(eval_results)
|
||||
|
||||
if not num_data:
|
||||
raise ValueError('eval results cannot be empty.')
|
||||
raise ValueError("eval results cannot be empty.")
|
||||
|
||||
if ax is None:
|
||||
if figsize is not None:
|
||||
_check_not_tuple_of_2_elements(figsize, 'figsize')
|
||||
_check_not_tuple_of_2_elements(figsize, "figsize")
|
||||
_, ax = plt.subplots(1, 1, figsize=figsize, dpi=dpi)
|
||||
|
||||
if dataset_names is None:
|
||||
dataset_names_iter = iter(eval_results.keys())
|
||||
elif not isinstance(dataset_names, (list, tuple, set)) or not dataset_names:
|
||||
raise ValueError('dataset_names should be iterable and cannot be empty')
|
||||
raise ValueError("dataset_names should be iterable and cannot be empty")
|
||||
else:
|
||||
dataset_names_iter = iter(dataset_names)
|
||||
|
||||
|
@ -382,7 +379,7 @@ def plot_metric(
|
|||
metric, results = metrics_for_one.popitem()
|
||||
else:
|
||||
if metric not in metrics_for_one:
|
||||
raise KeyError('No given metric in eval results.')
|
||||
raise KeyError("No given metric in eval results.")
|
||||
results = metrics_for_one[metric]
|
||||
num_iteration = len(results)
|
||||
max_result = max(results)
|
||||
|
@ -397,16 +394,16 @@ def plot_metric(
|
|||
min_result = min(*results, min_result)
|
||||
ax.plot(x_, results, label=name)
|
||||
|
||||
ax.legend(loc='best')
|
||||
ax.legend(loc="best")
|
||||
|
||||
if xlim is not None:
|
||||
_check_not_tuple_of_2_elements(xlim, 'xlim')
|
||||
_check_not_tuple_of_2_elements(xlim, "xlim")
|
||||
else:
|
||||
xlim = (0, num_iteration)
|
||||
ax.set_xlim(xlim)
|
||||
|
||||
if ylim is not None:
|
||||
_check_not_tuple_of_2_elements(ylim, 'ylim')
|
||||
_check_not_tuple_of_2_elements(ylim, "ylim")
|
||||
else:
|
||||
range_result = max_result - min_result
|
||||
ylim = (min_result - range_result * 0.2, max_result + range_result * 0.2)
|
||||
|
@ -417,7 +414,7 @@ def plot_metric(
|
|||
if xlabel is not None:
|
||||
ax.set_xlabel(xlabel)
|
||||
if ylabel is not None:
|
||||
ylabel = ylabel.replace('@metric@', metric)
|
||||
ylabel = ylabel.replace("@metric@", metric)
|
||||
ax.set_ylabel(ylabel)
|
||||
ax.grid(grid)
|
||||
return ax
|
||||
|
@ -432,19 +429,20 @@ def _determine_direction_for_numeric_split(
|
|||
missing_type = _MissingType(missing_type_str)
|
||||
if math.isnan(fval) and missing_type != _MissingType.NAN:
|
||||
fval = 0.0
|
||||
if ((missing_type == _MissingType.ZERO and _is_zero(fval))
|
||||
or (missing_type == _MissingType.NAN and math.isnan(fval))):
|
||||
direction = 'left' if default_left else 'right'
|
||||
if (missing_type == _MissingType.ZERO and _is_zero(fval)) or (
|
||||
missing_type == _MissingType.NAN and math.isnan(fval)
|
||||
):
|
||||
direction = "left" if default_left else "right"
|
||||
else:
|
||||
direction = 'left' if fval <= threshold else 'right'
|
||||
direction = "left" if fval <= threshold else "right"
|
||||
return direction
|
||||
|
||||
|
||||
def _determine_direction_for_categorical_split(fval: float, thresholds: str) -> str:
|
||||
if math.isnan(fval) or int(fval) < 0:
|
||||
return 'right'
|
||||
int_thresholds = {int(t) for t in thresholds.split('||')}
|
||||
return 'left' if int(fval) in int_thresholds else 'right'
|
||||
return "right"
|
||||
int_thresholds = {int(t) for t in thresholds.split("||")}
|
||||
return "left" if int(fval) in int_thresholds else "right"
|
||||
|
||||
|
||||
def _to_graphviz(
|
||||
|
@ -456,7 +454,7 @@ def _to_graphviz(
|
|||
constraints: Optional[List[int]],
|
||||
example_case: Optional[Union[np.ndarray, pd_DataFrame]],
|
||||
max_category_values: int,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Convert specified tree to graphviz instance.
|
||||
|
||||
|
@ -466,120 +464,124 @@ def _to_graphviz(
|
|||
if GRAPHVIZ_INSTALLED:
|
||||
from graphviz import Digraph
|
||||
else:
|
||||
raise ImportError('You must install graphviz and restart your session to plot tree.')
|
||||
raise ImportError("You must install graphviz and restart your session to plot tree.")
|
||||
|
||||
def add(
|
||||
root: Dict[str, Any],
|
||||
total_count: int,
|
||||
parent: Optional[str],
|
||||
decision: Optional[str],
|
||||
highlight: bool
|
||||
root: Dict[str, Any], total_count: int, parent: Optional[str], decision: Optional[str], highlight: bool
|
||||
) -> None:
|
||||
"""Recursively add node or edge."""
|
||||
fillcolor = 'white'
|
||||
style = ''
|
||||
fillcolor = "white"
|
||||
style = ""
|
||||
tooltip = None
|
||||
if highlight:
|
||||
color = 'blue'
|
||||
penwidth = '3'
|
||||
color = "blue"
|
||||
penwidth = "3"
|
||||
else:
|
||||
color = 'black'
|
||||
penwidth = '1'
|
||||
if 'split_index' in root: # non-leaf
|
||||
color = "black"
|
||||
penwidth = "1"
|
||||
if "split_index" in root: # non-leaf
|
||||
shape = "rectangle"
|
||||
l_dec = 'yes'
|
||||
r_dec = 'no'
|
||||
threshold = root['threshold']
|
||||
if root['decision_type'] == '<=':
|
||||
l_dec = "yes"
|
||||
r_dec = "no"
|
||||
threshold = root["threshold"]
|
||||
if root["decision_type"] == "<=":
|
||||
operator = "≤"
|
||||
elif root['decision_type'] == '==':
|
||||
elif root["decision_type"] == "==":
|
||||
operator = "="
|
||||
else:
|
||||
raise ValueError('Invalid decision type in tree model.')
|
||||
raise ValueError("Invalid decision type in tree model.")
|
||||
name = f"split{root['split_index']}"
|
||||
split_feature = root['split_feature']
|
||||
split_feature = root["split_feature"]
|
||||
if feature_names is not None:
|
||||
label = f"<B>{feature_names[split_feature]}</B> {operator}"
|
||||
else:
|
||||
label = f"feature <B>{split_feature}</B> {operator} "
|
||||
direction = None
|
||||
if example_case is not None:
|
||||
if root['decision_type'] == '==':
|
||||
if root["decision_type"] == "==":
|
||||
direction = _determine_direction_for_categorical_split(
|
||||
fval=example_case[split_feature],
|
||||
thresholds=root['threshold']
|
||||
fval=example_case[split_feature], thresholds=root["threshold"]
|
||||
)
|
||||
else:
|
||||
direction = _determine_direction_for_numeric_split(
|
||||
fval=example_case[split_feature],
|
||||
threshold=root['threshold'],
|
||||
missing_type_str=root['missing_type'],
|
||||
default_left=root['default_left']
|
||||
threshold=root["threshold"],
|
||||
missing_type_str=root["missing_type"],
|
||||
default_left=root["default_left"],
|
||||
)
|
||||
if root['decision_type'] == '==':
|
||||
category_values = root['threshold'].split('||')
|
||||
if root["decision_type"] == "==":
|
||||
category_values = root["threshold"].split("||")
|
||||
if len(category_values) > max_category_values:
|
||||
tooltip = root['threshold']
|
||||
threshold = '||'.join(category_values[:2]) + '||...||' + category_values[-1]
|
||||
tooltip = root["threshold"]
|
||||
threshold = "||".join(category_values[:2]) + "||...||" + category_values[-1]
|
||||
|
||||
label += f"<B>{_float2str(threshold, precision)}</B>"
|
||||
for info in ['split_gain', 'internal_value', 'internal_weight', "internal_count", "data_percentage"]:
|
||||
for info in ["split_gain", "internal_value", "internal_weight", "internal_count", "data_percentage"]:
|
||||
if info in show_info:
|
||||
output = info.split('_')[-1]
|
||||
if info in {'split_gain', 'internal_value', 'internal_weight'}:
|
||||
output = info.split("_")[-1]
|
||||
if info in {"split_gain", "internal_value", "internal_weight"}:
|
||||
label += f"<br/>{_float2str(root[info], precision)} {output}"
|
||||
elif info == 'internal_count':
|
||||
elif info == "internal_count":
|
||||
label += f"<br/>{output}: {root[info]}"
|
||||
elif info == "data_percentage":
|
||||
label += f"<br/>{_float2str(root['internal_count'] / total_count * 100, 2)}% of data"
|
||||
|
||||
if constraints:
|
||||
if constraints[root['split_feature']] == 1:
|
||||
if constraints[root["split_feature"]] == 1:
|
||||
fillcolor = "#ddffdd" # light green
|
||||
if constraints[root['split_feature']] == -1:
|
||||
if constraints[root["split_feature"]] == -1:
|
||||
fillcolor = "#ffdddd" # light red
|
||||
style = "filled"
|
||||
label = f"<{label}>"
|
||||
add(
|
||||
root=root['left_child'],
|
||||
root=root["left_child"],
|
||||
total_count=total_count,
|
||||
parent=name,
|
||||
decision=l_dec,
|
||||
highlight=highlight and direction == "left"
|
||||
highlight=highlight and direction == "left",
|
||||
)
|
||||
add(
|
||||
root=root['right_child'],
|
||||
root=root["right_child"],
|
||||
total_count=total_count,
|
||||
parent=name,
|
||||
decision=r_dec,
|
||||
highlight=highlight and direction == "right"
|
||||
highlight=highlight and direction == "right",
|
||||
)
|
||||
else: # leaf
|
||||
shape = "ellipse"
|
||||
name = f"leaf{root['leaf_index']}"
|
||||
label = f"leaf {root['leaf_index']}: "
|
||||
label += f"<B>{_float2str(root['leaf_value'], precision)}</B>"
|
||||
if 'leaf_weight' in show_info:
|
||||
if "leaf_weight" in show_info:
|
||||
label += f"<br/>{_float2str(root['leaf_weight'], precision)} weight"
|
||||
if 'leaf_count' in show_info:
|
||||
if "leaf_count" in show_info:
|
||||
label += f"<br/>count: {root['leaf_count']}"
|
||||
if "data_percentage" in show_info:
|
||||
label += f"<br/>{_float2str(root['leaf_count'] / total_count * 100, 2)}% of data"
|
||||
label = f"<{label}>"
|
||||
graph.node(name, label=label, shape=shape, style=style, fillcolor=fillcolor, color=color, penwidth=penwidth, tooltip=tooltip)
|
||||
graph.node(
|
||||
name,
|
||||
label=label,
|
||||
shape=shape,
|
||||
style=style,
|
||||
fillcolor=fillcolor,
|
||||
color=color,
|
||||
penwidth=penwidth,
|
||||
tooltip=tooltip,
|
||||
)
|
||||
if parent is not None:
|
||||
graph.edge(parent, name, decision, color=color, penwidth=penwidth)
|
||||
|
||||
graph = Digraph(**kwargs)
|
||||
rankdir = "LR" if orientation == "horizontal" else "TB"
|
||||
graph.attr("graph", nodesep="0.05", ranksep="0.3", rankdir=rankdir)
|
||||
if "internal_count" in tree_info['tree_structure']:
|
||||
if "internal_count" in tree_info["tree_structure"]:
|
||||
add(
|
||||
root=tree_info['tree_structure'],
|
||||
total_count=tree_info['tree_structure']["internal_count"],
|
||||
root=tree_info["tree_structure"],
|
||||
total_count=tree_info["tree_structure"]["internal_count"],
|
||||
parent=None,
|
||||
decision=None,
|
||||
highlight=example_case is not None
|
||||
highlight=example_case is not None,
|
||||
)
|
||||
else:
|
||||
raise Exception("Cannot plot trees with no split")
|
||||
|
@ -610,10 +612,10 @@ def create_tree_digraph(
|
|||
tree_index: int = 0,
|
||||
show_info: Optional[List[str]] = None,
|
||||
precision: Optional[int] = 3,
|
||||
orientation: str = 'horizontal',
|
||||
orientation: str = "horizontal",
|
||||
example_case: Optional[Union[np.ndarray, pd_DataFrame]] = None,
|
||||
max_category_values: int = 10,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Create a digraph representation of specified tree.
|
||||
|
||||
|
@ -689,32 +691,32 @@ def create_tree_digraph(
|
|||
if isinstance(booster, LGBMModel):
|
||||
booster = booster.booster_
|
||||
elif not isinstance(booster, Booster):
|
||||
raise TypeError('booster must be Booster or LGBMModel.')
|
||||
raise TypeError("booster must be Booster or LGBMModel.")
|
||||
|
||||
model = booster.dump_model()
|
||||
tree_infos = model['tree_info']
|
||||
feature_names = model.get('feature_names', None)
|
||||
monotone_constraints = model.get('monotone_constraints', None)
|
||||
tree_infos = model["tree_info"]
|
||||
feature_names = model.get("feature_names", None)
|
||||
monotone_constraints = model.get("monotone_constraints", None)
|
||||
|
||||
if tree_index < len(tree_infos):
|
||||
tree_info = tree_infos[tree_index]
|
||||
else:
|
||||
raise IndexError('tree_index is out of range.')
|
||||
raise IndexError("tree_index is out of range.")
|
||||
|
||||
if show_info is None:
|
||||
show_info = []
|
||||
|
||||
if example_case is not None:
|
||||
if not isinstance(example_case, (np.ndarray, pd_DataFrame)) or example_case.ndim != 2:
|
||||
raise ValueError('example_case must be a numpy 2-D array or a pandas DataFrame')
|
||||
raise ValueError("example_case must be a numpy 2-D array or a pandas DataFrame")
|
||||
if example_case.shape[0] != 1:
|
||||
raise ValueError('example_case must have a single row.')
|
||||
raise ValueError("example_case must have a single row.")
|
||||
if isinstance(example_case, pd_DataFrame):
|
||||
example_case = _data_from_pandas(
|
||||
data=example_case,
|
||||
feature_name="auto",
|
||||
categorical_feature="auto",
|
||||
pandas_categorical=booster.pandas_categorical
|
||||
pandas_categorical=booster.pandas_categorical,
|
||||
)[0]
|
||||
example_case = example_case[0]
|
||||
|
||||
|
@ -727,7 +729,7 @@ def create_tree_digraph(
|
|||
constraints=monotone_constraints,
|
||||
example_case=example_case,
|
||||
max_category_values=max_category_values,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
|
@ -739,9 +741,9 @@ def plot_tree(
|
|||
dpi: Optional[int] = None,
|
||||
show_info: Optional[List[str]] = None,
|
||||
precision: Optional[int] = 3,
|
||||
orientation: str = 'horizontal',
|
||||
orientation: str = "horizontal",
|
||||
example_case: Optional[Union[np.ndarray, pd_DataFrame]] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Plot specified tree.
|
||||
|
||||
|
@ -807,22 +809,28 @@ def plot_tree(
|
|||
import matplotlib.image
|
||||
import matplotlib.pyplot as plt
|
||||
else:
|
||||
raise ImportError('You must install matplotlib and restart your session to plot tree.')
|
||||
raise ImportError("You must install matplotlib and restart your session to plot tree.")
|
||||
|
||||
if ax is None:
|
||||
if figsize is not None:
|
||||
_check_not_tuple_of_2_elements(figsize, 'figsize')
|
||||
_check_not_tuple_of_2_elements(figsize, "figsize")
|
||||
_, ax = plt.subplots(1, 1, figsize=figsize, dpi=dpi)
|
||||
|
||||
graph = create_tree_digraph(booster=booster, tree_index=tree_index,
|
||||
show_info=show_info, precision=precision,
|
||||
orientation=orientation, example_case=example_case, **kwargs)
|
||||
graph = create_tree_digraph(
|
||||
booster=booster,
|
||||
tree_index=tree_index,
|
||||
show_info=show_info,
|
||||
precision=precision,
|
||||
orientation=orientation,
|
||||
example_case=example_case,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
s = BytesIO()
|
||||
s.write(graph.pipe(format='png'))
|
||||
s.write(graph.pipe(format="png"))
|
||||
s.seek(0)
|
||||
img = matplotlib.image.imread(s)
|
||||
|
||||
ax.imshow(img)
|
||||
ax.axis('off')
|
||||
ax.axis("off")
|
||||
return ax
|
||||
|
|
|
@ -46,10 +46,10 @@ from .compat import (
|
|||
from .engine import train
|
||||
|
||||
__all__ = [
|
||||
'LGBMClassifier',
|
||||
'LGBMModel',
|
||||
'LGBMRanker',
|
||||
'LGBMRegressor',
|
||||
"LGBMClassifier",
|
||||
"LGBMModel",
|
||||
"LGBMRanker",
|
||||
"LGBMRegressor",
|
||||
]
|
||||
|
||||
_LGBM_ScikitMatrixLike = Union[
|
||||
|
@ -57,58 +57,58 @@ _LGBM_ScikitMatrixLike = Union[
|
|||
List[Union[List[float], List[int]]],
|
||||
np.ndarray,
|
||||
pd_DataFrame,
|
||||
scipy.sparse.spmatrix
|
||||
scipy.sparse.spmatrix,
|
||||
]
|
||||
_LGBM_ScikitCustomObjectiveFunction = Union[
|
||||
# f(labels, preds)
|
||||
Callable[
|
||||
[Optional[np.ndarray], np.ndarray],
|
||||
Tuple[np.ndarray, np.ndarray]
|
||||
Tuple[np.ndarray, np.ndarray],
|
||||
],
|
||||
# f(labels, preds, weights)
|
||||
Callable[
|
||||
[Optional[np.ndarray], np.ndarray, Optional[np.ndarray]],
|
||||
Tuple[np.ndarray, np.ndarray]
|
||||
Tuple[np.ndarray, np.ndarray],
|
||||
],
|
||||
# f(labels, preds, weights, group)
|
||||
Callable[
|
||||
[Optional[np.ndarray], np.ndarray, Optional[np.ndarray], Optional[np.ndarray]],
|
||||
Tuple[np.ndarray, np.ndarray]
|
||||
Tuple[np.ndarray, np.ndarray],
|
||||
],
|
||||
]
|
||||
_LGBM_ScikitCustomEvalFunction = Union[
|
||||
# f(labels, preds)
|
||||
Callable[
|
||||
[Optional[np.ndarray], np.ndarray],
|
||||
_LGBM_EvalFunctionResultType
|
||||
_LGBM_EvalFunctionResultType,
|
||||
],
|
||||
Callable[
|
||||
[Optional[np.ndarray], np.ndarray],
|
||||
List[_LGBM_EvalFunctionResultType]
|
||||
List[_LGBM_EvalFunctionResultType],
|
||||
],
|
||||
# f(labels, preds, weights)
|
||||
Callable[
|
||||
[Optional[np.ndarray], np.ndarray, Optional[np.ndarray]],
|
||||
_LGBM_EvalFunctionResultType
|
||||
_LGBM_EvalFunctionResultType,
|
||||
],
|
||||
Callable[
|
||||
[Optional[np.ndarray], np.ndarray, Optional[np.ndarray]],
|
||||
List[_LGBM_EvalFunctionResultType]
|
||||
List[_LGBM_EvalFunctionResultType],
|
||||
],
|
||||
# f(labels, preds, weights, group)
|
||||
Callable[
|
||||
[Optional[np.ndarray], np.ndarray, Optional[np.ndarray], Optional[np.ndarray]],
|
||||
_LGBM_EvalFunctionResultType
|
||||
_LGBM_EvalFunctionResultType,
|
||||
],
|
||||
Callable[
|
||||
[Optional[np.ndarray], np.ndarray, Optional[np.ndarray], Optional[np.ndarray]],
|
||||
List[_LGBM_EvalFunctionResultType]
|
||||
]
|
||||
List[_LGBM_EvalFunctionResultType],
|
||||
],
|
||||
]
|
||||
_LGBM_ScikitEvalMetricType = Union[
|
||||
str,
|
||||
_LGBM_ScikitCustomEvalFunction,
|
||||
List[Union[str, _LGBM_ScikitCustomEvalFunction]]
|
||||
List[Union[str, _LGBM_ScikitCustomEvalFunction]],
|
||||
]
|
||||
_LGBM_ScikitValidSet = Tuple[_LGBM_ScikitMatrixLike, _LGBM_LabelType]
|
||||
|
||||
|
@ -119,7 +119,7 @@ def _get_group_from_constructed_dataset(dataset: Dataset) -> Optional[np.ndarray
|
|||
"Estimators in lightgbm.sklearn should only retrieve query groups from a constructed Dataset. "
|
||||
"If you're seeing this message, it's a bug in lightgbm. Please report it at https://github.com/microsoft/LightGBM/issues."
|
||||
)
|
||||
assert (group is None or isinstance(group, np.ndarray)), error_msg
|
||||
assert group is None or isinstance(group, np.ndarray), error_msg
|
||||
return group
|
||||
|
||||
|
||||
|
@ -139,7 +139,7 @@ def _get_weight_from_constructed_dataset(dataset: Dataset) -> Optional[np.ndarra
|
|||
"Estimators in lightgbm.sklearn should only retrieve weights from a constructed Dataset. "
|
||||
"If you're seeing this message, it's a bug in lightgbm. Please report it at https://github.com/microsoft/LightGBM/issues."
|
||||
)
|
||||
assert (weight is None or isinstance(weight, np.ndarray)), error_msg
|
||||
assert weight is None or isinstance(weight, np.ndarray), error_msg
|
||||
return weight
|
||||
|
||||
|
||||
|
@ -189,7 +189,11 @@ class _ObjectiveFunctionWrapper:
|
|||
"""
|
||||
self.func = func
|
||||
|
||||
def __call__(self, preds: np.ndarray, dataset: Dataset) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def __call__(
|
||||
self,
|
||||
preds: np.ndarray,
|
||||
dataset: Dataset,
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Call passed function with appropriate arguments.
|
||||
|
||||
Parameters
|
||||
|
@ -271,7 +275,7 @@ class _EvalFunctionWrapper:
|
|||
def __call__(
|
||||
self,
|
||||
preds: np.ndarray,
|
||||
dataset: Dataset
|
||||
dataset: Dataset,
|
||||
) -> Union[_LGBM_EvalFunctionResultType, List[_LGBM_EvalFunctionResultType]]:
|
||||
"""Call passed function with appropriate arguments.
|
||||
|
||||
|
@ -310,8 +314,7 @@ class _EvalFunctionWrapper:
|
|||
# documentation templates for LGBMModel methods are shared between the classes in
|
||||
# this module and those in the ``dask`` module
|
||||
|
||||
_lgbmmodel_doc_fit = (
|
||||
"""
|
||||
_lgbmmodel_doc_fit = """
|
||||
Build a gradient boosting model from the training set (X, y).
|
||||
|
||||
Parameters
|
||||
|
@ -372,7 +375,6 @@ _lgbmmodel_doc_fit = (
|
|||
self : LGBMModel
|
||||
Returns self.
|
||||
"""
|
||||
)
|
||||
|
||||
_lgbmmodel_doc_custom_eval_note = """
|
||||
Note
|
||||
|
@ -405,8 +407,7 @@ _lgbmmodel_doc_custom_eval_note = """
|
|||
Is eval result higher better, e.g. AUC is ``is_higher_better``.
|
||||
"""
|
||||
|
||||
_lgbmmodel_doc_predict = (
|
||||
"""
|
||||
_lgbmmodel_doc_predict = """
|
||||
{description}
|
||||
|
||||
Parameters
|
||||
|
@ -451,7 +452,6 @@ _lgbmmodel_doc_predict = (
|
|||
X_SHAP_values : {X_SHAP_values_shape}
|
||||
If ``pred_contrib=True``, the feature contributions for each sample.
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
class LGBMModel(_LGBMModelBase):
|
||||
|
@ -459,7 +459,7 @@ class LGBMModel(_LGBMModelBase):
|
|||
|
||||
def __init__(
|
||||
self,
|
||||
boosting_type: str = 'gbdt',
|
||||
boosting_type: str = "gbdt",
|
||||
num_leaves: int = 31,
|
||||
max_depth: int = -1,
|
||||
learning_rate: float = 0.1,
|
||||
|
@ -467,18 +467,18 @@ class LGBMModel(_LGBMModelBase):
|
|||
subsample_for_bin: int = 200000,
|
||||
objective: Optional[Union[str, _LGBM_ScikitCustomObjectiveFunction]] = None,
|
||||
class_weight: Optional[Union[Dict, str]] = None,
|
||||
min_split_gain: float = 0.,
|
||||
min_split_gain: float = 0.0,
|
||||
min_child_weight: float = 1e-3,
|
||||
min_child_samples: int = 20,
|
||||
subsample: float = 1.,
|
||||
subsample: float = 1.0,
|
||||
subsample_freq: int = 0,
|
||||
colsample_bytree: float = 1.,
|
||||
reg_alpha: float = 0.,
|
||||
reg_lambda: float = 0.,
|
||||
random_state: Optional[Union[int, np.random.RandomState, 'np.random.Generator']] = None,
|
||||
colsample_bytree: float = 1.0,
|
||||
reg_alpha: float = 0.0,
|
||||
reg_lambda: float = 0.0,
|
||||
random_state: Optional[Union[int, np.random.RandomState, "np.random.Generator"]] = None,
|
||||
n_jobs: Optional[int] = None,
|
||||
importance_type: str = 'split',
|
||||
**kwargs
|
||||
importance_type: str = "split",
|
||||
**kwargs,
|
||||
):
|
||||
r"""Construct a gradient boosting model.
|
||||
|
||||
|
@ -598,8 +598,10 @@ class LGBMModel(_LGBMModelBase):
|
|||
and grad and hess should be returned in the same format.
|
||||
"""
|
||||
if not SKLEARN_INSTALLED:
|
||||
raise LightGBMError('scikit-learn is required for lightgbm.sklearn. '
|
||||
'You must install scikit-learn and restart your session to use this module.')
|
||||
raise LightGBMError(
|
||||
"scikit-learn is required for lightgbm.sklearn. "
|
||||
"You must install scikit-learn and restart your session to use this module."
|
||||
)
|
||||
|
||||
self.boosting_type = boosting_type
|
||||
self.objective = objective
|
||||
|
@ -636,14 +638,13 @@ class LGBMModel(_LGBMModelBase):
|
|||
|
||||
def _more_tags(self) -> Dict[str, Any]:
|
||||
return {
|
||||
'allow_nan': True,
|
||||
'X_types': ['2darray', 'sparse', '1dlabels'],
|
||||
'_xfail_checks': {
|
||||
'check_no_attributes_set_in_init':
|
||||
'scikit-learn incorrectly asserts that private attributes '
|
||||
'cannot be set in __init__: '
|
||||
'(see https://github.com/microsoft/LightGBM/issues/2628)'
|
||||
}
|
||||
"allow_nan": True,
|
||||
"X_types": ["2darray", "sparse", "1dlabels"],
|
||||
"_xfail_checks": {
|
||||
"check_no_attributes_set_in_init": "scikit-learn incorrectly asserts that private attributes "
|
||||
"cannot be set in __init__: "
|
||||
"(see https://github.com/microsoft/LightGBM/issues/2628)"
|
||||
},
|
||||
}
|
||||
|
||||
def __sklearn_is_fitted__(self) -> bool:
|
||||
|
@ -703,8 +704,8 @@ class LGBMModel(_LGBMModelBase):
|
|||
assert stage in {"fit", "predict"}
|
||||
params = self.get_params()
|
||||
|
||||
params.pop('objective', None)
|
||||
for alias in _ConfigAliases.get('objective'):
|
||||
params.pop("objective", None)
|
||||
for alias in _ConfigAliases.get("objective"):
|
||||
if alias in params:
|
||||
obj = params.pop(alias)
|
||||
_log_warning(f"Found '{alias}' in params. Will use it instead of 'objective' argument")
|
||||
|
@ -725,33 +726,31 @@ class LGBMModel(_LGBMModelBase):
|
|||
raise ValueError("Unknown LGBMModel type.")
|
||||
if callable(self._objective):
|
||||
if stage == "fit":
|
||||
params['objective'] = _ObjectiveFunctionWrapper(self._objective)
|
||||
params["objective"] = _ObjectiveFunctionWrapper(self._objective)
|
||||
else:
|
||||
params['objective'] = 'None'
|
||||
params["objective"] = "None"
|
||||
else:
|
||||
params['objective'] = self._objective
|
||||
params["objective"] = self._objective
|
||||
|
||||
params.pop('importance_type', None)
|
||||
params.pop('n_estimators', None)
|
||||
params.pop('class_weight', None)
|
||||
params.pop("importance_type", None)
|
||||
params.pop("n_estimators", None)
|
||||
params.pop("class_weight", None)
|
||||
|
||||
if isinstance(params['random_state'], np.random.RandomState):
|
||||
params['random_state'] = params['random_state'].randint(np.iinfo(np.int32).max)
|
||||
elif isinstance(params['random_state'], np_random_Generator):
|
||||
params['random_state'] = int(
|
||||
params['random_state'].integers(np.iinfo(np.int32).max)
|
||||
)
|
||||
if isinstance(params["random_state"], np.random.RandomState):
|
||||
params["random_state"] = params["random_state"].randint(np.iinfo(np.int32).max)
|
||||
elif isinstance(params["random_state"], np_random_Generator):
|
||||
params["random_state"] = int(params["random_state"].integers(np.iinfo(np.int32).max))
|
||||
if self._n_classes > 2:
|
||||
for alias in _ConfigAliases.get('num_class'):
|
||||
for alias in _ConfigAliases.get("num_class"):
|
||||
params.pop(alias, None)
|
||||
params['num_class'] = self._n_classes
|
||||
if hasattr(self, '_eval_at'):
|
||||
params["num_class"] = self._n_classes
|
||||
if hasattr(self, "_eval_at"):
|
||||
eval_at = self._eval_at
|
||||
for alias in _ConfigAliases.get('eval_at'):
|
||||
for alias in _ConfigAliases.get("eval_at"):
|
||||
if alias in params:
|
||||
_log_warning(f"Found '{alias}' in params. Will use it instead of 'eval_at' argument")
|
||||
eval_at = params.pop(alias)
|
||||
params['eval_at'] = eval_at
|
||||
params["eval_at"] = eval_at
|
||||
|
||||
# register default metric for consistency with callable eval_metric case
|
||||
original_metric = self._objective if isinstance(self._objective, str) else None
|
||||
|
@ -809,10 +808,10 @@ class LGBMModel(_LGBMModelBase):
|
|||
eval_init_score: Optional[List[_LGBM_InitScoreType]] = None,
|
||||
eval_group: Optional[List[_LGBM_GroupType]] = None,
|
||||
eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None,
|
||||
feature_name: _LGBM_FeatureNameConfiguration = 'auto',
|
||||
categorical_feature: _LGBM_CategoricalFeatureConfiguration = 'auto',
|
||||
feature_name: _LGBM_FeatureNameConfiguration = "auto",
|
||||
categorical_feature: _LGBM_CategoricalFeatureConfiguration = "auto",
|
||||
callbacks: Optional[List[Callable]] = None,
|
||||
init_model: Optional[Union[str, Path, Booster, "LGBMModel"]] = None
|
||||
init_model: Optional[Union[str, Path, Booster, "LGBMModel"]] = None,
|
||||
) -> "LGBMModel":
|
||||
"""Docstring is set after definition, using a template."""
|
||||
params = self._process_params(stage="fit")
|
||||
|
@ -832,9 +831,9 @@ class LGBMModel(_LGBMModelBase):
|
|||
eval_metrics_builtin = [m for m in eval_metric_list if isinstance(m, str)]
|
||||
|
||||
# concatenate metric from params (or default if not provided in params) and eval_metric
|
||||
params['metric'] = [params['metric']] if isinstance(params['metric'], (str, type(None))) else params['metric']
|
||||
params['metric'] = [e for e in eval_metrics_builtin if e not in params['metric']] + params['metric']
|
||||
params['metric'] = [metric for metric in params['metric'] if metric is not None]
|
||||
params["metric"] = [params["metric"]] if isinstance(params["metric"], (str, type(None))) else params["metric"]
|
||||
params["metric"] = [e for e in eval_metrics_builtin if e not in params["metric"]] + params["metric"]
|
||||
params["metric"] = [metric for metric in params["metric"] if metric is not None]
|
||||
|
||||
if not isinstance(X, (pd_DataFrame, dt_DataTable)):
|
||||
_X, _y = _LGBMCheckXY(X, y, accept_sparse=True, force_all_finite=False, ensure_min_samples=2)
|
||||
|
@ -856,9 +855,15 @@ class LGBMModel(_LGBMModelBase):
|
|||
# copy for consistency
|
||||
self._n_features_in = self._n_features
|
||||
|
||||
train_set = Dataset(data=_X, label=_y, weight=sample_weight, group=group,
|
||||
init_score=init_score, categorical_feature=categorical_feature,
|
||||
params=params)
|
||||
train_set = Dataset(
|
||||
data=_X,
|
||||
label=_y,
|
||||
weight=sample_weight,
|
||||
group=group,
|
||||
init_score=init_score,
|
||||
categorical_feature=categorical_feature,
|
||||
params=params,
|
||||
)
|
||||
|
||||
valid_sets: List[Dataset] = []
|
||||
if eval_set is not None:
|
||||
|
@ -880,8 +885,8 @@ class LGBMModel(_LGBMModelBase):
|
|||
if valid_data[0] is X and valid_data[1] is y:
|
||||
valid_set = train_set
|
||||
else:
|
||||
valid_weight = _get_meta_data(eval_sample_weight, 'eval_sample_weight', i)
|
||||
valid_class_weight = _get_meta_data(eval_class_weight, 'eval_class_weight', i)
|
||||
valid_weight = _get_meta_data(eval_sample_weight, "eval_sample_weight", i)
|
||||
valid_class_weight = _get_meta_data(eval_class_weight, "eval_class_weight", i)
|
||||
if valid_class_weight is not None:
|
||||
if isinstance(valid_class_weight, dict) and self._class_map is not None:
|
||||
valid_class_weight = {self._class_map[k]: v for k, v in valid_class_weight.items()}
|
||||
|
@ -890,11 +895,17 @@ class LGBMModel(_LGBMModelBase):
|
|||
valid_weight = valid_class_sample_weight
|
||||
else:
|
||||
valid_weight = np.multiply(valid_weight, valid_class_sample_weight)
|
||||
valid_init_score = _get_meta_data(eval_init_score, 'eval_init_score', i)
|
||||
valid_group = _get_meta_data(eval_group, 'eval_group', i)
|
||||
valid_set = Dataset(data=valid_data[0], label=valid_data[1], weight=valid_weight,
|
||||
group=valid_group, init_score=valid_init_score,
|
||||
categorical_feature='auto', params=params)
|
||||
valid_init_score = _get_meta_data(eval_init_score, "eval_init_score", i)
|
||||
valid_group = _get_meta_data(eval_group, "eval_group", i)
|
||||
valid_set = Dataset(
|
||||
data=valid_data[0],
|
||||
label=valid_data[1],
|
||||
weight=valid_weight,
|
||||
group=valid_group,
|
||||
init_score=valid_init_score,
|
||||
categorical_feature="auto",
|
||||
params=params,
|
||||
)
|
||||
|
||||
valid_sets.append(valid_set)
|
||||
|
||||
|
@ -918,7 +929,7 @@ class LGBMModel(_LGBMModelBase):
|
|||
feval=eval_metrics_callable, # type: ignore[arg-type]
|
||||
init_model=init_model,
|
||||
feature_name=feature_name,
|
||||
callbacks=callbacks
|
||||
callbacks=callbacks,
|
||||
)
|
||||
|
||||
self._evals_result = evals_result
|
||||
|
@ -932,16 +943,20 @@ class LGBMModel(_LGBMModelBase):
|
|||
del train_set, valid_sets
|
||||
return self
|
||||
|
||||
fit.__doc__ = _lgbmmodel_doc_fit.format(
|
||||
X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame , scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
|
||||
y_shape="numpy array, pandas DataFrame, pandas Series, list of int or float of shape = [n_samples]",
|
||||
sample_weight_shape="numpy array, pandas Series, list of int or float of shape = [n_samples] or None, optional (default=None)",
|
||||
init_score_shape="numpy array, pandas DataFrame, pandas Series, list of int or float of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task) or shape = [n_samples, n_classes] (for multi-class task) or None, optional (default=None)",
|
||||
group_shape="numpy array, pandas Series, list of int or float, or None, optional (default=None)",
|
||||
eval_sample_weight_shape="list of array (same types as ``sample_weight`` supports), or None, optional (default=None)",
|
||||
eval_init_score_shape="list of array (same types as ``init_score`` supports), or None, optional (default=None)",
|
||||
eval_group_shape="list of array (same types as ``group`` supports), or None, optional (default=None)"
|
||||
) + "\n\n" + _lgbmmodel_doc_custom_eval_note
|
||||
fit.__doc__ = (
|
||||
_lgbmmodel_doc_fit.format(
|
||||
X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame , scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
|
||||
y_shape="numpy array, pandas DataFrame, pandas Series, list of int or float of shape = [n_samples]",
|
||||
sample_weight_shape="numpy array, pandas Series, list of int or float of shape = [n_samples] or None, optional (default=None)",
|
||||
init_score_shape="numpy array, pandas DataFrame, pandas Series, list of int or float of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task) or shape = [n_samples, n_classes] (for multi-class task) or None, optional (default=None)",
|
||||
group_shape="numpy array, pandas Series, list of int or float, or None, optional (default=None)",
|
||||
eval_sample_weight_shape="list of array (same types as ``sample_weight`` supports), or None, optional (default=None)",
|
||||
eval_init_score_shape="list of array (same types as ``init_score`` supports), or None, optional (default=None)",
|
||||
eval_group_shape="list of array (same types as ``group`` supports), or None, optional (default=None)",
|
||||
)
|
||||
+ "\n\n"
|
||||
+ _lgbmmodel_doc_custom_eval_note
|
||||
)
|
||||
|
||||
def predict(
|
||||
self,
|
||||
|
@ -952,7 +967,7 @@ class LGBMModel(_LGBMModelBase):
|
|||
pred_leaf: bool = False,
|
||||
pred_contrib: bool = False,
|
||||
validate_features: bool = False,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Docstring is set after definition, using a template."""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
|
@ -961,9 +976,11 @@ class LGBMModel(_LGBMModelBase):
|
|||
X = _LGBMCheckArray(X, accept_sparse=True, force_all_finite=False)
|
||||
n_features = X.shape[1]
|
||||
if self._n_features != n_features:
|
||||
raise ValueError("Number of features of the model must "
|
||||
f"match the input. Model n_features_ is {self._n_features} and "
|
||||
f"input n_features is {n_features}")
|
||||
raise ValueError(
|
||||
"Number of features of the model must "
|
||||
f"match the input. Model n_features_ is {self._n_features} and "
|
||||
f"input n_features is {n_features}"
|
||||
)
|
||||
# retrive original params that possibly can be used in both training and prediction
|
||||
# and then overwrite them (considering aliases) with params that were passed directly in prediction
|
||||
predict_params = self._process_params(stage="predict")
|
||||
|
@ -975,7 +992,7 @@ class LGBMModel(_LGBMModelBase):
|
|||
"num_iteration",
|
||||
"pred_leaf",
|
||||
"pred_contrib",
|
||||
*kwargs.keys()
|
||||
*kwargs.keys(),
|
||||
):
|
||||
predict_params.pop(alias, None)
|
||||
predict_params.update(kwargs)
|
||||
|
@ -986,9 +1003,14 @@ class LGBMModel(_LGBMModelBase):
|
|||
predict_params["num_threads"] = self._process_n_jobs(predict_params["num_threads"])
|
||||
|
||||
return self._Booster.predict( # type: ignore[union-attr]
|
||||
X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
|
||||
pred_leaf=pred_leaf, pred_contrib=pred_contrib, validate_features=validate_features,
|
||||
**predict_params
|
||||
X,
|
||||
raw_score=raw_score,
|
||||
start_iteration=start_iteration,
|
||||
num_iteration=num_iteration,
|
||||
pred_leaf=pred_leaf,
|
||||
pred_contrib=pred_contrib,
|
||||
validate_features=validate_features,
|
||||
**predict_params,
|
||||
)
|
||||
|
||||
predict.__doc__ = _lgbmmodel_doc_predict.format(
|
||||
|
@ -997,42 +1019,44 @@ class LGBMModel(_LGBMModelBase):
|
|||
output_name="predicted_result",
|
||||
predicted_result_shape="array-like of shape = [n_samples] or shape = [n_samples, n_classes]",
|
||||
X_leaves_shape="array-like of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]",
|
||||
X_SHAP_values_shape="array-like of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes] or list with n_classes length of such objects"
|
||||
X_SHAP_values_shape="array-like of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes] or list with n_classes length of such objects",
|
||||
)
|
||||
|
||||
@property
|
||||
def n_features_(self) -> int:
|
||||
""":obj:`int`: The number of features of fitted model."""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No n_features found. Need to call fit beforehand.')
|
||||
raise LGBMNotFittedError("No n_features found. Need to call fit beforehand.")
|
||||
return self._n_features
|
||||
|
||||
@property
|
||||
def n_features_in_(self) -> int:
|
||||
""":obj:`int`: The number of features of fitted model."""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No n_features_in found. Need to call fit beforehand.')
|
||||
raise LGBMNotFittedError("No n_features_in found. Need to call fit beforehand.")
|
||||
return self._n_features_in
|
||||
|
||||
@property
|
||||
def best_score_(self) -> _LGBM_BoosterBestScoreType:
|
||||
""":obj:`dict`: The best score of fitted model."""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No best_score found. Need to call fit beforehand.')
|
||||
raise LGBMNotFittedError("No best_score found. Need to call fit beforehand.")
|
||||
return self._best_score
|
||||
|
||||
@property
|
||||
def best_iteration_(self) -> int:
|
||||
""":obj:`int`: The best iteration of fitted model if ``early_stopping()`` callback has been specified."""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No best_iteration found. Need to call fit with early_stopping callback beforehand.')
|
||||
raise LGBMNotFittedError(
|
||||
"No best_iteration found. Need to call fit with early_stopping callback beforehand."
|
||||
)
|
||||
return self._best_iteration
|
||||
|
||||
@property
|
||||
def objective_(self) -> Union[str, _LGBM_ScikitCustomObjectiveFunction]:
|
||||
""":obj:`str` or :obj:`callable`: The concrete objective used while fitting this model."""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No objective found. Need to call fit beforehand.')
|
||||
raise LGBMNotFittedError("No objective found. Need to call fit beforehand.")
|
||||
return self._objective # type: ignore[return-value]
|
||||
|
||||
@property
|
||||
|
@ -1041,11 +1065,11 @@ class LGBMModel(_LGBMModelBase):
|
|||
|
||||
This might be less than parameter ``n_estimators`` if early stopping was enabled or
|
||||
if boosting stopped early due to limits on complexity like ``min_gain_to_split``.
|
||||
|
||||
|
||||
.. versionadded:: 4.0.0
|
||||
"""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No n_estimators found. Need to call fit beforehand.')
|
||||
raise LGBMNotFittedError("No n_estimators found. Need to call fit beforehand.")
|
||||
return self._Booster.current_iteration() # type: ignore
|
||||
|
||||
@property
|
||||
|
@ -1054,25 +1078,25 @@ class LGBMModel(_LGBMModelBase):
|
|||
|
||||
This might be less than parameter ``n_estimators`` if early stopping was enabled or
|
||||
if boosting stopped early due to limits on complexity like ``min_gain_to_split``.
|
||||
|
||||
|
||||
.. versionadded:: 4.0.0
|
||||
"""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No n_iter found. Need to call fit beforehand.')
|
||||
raise LGBMNotFittedError("No n_iter found. Need to call fit beforehand.")
|
||||
return self._Booster.current_iteration() # type: ignore
|
||||
|
||||
@property
|
||||
def booster_(self) -> Booster:
|
||||
"""Booster: The underlying Booster of this model."""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No booster found. Need to call fit beforehand.')
|
||||
raise LGBMNotFittedError("No booster found. Need to call fit beforehand.")
|
||||
return self._Booster # type: ignore[return-value]
|
||||
|
||||
@property
|
||||
def evals_result_(self) -> _EvalResultDict:
|
||||
""":obj:`dict`: The evaluation results if validation sets have been specified."""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No results found. Need to call fit with eval_set beforehand.')
|
||||
raise LGBMNotFittedError("No results found. Need to call fit with eval_set beforehand.")
|
||||
return self._evals_result
|
||||
|
||||
@property
|
||||
|
@ -1085,14 +1109,14 @@ class LGBMModel(_LGBMModelBase):
|
|||
to configure the type of importance values to be extracted.
|
||||
"""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No feature_importances found. Need to call fit beforehand.')
|
||||
raise LGBMNotFittedError("No feature_importances found. Need to call fit beforehand.")
|
||||
return self._Booster.feature_importance(importance_type=self.importance_type) # type: ignore[union-attr]
|
||||
|
||||
@property
|
||||
def feature_name_(self) -> List[str]:
|
||||
""":obj:`list` of shape = [n_features]: The names of features."""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No feature_name found. Need to call fit beforehand.')
|
||||
raise LGBMNotFittedError("No feature_name found. Need to call fit beforehand.")
|
||||
return self._Booster.feature_name() # type: ignore[union-attr]
|
||||
|
||||
|
||||
|
@ -1110,10 +1134,10 @@ class LGBMRegressor(_LGBMRegressorBase, LGBMModel):
|
|||
eval_sample_weight: Optional[List[_LGBM_WeightType]] = None,
|
||||
eval_init_score: Optional[List[_LGBM_InitScoreType]] = None,
|
||||
eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None,
|
||||
feature_name: _LGBM_FeatureNameConfiguration = 'auto',
|
||||
categorical_feature: _LGBM_CategoricalFeatureConfiguration = 'auto',
|
||||
feature_name: _LGBM_FeatureNameConfiguration = "auto",
|
||||
categorical_feature: _LGBM_CategoricalFeatureConfiguration = "auto",
|
||||
callbacks: Optional[List[Callable]] = None,
|
||||
init_model: Optional[Union[str, Path, Booster, LGBMModel]] = None
|
||||
init_model: Optional[Union[str, Path, Booster, LGBMModel]] = None,
|
||||
) -> "LGBMRegressor":
|
||||
"""Docstring is inherited from the LGBMModel."""
|
||||
super().fit(
|
||||
|
@ -1129,17 +1153,17 @@ class LGBMRegressor(_LGBMRegressorBase, LGBMModel):
|
|||
feature_name=feature_name,
|
||||
categorical_feature=categorical_feature,
|
||||
callbacks=callbacks,
|
||||
init_model=init_model
|
||||
init_model=init_model,
|
||||
)
|
||||
return self
|
||||
|
||||
_base_doc = LGBMModel.fit.__doc__.replace("self : LGBMModel", "self : LGBMRegressor") # type: ignore
|
||||
_base_doc = (_base_doc[:_base_doc.find('group :')] # type: ignore
|
||||
+ _base_doc[_base_doc.find('eval_set :'):]) # type: ignore
|
||||
_base_doc = (_base_doc[:_base_doc.find('eval_class_weight :')]
|
||||
+ _base_doc[_base_doc.find('eval_init_score :'):])
|
||||
fit.__doc__ = (_base_doc[:_base_doc.find('eval_group :')]
|
||||
+ _base_doc[_base_doc.find('eval_metric :'):])
|
||||
_base_doc = (
|
||||
_base_doc[: _base_doc.find("group :")] # type: ignore
|
||||
+ _base_doc[_base_doc.find("eval_set :") :]
|
||||
) # type: ignore
|
||||
_base_doc = _base_doc[: _base_doc.find("eval_class_weight :")] + _base_doc[_base_doc.find("eval_init_score :") :]
|
||||
fit.__doc__ = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :]
|
||||
|
||||
|
||||
class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
|
||||
|
@ -1157,10 +1181,10 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
|
|||
eval_class_weight: Optional[List[float]] = None,
|
||||
eval_init_score: Optional[List[_LGBM_InitScoreType]] = None,
|
||||
eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None,
|
||||
feature_name: _LGBM_FeatureNameConfiguration = 'auto',
|
||||
categorical_feature: _LGBM_CategoricalFeatureConfiguration = 'auto',
|
||||
feature_name: _LGBM_FeatureNameConfiguration = "auto",
|
||||
categorical_feature: _LGBM_CategoricalFeatureConfiguration = "auto",
|
||||
callbacks: Optional[List[Callable]] = None,
|
||||
init_model: Optional[Union[str, Path, Booster, LGBMModel]] = None
|
||||
init_model: Optional[Union[str, Path, Booster, LGBMModel]] = None,
|
||||
) -> "LGBMClassifier":
|
||||
"""Docstring is inherited from the LGBMModel."""
|
||||
_LGBMAssertAllFinite(y)
|
||||
|
@ -1187,16 +1211,16 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
|
|||
eval_metric_list = []
|
||||
if self._n_classes > 2:
|
||||
for index, metric in enumerate(eval_metric_list):
|
||||
if metric in {'logloss', 'binary_logloss'}:
|
||||
if metric in {"logloss", "binary_logloss"}:
|
||||
eval_metric_list[index] = "multi_logloss"
|
||||
elif metric in {'error', 'binary_error'}:
|
||||
elif metric in {"error", "binary_error"}:
|
||||
eval_metric_list[index] = "multi_error"
|
||||
else:
|
||||
for index, metric in enumerate(eval_metric_list):
|
||||
if metric in {'logloss', 'multi_logloss'}:
|
||||
eval_metric_list[index] = 'binary_logloss'
|
||||
elif metric in {'error', 'multi_error'}:
|
||||
eval_metric_list[index] = 'binary_error'
|
||||
if metric in {"logloss", "multi_logloss"}:
|
||||
eval_metric_list[index] = "binary_logloss"
|
||||
elif metric in {"error", "multi_error"}:
|
||||
eval_metric_list[index] = "binary_error"
|
||||
eval_metric = eval_metric_list
|
||||
|
||||
# do not modify args, as it causes errors in model selection tools
|
||||
|
@ -1225,15 +1249,16 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
|
|||
feature_name=feature_name,
|
||||
categorical_feature=categorical_feature,
|
||||
callbacks=callbacks,
|
||||
init_model=init_model
|
||||
init_model=init_model,
|
||||
)
|
||||
return self
|
||||
|
||||
_base_doc = LGBMModel.fit.__doc__.replace("self : LGBMModel", "self : LGBMClassifier") # type: ignore
|
||||
_base_doc = (_base_doc[:_base_doc.find('group :')] # type: ignore
|
||||
+ _base_doc[_base_doc.find('eval_set :'):]) # type: ignore
|
||||
fit.__doc__ = (_base_doc[:_base_doc.find('eval_group :')]
|
||||
+ _base_doc[_base_doc.find('eval_metric :'):])
|
||||
_base_doc = (
|
||||
_base_doc[: _base_doc.find("group :")] # type: ignore
|
||||
+ _base_doc[_base_doc.find("eval_set :") :]
|
||||
) # type: ignore
|
||||
fit.__doc__ = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :]
|
||||
|
||||
def predict(
|
||||
self,
|
||||
|
@ -1244,7 +1269,7 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
|
|||
pred_leaf: bool = False,
|
||||
pred_contrib: bool = False,
|
||||
validate_features: bool = False,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Docstring is inherited from the LGBMModel."""
|
||||
result = self.predict_proba(
|
||||
|
@ -1255,7 +1280,7 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
|
|||
pred_leaf=pred_leaf,
|
||||
pred_contrib=pred_contrib,
|
||||
validate_features=validate_features,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
if callable(self._objective) or raw_score or pred_leaf or pred_contrib:
|
||||
return result
|
||||
|
@ -1274,7 +1299,7 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
|
|||
pred_leaf: bool = False,
|
||||
pred_contrib: bool = False,
|
||||
validate_features: bool = False,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Docstring is set after definition, using a template."""
|
||||
result = super().predict(
|
||||
|
@ -1285,17 +1310,19 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
|
|||
pred_leaf=pred_leaf,
|
||||
pred_contrib=pred_contrib,
|
||||
validate_features=validate_features,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
if callable(self._objective) and not (raw_score or pred_leaf or pred_contrib):
|
||||
_log_warning("Cannot compute class probabilities or labels "
|
||||
"due to the usage of customized objective function.\n"
|
||||
"Returning raw scores instead.")
|
||||
_log_warning(
|
||||
"Cannot compute class probabilities or labels "
|
||||
"due to the usage of customized objective function.\n"
|
||||
"Returning raw scores instead."
|
||||
)
|
||||
return result
|
||||
elif self._n_classes > 2 or raw_score or pred_leaf or pred_contrib: # type: ignore [operator]
|
||||
return result
|
||||
else:
|
||||
return np.vstack((1. - result, result)).transpose()
|
||||
return np.vstack((1.0 - result, result)).transpose()
|
||||
|
||||
predict_proba.__doc__ = _lgbmmodel_doc_predict.format(
|
||||
description="Return the predicted probability for each class for each sample.",
|
||||
|
@ -1303,21 +1330,21 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
|
|||
output_name="predicted_probability",
|
||||
predicted_result_shape="array-like of shape = [n_samples] or shape = [n_samples, n_classes]",
|
||||
X_leaves_shape="array-like of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]",
|
||||
X_SHAP_values_shape="array-like of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes] or list with n_classes length of such objects"
|
||||
X_SHAP_values_shape="array-like of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes] or list with n_classes length of such objects",
|
||||
)
|
||||
|
||||
@property
|
||||
def classes_(self) -> np.ndarray:
|
||||
""":obj:`array` of shape = [n_classes]: The class label array."""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No classes found. Need to call fit beforehand.')
|
||||
raise LGBMNotFittedError("No classes found. Need to call fit beforehand.")
|
||||
return self._classes # type: ignore[return-value]
|
||||
|
||||
@property
|
||||
def n_classes_(self) -> int:
|
||||
""":obj:`int`: The number of classes."""
|
||||
if not self.__sklearn_is_fitted__():
|
||||
raise LGBMNotFittedError('No classes found. Need to call fit beforehand.')
|
||||
raise LGBMNotFittedError("No classes found. Need to call fit beforehand.")
|
||||
return self._n_classes
|
||||
|
||||
|
||||
|
@ -1345,10 +1372,10 @@ class LGBMRanker(LGBMModel):
|
|||
eval_group: Optional[List[_LGBM_GroupType]] = None,
|
||||
eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None,
|
||||
eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5),
|
||||
feature_name: _LGBM_FeatureNameConfiguration = 'auto',
|
||||
categorical_feature: _LGBM_CategoricalFeatureConfiguration = 'auto',
|
||||
feature_name: _LGBM_FeatureNameConfiguration = "auto",
|
||||
categorical_feature: _LGBM_CategoricalFeatureConfiguration = "auto",
|
||||
callbacks: Optional[List[Callable]] = None,
|
||||
init_model: Optional[Union[str, Path, Booster, LGBMModel]] = None
|
||||
init_model: Optional[Union[str, Path, Booster, LGBMModel]] = None,
|
||||
) -> "LGBMRanker":
|
||||
"""Docstring is inherited from the LGBMModel."""
|
||||
# check group data
|
||||
|
@ -1360,12 +1387,16 @@ class LGBMRanker(LGBMModel):
|
|||
raise ValueError("Eval_group cannot be None when eval_set is not None")
|
||||
elif len(eval_group) != len(eval_set):
|
||||
raise ValueError("Length of eval_group should be equal to eval_set")
|
||||
elif (isinstance(eval_group, dict)
|
||||
and any(i not in eval_group or eval_group[i] is None for i in range(len(eval_group)))
|
||||
or isinstance(eval_group, list)
|
||||
and any(group is None for group in eval_group)):
|
||||
raise ValueError("Should set group for all eval datasets for ranking task; "
|
||||
"if you use dict, the index should start from 0")
|
||||
elif (
|
||||
isinstance(eval_group, dict)
|
||||
and any(i not in eval_group or eval_group[i] is None for i in range(len(eval_group)))
|
||||
or isinstance(eval_group, list)
|
||||
and any(group is None for group in eval_group)
|
||||
):
|
||||
raise ValueError(
|
||||
"Should set group for all eval datasets for ranking task; "
|
||||
"if you use dict, the index should start from 0"
|
||||
)
|
||||
|
||||
self._eval_at = eval_at
|
||||
super().fit(
|
||||
|
@ -1383,15 +1414,17 @@ class LGBMRanker(LGBMModel):
|
|||
feature_name=feature_name,
|
||||
categorical_feature=categorical_feature,
|
||||
callbacks=callbacks,
|
||||
init_model=init_model
|
||||
init_model=init_model,
|
||||
)
|
||||
return self
|
||||
|
||||
_base_doc = LGBMModel.fit.__doc__.replace("self : LGBMModel", "self : LGBMRanker") # type: ignore
|
||||
fit.__doc__ = (_base_doc[:_base_doc.find('eval_class_weight :')] # type: ignore
|
||||
+ _base_doc[_base_doc.find('eval_init_score :'):]) # type: ignore
|
||||
fit.__doc__ = (
|
||||
_base_doc[: _base_doc.find("eval_class_weight :")] # type: ignore
|
||||
+ _base_doc[_base_doc.find("eval_init_score :") :]
|
||||
) # type: ignore
|
||||
_base_doc = fit.__doc__
|
||||
_before_feature_name, _feature_name, _after_feature_name = _base_doc.partition('feature_name :')
|
||||
_before_feature_name, _feature_name, _after_feature_name = _base_doc.partition("feature_name :")
|
||||
fit.__doc__ = f"""{_before_feature_name}eval_at : list or tuple of int, optional (default=(1, 2, 3, 4, 5))
|
||||
The evaluation positions of the specified metric.
|
||||
{_feature_name}{_after_feature_name}"""
|
||||
|
|
|
@ -114,7 +114,6 @@ exclude = [
|
|||
"compile/*.py",
|
||||
"external_libs/*.py",
|
||||
"lightgbm-python/*.py",
|
||||
"python-package/*.py",
|
||||
]
|
||||
indent-style = "space"
|
||||
quote-style = "double"
|
||||
|
|
Загрузка…
Ссылка в новой задаче