зеркало из https://github.com/microsoft/LightGBM.git
fix RF's bug with MAPE & support RF with multi-class (#1637)
* fix RF's bug withMAPE * simplify rf's code & support multi-class rf * fix bug & add test * add more tests * Update test_engine.py * Update test_engine.py
This commit is contained in:
Родитель
ae34ad3efd
Коммит
83565f01ea
|
@ -38,16 +38,16 @@ public:
|
|||
CHECK(train_data->metadata().init_score() == nullptr);
|
||||
}
|
||||
// cannot use RF for multi-class.
|
||||
CHECK(num_tree_per_iteration_ == 1);
|
||||
CHECK(num_tree_per_iteration_ == num_class_);
|
||||
// not shrinkage rate for the RF
|
||||
shrinkage_rate_ = 1.0f;
|
||||
// only boosting one time
|
||||
Boosting();
|
||||
GetRFTargets(train_data);
|
||||
if (is_use_subset_ && bag_data_cnt_ < num_data_) {
|
||||
size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
|
||||
tmp_grad_.resize(total_size);
|
||||
tmp_hess_.resize(total_size);
|
||||
tmp_grad_.resize(num_data_);
|
||||
tmp_hess_.resize(num_data_);
|
||||
}
|
||||
tmp_score_.resize(num_data_, 0.0f);
|
||||
}
|
||||
|
||||
void ResetConfig(const Config* config) override {
|
||||
|
@ -67,72 +67,80 @@ public:
|
|||
}
|
||||
}
|
||||
// cannot use RF for multi-class.
|
||||
CHECK(num_tree_per_iteration_ == 1);
|
||||
CHECK(num_tree_per_iteration_ == num_class_);
|
||||
// only boosting one time
|
||||
Boosting();
|
||||
GetRFTargets(train_data);
|
||||
if (is_use_subset_ && bag_data_cnt_ < num_data_) {
|
||||
size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
|
||||
tmp_grad_.resize(total_size);
|
||||
tmp_hess_.resize(total_size);
|
||||
tmp_grad_.resize(num_data_);
|
||||
tmp_hess_.resize(num_data_);
|
||||
}
|
||||
tmp_score_.resize(num_data_, 0.0f);
|
||||
}
|
||||
|
||||
void GetRFTargets(const Dataset* train_data) {
|
||||
auto label_ptr = train_data->metadata().label();
|
||||
std::fill(hessians_.begin(), hessians_.end(), 1);
|
||||
if (num_tree_per_iteration_ == 1) {
|
||||
OMP_INIT_EX();
|
||||
#pragma omp parallel for schedule(static,1)
|
||||
for (data_size_t i = 0; i < train_data->num_data(); ++i) {
|
||||
OMP_LOOP_EX_BEGIN();
|
||||
double label = label_ptr[i];
|
||||
gradients_[i] = static_cast<score_t>(-label);
|
||||
OMP_LOOP_EX_END();
|
||||
}
|
||||
OMP_THROW_EX();
|
||||
}
|
||||
else {
|
||||
std::fill(gradients_.begin(), gradients_.end(), 0);
|
||||
OMP_INIT_EX();
|
||||
#pragma omp parallel for schedule(static,1)
|
||||
for (data_size_t i = 0; i < train_data->num_data(); ++i) {
|
||||
OMP_LOOP_EX_BEGIN();
|
||||
double label = label_ptr[i];
|
||||
gradients_[i + static_cast<int>(label) * num_data_] = -1;
|
||||
OMP_LOOP_EX_END();
|
||||
}
|
||||
OMP_THROW_EX();
|
||||
}
|
||||
}
|
||||
|
||||
void Boosting() override {
|
||||
if (objective_function_ == nullptr) {
|
||||
Log::Fatal("No object function provided");
|
||||
}
|
||||
std::vector<double> tmp_score(num_tree_per_iteration_ * num_data_, 0.0f);
|
||||
objective_function_->
|
||||
GetGradients(tmp_score.data(), gradients_.data(), hessians_.data());
|
||||
|
||||
}
|
||||
|
||||
bool TrainOneIter(const score_t* gradients, const score_t* hessians) override {
|
||||
// bagging logic
|
||||
Bagging(iter_);
|
||||
if (gradients == nullptr || hessians == nullptr) {
|
||||
gradients = gradients_.data();
|
||||
hessians = hessians_.data();
|
||||
}
|
||||
CHECK(gradients == nullptr);
|
||||
CHECK(hessians == nullptr);
|
||||
|
||||
gradients = gradients_.data();
|
||||
hessians = hessians_.data();
|
||||
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
|
||||
std::unique_ptr<Tree> new_tree(new Tree(2));
|
||||
if (class_need_train_[cur_tree_id]) {
|
||||
size_t bias = static_cast<size_t>(cur_tree_id)* num_data_;
|
||||
size_t bias = static_cast<size_t>(cur_tree_id)* num_data_;
|
||||
auto grad = gradients + bias;
|
||||
auto hess = hessians + bias;
|
||||
|
||||
auto grad = gradients + bias;
|
||||
auto hess = hessians + bias;
|
||||
|
||||
// need to copy gradients for bagging subset.
|
||||
if (is_use_subset_ && bag_data_cnt_ < num_data_) {
|
||||
for (int i = 0; i < bag_data_cnt_; ++i) {
|
||||
tmp_grad_[bias + i] = grad[bag_data_indices_[i]];
|
||||
tmp_hess_[bias + i] = hess[bag_data_indices_[i]];
|
||||
}
|
||||
grad = tmp_grad_.data() + bias;
|
||||
hess = tmp_hess_.data() + bias;
|
||||
// need to copy gradients for bagging subset.
|
||||
if (is_use_subset_ && bag_data_cnt_ < num_data_) {
|
||||
for (int i = 0; i < bag_data_cnt_; ++i) {
|
||||
tmp_grad_[i] = grad[bag_data_indices_[i]];
|
||||
tmp_hess_[i] = hess[bag_data_indices_[i]];
|
||||
}
|
||||
|
||||
new_tree.reset(tree_learner_->Train(grad, hess, is_constant_hessian_,
|
||||
forced_splits_json_));
|
||||
grad = tmp_grad_.data();
|
||||
hess = tmp_hess_.data();
|
||||
}
|
||||
|
||||
new_tree.reset(tree_learner_->Train(grad, hess, is_constant_hessian_,
|
||||
forced_splits_json_));
|
||||
if (new_tree->num_leaves() > 1) {
|
||||
tree_learner_->RenewTreeOutput(new_tree.get(), objective_function_, tmp_score_.data(),
|
||||
num_data_, bag_data_indices_.data(), bag_data_cnt_);
|
||||
// update score
|
||||
MultiplyScore(cur_tree_id, (iter_ + num_init_iteration_));
|
||||
ConvertTreeOutput(new_tree.get());
|
||||
UpdateScore(new_tree.get(), cur_tree_id);
|
||||
MultiplyScore(cur_tree_id, 1.0 / (iter_ + num_init_iteration_ + 1));
|
||||
} else {
|
||||
// only add default score one-time
|
||||
if (!class_need_train_[cur_tree_id] && models_.size() < static_cast<size_t>(num_tree_per_iteration_)) {
|
||||
double output = class_default_output_[cur_tree_id];
|
||||
objective_function_->ConvertOutput(&output, &output);
|
||||
new_tree->AsConstantTree(output);
|
||||
train_score_updater_->AddScore(output, cur_tree_id);
|
||||
for (auto& score_updater : valid_score_updater_) {
|
||||
score_updater->AddScore(output, cur_tree_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
// add model
|
||||
models_.push_back(std::move(new_tree));
|
||||
|
@ -169,15 +177,6 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void ConvertTreeOutput(Tree* tree) {
|
||||
tree->Shrinkage(1.0f);
|
||||
for (int i = 0; i < tree->num_leaves(); ++i) {
|
||||
double output = tree->LeafOutput(i);
|
||||
objective_function_->ConvertOutput(&output, &output);
|
||||
tree->SetLeafOutput(i, output);
|
||||
}
|
||||
}
|
||||
|
||||
void AddValidDataset(const Dataset* valid_data,
|
||||
const std::vector<const Metric*>& valid_metrics) override {
|
||||
GBDT::AddValidDataset(valid_data, valid_metrics);
|
||||
|
@ -201,6 +200,7 @@ private:
|
|||
|
||||
std::vector<score_t> tmp_grad_;
|
||||
std::vector<score_t> tmp_hess_;
|
||||
std::vector<double> tmp_score_;
|
||||
|
||||
};
|
||||
|
||||
|
|
|
@ -294,6 +294,33 @@ class TestEngine(unittest.TestCase):
|
|||
self.assertLess(ret, 0.2)
|
||||
self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)
|
||||
|
||||
def test_multiclass_rf(self):
|
||||
X, y = load_digits(10, True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
|
||||
params = {
|
||||
'boosting_type': 'rf',
|
||||
'objective': 'multiclass',
|
||||
'metric': 'multi_logloss',
|
||||
'bagging_freq': 1,
|
||||
'bagging_fraction': 0.6,
|
||||
'feature_fraction': 0.6,
|
||||
'num_class': 10,
|
||||
'num_leaves': 50,
|
||||
'min_data': 1,
|
||||
'verbose': -1
|
||||
}
|
||||
lgb_train = lgb.Dataset(X_train, y_train, params=params)
|
||||
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, params=params)
|
||||
evals_result = {}
|
||||
gbm = lgb.train(params, lgb_train,
|
||||
num_boost_round=100,
|
||||
valid_sets=lgb_eval,
|
||||
verbose_eval=False,
|
||||
evals_result=evals_result)
|
||||
ret = multi_logloss(y_test, gbm.predict(X_test))
|
||||
self.assertLess(ret, 0.4)
|
||||
self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)
|
||||
|
||||
def test_multiclass_prediction_early_stopping(self):
|
||||
X, y = load_digits(10, True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
|
||||
|
@ -652,9 +679,44 @@ class TestEngine(unittest.TestCase):
|
|||
}
|
||||
lgb_train = lgb.Dataset(X_train, y_train)
|
||||
gbm = lgb.train(params, lgb_train,
|
||||
num_boost_round=20,
|
||||
verbose_eval=False)
|
||||
num_boost_round=20)
|
||||
err_pred = log_loss(y_test, gbm.predict(X_test))
|
||||
new_gbm = gbm.refit(X_test, y_test)
|
||||
new_err_pred = log_loss(y_test, new_gbm.predict(X_test))
|
||||
self.assertGreater(err_pred, new_err_pred)
|
||||
|
||||
def test_mape_rf(self):
|
||||
X, y = load_boston(True)
|
||||
params = {
|
||||
'boosting_type': 'rf',
|
||||
'objective': 'mape',
|
||||
'verbose': -1,
|
||||
'bagging_freq': 1,
|
||||
'bagging_fraction': 0.8,
|
||||
'feature_fraction': 0.8,
|
||||
'boost_from_average': False
|
||||
}
|
||||
lgb_train = lgb.Dataset(X, y)
|
||||
gbm = lgb.train(params, lgb_train,
|
||||
num_boost_round=20)
|
||||
pred = gbm.predict(X)
|
||||
pred_mean = pred.mean()
|
||||
self.assertGreater(pred_mean, 20)
|
||||
|
||||
def test_mape_dart(self):
|
||||
X, y = load_boston(True)
|
||||
params = {
|
||||
'boosting_type': 'dart',
|
||||
'objective': 'mape',
|
||||
'verbose': -1,
|
||||
'bagging_freq': 1,
|
||||
'bagging_fraction': 0.8,
|
||||
'feature_fraction': 0.8,
|
||||
'boost_from_average': False
|
||||
}
|
||||
lgb_train = lgb.Dataset(X, y)
|
||||
gbm = lgb.train(params, lgb_train,
|
||||
num_boost_round=40)
|
||||
pred = gbm.predict(X)
|
||||
pred_mean = pred.mean()
|
||||
self.assertGreater(pred_mean, 18)
|
||||
|
|
Загрузка…
Ссылка в новой задаче