зеркало из https://github.com/microsoft/LightGBM.git
fix mismatch of number of features in feature_importances
This commit is contained in:
Родитель
b8e38ec1eb
Коммит
03979ad44a
|
@ -104,7 +104,16 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration, int feature_
|
||||||
for (size_t i = 0; i < feature_importances.size(); ++i) {
|
for (size_t i = 0; i < feature_importances.size(); ++i) {
|
||||||
size_t feature_importances_int = static_cast<size_t>(feature_importances[i]);
|
size_t feature_importances_int = static_cast<size_t>(feature_importances[i]);
|
||||||
if (feature_importances_int > 0) {
|
if (feature_importances_int > 0) {
|
||||||
pairs.emplace_back(feature_importances_int, feature_names_[i]);
|
Log::Warning("i = %d, feature_names_.size() = %d", i, feature_names_.size());
|
||||||
|
if (i < feature_names_.size()) {
|
||||||
|
pairs.emplace_back(feature_importances_int, feature_names_[i]);
|
||||||
|
} else {
|
||||||
|
// with LibSVM format and continual training, the number of features in dataset can be fewer than in the intial model
|
||||||
|
// in that case FeatureImportance returns with the number of features in the intial model
|
||||||
|
std::stringstream str_buf;
|
||||||
|
str_buf << "Column_" << i << "_from_init_model";
|
||||||
|
pairs.emplace_back(feature_importances_int, str_buf.str());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
str_buf << '\n' << "\"feature_importances\":" << "{";
|
str_buf << '\n' << "\"feature_importances\":" << "{";
|
||||||
|
@ -377,7 +386,15 @@ std::string GBDT::SaveModelToString(int start_iteration, int num_iteration, int
|
||||||
for (size_t i = 0; i < feature_importances.size(); ++i) {
|
for (size_t i = 0; i < feature_importances.size(); ++i) {
|
||||||
size_t feature_importances_int = static_cast<size_t>(feature_importances[i]);
|
size_t feature_importances_int = static_cast<size_t>(feature_importances[i]);
|
||||||
if (feature_importances_int > 0) {
|
if (feature_importances_int > 0) {
|
||||||
pairs.emplace_back(feature_importances_int, feature_names_[i]);
|
if (i < feature_names_.size()) {
|
||||||
|
pairs.emplace_back(feature_importances_int, feature_names_[i]);
|
||||||
|
} else {
|
||||||
|
// with LibSVM format and continual training, the number of features in dataset can be fewer than in the intial model
|
||||||
|
// in that case FeatureImportance returns with the number of features in the intial model
|
||||||
|
std::stringstream str_buf;
|
||||||
|
str_buf << "Column_" << i << "_from_init_model";
|
||||||
|
pairs.emplace_back(feature_importances_int, str_buf.str());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// sort the importance
|
// sort the importance
|
||||||
|
@ -609,10 +626,14 @@ std::vector<double> GBDT::FeatureImportance(int num_iteration, int importance_ty
|
||||||
for (int iter = 0; iter < num_used_model; ++iter) {
|
for (int iter = 0; iter < num_used_model; ++iter) {
|
||||||
for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
|
for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
|
||||||
if (models_[iter]->split_gain(split_idx) > 0) {
|
if (models_[iter]->split_gain(split_idx) > 0) {
|
||||||
|
const int real_feature_index = models_[iter]->split_feature(split_idx);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
CHECK_GE(models_[iter]->split_feature(split_idx), 0);
|
CHECK_GE(real_feature_index, 0);
|
||||||
#endif
|
#endif
|
||||||
feature_importances[models_[iter]->split_feature(split_idx)] += 1.0;
|
if (static_cast<size_t>(real_feature_index) >= feature_importances.size()) {
|
||||||
|
feature_importances.resize(real_feature_index + 1);
|
||||||
|
}
|
||||||
|
feature_importances[real_feature_index] += 1.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -620,10 +641,14 @@ std::vector<double> GBDT::FeatureImportance(int num_iteration, int importance_ty
|
||||||
for (int iter = 0; iter < num_used_model; ++iter) {
|
for (int iter = 0; iter < num_used_model; ++iter) {
|
||||||
for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
|
for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
|
||||||
if (models_[iter]->split_gain(split_idx) > 0) {
|
if (models_[iter]->split_gain(split_idx) > 0) {
|
||||||
|
const int real_feature_index = models_[iter]->split_feature(split_idx);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
CHECK_GE(models_[iter]->split_feature(split_idx), 0);
|
CHECK_GE(real_feature_index, 0);
|
||||||
#endif
|
#endif
|
||||||
feature_importances[models_[iter]->split_feature(split_idx)] += models_[iter]->split_gain(split_idx);
|
if (static_cast<size_t>(real_feature_index) >= feature_importances.size()) {
|
||||||
|
feature_importances.resize(real_feature_index + 1);
|
||||||
|
}
|
||||||
|
feature_importances[real_feature_index] += models_[iter]->split_gain(split_idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче