diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 7d613919d..93c241bce 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -51,13 +51,13 @@ Core Parameters - **Note**: can be used only in CLI version; for language-specific packages you can use the correspondent functions -- ``objective`` :raw-html:`🔗︎`, default = ``regression``, type = enum, options: ``regression``, ``regression_l1``, ``huber``, ``fair``, ``poisson``, ``quantile``, ``mape``, ``gamma``, ``tweedie``, ``binary``, ``multiclass``, ``multiclassova``, ``xentropy``, ``xentlambda``, ``lambdarank``, aliases: ``objective_type``, ``app``, ``application`` +- ``objective`` :raw-html:`🔗︎`, default = ``regression``, type = enum, options: ``regression``, ``regression_l1``, ``huber``, ``fair``, ``poisson``, ``quantile``, ``mape``, ``gamma``, ``tweedie``, ``binary``, ``multiclass``, ``multiclassova``, ``cross_entropy``, ``cross_entropy_lambda``, ``lambdarank``, aliases: ``objective_type``, ``app``, ``application`` - regression application - - ``regression_l2``, L2 loss, aliases: ``regression``, ``mean_squared_error``, ``mse``, ``l2_root``, ``root_mean_squared_error``, ``rmse`` + - ``regression``, L2 loss, aliases: ``regression_l2``, ``l2``, ``mean_squared_error``, ``mse``, ``l2_root``, ``root_mean_squared_error``, ``rmse`` - - ``regression_l1``, L1 loss, aliases: ``mean_absolute_error``, ``mae`` + - ``regression_l1``, L1 loss, aliases: ``l1``, ``mean_absolute_error``, ``mae`` - ``huber``, `Huber loss `__ @@ -85,9 +85,9 @@ Core Parameters - cross-entropy application - - ``xentropy``, objective function for cross-entropy (with optional linear weights), aliases: ``cross_entropy`` + - ``cross_entropy``, objective function for cross-entropy (with optional linear weights), aliases: ``xentropy`` - - ``xentlambda``, alternative parameterization of cross-entropy, aliases: ``cross_entropy_lambda`` + - ``cross_entropy_lambda``, alternative parameterization of cross-entropy, aliases: ``xentlambda`` - label is anything in interval [0, 1] @@ -857,11 +857,11 @@ Metric Parameters - ``multi_error``, error rate for multi-class classification - - ``xentropy``, cross-entropy (with optional linear weights), aliases: ``cross_entropy`` + - ``cross_entropy``, cross-entropy (with optional linear weights), aliases: ``xentropy`` - - ``xentlambda``, "intensity-weighted" cross-entropy, aliases: ``cross_entropy_lambda`` + - ``cross_entropy_lambda``, "intensity-weighted" cross-entropy, aliases: ``xentlambda`` - - ``kldiv``, `Kullback-Leibler divergence `__, aliases: ``kullback_leibler`` + - ``kullback_leibler``, `Kullback-Leibler divergence `__, aliases: ``kldiv`` - support multiple metrics, separated by ``,`` diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 74ae1aa19..315c0d78d 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -102,11 +102,11 @@ struct Config { // [doc-only] // type = enum - // options = regression, regression_l1, huber, fair, poisson, quantile, mape, gamma, tweedie, binary, multiclass, multiclassova, xentropy, xentlambda, lambdarank + // options = regression, regression_l1, huber, fair, poisson, quantile, mape, gamma, tweedie, binary, multiclass, multiclassova, cross_entropy, cross_entropy_lambda, lambdarank // alias = objective_type, app, application // desc = regression application - // descl2 = ``regression_l2``, L2 loss, aliases: ``regression``, ``mean_squared_error``, ``mse``, ``l2_root``, ``root_mean_squared_error``, ``rmse`` - // descl2 = ``regression_l1``, L1 loss, aliases: ``mean_absolute_error``, ``mae`` + // descl2 = ``regression``, L2 loss, aliases: ``regression_l2``, ``l2``, ``mean_squared_error``, ``mse``, ``l2_root``, ``root_mean_squared_error``, ``rmse`` + // descl2 = ``regression_l1``, L1 loss, aliases: ``l1``, ``mean_absolute_error``, ``mae`` // descl2 = ``huber``, `Huber loss `__ // descl2 = ``fair``, `Fair loss `__ // descl2 = ``poisson``, `Poisson regression `__ @@ -120,8 +120,8 @@ struct Config { // descl2 = ``multiclassova``, `One-vs-All `__ binary objective function, aliases: ``multiclass_ova``, ``ova``, ``ovr`` // descl2 = ``num_class`` should be set as well // desc = cross-entropy application - // descl2 = ``xentropy``, objective function for cross-entropy (with optional linear weights), aliases: ``cross_entropy`` - // descl2 = ``xentlambda``, alternative parameterization of cross-entropy, aliases: ``cross_entropy_lambda`` + // descl2 = ``cross_entropy``, objective function for cross-entropy (with optional linear weights), aliases: ``xentropy`` + // descl2 = ``cross_entropy_lambda``, alternative parameterization of cross-entropy, aliases: ``xentlambda`` // descl2 = label is anything in interval [0, 1] // desc = ``lambdarank``, `lambdarank `__ application // descl2 = label should be ``int`` type in lambdarank tasks, and larger number represents the higher relevance (e.g. 0:bad, 1:fair, 2:good, 3:perfect) @@ -754,9 +754,9 @@ struct Config { // descl2 = ``binary_error``, for one sample: ``0`` for correct classification, ``1`` for error classification // descl2 = ``multi_logloss``, log loss for multi-class classification, aliases: ``multiclass``, ``softmax``, ``multiclassova``, ``multiclass_ova``, ``ova``, ``ovr`` // descl2 = ``multi_error``, error rate for multi-class classification - // descl2 = ``xentropy``, cross-entropy (with optional linear weights), aliases: ``cross_entropy`` - // descl2 = ``xentlambda``, "intensity-weighted" cross-entropy, aliases: ``cross_entropy_lambda`` - // descl2 = ``kldiv``, `Kullback-Leibler divergence `__, aliases: ``kullback_leibler`` + // descl2 = ``cross_entropy``, cross-entropy (with optional linear weights), aliases: ``xentropy`` + // descl2 = ``cross_entropy_lambda``, "intensity-weighted" cross-entropy, aliases: ``xentlambda`` + // descl2 = ``kullback_leibler``, `Kullback-Leibler divergence `__, aliases: ``kldiv`` // desc = support multiple metrics, separated by ``,`` std::vector metric; diff --git a/src/io/config.cpp b/src/io/config.cpp index 77fc69e69..e981aeaa7 100644 --- a/src/io/config.cpp +++ b/src/io/config.cpp @@ -63,41 +63,91 @@ void GetBoostingType(const std::unordered_map& params, } } +std::string ParseObjectiveAlias(const std::string& type) { + if (type == std::string("regression") || type == std::string("regression_l2") + || type == std::string("mean_squared_error") || type == std::string("mse") || type == std::string("l2") + || type == std::string("l2_root") || type == std::string("root_mean_squared_error") || type == std::string("rmse")) { + return "regression"; + } else if (type == std::string("regression_l1") || type == std::string("mean_absolute_error") + || type == std::string("l1") || type == std::string("mae")) { + return "regression_l1"; + } else if (type == std::string("multiclass") || type == std::string("softmax")) { + return "multiclass"; + } else if (type == std::string("multiclassova") || type == std::string("multiclass_ova") || type == std::string("ova") || type == std::string("ovr")) { + return "multiclassova"; + } else if (type == std::string("xentropy") || type == std::string("cross_entropy")) { + return "cross_entropy"; + } else if (type == std::string("xentlambda") || type == std::string("cross_entropy_lambda")) { + return "cross_entropy_lambda"; + } else if (type == std::string("mean_absolute_percentage_error") || type == std::string("mape")) { + return "mape"; + } else if (type == std::string("none") || type == std::string("null") || type == std::string("custom") || type == std::string("na")) { + return "custom"; + } + return type; +} + +std::string ParseMetricAlias(const std::string& type) { + if (type == std::string("regression") || type == std::string("regression_l2") || type == std::string("l2") || type == std::string("mean_squared_error") || type == std::string("mse")) { + return "l2"; + } else if (type == std::string("l2_root") || type == std::string("root_mean_squared_error") || type == std::string("rmse")) { + return "rmse"; + } else if (type == std::string("regression_l1") || type == std::string("l1") || type == std::string("mean_absolute_error") || type == std::string("mae")) { + return "l1"; + } else if (type == std::string("binary_logloss") || type == std::string("binary")) { + return "binary_logloss"; + } else if (type == std::string("ndcg") || type == std::string("lambdarank")) { + return "ndcg"; + } else if (type == std::string("map") || type == std::string("mean_average_precision")) { + return "map"; + } else if (type == std::string("multi_logloss") || type == std::string("multiclass") || type == std::string("softmax") || type == std::string("multiclassova") || type == std::string("multiclass_ova") || type == std::string("ova") || type == std::string("ovr")) { + return "multi_logloss"; + } else if (type == std::string("xentropy") || type == std::string("cross_entropy")) { + return "cross_entropy"; + } else if (type == std::string("xentlambda") || type == std::string("cross_entropy_lambda")) { + return "cross_entropy_lambda"; + } else if (type == std::string("kldiv") || type == std::string("kullback_leibler")) { + return "kullback_leibler"; + } else if (type == std::string("mean_absolute_percentage_error") || type == std::string("mape")) { + return "mape"; + } else if (type == std::string("none") || type == std::string("null") || type == std::string("custom") || type == std::string("na")) { + return "custom"; + } + return type; +} + +void ParseMetrics(const std::string& value, std::vector* out_metric) { + std::unordered_set metric_sets; + out_metric->clear(); + std::vector metrics = Common::Split(value.c_str(), ','); + for (auto& met : metrics) { + auto type = ParseMetricAlias(met); + if (metric_sets.count(type) <= 0) { + out_metric->push_back(type); + metric_sets.insert(type); + } + } +} + void GetObjectiveType(const std::unordered_map& params, std::string* objective) { std::string value; if (Config::GetString(params, "objective", &value)) { std::transform(value.begin(), value.end(), value.begin(), Common::tolower); - *objective = value; + *objective = ParseObjectiveAlias(value); } } void GetMetricType(const std::unordered_map& params, std::vector* metric) { std::string value; if (Config::GetString(params, "metric", &value)) { - // clear old metrics - metric->clear(); - // to lower std::transform(value.begin(), value.end(), value.begin(), Common::tolower); - // split - std::vector metrics = Common::Split(value.c_str(), ','); - // remove duplicate - std::unordered_set metric_sets; - for (auto& met : metrics) { - std::transform(met.begin(), met.end(), met.begin(), Common::tolower); - if (metric_sets.count(met) <= 0) { - metric_sets.insert(met); - } - } - for (auto& met : metric_sets) { - metric->push_back(met); - } - metric->shrink_to_fit(); + ParseMetrics(value, metric); } // add names of objective function if not providing metric if (metric->empty() && value.size() == 0) { if (Config::GetString(params, "objective", &value)) { std::transform(value.begin(), value.end(), value.begin(), Common::tolower); - metric->push_back(value); + ParseMetrics(value, metric); } } } @@ -196,20 +246,13 @@ void Config::Set(const std::unordered_map& params) { } bool CheckMultiClassObjective(const std::string& objective) { - return (objective == std::string("multiclass") - || objective == std::string("multiclassova") - || objective == std::string("softmax") - || objective == std::string("multiclass_ova") - || objective == std::string("ova") - || objective == std::string("ovr")); + return (objective == std::string("multiclass") || objective == std::string("multiclassova")); } void Config::CheckParamConflict() { // check if objective, metric, and num_class match int num_class_check = num_class; - bool objective_custom = objective == std::string("none") || objective == std::string("null") - || objective == std::string("custom") || objective == std::string("na"); - bool objective_type_multiclass = CheckMultiClassObjective(objective) || (objective_custom && num_class_check > 1); + bool objective_type_multiclass = CheckMultiClassObjective(objective) || (objective == std::string("custom") && num_class_check > 1); if (objective_type_multiclass) { if (num_class_check <= 1) { @@ -221,12 +264,10 @@ void Config::CheckParamConflict() { } } for (std::string metric_type : metric) { - bool metric_custom_or_none = metric_type == std::string("none") || metric_type == std::string("null") - || metric_type == std::string("custom") || metric_type == std::string("na"); bool metric_type_multiclass = (CheckMultiClassObjective(metric_type) || metric_type == std::string("multi_logloss") || metric_type == std::string("multi_error") - || (metric_custom_or_none && num_class_check > 1)); + || (metric_type == std::string("custom") && num_class_check > 1)); if ((objective_type_multiclass && !metric_type_multiclass) || (!objective_type_multiclass && metric_type_multiclass)) { Log::Fatal("Multiclass objective and metrics don't match"); diff --git a/src/metric/metric.cpp b/src/metric/metric.cpp index 0cfd7ab99..715c78910 100644 --- a/src/metric/metric.cpp +++ b/src/metric/metric.cpp @@ -14,11 +14,11 @@ namespace LightGBM { Metric* Metric::CreateMetric(const std::string& type, const Config& config) { - if (type == std::string("regression") || type == std::string("regression_l2") || type == std::string("l2") || type == std::string("mean_squared_error") || type == std::string("mse")) { + if (type == std::string("l2")) { return new L2Metric(config); - } else if (type == std::string("l2_root") || type == std::string("root_mean_squared_error") || type == std::string("rmse")) { + } else if (type == std::string("rmse")) { return new RMSEMetric(config); - } else if (type == std::string("regression_l1") || type == std::string("l1") || type == std::string("mean_absolute_error") || type == std::string("mae")) { + } else if (type == std::string("l1")) { return new L1Metric(config); } else if (type == std::string("quantile")) { return new QuantileMetric(config); @@ -28,27 +28,27 @@ Metric* Metric::CreateMetric(const std::string& type, const Config& config) { return new FairLossMetric(config); } else if (type == std::string("poisson")) { return new PoissonMetric(config); - } else if (type == std::string("binary_logloss") || type == std::string("binary")) { + } else if (type == std::string("binary_logloss")) { return new BinaryLoglossMetric(config); } else if (type == std::string("binary_error")) { return new BinaryErrorMetric(config); } else if (type == std::string("auc")) { return new AUCMetric(config); - } else if (type == std::string("ndcg") || type == std::string("lambdarank")) { + } else if (type == std::string("ndcg")) { return new NDCGMetric(config); - } else if (type == std::string("map") || type == std::string("mean_average_precision")) { + } else if (type == std::string("map")) { return new MapMetric(config); - } else if (type == std::string("multi_logloss") || type == std::string("multiclass") || type == std::string("softmax") || type == std::string("multiclassova") || type == std::string("multiclass_ova") || type == std::string("ova") || type == std::string("ovr")) { + } else if (type == std::string("multi_logloss")) { return new MultiSoftmaxLoglossMetric(config); } else if (type == std::string("multi_error")) { return new MultiErrorMetric(config); - } else if (type == std::string("xentropy") || type == std::string("cross_entropy")) { + } else if (type == std::string("cross_entropy")) { return new CrossEntropyMetric(config); - } else if (type == std::string("xentlambda") || type == std::string("cross_entropy_lambda")) { + } else if (type == std::string("cross_entropy_lambda")) { return new CrossEntropyLambdaMetric(config); - } else if (type == std::string("kldiv") || type == std::string("kullback_leibler")) { + } else if (type == std::string("kullback_leibler")) { return new KullbackLeiblerDivergence(config); - } else if (type == std::string("mean_absolute_percentage_error") || type == std::string("mape")) { + } else if (type == std::string("mape")) { return new MAPEMetric(config); } else if (type == std::string("gamma")) { return new GammaMetric(config); diff --git a/src/metric/regression_metric.hpp b/src/metric/regression_metric.hpp index 680a2f75f..246e82c32 100644 --- a/src/metric/regression_metric.hpp +++ b/src/metric/regression_metric.hpp @@ -287,7 +287,7 @@ class GammaDevianceMetric : public RegressionMetric { return tmp - Common::SafeLog(tmp) - 1; } inline static const char* Name() { - return "gamma-deviance"; + return "gamma_deviance"; } inline static double AverageLoss(double sum_loss, double) { return sum_loss * 2; diff --git a/src/metric/xentropy_metric.hpp b/src/metric/xentropy_metric.hpp index 14c6ce15e..bec611d28 100644 --- a/src/metric/xentropy_metric.hpp +++ b/src/metric/xentropy_metric.hpp @@ -74,7 +74,7 @@ class CrossEntropyMetric : public Metric { virtual ~CrossEntropyMetric() {} void Init(const Metadata& metadata, data_size_t num_data) override { - name_.emplace_back("xentropy"); + name_.emplace_back("cross_entropy"); num_data_ = num_data; label_ = metadata.label(); weights_ = metadata.weights(); @@ -169,7 +169,7 @@ class CrossEntropyLambdaMetric : public Metric { virtual ~CrossEntropyLambdaMetric() {} void Init(const Metadata& metadata, data_size_t num_data) override { - name_.emplace_back("xentlambda"); + name_.emplace_back("cross_entropy_lambda"); num_data_ = num_data; label_ = metadata.label(); weights_ = metadata.weights(); @@ -252,7 +252,7 @@ class KullbackLeiblerDivergence : public Metric { virtual ~KullbackLeiblerDivergence() {} void Init(const Metadata& metadata, data_size_t num_data) override { - name_.emplace_back("kldiv"); + name_.emplace_back("kullback_leibler"); num_data_ = num_data; label_ = metadata.label(); weights_ = metadata.weights(); diff --git a/src/objective/objective_function.cpp b/src/objective/objective_function.cpp index 9cf030a8f..09fb139a2 100644 --- a/src/objective/objective_function.cpp +++ b/src/objective/objective_function.cpp @@ -13,11 +13,9 @@ namespace LightGBM { ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& type, const Config& config) { - if (type == std::string("regression") || type == std::string("regression_l2") - || type == std::string("mean_squared_error") || type == std::string("mse") - || type == std::string("l2_root") || type == std::string("root_mean_squared_error") || type == std::string("rmse")) { + if (type == std::string("regression")) { return new RegressionL2loss(config); - } else if (type == std::string("regression_l1") || type == std::string("mean_absolute_error") || type == std::string("mae")) { + } else if (type == std::string("regression_l1")) { return new RegressionL1loss(config); } else if (type == std::string("quantile")) { return new RegressionQuantileloss(config); @@ -31,21 +29,21 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& return new BinaryLogloss(config); } else if (type == std::string("lambdarank")) { return new LambdarankNDCG(config); - } else if (type == std::string("multiclass") || type == std::string("softmax")) { + } else if (type == std::string("multiclass")) { return new MulticlassSoftmax(config); - } else if (type == std::string("multiclassova") || type == std::string("multiclass_ova") || type == std::string("ova") || type == std::string("ovr")) { + } else if (type == std::string("multiclassova")) { return new MulticlassOVA(config); - } else if (type == std::string("xentropy") || type == std::string("cross_entropy")) { + } else if (type == std::string("cross_entropy")) { return new CrossEntropy(config); - } else if (type == std::string("xentlambda") || type == std::string("cross_entropy_lambda")) { + } else if (type == std::string("cross_entropy_lambda")) { return new CrossEntropyLambda(config); - } else if (type == std::string("mean_absolute_percentage_error") || type == std::string("mape")) { + } else if (type == std::string("mape")) { return new RegressionMAPELOSS(config); } else if (type == std::string("gamma")) { return new RegressionGammaLoss(config); } else if (type == std::string("tweedie")) { return new RegressionTweedieLoss(config); - } else if (type == std::string("none") || type == std::string("null") || type == std::string("custom") || type == std::string("na")) { + } else if (type == std::string("custom")) { return nullptr; } Log::Fatal("Unknown objective type name: %s", type.c_str()); @@ -74,17 +72,17 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& return new MulticlassSoftmax(strs); } else if (type == std::string("multiclassova")) { return new MulticlassOVA(strs); - } else if (type == std::string("xentropy") || type == std::string("cross_entropy")) { + } else if (type == std::string("cross_entropy")) { return new CrossEntropy(strs); - } else if (type == std::string("xentlambda") || type == std::string("cross_entropy_lambda")) { + } else if (type == std::string("cross_entropy_lambda")) { return new CrossEntropyLambda(strs); - } else if (type == std::string("mean_absolute_percentage_error") || type == std::string("mape")) { + } else if (type == std::string("mape")) { return new RegressionMAPELOSS(strs); } else if (type == std::string("gamma")) { return new RegressionGammaLoss(strs); } else if (type == std::string("tweedie")) { return new RegressionTweedieLoss(strs); - } else if (type == std::string("none") || type == std::string("null") || type == std::string("custom") || type == std::string("na")) { + } else if (type == std::string("custom")) { return nullptr; } Log::Fatal("Unknown objective type name: %s", type.c_str()); diff --git a/src/objective/xentropy_objective.hpp b/src/objective/xentropy_objective.hpp index f4e83f226..cad2b8faa 100644 --- a/src/objective/xentropy_objective.hpp +++ b/src/objective/xentropy_objective.hpp @@ -94,7 +94,7 @@ class CrossEntropy: public ObjectiveFunction { } const char* GetName() const override { - return "xentropy"; + return "cross_entropy"; } // convert score to a probability @@ -213,7 +213,7 @@ class CrossEntropyLambda: public ObjectiveFunction { } const char* GetName() const override { - return "xentlambda"; + return "cross_entropy_lambda"; } //