diff --git a/docs/Experiments.rst b/docs/Experiments.rst index c314321e7..4440a2c0c 100644 --- a/docs/Experiments.rst +++ b/docs/Experiments.rst @@ -18,19 +18,19 @@ Data We used 5 datasets to conduct our comparison experiments. Details of data are listed in the following table: -+-----------+-----------------------+------------------------------------------------------------------------+-------------+----------+----------------------------------------------+ -| Data | Task | Link | #Train\_Set | #Feature | Comments | -+===========+=======================+========================================================================+=============+==========+==============================================+ -| Higgs | Binary classification | `link `__ | 10,500,000 | 28 | last 500,000 samples were used as test set | -+-----------+-----------------------+------------------------------------------------------------------------+-------------+----------+----------------------------------------------+ -| Yahoo LTR | Learning to rank | `link `__ | 473,134 | 700 | set1.train as train, set1.test as test | -+-----------+-----------------------+------------------------------------------------------------------------+-------------+----------+----------------------------------------------+ -| MS LTR | Learning to rank | `link `__ | 2,270,296 | 137 | {S1,S2,S3} as train set, {S5} as test set | -+-----------+-----------------------+------------------------------------------------------------------------+-------------+----------+----------------------------------------------+ -| Expo | Binary classification | `link `__ | 11,000,000 | 700 | last 1,000,000 samples were used as test set | -+-----------+-----------------------+------------------------------------------------------------------------+-------------+----------+----------------------------------------------+ -| Allstate | Binary classification | `link `__ | 13,184,290 | 4228 | last 1,000,000 samples were used as test set | -+-----------+-----------------------+------------------------------------------------------------------------+-------------+----------+----------------------------------------------+ ++-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+ +| Data | Task | Link | #Train\_Set | #Feature | Comments | ++===========+=======================+=================================================================================+=============+==========+==============================================+ +| Higgs | Binary classification | `link `__ | 10,500,000 | 28 | last 500,000 samples were used as test set | ++-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+ +| Yahoo LTR | Learning to rank | `link `__ | 473,134 | 700 | set1.train as train, set1.test as test | ++-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+ +| MS LTR | Learning to rank | `link `__ | 2,270,296 | 137 | {S1,S2,S3} as train set, {S5} as test set | ++-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+ +| Expo | Binary classification | `link `__ | 11,000,000 | 700 | last 1,000,000 samples were used as test set | ++-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+ +| Allstate | Binary classification | `link `__ | 13,184,290 | 4228 | last 1,000,000 samples were used as test set | ++-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+ Environment ^^^^^^^^^^^ diff --git a/docs/Features.rst b/docs/Features.rst index a7db86ec2..89b566465 100644 --- a/docs/Features.rst +++ b/docs/Features.rst @@ -291,7 +291,7 @@ References .. _On Grouping for Maximum Homogeneity: https://www.tandfonline.com/doi/abs/10.1080/01621459.1958.10501479 -.. _Optimization of collective communication operations in MPICH: https://www.mcs.anl.gov/~thakur/papers/ijhpca-coll.pdf +.. _Optimization of collective communication operations in MPICH: https://web.cels.anl.gov/~thakur/papers/ijhpca-coll.pdf .. _A Communication-Efficient Parallel Algorithm for Decision Tree: http://papers.nips.cc/paper/6381-a-communication-efficient-parallel-algorithm-for-decision-tree diff --git a/docs/GPU-Performance.rst b/docs/GPU-Performance.rst index be1c1051b..64cd78eb4 100644 --- a/docs/GPU-Performance.rst +++ b/docs/GPU-Performance.rst @@ -194,7 +194,7 @@ following article: Huan Zhang, Si Si and Cho-Jui Hsieh. `GPU Acceleration for Large-scale Tree Boosting`_. SysML Conference, 2018. -.. _link1: https://archive.ics.uci.edu/ml/datasets/HIGGS +.. _link1: https://archive.ics.uci.edu/dataset/280/higgs .. _link2: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html @@ -202,9 +202,9 @@ Huan Zhang, Si Si and Cho-Jui Hsieh. `GPU Acceleration for Large-scale Tree Boos .. _link4: https://webscope.sandbox.yahoo.com/catalog.php?datatype=c -.. _link5: http://research.microsoft.com/en-us/projects/mslr/ +.. _link5: https://www.microsoft.com/en-us/research/project/mslr/ -.. _link6: http://stat-computing.org/dataexpo/2009/ +.. _link6: https://community.amstat.org/jointscsg-section/dataexpo/dataexpo2009 .. _0bb4a82: https://github.com/microsoft/LightGBM/commit/0bb4a82 diff --git a/docs/Installation-Guide.rst b/docs/Installation-Guide.rst index 1acfbcefa..564fa7304 100644 --- a/docs/Installation-Guide.rst +++ b/docs/Installation-Guide.rst @@ -950,7 +950,7 @@ gcc .. _RDMA: https://en.wikipedia.org/wiki/Remote_direct_memory_access -.. _MS MPI: https://docs.microsoft.com/en-us/message-passing-interface/microsoft-mpi-release-notes +.. _MS MPI: https://learn.microsoft.com/en-us/message-passing-interface/microsoft-mpi-release-notes .. _Open MPI: https://www.open-mpi.org/ diff --git a/docs/Parallel-Learning-Guide.rst b/docs/Parallel-Learning-Guide.rst index a347be942..cbc7b1012 100644 --- a/docs/Parallel-Learning-Guide.rst +++ b/docs/Parallel-Learning-Guide.rst @@ -518,7 +518,7 @@ See `the mars documentation`_ for usage examples. .. _the Dask DataFrame documentation: https://docs.dask.org/en/latest/dataframe.html -.. _the Dask prediction example: https://github.com/microsoft/lightgbm/tree/master/examples/python-guide/dask/prediction.py +.. _the Dask prediction example: https://github.com/microsoft/LightGBM/blob/master/examples/python-guide/dask/prediction.py .. _the Dask worker documentation: https://distributed.dask.org/en/stable/worker-memory.html @@ -536,7 +536,7 @@ See `the mars documentation`_ for usage examples. .. _lightgbm_ray: https://github.com/ray-project/lightgbm_ray -.. _Ray: https://ray.io/ +.. _Ray: https://www.ray.io/ .. _the lightgbm_ray documentation: https://docs.ray.io/en/latest/tune/api_docs/integration.html#lightgbm-tune-integration-lightgbm diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 86104ba5b..329f9c386 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -119,7 +119,7 @@ Core Parameters - ranking application - - ``lambdarank``, `lambdarank `__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain`` + - ``lambdarank``, `lambdarank `__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain`` - ``rank_xendcg``, `XE_NDCG_MART `__ ranking objective function, aliases: ``xendcg``, ``xe_ndcg``, ``xe_ndcg_mart``, ``xendcg_mart`` @@ -536,15 +536,15 @@ Learning Control Parameters - ``basic``, the most basic monotone constraints method. It does not slow the library at all, but over-constrains the predictions - - ``intermediate``, a `more advanced method `__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results + - ``intermediate``, a `more advanced method `__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results - - ``advanced``, an `even more advanced method `__, which may slow the library. However, this method is even less constraining than the intermediate method and should again significantly improve the results + - ``advanced``, an `even more advanced method `__, which may slow the library. However, this method is even less constraining than the intermediate method and should again significantly improve the results - ``monotone_penalty`` :raw-html:`🔗︎`, default = ``0.0``, type = double, aliases: ``monotone_splits_penalty``, ``ms_penalty``, ``mc_penalty``, constraints: ``monotone_penalty >= 0.0`` - used only if ``monotone_constraints`` is set - - `monotone penalty `__: a penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter + - `monotone penalty `__: a penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter - if ``0.0`` (the default), no penalization is applied @@ -564,7 +564,7 @@ Learning Control Parameters - **Note**: the forced split logic will be ignored, if the split makes gain worse - - see `this file `__ as an example + - see `this file `__ as an example - ``refit_decay_rate`` :raw-html:`🔗︎`, default = ``0.9``, type = double, constraints: ``0.0 <= refit_decay_rate <= 1.0`` @@ -770,7 +770,7 @@ Dataset Parameters - ``enable_bundle`` :raw-html:`🔗︎`, default = ``true``, type = bool, aliases: ``is_enable_bundle``, ``bundle`` - - set this to ``false`` to disable Exclusive Feature Bundling (EFB), which is described in `LightGBM: A Highly Efficient Gradient Boosting Decision Tree `__ + - set this to ``false`` to disable Exclusive Feature Bundling (EFB), which is described in `LightGBM: A Highly Efficient Gradient Boosting Decision Tree `__ - **Note**: disabling this may cause the slow training speed for sparse datasets @@ -894,7 +894,7 @@ Dataset Parameters - ``.json`` file should contain an array of objects, each containing the word ``feature`` (integer feature index) and ``bin_upper_bound`` (array of thresholds for binning) - - see `this file `__ as an example + - see `this file `__ as an example - ``save_binary`` :raw-html:`🔗︎`, default = ``false``, type = bool, aliases: ``is_save_binary``, ``is_save_binary_file`` @@ -961,7 +961,7 @@ Predict Parameters - produces ``#features + 1`` values where the last value is the expected value of the model output over the training data - - **Note**: if you want to get more explanation for your model's predictions using SHAP values like SHAP interaction values, you can install `shap package `__ + - **Note**: if you want to get more explanation for your model's predictions using SHAP values like SHAP interaction values, you can install `shap package `__ - **Note**: unlike the shap package, with ``predict_contrib`` we return a matrix with an extra column, where the last column is the expected value diff --git a/docs/Quick-Start.rst b/docs/Quick-Start.rst index 04e64beb1..30b0b3c22 100644 --- a/docs/Quick-Start.rst +++ b/docs/Quick-Start.rst @@ -85,4 +85,4 @@ Examples .. _LibSVM: https://www.csie.ntu.edu.tw/~cjlin/libsvm/ -.. _Expo data: http://stat-computing.org/dataexpo/2009/ +.. _Expo data: https://community.amstat.org/jointscsg-section/dataexpo/dataexpo2009 diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 6d61bc764..6500cb772 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -150,7 +150,7 @@ struct Config { // descl2 = ``cross_entropy_lambda``, alternative parameterization of cross-entropy, aliases: ``xentlambda`` // descl2 = label is anything in interval [0, 1] // desc = ranking application - // descl2 = ``lambdarank``, `lambdarank `__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain`` + // descl2 = ``lambdarank``, `lambdarank `__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain`` // descl2 = ``rank_xendcg``, `XE_NDCG_MART `__ ranking objective function, aliases: ``xendcg``, ``xe_ndcg``, ``xe_ndcg_mart``, ``xendcg_mart`` // descl2 = ``rank_xendcg`` is faster than and achieves the similar performance as ``lambdarank`` // descl2 = label should be ``int`` type, and larger number represents the higher relevance (e.g. 0:bad, 1:fair, 2:good, 3:perfect) @@ -501,14 +501,14 @@ struct Config { // desc = used only if ``monotone_constraints`` is set // desc = monotone constraints method // descl2 = ``basic``, the most basic monotone constraints method. It does not slow the library at all, but over-constrains the predictions - // descl2 = ``intermediate``, a `more advanced method `__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results - // descl2 = ``advanced``, an `even more advanced method `__, which may slow the library. However, this method is even less constraining than the intermediate method and should again significantly improve the results + // descl2 = ``intermediate``, a `more advanced method `__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results + // descl2 = ``advanced``, an `even more advanced method `__, which may slow the library. However, this method is even less constraining than the intermediate method and should again significantly improve the results std::string monotone_constraints_method = "basic"; // alias = monotone_splits_penalty, ms_penalty, mc_penalty // check = >=0.0 // desc = used only if ``monotone_constraints`` is set - // desc = `monotone penalty `__: a penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter + // desc = `monotone penalty `__: a penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter // desc = if ``0.0`` (the default), no penalization is applied double monotone_penalty = 0.0; @@ -524,7 +524,7 @@ struct Config { // desc = ``.json`` file can be arbitrarily nested, and each split contains ``feature``, ``threshold`` fields, as well as ``left`` and ``right`` fields representing subsplits // desc = categorical splits are forced in a one-hot fashion, with ``left`` representing the split containing the feature value and ``right`` representing other values // desc = **Note**: the forced split logic will be ignored, if the split makes gain worse - // desc = see `this file `__ as an example + // desc = see `this file `__ as an example std::string forcedsplits_filename = ""; // check = >=0.0 @@ -683,7 +683,7 @@ struct Config { bool is_enable_sparse = true; // alias = is_enable_bundle, bundle - // desc = set this to ``false`` to disable Exclusive Feature Bundling (EFB), which is described in `LightGBM: A Highly Efficient Gradient Boosting Decision Tree `__ + // desc = set this to ``false`` to disable Exclusive Feature Bundling (EFB), which is described in `LightGBM: A Highly Efficient Gradient Boosting Decision Tree `__ // desc = **Note**: disabling this may cause the slow training speed for sparse datasets bool enable_bundle = true; @@ -770,7 +770,7 @@ struct Config { // desc = path to a ``.json`` file that specifies bin upper bounds for some or all features // desc = ``.json`` file should contain an array of objects, each containing the word ``feature`` (integer feature index) and ``bin_upper_bound`` (array of thresholds for binning) - // desc = see `this file `__ as an example + // desc = see `this file `__ as an example std::string forcedbins_filename = ""; // [no-save] @@ -826,7 +826,7 @@ struct Config { // desc = used only in ``prediction`` task // desc = set this to ``true`` to estimate `SHAP values `__, which represent how each feature contributes to each prediction // desc = produces ``#features + 1`` values where the last value is the expected value of the model output over the training data - // desc = **Note**: if you want to get more explanation for your model's predictions using SHAP values like SHAP interaction values, you can install `shap package `__ + // desc = **Note**: if you want to get more explanation for your model's predictions using SHAP values like SHAP interaction values, you can install `shap package `__ // desc = **Note**: unlike the shap package, with ``predict_contrib`` we return a matrix with an extra column, where the last column is the expected value // desc = **Note**: this feature is not implemented for linear trees bool predict_contrib = false;