[ci][docs] generate docs for C API (#2059)

* use file to install deps for docs * added C_API docs * use breathe without exhale * added missed params descriptions and make Doxygen fail for warnings * escape char hotfix * ignore unknown directive for rstcheck * better handle env variable * hotfix for 'Unknown directive type' error with C_API=NO * Update .gitignore * fixed pylint * use already defined constants in conf.py * do not suppress Doxygen's output * addressed review comments * removed unneeded import
2019-05-05 14:54:37 +03:00 · 2019-05-05 14:54:37 +03:00 · cfcc020e58
--- a/.ci/test.sh
+++ b/.ci/test.sh
@ -37,16 +37,14 @@ source activate $CONDA_ENV
 cd $BUILD_DIRECTORY

 if [[ $TRAVIS == "true" ]] && [[ $TASK == "check-docs" ]]; then
-    if [[ $PYTHON_VERSION == "2.7" ]]; then
-        conda -q -y -n $CONDA_ENV mock
-    fi
-    conda install -q -y -n $CONDA_ENV sphinx "sphinx_rtd_theme>=0.3"
-    pip install --user rstcheck
+    cd $BUILD_DIRECTORY/docs
+    conda install -q -y -n $CONDA_ENV -c conda-forge doxygen
+    pip install --user -r requirements.txt rstcheck
    # check reStructuredText formatting
    cd $BUILD_DIRECTORY/python-package
    rstcheck --report warning `find . -type f -name "*.rst"` || exit -1
    cd $BUILD_DIRECTORY/docs
-    rstcheck --report warning --ignore-directives=autoclass,autofunction `find . -type f -name "*.rst"` || exit -1
+    rstcheck --report warning --ignore-directives=autoclass,autofunction,doxygenfile `find . -type f -name "*.rst"` || exit -1
    # build docs and check them for broken links
    make html || exit -1
    find ./_build/html/ -type f -name '*.html' -exec \
--- a/.gitignore
+++ b/.gitignore
@ -340,6 +340,9 @@ instance/
 # Sphinx documentation
 docs/_build/

+# Doxygen documentation
+docs/doxyoutput/
+
 # PyBuilder
 target/

--- a/docs/C-API.rst
+++ b/docs/C-API.rst
@ -0,0 +1,4 @@
+C API
+=====
+
+.. doxygenfile:: c_api.h
--- a/docs/Development-Guide.rst
+++ b/docs/Development-Guide.rst
@ -4,7 +4,7 @@ Development Guide
 Algorithms
 ----------

-Refer to `Features <./Features.rst>`__ to understand important algorithms used in LightGBM.
+Refer to `Features <./Features.rst>`__ for understanding of important algorithms used in LightGBM.

 Classes and Code Structure
 --------------------------
@ -73,7 +73,7 @@ Refer to `docs README <./README.rst>`__.
 C API
 -----

-Refer to the comments in `c\_api.h <https://github.com/Microsoft/LightGBM/blob/master/include/LightGBM/c_api.h>`__.
+Refer to `C API <./C-API.rst>`__ or the comments in `c\_api.h <https://github.com/Microsoft/LightGBM/blob/master/include/LightGBM/c_api.h>`__ file, from which the documentation is generated.

 High Level Language Package
 ---------------------------
--- a/docs/README.rst
+++ b/docs/README.rst
@ -1,7 +1,8 @@
 Documentation
 =============

-Documentation for LightGBM is generated using `Sphinx <http://www.sphinx-doc.org/>`__.
+Documentation for LightGBM is generated using `Sphinx <http://www.sphinx-doc.org/>`__
+and `Breathe <https://breathe.readthedocs.io/>`__, which works on top of `Doxygen <http://www.doxygen.nl/index.html>`__ output.

 List of parameters and their descriptions in `Parameters.rst <./Parameters.rst>`__
 is generated automatically from comments in `config file <https://github.com/Microsoft/LightGBM/blob/master/include/LightGBM/config.h>`__
@ -12,19 +13,18 @@ After each commit on ``master``, documentation is updated and published to `Read
 Build
 -----

-You can build the documentation locally. Just run in ``docs`` folder
-
-for Python 3.x:
+You can build the documentation locally. Just install Doxygen and run in ``docs`` folder

 .. code:: sh

-    pip install sphinx "sphinx_rtd_theme>=0.3"
+    pip install -r requirements.txt
    make html

- 
-for Python 2.x:
+If you faced any problems with Doxygen installation or you simply do not need documentation for C code,
+it is possible to build the documentation without it:

 .. code:: sh

-    pip install mock sphinx "sphinx_rtd_theme>=0.3"
+    pip install -r requirements_base.txt
+    export C_API=NO || set C_API=NO
    make html
--- a/docs/conf.py
+++ b/docs/conf.py
@ -21,11 +21,14 @@ import datetime
 import os
 import sys
 import sphinx
-from sphinx.errors import VersionRequirementError

-curr_path = os.path.dirname(os.path.realpath(__file__))
-libpath = os.path.join(curr_path, '../python-package/')
-sys.path.insert(0, libpath)
+from docutils.parsers.rst import Directive
+from sphinx.errors import VersionRequirementError
+from subprocess import PIPE, Popen
+
+CURR_PATH = os.path.abspath(os.path.dirname(__file__))
+LIB_PATH = os.path.join(CURR_PATH, os.path.pardir, 'python-package')
+sys.path.insert(0, LIB_PATH)

 # -- mock out modules
 try:
@ -38,9 +41,21 @@ MOCK_MODULES = ['numpy', 'scipy', 'scipy.sparse',
 for mod_name in MOCK_MODULES:
    sys.modules[mod_name] = Mock()

+
+class IgnoredDirective(Directive):
+    """Stub for unknown directives."""
+
+    has_content = True
+
+    def run(self):
+        """Do nothing."""
+        return []
+
+
 # -- General configuration ------------------------------------------------

 os.environ['LIGHTGBM_BUILD_DOC'] = '1'
+C_API = os.environ.get('C_API', '').lower().strip() != 'no'

 # If your documentation needs a minimal Sphinx version, state it here.
 needs_sphinx = '1.3'  # Due to sphinx.ext.napoleon
@ -76,12 +91,7 @@ author = 'Microsoft Corporation'
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
-#
-BASE_DIR = os.path.abspath('../')
-VERSION_FILE = os.path.join(BASE_DIR, "VERSION.txt")
-
-with open(VERSION_FILE, 'r') as f:
-
+with open(os.path.join(CURR_PATH, os.path.pardir, 'VERSION.txt'), 'r') as f:
    # The short X.Y version.
    version = f.read().strip()

@ -109,6 +119,21 @@ todo_include_todos = False
 # Both the class' and the __init__ method's docstring are concatenated and inserted.
 autoclass_content = 'both'

+# -- Configuration for C API docs generation ------------------------------
+
+if C_API:
+    extensions.extend([
+        'breathe',
+    ])
+    breathe_projects = {
+        "LightGBM": os.path.join(CURR_PATH, 'doxyoutput', 'xml')
+    }
+    breathe_default_project = "LightGBM"
+    breathe_domain_by_extension = {
+        "h": "c",
+    }
+    breathe_show_define_initializer = True
+
 # -- Options for HTML output ----------------------------------------------

 # The theme to use for HTML and HTML Help pages.  See the documentation for
@ -133,6 +158,56 @@ html_static_path = ['_static']
 htmlhelp_basename = 'LightGBMdoc'


+def generate_doxygen_xml(app):
+    """Generate XML documentation for C API by Doxygen.
+
+    Parameters
+    ----------
+    app : object
+        The application object representing the Sphinx process.
+    """
+    doxygen_args = [
+        "INPUT={}".format(os.path.join(CURR_PATH, os.path.pardir,
+                                       'include', 'LightGBM', 'c_api.h')),
+        "OUTPUT_DIRECTORY={}".format(os.path.join(CURR_PATH, 'doxyoutput')),
+        "GENERATE_HTML=NO",
+        "GENERATE_LATEX=NO",
+        "GENERATE_XML=YES",
+        "XML_OUTPUT=xml",
+        "XML_PROGRAMLISTING=YES",
+        r'ALIASES="rst=\verbatim embed:rst:leading-asterisk"',
+        r'ALIASES+="endrst=\endverbatim"',
+        "ENABLE_PREPROCESSING=YES",
+        "MACRO_EXPANSION=YES",
+        "EXPAND_ONLY_PREDEF=NO",
+        "SKIP_FUNCTION_MACROS=NO",
+        "SORT_BRIEF_DOCS=YES",
+        "WARN_AS_ERROR=YES",
+    ]
+    doxygen_input = '\n'.join(doxygen_args)
+    is_py3 = sys.version[0] == "3"
+    if is_py3:
+        doxygen_input = bytes(doxygen_input, "utf-8")
+    if not os.path.exists(os.path.join(CURR_PATH, 'doxyoutput')):
+        os.makedirs(os.path.join(CURR_PATH, 'doxyoutput'))
+    try:
+        # Warning! The following code can cause buffer overflows on RTD.
+        # Consider suppressing output completely if RTD project silently fails.
+        # Refer to https://github.com/svenevs/exhale
+        # /blob/fe7644829057af622e467bb529db6c03a830da99/exhale/deploy.py#L99-L111
+        process = Popen(["doxygen", "-"],
+                        stdin=PIPE, stdout=PIPE, stderr=PIPE)
+        stdout, stderr = process.communicate(doxygen_input)
+        output = '\n'.join([i.decode('utf-8') if is_py3 else i
+                            for i in (stdout, stderr) if i is not None])
+        if process.returncode != 0:
+            raise RuntimeError(output)
+        else:
+            print(output)
+    except BaseException as e:
+        raise Exception("An error has occurred while executing Doxygen\n" + str(e))
+
+
 def setup(app):
    """Add new elements at Sphinx initialization time.

@ -141,4 +216,8 @@ def setup(app):
    app : object
        The application object representing the Sphinx process.
    """
+    if C_API:
+        app.connect("builder-inited", generate_doxygen_xml)
+    else:
+        app.add_directive('doxygenfile', IgnoredDirective)
    app.add_javascript("js/script.js")
--- a/docs/index.rst
+++ b/docs/index.rst
@ -27,6 +27,7 @@ For more details, please refer to `Features <./Features.rst>`__.
   Experiments <Experiments>
   Parameters <Parameters>
   Parameters Tuning <Parameters-Tuning>
+   C API <C-API>
   Python API <Python-API>
   Parallel Learning Guide <Parallel-Learning-Guide>
   GPU Tutorial <GPU-Tutorial>
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@ -0,0 +1,2 @@
+-r requirements_base.txt
+breathe
--- a/docs/requirements_base.txt
+++ b/docs/requirements_base.txt
@ -0,0 +1,3 @@
+sphinx
+sphinx_rtd_theme >= 0.3
+mock; python_version < '3'
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@ -225,10 +225,12 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromMat(const void* data,

 /*!
 * \brief create dataset from array of dense matrices
+* \param nmat number of matrices
 * \param data pointer to the data space
 * \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
 * \param nrow number of rows
 * \param ncol number columns
+* \param is_row_major 1 for row major, 0 for column major
 * \param parameters additional parameters
 * \param reference used to align bin mapper with other dataset, nullptr means don't used
 * \param out created dataset
@ -381,7 +383,7 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetAddFeaturesFrom(DatasetHandle target,
 * \brief create an new boosting learner
 * \param train_data training data set
 * \param parameters format: 'key1=value1 key2=value2'
-* \prama out handle of created Booster
+* \param out handle of created Booster
 * \return 0 when succeed, -1 when failure happens
 */
 LIGHTGBM_C_EXPORT int LGBM_BoosterCreate(const DatasetHandle train_data,
@ -481,7 +483,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterUpdateOneIter(BoosterHandle handle,
 /*!
 * \brief Refit the tree model using the new data (online learning)
 * \param handle handle
-* \param leaf_preds 
+* \param leaf_preds
 * \param nrow number of rows of leaf_preds
 * \param ncol number of columns of leaf_preds
 * \return 0 when succeed, -1 when failure happens
@ -514,6 +516,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterRollbackOneIter(BoosterHandle handle);

 /*!
 * \brief Get iteration of current boosting rounds
+* \param handle handle
 * \param out_iteration iteration of boosting rounds
 * \return 0 when succeed, -1 when failure happens
 */
@ -522,6 +525,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetCurrentIteration(BoosterHandle handle,

 /*!
 * \brief Get number of tree per iteration
+* \param handle handle
 * \param out_tree_per_iteration number of tree per iteration
 * \return 0 when succeed, -1 when failure happens
 */
@ -530,6 +534,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterNumModelPerIteration(BoosterHandle handle,

 /*!
 * \brief Get number of weak sub-models
+* \param handle handle
 * \param out_models number of weak sub-models
 * \return 0 when succeed, -1 when failure happens
 */
@ -538,6 +543,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterNumberOfTotalModel(BoosterHandle handle,

 /*!
 * \brief Get number of eval
+* \param handle handle
 * \param out_len total number of eval results
 * \return 0 when succeed, -1 when failure happens
 */
@ -546,6 +552,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalCounts(BoosterHandle handle,

 /*!
 * \brief Get name of eval
+* \param handle handle
 * \param out_len total number of eval results
 * \param out_strs names of eval result, need to pre-allocate memory before call this
 * \return 0 when succeed, -1 when failure happens
@ -556,6 +563,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalNames(BoosterHandle handle,

 /*!
 * \brief Get name of features
+* \param handle handle
 * \param out_len total number of features
 * \param out_strs names of features, need to pre-allocate memory before call this
 * \return 0 when succeed, -1 when failure happens
@ -566,6 +574,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetFeatureNames(BoosterHandle handle,

 /*!
 * \brief Get number of features
+* \param handle handle
 * \param out_len total number of features
 * \return 0 when succeed, -1 when failure happens
 */
@ -579,7 +588,7 @@ Note: 1. you should call LGBM_BoosterGetEvalNames first to get the name of evalu
 * \param handle handle
 * \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ...
 * \param out_len len of output result
-* \param out_result float arrary contains result
+* \param out_results float arrary contains result
 * \return 0 when succeed, -1 when failure happens
 */
 LIGHTGBM_C_EXPORT int LGBM_BoosterGetEval(BoosterHandle handle,
@ -695,7 +704,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSR(BoosterHandle handle,
                                                double* out_result);

 /*!
-* \brief make prediction for an new data set. This method re-uses the internal predictor structure 
+* \brief make prediction for an new data set. This method re-uses the internal predictor structure
 *        from previous calls and is optimized for single row invocation.
 *        Note:  should pre-allocate memory for out_result,
 *               for normal and raw score: its length is equal to num_class * num_data
@ -808,7 +817,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMat(BoosterHandle handle,
                                                double* out_result);

 /*!
-* \brief make prediction for an new data set. This method re-uses the internal predictor structure 
+* \brief make prediction for an new data set. This method re-uses the internal predictor structure
 *        from previous calls and is optimized for single row invocation.
 *        Note:  should pre-allocate memory for out_result,
 *               for normal and raw score: its length is equal to num_class * num_data
@ -816,7 +825,6 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMat(BoosterHandle handle,
 * \param handle handle
 * \param data pointer to the data space
 * \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
-* \param nrow number of rows
 * \param ncol number columns
 * \param is_row_major 1 for row major, 0 for column major
 * \param predict_type
@ -873,6 +881,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMats(BoosterHandle handle,
 /*!
 * \brief save model into file
 * \param handle handle
+* \param start_iteration start iteration that should be saved
 * \param num_iteration, <= 0 means save all
 * \param filename file name
 * \return 0 when succeed, -1 when failure happens
@ -885,6 +894,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterSaveModel(BoosterHandle handle,
 /*!
 * \brief save model to string
 * \param handle handle
+* \param start_iteration start iteration that should be saved
 * \param num_iteration, <= 0 means save all
 * \param buffer_len string buffer length, if buffer_len < out_len, re-allocate buffer
 * \param out_len actual output length
@ -901,6 +911,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterSaveModelToString(BoosterHandle handle,
 /*!
 * \brief dump model to json
 * \param handle handle
+* \param start_iteration start iteration that should be dumped
 * \param num_iteration, <= 0 means save all
 * \param buffer_len string buffer length, if buffer_len < out_len, re-allocate buffer
 * \param out_len actual output length