merge bagging by query

2024-09-04 02:36:39 +00:00 · 2024-09-04 02:36:39 +00:00 · b8427b0d4e
--- a/.appveyor.yml
+++ b/.appveyor.yml
@ -1,16 +1,14 @@
-version: 4.3.0.99.{build}
+version: 4.5.0.99.{build}

 image: Visual Studio 2015
 platform: x64
-configuration:  # a trick to construct a build matrix with multiple Python versions
+configuration:
  - '3.8'

-# only build pull requests and
-# commits to 'master' or any branch starting with 'release'
+# only build on 'master' and pull requests targeting it
 branches:
  only:
    - master
-    - /^release/

 environment:
  matrix:
@ -25,12 +23,13 @@ install:
  - git submodule update --init --recursive  # get `external_libs` folder
  - set PATH=C:\mingw-w64\x86_64-8.1.0-posix-seh-rt_v6-rev0\mingw64\bin;%PATH%
  - set PYTHON_VERSION=%CONFIGURATION%
-  - set CONDA_ENV="test-env"
  - ps: |
+      $env:ALLOW_SKIP_ARROW_TESTS = "1"
+      $env:APPVEYOR = "true"
+      $env:CMAKE_BUILD_PARALLEL_LEVEL = 4
      $env:MINICONDA = "C:\Miniconda3-x64"
      $env:PATH = "$env:MINICONDA;$env:MINICONDA\Scripts;$env:PATH"
      $env:BUILD_SOURCESDIRECTORY = "$env:APPVEYOR_BUILD_FOLDER"
-      $env:LGB_VER = (Get-Content $env:APPVEYOR_BUILD_FOLDER\VERSION.txt).trim()

 build: false

@ -40,4 +39,4 @@ test_script:
  - conda config --add channels conda-forge
  - conda config --set channel_priority strict
  - conda init powershell
-  - powershell.exe -ExecutionPolicy Bypass -File %APPVEYOR_BUILD_FOLDER%\.ci\test_windows.ps1
+  - powershell.exe -ExecutionPolicy Bypass -File %APPVEYOR_BUILD_FOLDER%\.ci\test-windows.ps1
--- a/.ci/append-comment.sh
+++ b/.ci/append-comment.sh
@ -4,13 +4,13 @@
 #     Update comment appending a given body to the specified original comment.
 #
 # [usage]
-#     append_comment.sh <COMMENT_ID> <BODY>
+#     append-comment.sh <COMMENT_ID> <BODY>
 #
 # COMMENT_ID: ID of comment that should be modified.
 #
 # BODY: Text that will be appended to the original comment body.

-set -e
+set -e -E -u -o pipefail

 if [ -z "$GITHUB_ACTIONS" ]; then
  echo "Must be run inside GitHub Actions CI"
--- a/helpers/check_dynamic_dependencies.py
+++ b/helpers/check_dynamic_dependencies.py
@ -12,6 +12,7 @@ Version history for these symbols can be found at the following:
 * GLIBCXX: https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html
 * OMP/GOMP: https://github.com/gcc-mirror/gcc/blob/master/libgomp/libgomp.map
 """
+
 import re
 import sys
 from pathlib import Path
--- a/.ci/check-python-dists.sh
+++ b/.ci/check-python-dists.sh
@ -1,7 +1,13 @@
 #!/bin/sh

+set -e -u
+
 DIST_DIR=${1}

+# defaults
+METHOD=${METHOD:-""}
+TASK=${TASK:-""}
+
 echo "checking Python package distributions in '${DIST_DIR}'"

 pip install \
@ -18,31 +24,32 @@ if { test "${TASK}" = "bdist" || test "${METHOD}" = "wheel"; }; then
 fi

 PY_MINOR_VER=$(python -c "import sys; print(sys.version_info.minor)")
-if [ $PY_MINOR_VER -gt 7 ]; then
+if [ "$PY_MINOR_VER" -gt 7 ]; then
    echo "pydistcheck..."
-    pip install pydistcheck
+    pip install 'pydistcheck>=0.7.0'
    if { test "${TASK}" = "cuda" || test "${METHOD}" = "wheel"; }; then
        pydistcheck \
            --inspect \
-            --ignore 'compiled-objects-have-debug-symbols,distro-too-large-compressed' \
+            --ignore 'compiled-objects-have-debug-symbols'\
+            --ignore 'distro-too-large-compressed' \
            --max-allowed-size-uncompressed '100M' \
            --max-allowed-files 800 \
-            ${DIST_DIR}/* || exit 1
-    elif { test $(uname -m) = "aarch64"; }; then
+            "$(echo ${DIST_DIR}/*)" || exit 1
+    elif { test "$(uname -m)" = "aarch64"; }; then
        pydistcheck \
            --inspect \
            --ignore 'compiled-objects-have-debug-symbols' \
            --max-allowed-size-compressed '5M' \
            --max-allowed-size-uncompressed '15M' \
            --max-allowed-files 800 \
-            ${DIST_DIR}/* || exit 1
+            "$(echo ${DIST_DIR}/*)" || exit 1
    else
        pydistcheck \
            --inspect \
            --max-allowed-size-compressed '5M' \
            --max-allowed-size-uncompressed '15M' \
            --max-allowed-files 800 \
-            ${DIST_DIR}/* || exit 1
+            "$(echo ${DIST_DIR}/*)" || exit 1
    fi
 else
    echo "skipping pydistcheck (does not support Python 3.${PY_MINOR_VER})"
--- a/.ci/conda-envs/README.md
+++ b/.ci/conda-envs/README.md
@ -0,0 +1,11 @@
+# conda-envs
+
+This directory contains files used to create `conda` environments for development
+and testing of LightGBM.
+
+The `.txt` files here are intended to be used with `conda create --file`.
+
+For details on that, see the `conda` docs:
+
+* `conda create` docs ([link](https://conda.io/projects/conda/en/latest/commands/create.html))
+* "Managing Environments" ([link](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html))
--- a/.ci/conda-envs/ci-core-py37.txt
+++ b/.ci/conda-envs/ci-core-py37.txt
@ -0,0 +1,56 @@
+# [description]
+#
+#   Similar to ci-core.txt, but specific to Python 3.7.
+#
+#   Unlike ci-core.txt, this includes a Python version and uses
+#   `=` and `<=` pins to make solves faster and prevent against
+#   issues like https://github.com/microsoft/LightGBM/pull/6370.
+#
+# [usage]
+#
+#   conda create \
+#     --name test-env \
+#     --file ./.ci/conda-envs/ci-core-py37.txt
+#
+
+# python
+python=3.7.*
+
+# direct imports
+cffi=1.15.*
+# older versions of Dask are incompatible with pandas>=2.0, but not all conda packages' metadata accurately reflects that
+#
+# ref: https://github.com/microsoft/LightGBM/issues/6030
+dask=2022.2.*
+distributed=2022.2.*
+joblib=1.3.*
+matplotlib-base=3.5.*
+numpy=1.21.*
+pandas=1.3.*
+pyarrow=9.0.*
+# python-graphviz 0.20.2 is not compatible with Python 3.7
+# ref: https://github.com/microsoft/LightGBM/pull/6370
+python-graphviz=0.20.1
+scikit-learn=1.0.*
+scipy=1.7.*
+
+# testing-only dependencies
+cloudpickle=2.2.*
+pluggy=1.0.*
+psutil=5.9.3
+pytest=7.4.*
+
+# other recursive dependencies, just
+# pinned here to help speed up solves
+bokeh=2.4.*
+fsspec=2023.1.*
+msgpack-python=1.0.*
+pluggy=1.0.*
+pytz=2024.1
+setuptools=59.8.*
+snappy=1.1.*
+tomli=2.0.*
+tornado=6.1.*
+wheel=0.42.*
+zict=2.2.*
+zipp=3.15.*
--- a/.ci/conda-envs/ci-core-py38.txt
+++ b/.ci/conda-envs/ci-core-py38.txt
@ -0,0 +1,51 @@
+# [description]
+#
+#   Similar to ci-core.txt, but specific to Python 3.8.
+#
+#   Unlike ci-core.txt, this includes a Python version and uses
+#   `=` and `<=` pins to make solves faster and prevent against
+#   issues like https://github.com/microsoft/LightGBM/pull/6370.
+#
+# [usage]
+#
+#   conda create \
+#     --name test-env \
+#     --file ./.ci/conda-envs/ci-core-py38.txt
+#
+
+# python
+python=3.8.*
+
+# direct imports
+cffi=1.15.*
+dask=2023.5.*
+distributed=2023.5.*
+joblib=1.4.*
+matplotlib-base=3.7.*
+numpy=1.24.*
+pandas=1.5.*
+pyarrow-core=16.1.*
+python-graphviz=0.20.*
+scikit-learn=1.3.*
+scipy=1.10.*
+
+# testing-only dependencies
+cloudpickle=3.0.*
+pluggy=1.5.*
+psutil=5.9.8
+pytest=8.2.*
+
+# other recursive dependencies, just
+# pinned here to help speed up solves
+bokeh=3.1.*
+fsspec=2024.5.*
+msgpack-python=1.0.*
+pluggy=1.5.*
+pytz=2024.1
+setuptools=69.5.*
+snappy=1.2.*
+tomli=2.0.*
+tornado=6.4.*
+wheel=0.43.*
+zict=3.0.*
+zipp=3.17.*
--- a/.ci/conda-envs/ci-core.txt
+++ b/.ci/conda-envs/ci-core.txt
@ -0,0 +1,40 @@
+# [description]
+#
+#   Core dependencies used across most LightGBM continuous integration (CI) jobs.
+#
+#   'python' constraint is intentionally omitted, so this file can be reused across
+#   Python versions.
+#
+#   These floors are not the oldest versions LightGBM supports... they're here just to make conda
+#   solves faster, and should generally be the latest versions that work for all CI jobs using this.
+#
+# [usage]
+#
+#   conda create \
+#     --name test-env \
+#     --file ./.ci/conda-envs/ci-core.txt \
+#     python=3.10
+#
+
+# direct imports
+cffi>=1.16
+dask>=2023.5.0
+joblib>=1.3.2
+matplotlib-base>=3.7.3
+numpy>=1.24.4
+pandas>2.0
+pyarrow>=6.0
+python-graphviz>=0.20.3
+scikit-learn>=1.3.2
+scipy>=1.1
+
+# testing-only dependencies
+cloudpickle>=3.0.0
+psutil>=5.9.8
+pytest>=8.1.1
+
+# other recursive dependencies, just
+# pinned here to help speed up solves
+pluggy>=1.4.0
+setuptools>=69.2
+wheel>=0.43
--- a/.nuget/create_nuget.py
+++ b/.nuget/create_nuget.py
@ -1,5 +1,6 @@
 # coding: utf-8
 """Script for generating files with NuGet package metadata."""
+
 import datetime
 import sys
 from pathlib import Path
@ -7,20 +8,20 @@ from shutil import copyfile

 if __name__ == "__main__":
    source = Path(sys.argv[1])
-    current_dir = Path(__file__).absolute().parent
-    linux_folder_path = current_dir / "runtimes" / "linux-x64" / "native"
+    nuget_dir = Path(__file__).absolute().parent / "nuget"
+    linux_folder_path = nuget_dir / "runtimes" / "linux-x64" / "native"
    linux_folder_path.mkdir(parents=True, exist_ok=True)
-    osx_folder_path = current_dir / "runtimes" / "osx-x64" / "native"
+    osx_folder_path = nuget_dir / "runtimes" / "osx-x64" / "native"
    osx_folder_path.mkdir(parents=True, exist_ok=True)
-    windows_folder_path = current_dir / "runtimes" / "win-x64" / "native"
+    windows_folder_path = nuget_dir / "runtimes" / "win-x64" / "native"
    windows_folder_path.mkdir(parents=True, exist_ok=True)
-    build_folder_path = current_dir / "build"
+    build_folder_path = nuget_dir / "build"
    build_folder_path.mkdir(parents=True, exist_ok=True)
    copyfile(source / "lib_lightgbm.so", linux_folder_path / "lib_lightgbm.so")
    copyfile(source / "lib_lightgbm.dylib", osx_folder_path / "lib_lightgbm.dylib")
    copyfile(source / "lib_lightgbm.dll", windows_folder_path / "lib_lightgbm.dll")
    copyfile(source / "lightgbm.exe", windows_folder_path / "lightgbm.exe")
-    version = (current_dir.parent / "VERSION.txt").read_text(encoding="utf-8").strip().replace("rc", "-rc")
+    version = (nuget_dir.parents[1] / "VERSION.txt").read_text(encoding="utf-8").strip().replace("rc", "-rc")
    nuget_str = rf"""<?xml version="1.0"?>
    <package xmlns="http://schemas.microsoft.com/packaging/2013/05/nuspec.xsd">
    <metadata>
@ -75,6 +76,6 @@ if __name__ == "__main__":
    </Target>
    </Project>
    """
-    (current_dir / "LightGBM.nuspec").write_text(nuget_str, encoding="utf-8")
-    (current_dir / "build" / "LightGBM.props").write_text(prop_str, encoding="utf-8")
-    (current_dir / "build" / "LightGBM.targets").write_text(target_str, encoding="utf-8")
+    (nuget_dir / "LightGBM.nuspec").write_text(nuget_str, encoding="utf-8")
+    (nuget_dir / "build" / "LightGBM.props").write_text(prop_str, encoding="utf-8")
+    (nuget_dir / "build" / "LightGBM.targets").write_text(target_str, encoding="utf-8")
--- a/.ci/get-workflow-status.py
+++ b/.ci/get-workflow-status.py
@ -2,19 +2,16 @@
 """Get the most recent status of workflow for the current PR.

 [usage]
-    python get_workflow_status.py TRIGGER_PHRASE
+    python get-workflow-status.py TRIGGER_PHRASE

 TRIGGER_PHRASE: Code phrase that triggers workflow.
 """
+
 import json
 from os import environ
 from sys import argv, exit
 from time import sleep
-
-try:
-    from urllib import request
-except ImportError:
-    import urllib2 as request
+from urllib import request


 def get_runs(trigger_phrase):
--- a/.ci/install-clang-devel.sh
+++ b/.ci/install-clang-devel.sh
@ -1,74 +0,0 @@
-#!/bin/bash
-
-# [description]
-#
-#   Installs a development version of clang and the other LLVM tools.
-#
-
-set -e -E -u -o pipefail
-
-CLANG_VERSION=${1}
-
-apt-get autoremove -y --purge \
-    clang-* \
-    libclang-* \
-    libunwind-* \
-    llvm-*
-
-apt-get update -y
-apt-get install --no-install-recommends -y \
-    gnupg \
-    lsb-release \
-    software-properties-common \
-    wget
-
-wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
-
-# ref: https://apt.llvm.org/
-add-apt-repository -y "deb http://apt.llvm.org/unstable/ llvm-toolchain main"
-add-apt-repository -y "deb-src http://apt.llvm.org/unstable/ llvm-toolchain main"
-add-apt-repository -y "deb http://apt.llvm.org/unstable/ llvm-toolchain-${CLANG_VERSION} main" || true
-add-apt-repository -y "deb-src http://apt.llvm.org/unstable/ llvm-toolchain-${CLANG_VERSION} main" || true
-apt-get update -y
-
-apt-get install -y --no-install-recommends \
-    clang-${CLANG_VERSION} \
-    clangd-${CLANG_VERSION} \
-    clang-format-${CLANG_VERSION} \
-    clang-tidy-${CLANG_VERSION} \
-    clang-tools-${CLANG_VERSION} \
-    lldb-${CLANG_VERSION} \
-    lld-${CLANG_VERSION} \
-    llvm-${CLANG_VERSION}-dev \
-    llvm-${CLANG_VERSION}-tools \
-    libomp-${CLANG_VERSION}-dev \
-    libc++-${CLANG_VERSION}-dev \
-    libc++abi-${CLANG_VERSION}-dev \
-    libclang-common-${CLANG_VERSION}-dev \
-    libclang-${CLANG_VERSION}-dev \
-    libclang-cpp${CLANG_VERSION}-dev \
-    libunwind-${CLANG_VERSION}-dev
-
-# overwriting the stuff in /usr/bin is simpler and more reliable than
-# updating PATH, LD_LIBRARY_PATH, etc.
-cp --remove-destination /usr/lib/llvm-${CLANG_VERSION}/bin/* /usr/bin/
-
-# per https://www.stats.ox.ac.uk/pub/bdr/Rconfig/r-devel-linux-x86_64-fedora-clang
-#
-# clang was built to use libc++: for a version built to default to libstdc++
-# (as shipped by Fedora/Debian/Ubuntu), add -stdlib=libc++ to CXX 
-# and install the libcxx-devel/libc++-dev package.
-mkdir -p "${HOME}/.R"
-
-cat << EOF > "${HOME}/.R/Makevars"
-CXX += -stdlib=libc++
-CXX11 += -stdlib=libc++
-CXX14 += -stdlib=libc++
-CXX17 += -stdlib=libc++
-CXX20 += -stdlib=libc++
-EOF
-
-echo ""
-echo "done installing clang"
-clang --version
-echo ""
--- a/.ci/install-opencl.ps1
+++ b/.ci/install-opencl.ps1
--- a/.ci/lint-cpp.sh
+++ b/.ci/lint-cpp.sh
@ -1,4 +1,6 @@
-#!/bin/sh
+#!/bin/bash
+
+set -e -E -u -o pipefail

 echo "running cpplint"
 cpplint \
@ -32,9 +34,17 @@ get_omp_pragmas_without_num_threads() {
        'pragma omp parallel' \
    | grep -v ' num_threads'
 }
+
+# 'grep' returns a non-0 exit code if 0 lines were found.
+# Turning off '-e -o pipefail' options here so that bash doesn't
+# consider this a failure and stop execution of the script.
+#
+# ref: https://www.gnu.org/software/grep/manual/html_node/Exit-Status.html
+set +e +o pipefail
 PROBLEMATIC_LINES=$(
    get_omp_pragmas_without_num_threads
 )
+set -e -o pipefail
 if test "${PROBLEMATIC_LINES}" != ""; then
    get_omp_pragmas_without_num_threads
    echo "Found '#pragma omp parallel' not using explicit num_threads() configuration. Fix those."
--- a/.ci/lint-python.sh
+++ b/.ci/lint-python.sh
@ -1,4 +1,6 @@
-#!/bin/sh
+#!/bin/bash
+
+set -e -E -u -o pipefail

 echo "running pre-commit checks"
 pre-commit run --all-files || exit 1
--- a/.ci/lint-r-code.R
+++ b/.ci/lint-r-code.R
@ -52,6 +52,8 @@ LINTERS_TO_USE <- list(
    , "inner_combine"        = lintr::inner_combine_linter()
    , "is_numeric"           = lintr::is_numeric_linter()
    , "lengths"              = lintr::lengths_linter()
+    , "length_levels"        = lintr::length_levels_linter()
+    , "length_test"          = lintr::length_test_linter()
    , "line_length"          = lintr::line_length_linter(length = 120L)
    , "literal_coercion"     = lintr::literal_coercion_linter()
    , "matrix"               = lintr::matrix_apply_linter()
@ -66,6 +68,7 @@ LINTERS_TO_USE <- list(
    , "redundant_equals"     = lintr::redundant_equals_linter()
    , "regex_subset"         = lintr::regex_subset_linter()
    , "routine_registration" = lintr::routine_registration_linter()
+    , "scalar_in"            = lintr::scalar_in_linter()
    , "semicolon"            = lintr::semicolon_linter()
    , "seq"                  = lintr::seq_linter()
    , "spaces_inside"        = lintr::spaces_inside_linter()
--- a/helpers/parameter_generator.py
+++ b/helpers/parameter_generator.py
@ -6,6 +6,7 @@ with list of all parameters, aliases table and other routines
 along with parameters description in LightGBM/docs/Parameters.rst file
 from the information in LightGBM/include/LightGBM/config.h file.
 """
+
 import re
 from collections import defaultdict
 from pathlib import Path
@ -285,7 +286,7 @@ def gen_parameter_code(
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 *
 * \note
- * This file is auto generated by LightGBM\helpers\parameter_generator.py from LightGBM\include\LightGBM\config.h file.
+ * This file is auto generated by LightGBM\.ci\parameter-generator.py from LightGBM\include\LightGBM\config.h file.
 */
 """
    str_to_write += "#include<LightGBM/config.h>\nnamespace LightGBM {\n"
--- a/.ci/rerun-workflow.sh
+++ b/.ci/rerun-workflow.sh
@ -4,7 +4,7 @@
 #     Rerun specified workflow for given pull request.
 #
 # [usage]
-#     rerun_workflow.sh <WORKFLOW_ID> <PR_NUMBER> <PR_BRANCH>
+#     rerun-workflow.sh <WORKFLOW_ID> <PR_NUMBER> <PR_BRANCH>
 #
 # WORKFLOW_ID: Identifier (config name of ID) of a workflow to be rerun.
 #
@ -12,7 +12,7 @@
 #
 # PR_BRANCH: Name of pull request's branch.

-set -e
+set -e -E -u -o pipefail

 if [ -z "$GITHUB_ACTIONS" ]; then
  echo "Must be run inside GitHub Actions CI"
--- a/.ci/run-r-cmd-check.sh
+++ b/.ci/run-r-cmd-check.sh
@ -0,0 +1,46 @@
+#!/bin/bash
+
+set -e -u -o pipefail
+
+PKG_TARBALL="${1}"
+declare -i ALLOWED_CHECK_NOTES=${2}
+
+# 'R CMD check' redirects installation logs to a file, and returns
+# a non-0 exit code if ERRORs are raised.
+#
+# The '||' here gives us an opportunity to echo out the installation
+# logs prior to exiting the script.
+check_succeeded="yes"
+R CMD check "${PKG_TARBALL}" \
+    --as-cran \
+    --run-donttest \
+|| check_succeeded="no"
+
+CHECK_LOG_FILE=lightgbm.Rcheck/00check.log
+BUILD_LOG_FILE=lightgbm.Rcheck/00install.out
+
+echo "R CMD check build logs:"
+cat "${BUILD_LOG_FILE}"
+
+if [[ $check_succeeded == "no" ]]; then
+    echo "R CMD check failed"
+    exit 1
+fi
+
+# WARNINGs or ERRORs should be treated as a failure
+if grep -q -E "WARNING|ERROR" "${CHECK_LOG_FILE}"; then
+    echo "WARNINGs or ERRORs have been found by R CMD check"
+    exit 1
+fi
+
+# Allow a configurable number of NOTEs.
+# Sometimes NOTEs are raised in CI that wouldn't show up on an actual CRAN submission.
+set +e
+NUM_CHECK_NOTES=$(
+    grep -o -E '[0-9]+ NOTE' "${CHECK_LOG_FILE}" \
+    | sed 's/[^0-9]*//g'
+)
+if [[ ${NUM_CHECK_NOTES} -gt ${ALLOWED_CHECK_NOTES} ]]; then
+    echo "Found ${NUM_CHECK_NOTES} NOTEs from R CMD check. Only ${ALLOWED_CHECK_NOTES} are allowed"
+    exit 1
+fi
--- a/.ci/set-commit-status.sh
+++ b/.ci/set-commit-status.sh
@ -4,7 +4,7 @@
 #     Set a status with a given name to the specified commit.
 #
 # [usage]
-#     set_commit_status.sh <NAME> <STATUS> <SHA>
+#     set-commit-status.sh <NAME> <STATUS> <SHA>
 #
 # NAME: Name of status.
 #       Status with existing name overwrites a previous one.
@ -14,7 +14,7 @@
 #
 # SHA: SHA of a commit to set a status on.

-set -e
+set -e -E -u -o pipefail

 if [ -z "$GITHUB_ACTIONS" ]; then
  echo "Must be run inside GitHub Actions CI"
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@ -1,16 +1,26 @@
 #!/bin/bash

+set -e -E -u -o pipefail
+
+# defaults
+AZURE=${AZURE:-"false"}
+IN_UBUNTU_BASE_CONTAINER=${IN_UBUNTU_BASE_CONTAINER:-"false"}
+SETUP_CONDA=${SETUP_CONDA:-"true"}
+
+ARCH=$(uname -m)
+
+
 if [[ $OS_NAME == "macos" ]]; then
    if  [[ $COMPILER == "clang" ]]; then
        brew install libomp
        if [[ $AZURE == "true" ]]; then
-            sudo xcode-select -s /Applications/Xcode_11.7.app/Contents/Developer || exit 1
+            sudo xcode-select -s /Applications/Xcode_13.1.0.app/Contents/Developer || exit 1
        fi
    else  # gcc
-        sudo xcode-select -s /Applications/Xcode_14.1.app/Contents/Developer || exit 1
-        if [[ $TASK != "mpi" ]]; then
-            brew install gcc
-        fi
+        # Check https://github.com/actions/runner-images/tree/main/images/macos for available
+        # versions of Xcode
+        sudo xcode-select -s /Applications/Xcode_14.3.1.app/Contents/Developer || exit 1
+        brew install 'gcc@12'
    fi
    if [[ $TASK == "mpi" ]]; then
        brew install open-mpi
@ -18,11 +28,26 @@ if [[ $OS_NAME == "macos" ]]; then
    if [[ $TASK == "swig" ]]; then
        brew install swig
    fi
-    curl \
-        -sL \
-        -o miniforge.sh \
-        https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-x86_64.sh
 else  # Linux
+    if type -f apt 2>&1 > /dev/null; then
+        sudo apt-get update
+        sudo apt-get install --no-install-recommends -y \
+            ca-certificates \
+            curl
+    else
+        sudo yum update -y
+        sudo yum install -y \
+            ca-certificates \
+            curl
+    fi
+    CMAKE_VERSION="3.30.0"
+    curl -O -L \
+        https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${ARCH}.sh \
+    || exit 1
+    sudo mkdir /opt/cmake || exit 1
+    sudo sh cmake-${CMAKE_VERSION}-linux-${ARCH}.sh --skip-license --prefix=/opt/cmake || exit 1
+    sudo ln -sf /opt/cmake/bin/cmake /usr/local/bin/cmake || exit 1
+
    if [[ $IN_UBUNTU_BASE_CONTAINER == "true" ]]; then
        # fixes error "unable to initialize frontend: Dialog"
        # https://github.com/moby/moby/issues/27988#issuecomment-462809153
@ -33,35 +58,27 @@ else  # Linux
            software-properties-common

        sudo apt-get install --no-install-recommends -y \
-            apt-utils \
            build-essential \
-            ca-certificates \
-            cmake \
-            curl \
            git \
-            iputils-ping \
-            jq \
            libcurl4 \
            libicu-dev \
            libssl-dev \
-            libunwind8 \
            locales \
-            locales-all \
-            netcat \
-            unzip \
-            zip || exit 1
+            locales-all || exit 1
        if [[ $COMPILER == "clang" ]]; then
            sudo apt-get install --no-install-recommends -y \
                clang \
                libomp-dev
        elif [[ $COMPILER == "clang-17" ]]; then
-            sudo apt-get install wget
+            sudo apt-get install --no-install-recommends -y \
+                wget
            wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc
            sudo apt-add-repository deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
            sudo apt-add-repository deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
            sudo apt-get update
-            sudo apt-get install -y clang-17
-            sudo apt-get install --no-install-recommends -y libomp-17-dev
+            sudo apt-get install -y \
+                clang-17 \
+                libomp-17-dev
        fi

        export LANG="en_US.UTF-8"
@ -116,33 +133,21 @@ else  # Linux
    fi
    if [[ $TASK == "cuda" ]]; then
        echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
-        apt-get update
-        apt-get install --no-install-recommends -y \
-            curl \
-            lsb-release \
-            software-properties-common
        if [[ $COMPILER == "clang" ]]; then
+            apt-get update
            apt-get install --no-install-recommends -y \
                clang \
                libomp-dev
        fi
-        curl -sL https://apt.kitware.com/keys/kitware-archive-latest.asc | apt-key add -
-        apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" -y
-        apt-get update
-        apt-get install --no-install-recommends -y \
-            cmake
-    fi
-    if [[ $SETUP_CONDA != "false" ]]; then
-        ARCH=$(uname -m)
-        curl \
-            -sL \
-            -o miniforge.sh \
-            https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-${ARCH}.sh
    fi
 fi

 if [[ "${TASK}" != "r-package" ]] && [[ "${TASK}" != "r-rchk" ]]; then
    if [[ $SETUP_CONDA != "false" ]]; then
+        curl \
+            -sL \
+            -o miniforge.sh \
+            https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-${ARCH}.sh
        sh miniforge.sh -b -p $CONDA
    fi
    conda config --set always_yes yes --set changeps1 no
--- a/.ci/test-python-latest.sh
+++ b/.ci/test-python-latest.sh
@ -0,0 +1,50 @@
+#!/bin/bash
+
+set -e -E -u -o pipefail
+
+# latest versions of lightgbm's dependencies,
+# including pre-releases and nightlies
+#
+# ref: https://github.com/pydata/xarray/blob/31111b3afe44fd6f7dac363264e94186cc5168d2/.github/workflows/upstream-dev-ci.yaml
+echo "installing testing dependencies"
+python -m pip install \
+    cloudpickle \
+    psutil \
+    pytest
+echo "done installing testing dependencies"
+
+echo "installing lightgbm's dependencies"
+python -m pip install \
+    --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
+    --prefer-binary \
+    --pre \
+    --upgrade \
+        'numpy>=2.0.0.dev0' \
+        'matplotlib>=3.10.0.dev0' \
+        'pandas>=3.0.0.dev0' \
+        'scikit-learn>=1.6.dev0' \
+        'scipy>=1.15.0.dev0'
+
+python -m pip install \
+    --extra-index-url https://pypi.fury.io/arrow-nightlies/ \
+    --prefer-binary \
+    --pre \
+    --upgrade \
+        'pyarrow>=17.0.0.dev0'
+
+python -m pip install \
+    'cffi>=1.15.1'
+
+echo "done installing lightgbm's dependencies"
+
+echo "installing lightgbm"
+pip install --no-deps dist/*.whl
+echo "done installing lightgbm"
+
+echo "installed package versions:"
+pip freeze
+
+echo ""
+echo "running tests"
+pytest tests/c_api_test/
+pytest tests/python_package_test/
--- a/.ci/test-python-oldest.sh
+++ b/.ci/test-python-oldest.sh
@ -1,19 +1,22 @@
 #!/bin/bash

+set -e -E -u -o pipefail
+
 # oldest versions of dependencies published after
-# minimum supported Python version's first release
+# minimum supported Python version's first release,
+# for which there are wheels compatible with the
+# python:{version} image
 #
 # see https://devguide.python.org/versions/
 #
 echo "installing lightgbm's dependencies"
 pip install \
  'cffi==1.15.1' \
-  'dataclasses' \
-  'numpy==1.16.6' \
-  'pandas==0.24.0' \
+  'numpy==1.19.0' \
+  'pandas==1.1.3' \
  'pyarrow==6.0.1' \
-  'scikit-learn==0.18.2' \
-  'scipy==0.19.0' \
+  'scikit-learn==0.24.0' \
+  'scipy==1.6.0' \
 || exit 1
 echo "done installing lightgbm's dependencies"

--- a/.ci/test-r-package-valgrind.sh
+++ b/.ci/test-r-package-valgrind.sh
@ -1,5 +1,7 @@
 #!/bin/bash

+set -e -E -u -o pipefail
+
 RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'jsonlite', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com')" || exit 1
 sh build-cran-package.sh \
  --r-executable=RDvalgrind \
@ -70,10 +72,14 @@ bytes_possibly_lost=$(
    | tr -d ","
 )
 echo "valgrind found ${bytes_possibly_lost} bytes possibly lost"
-if [[ ${bytes_possibly_lost} -gt 1056 ]]; then
+if [[ ${bytes_possibly_lost} -gt 1104 ]]; then
    exit 1
 fi

+# ensure 'grep --count' doesn't cause failures
+set +e
+
+echo "checking for invalid reads"
 invalid_reads=$(
  cat ${VALGRIND_LOGS_FILE} \
    | grep --count -i "Invalid read"
@ -83,6 +89,7 @@ if [[ ${invalid_reads} -gt 0 ]]; then
    exit 1
 fi

+echo "checking for invalid writes"
 invalid_writes=$(
  cat ${VALGRIND_LOGS_FILE} \
    | grep --count -i "Invalid write"
--- a/.ci/test-r-package-windows.ps1
+++ b/.ci/test-r-package-windows.ps1
@ -45,6 +45,8 @@ Remove-From-Path ".*android.*"
 Remove-From-Path ".*Android.*"
 Remove-From-Path ".*chocolatey.*"
 Remove-From-Path ".*Chocolatey.*"
+Remove-From-Path ".*cmake.*"
+Remove-From-Path ".*CMake.*"
 Remove-From-Path ".*\\Git\\.*"
 Remove-From-Path "(?!.*pandoc.*).*hostedtoolcache.*"
 Remove-From-Path ".*Microsoft SDKs.*"
@ -87,11 +89,17 @@ if ($env:R_MAJOR_VERSION -eq "3") {
  Write-Output "[ERROR] Unrecognized R version: $env:R_VERSION"
  Check-Output $false
 }
+$env:CMAKE_VERSION = "3.30.0"

 $env:R_LIB_PATH = "$env:BUILD_SOURCESDIRECTORY/RLibrary" -replace '[\\]', '/'
 $env:R_LIBS = "$env:R_LIB_PATH"
-$env:PATH = "$env:RTOOLS_BIN;" + "$env:RTOOLS_MINGW_BIN;" + "$env:R_LIB_PATH/R/bin/x64;"+ $env:PATH
-$env:CRAN_MIRROR = "https://cran.rstudio.com"
+$env:CMAKE_PATH = "$env:BUILD_SOURCESDIRECTORY/CMake_installation"
+$env:PATH = "$env:RTOOLS_BIN;" + "$env:RTOOLS_MINGW_BIN;" + "$env:R_LIB_PATH/R/bin/x64;" + "$env:CMAKE_PATH/cmake-$env:CMAKE_VERSION-windows-x86_64/bin;" + $env:PATH
+if ([version]$env:R_VERSION -lt [version]"4.0") {
+  $env:CRAN_MIRROR = "https://cran-archive.r-project.org"
+} else {
+  $env:CRAN_MIRROR = "https://cran.rstudio.com"
+}
 $env:MIKTEX_EXCEPTION_PATH = "$env:TEMP\miktex"

 # don't fail builds for long-running examples unless they're very long.
@ -108,11 +116,13 @@ if (($env:COMPILER -eq "MINGW") -and ($env:R_BUILD_TYPE -eq "cmake")) {
 cd $env:BUILD_SOURCESDIRECTORY
 tzutil /s "GMT Standard Time"
 [Void][System.IO.Directory]::CreateDirectory($env:R_LIB_PATH)
+[Void][System.IO.Directory]::CreateDirectory($env:CMAKE_PATH)

-# download R and RTools
-Write-Output "Downloading R and Rtools"
+# download R, RTools and CMake
+Write-Output "Downloading R, Rtools and CMake"
 Download-File-With-Retries -url "$env:CRAN_MIRROR/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" -destfile "R-win.exe"
 Download-File-With-Retries -url "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$env:RTOOLS_EXE_FILE" -destfile "Rtools.exe"
+Download-File-With-Retries -url "https://github.com/Kitware/CMake/releases/download/v$env:CMAKE_VERSION/cmake-$env:CMAKE_VERSION-windows-x86_64.zip" -destfile "$env:CMAKE_PATH/cmake.zip"

 # Install R
 Write-Output "Installing R"
@ -123,6 +133,13 @@ Write-Output "Installing Rtools"
 Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /SUPPRESSMSGBOXES /DIR=$RTOOLS_INSTALL_PATH" ; Check-Output $?
 Write-Output "Done installing Rtools"

+Write-Output "Installing CMake"
+Add-Type -AssemblyName System.IO.Compression.FileSystem
+[System.IO.Compression.ZipFile]::ExtractToDirectory("$env:CMAKE_PATH/cmake.zip", "$env:CMAKE_PATH") ; Check-Output $?
+# Remove old CMake shiped with RTools
+Remove-Item "$env:RTOOLS_MINGW_BIN/cmake.exe" -Force -ErrorAction Ignore
+Write-Output "Done installing CMake"
+
 Write-Output "Installing dependencies"
 $packages = "c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), dependencies = c('Imports', 'Depends', 'LinkingTo')"
 Run-R-Code-Redirect-Stderr "options(install.packages.check.source = 'no'); install.packages($packages, repos = '$env:CRAN_MIRROR', type = 'binary', lib = '$env:R_LIB_PATH', Ncpus = parallel::detectCores())" ; Check-Output $?
@ -191,7 +208,6 @@ if ($env:COMPILER -ne "MSVC") {
  }

 } else {
-  $env:TMPDIR = $env:USERPROFILE  # to avoid warnings about incremental builds inside a temp directory
  $INSTALL_LOG_FILE_NAME = "$env:BUILD_SOURCESDIRECTORY\00install_out.txt"
  Run-R-Code-Redirect-Stderr "source('build_r.R')" 1> $INSTALL_LOG_FILE_NAME ; $install_succeeded = $?
  Write-Output "----- build and install logs -----"
--- a/.ci/test-r-package.sh
+++ b/.ci/test-r-package.sh
@ -1,5 +1,10 @@
 #!/bin/bash

+set -e -E -u -o pipefail
+
+# defaults
+ARCH=$(uname -m)
+
 # set up R environment
 CRAN_MIRROR="https://cran.rstudio.com"
 R_LIB_PATH=~/Rlib
@ -22,7 +27,7 @@ if [[ "${R_MAJOR_VERSION}" == "3" ]]; then
    export R_APT_REPO="bionic-cran35/"
 elif [[ "${R_MAJOR_VERSION}" == "4" ]]; then
    export R_MAC_VERSION=4.3.1
-    export R_MAC_PKG_URL=${CRAN_MIRROR}/bin/macosx/big-sur-x86_64/base/R-${R_MAC_VERSION}-x86_64.pkg
+    export R_MAC_PKG_URL=${CRAN_MIRROR}/bin/macosx/big-sur-${ARCH}/base/R-${R_MAC_VERSION}-${ARCH}.pkg
    export R_LINUX_VERSION="4.3.1-1.2204.0"
    export R_APT_REPO="jammy-cran40/"
 else
@ -68,19 +73,12 @@ if [[ $OS_NAME == "linux" ]]; then
                automake \
                || exit 1
    fi
-    if [[ $INSTALL_CMAKE_FROM_RELEASES == "true" ]]; then
-        curl -O -L \
-            https://github.com/Kitware/CMake/releases/download/v3.25.1/cmake-3.25.1-linux-x86_64.sh \
-        || exit 1
-
-        sudo mkdir /opt/cmake || exit 1
-        sudo sh cmake-3.25.1-linux-x86_64.sh --skip-license --prefix=/opt/cmake || exit 1
-        sudo ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake || exit 1
-    fi
 fi

 # Installing R precompiled for Mac OS 10.11 or higher
 if [[ $OS_NAME == "macos" ]]; then
+    brew update-reset --auto-update
+    brew update --auto-update
    if [[ $R_BUILD_TYPE == "cran" ]]; then
        brew install automake || exit 1
    fi
@ -96,21 +94,42 @@ if [[ $OS_NAME == "macos" ]]; then
    sudo installer \
        -pkg $(pwd)/R.pkg \
        -target / || exit 1
+
+    # install tidy v5.8.0
+    # ref: https://groups.google.com/g/r-sig-mac/c/7u_ivEj4zhM
+    TIDY_URL=https://github.com/htacg/tidy-html5/releases/download/5.8.0/tidy-5.8.0-macos-x86_64+arm64.pkg
+    curl -sL ${TIDY_URL} -o tidy.pkg
+    sudo installer \
+        -pkg $(pwd)/tidy.pkg \
+        -target /
+
+    # ensure that this newer version of 'tidy' is used by 'R CMD check'
+    # ref: https://cran.r-project.org/doc/manuals/R-exts.html#Checking-packages
+    export R_TIDYCMD=/usr/local/bin/tidy
 fi

-# fix for issue where CRAN was not returning {lattice} when using R 3.6
+# fix for issue where CRAN was not returning {lattice} and {evaluate} when using R 3.6
 # "Warning: dependency ‘lattice’ is not available"
 if [[ "${R_MAJOR_VERSION}" == "3" ]]; then
-    Rscript --vanilla -e "install.packages('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', repos = NULL, lib = '${R_LIB_PATH}')"
+    Rscript --vanilla -e "install.packages(c('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', 'https://cran.r-project.org/src/contrib/Archive/evaluate/evaluate_0.23.tar.gz'), repos = NULL, lib = '${R_LIB_PATH}')"
+else
+    # {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}.
+    # This should be unnecessary on R >=4.4.0
+    # ref: https://github.com/microsoft/LightGBM/issues/6433
+    Rscript --vanilla -e "install.packages('lattice', repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}')"
 fi

+# manually install {Matrix}, as {Matrix}=1.7-0 raised its R floor all the way to R 4.4.0
+# ref: https://github.com/microsoft/LightGBM/issues/6433
+Rscript --vanilla -e "install.packages('https://cran.r-project.org/src/contrib/Archive/Matrix/Matrix_1.6-5.tar.gz', repos = NULL, lib = '${R_LIB_PATH}')"
+
 # Manually install Depends and Imports libraries + 'knitr', 'markdown', 'RhpcBLASctl', 'testthat'
 # to avoid a CI-time dependency on devtools (for devtools::install_deps())
 # NOTE: testthat is not required when running rchk
 if [[ "${TASK}" == "r-rchk" ]]; then
-    packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'R6', 'RhpcBLASctl')"
+    packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl')"
 else
-    packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'R6', 'RhpcBLASctl', 'testthat')"
+    packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl', 'testthat')"
 fi
 compile_from_source="both"
 if [[ $OS_NAME == "macos" ]]; then
@ -119,9 +138,9 @@ if [[ $OS_NAME == "macos" ]]; then
 fi
 Rscript --vanilla -e "options(install.packages.compile.from.source = '${compile_from_source}'); install.packages(${packages}, repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}', dependencies = c('Depends', 'Imports', 'LinkingTo'), Ncpus = parallel::detectCores())" || exit 1

-cd ${BUILD_DIRECTORY}
-
-PKG_TARBALL="lightgbm_*.tar.gz"
+cd "${BUILD_DIRECTORY}"
+PKG_TARBALL="lightgbm_$(head -1 VERSION.txt).tar.gz"
+BUILD_LOG_FILE="lightgbm.Rcheck/00install.out"
 LOG_FILE_NAME="lightgbm.Rcheck/00check.log"
 if [[ $R_BUILD_TYPE == "cmake" ]]; then
    Rscript build_r.R -j4 --skip-install || exit 1
@ -130,7 +149,7 @@ elif [[ $R_BUILD_TYPE == "cran" ]]; then
    # on Linux, we recreate configure in CI to test if
    # a change in a PR has changed configure.ac
    if [[ $OS_NAME == "linux" ]]; then
-        ${BUILD_DIRECTORY}/R-package/recreate-configure.sh
+        ./R-package/recreate-configure.sh

        num_files_changed=$(
            git diff --name-only | wc -l
@ -180,33 +199,13 @@ elif [[ $R_BUILD_TYPE == "cran" ]]; then
    cd ${R_CMD_CHECK_DIR}
 fi

-# fails tests if either ERRORs or WARNINGs are thrown by
-# R CMD CHECK
-check_succeeded="yes"
-(
-    R CMD check ${PKG_TARBALL} \
-        --as-cran \
-        --run-donttest \
-    || check_succeeded="no"
-) &
+declare -i allowed_notes=0
+bash "${BUILD_DIRECTORY}/.ci/run-r-cmd-check.sh" \
+    "${PKG_TARBALL}" \
+    "${allowed_notes}"

-# R CMD check suppresses output, some CIs kill builds after
-# a few minutes with no output. This trick gives R CMD check more time
-#     * https://github.com/travis-ci/travis-ci/issues/4190#issuecomment-169987525
-#     * https://stackoverflow.com/a/29890106/3986677
-CHECK_PID=$!
-while kill -0 ${CHECK_PID} >/dev/null 2>&1; do
-    echo -n -e " \b"
-    sleep 5
-done
-
-echo "R CMD check build logs:"
-BUILD_LOG_FILE=lightgbm.Rcheck/00install.out
-cat ${BUILD_LOG_FILE}
-
-if [[ $check_succeeded == "no" ]]; then
-    exit 1
-fi
+# ensure 'grep --count' doesn't cause failures
+set +e

 used_correct_r_version=$(
    cat $LOG_FILE_NAME \
@ -222,18 +221,12 @@ if [[ $R_BUILD_TYPE == "cmake" ]]; then
        cat $BUILD_LOG_FILE \
        | grep --count "R version passed into FindLibR.cmake: ${R_VERSION}"
    )
-    if [[ $used_correct_r_version -ne 1 ]]; then
+    if [[ $passed_correct_r_version_to_cmake -ne 1 ]]; then
        echo "Unexpected R version was passed into cmake. Expected '${R_VERSION}'."
        exit 1
    fi
 fi

-
-if grep -q -E "NOTE|WARNING|ERROR" "$LOG_FILE_NAME"; then
-    echo "NOTEs, WARNINGs, or ERRORs have been found by R CMD check"
-    exit 1
-fi
-
 # this check makes sure that CI builds of the package actually use OpenMP
 if [[ $OS_NAME == "macos" ]] && [[ $R_BUILD_TYPE == "cran" ]]; then
    omp_working=$(
@ -255,20 +248,25 @@ fi

 # this check makes sure that CI builds of the package
 # actually use MM_PREFETCH preprocessor definition
-if [[ $R_BUILD_TYPE == "cran" ]]; then
-    mm_prefetch_working=$(
-        cat $BUILD_LOG_FILE \
-        | grep --count -E "checking whether MM_PREFETCH work.*yes"
-    )
-else
-    mm_prefetch_working=$(
-        cat $BUILD_LOG_FILE \
-        | grep --count -E ".*Performing Test MM_PREFETCH - Success"
-    )
-fi
-if [[ $mm_prefetch_working -ne 1 ]]; then
-    echo "MM_PREFETCH test was not passed"
-    exit 1
+#
+# _mm_prefetch will not work on arm64 architecture
+# ref: https://github.com/microsoft/LightGBM/issues/4124
+if [[ $ARCH != "arm64" ]]; then
+    if [[ $R_BUILD_TYPE == "cran" ]]; then
+        mm_prefetch_working=$(
+            cat $BUILD_LOG_FILE \
+            | grep --count -E "checking whether MM_PREFETCH work.*yes"
+        )
+    else
+        mm_prefetch_working=$(
+            cat $BUILD_LOG_FILE \
+            | grep --count -E ".*Performing Test MM_PREFETCH - Success"
+        )
+    fi
+    if [[ $mm_prefetch_working -ne 1 ]]; then
+        echo "MM_PREFETCH test was not passed"
+        exit 1
+    fi
 fi

 # this check makes sure that CI builds of the package
--- a/.ci/test-windows.ps1
+++ b/.ci/test-windows.ps1
@ -6,36 +6,50 @@ function Check-Output {
  }
 }

-# unify environment variable for Azure DevOps and AppVeyor
-if (Test-Path env:APPVEYOR) {
-  $env:APPVEYOR = "true"
-}
+$env:CONDA_ENV = "test-env"
+$env:LGB_VER = (Get-Content $env:BUILD_SOURCESDIRECTORY\VERSION.txt).trim()
+# Use custom temp directory to avoid
+# > warning MSB8029: The Intermediate directory or Output directory cannot reside under the Temporary directory
+# > as it could lead to issues with incremental build.
+# And make sure this directory is always clean
+$env:TMPDIR = "$env:USERPROFILE\tmp"
+Remove-Item $env:TMPDIR -Force -Recurse -ErrorAction Ignore
+[Void][System.IO.Directory]::CreateDirectory($env:TMPDIR)

 if ($env:TASK -eq "r-package") {
-  & $env:BUILD_SOURCESDIRECTORY\.ci\test_r_package_windows.ps1 ; Check-Output $?
+  & .\.ci\test-r-package-windows.ps1 ; Check-Output $?
  Exit 0
 }

 if ($env:TASK -eq "cpp-tests") {
-  mkdir $env:BUILD_SOURCESDIRECTORY/build; cd $env:BUILD_SOURCESDIRECTORY/build
-  cmake -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF -DUSE_DEBUG=ON -A x64 ..
-  cmake --build . --target testlightgbm --config Debug ; Check-Output $?
-  cd ../Debug
-  .\testlightgbm.exe ; Check-Output $?
+  cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF -DUSE_DEBUG=ON -A x64
+  cmake --build build --target testlightgbm --config Debug ; Check-Output $?
+  .\Debug\testlightgbm.exe ; Check-Output $?
  Exit 0
 }

 if ($env:TASK -eq "swig") {
  $env:JAVA_HOME = $env:JAVA_HOME_8_X64  # there is pre-installed Eclipse Temurin 8 somewhere
  $ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows down download speed
-  Invoke-WebRequest -Uri "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/swigwin-4.0.2.zip" -OutFile $env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip -UserAgent "NativeHost"
+  Invoke-WebRequest -Uri "https://sourceforge.net/projects/swig/files/latest/download" -OutFile $env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip -UserAgent "curl"
  Add-Type -AssemblyName System.IO.Compression.FileSystem
-  [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig")
-  $env:PATH = "$env:BUILD_SOURCESDIRECTORY/swig/swigwin-4.0.2;" + $env:PATH
-  mkdir $env:BUILD_SOURCESDIRECTORY/build; cd $env:BUILD_SOURCESDIRECTORY/build
-  cmake -A x64 -DUSE_SWIG=ON .. ; cmake --build . --target ALL_BUILD --config Release ; Check-Output $?
+  [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig") ; Check-Output $?
+  $SwigFolder = Get-ChildItem -Directory -Name -Path "$env:BUILD_SOURCESDIRECTORY/swig"
+  $env:PATH = "$env:BUILD_SOURCESDIRECTORY/swig/$SwigFolder;" + $env:PATH
+  $BuildLogFileName = "$env:BUILD_SOURCESDIRECTORY\cmake_build.log"
+  cmake -B build -S . -A x64 -DUSE_SWIG=ON *> "$BuildLogFileName" ; $build_succeeded = $?
+  Write-Output "CMake build logs:"
+  Get-Content -Path "$BuildLogFileName"
+  Check-Output $build_succeeded
+  $checks = Select-String -Path "${BuildLogFileName}" -Pattern "-- Found SWIG.*${SwigFolder}/swig.exe"
+  $checks_cnt = $checks.Matches.length
+  if ($checks_cnt -eq 0) {
+    Write-Output "Wrong SWIG version was found (expected '${SwigFolder}'). Check the build logs."
+    Check-Output $False
+  }
+  cmake --build build --target ALL_BUILD --config Release ; Check-Output $?
  if ($env:AZURE -eq "true") {
-    cp $env:BUILD_SOURCESDIRECTORY/build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Check-Output $?
+    cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Check-Output $?
  }
  Exit 0
 }
@ -44,44 +58,37 @@ if ($env:TASK -eq "swig") {
 conda init powershell
 conda activate
 conda config --set always_yes yes --set changeps1 no
+conda update -q -y conda "python=$env:PYTHON_VERSION[build=*cpython]"

-# ref:
-# * https://stackoverflow.com/a/62897729/3986677
-# * https://github.com/microsoft/LightGBM/issues/5899
-conda install brotlipy
+if ($env:PYTHON_VERSION -eq "3.7") {
+  $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py37.txt"
+} elseif ($env:PYTHON_VERSION -eq "3.8") {
+  $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py38.txt"
+} else {
+  $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core.txt"
+}

-conda update -q -y conda
-conda create -q -y -n $env:CONDA_ENV `
-  cffi `
-  cloudpickle `
-  joblib `
-  matplotlib `
-  numpy `
-  pandas `
-  psutil `
-  pyarrow `
-  pytest `
-  "python=$env:PYTHON_VERSION[build=*cpython]" `
-  python-graphviz `
-  scikit-learn `
-  scipy ; Check-Output $?
+conda create `
+  -y `
+  -n $env:CONDA_ENV `
+  --file $env:CONDA_REQUIREMENT_FILE `
+  "python=$env:PYTHON_VERSION[build=*cpython]" ; Check-Output $?

 if ($env:TASK -ne "bdist") {
  conda activate $env:CONDA_ENV
 }

+cd $env:BUILD_SOURCESDIRECTORY
 if ($env:TASK -eq "regular") {
-  mkdir $env:BUILD_SOURCESDIRECTORY/build; cd $env:BUILD_SOURCESDIRECTORY/build
-  cmake -A x64 .. ; cmake --build . --target ALL_BUILD --config Release ; Check-Output $?
-  cd $env:BUILD_SOURCESDIRECTORY
-  sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --precompile ; Check-Output $?
-  cp $env:BUILD_SOURCESDIRECTORY/Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY
-  cp $env:BUILD_SOURCESDIRECTORY/Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY
+  cmake -B build -S . -A x64 ; Check-Output $?
+  cmake --build build --target ALL_BUILD --config Release ; Check-Output $?
+  sh ./build-python.sh install --precompile ; Check-Output $?
+  cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY
+  cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY
 }
 elseif ($env:TASK -eq "sdist") {
-  cd $env:BUILD_SOURCESDIRECTORY
-  sh $env:BUILD_SOURCESDIRECTORY/build-python.sh sdist ; Check-Output $?
-  sh $env:BUILD_SOURCESDIRECTORY/.ci/check_python_dists.sh $env:BUILD_SOURCESDIRECTORY/dist ; Check-Output $?
+  sh ./build-python.sh sdist ; Check-Output $?
+  sh ./.ci/check-python-dists.sh ./dist ; Check-Output $?
  cd dist; pip install @(Get-ChildItem *.gz) -v ; Check-Output $?
 }
 elseif ($env:TASK -eq "bdist") {
@ -95,17 +102,15 @@ elseif ($env:TASK -eq "bdist") {
  Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors

  conda activate $env:CONDA_ENV
-  cd $env:BUILD_SOURCESDIRECTORY
  sh "build-python.sh" bdist_wheel --integrated-opencl ; Check-Output $?
-  sh $env:BUILD_SOURCESDIRECTORY/.ci/check_python_dists.sh $env:BUILD_SOURCESDIRECTORY/dist ; Check-Output $?
-  cd dist; pip install --user @(Get-ChildItem *py3-none-win_amd64.whl) ; Check-Output $?
+  sh ./.ci/check-python-dists.sh ./dist ; Check-Output $?
+  cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Check-Output $?
  cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY
 } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) {
-  cd $env:BUILD_SOURCESDIRECTORY
  if ($env:COMPILER -eq "MINGW") {
-    sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --user --mingw ; Check-Output $?
+    sh ./build-python.sh install --mingw ; Check-Output $?
  } else {
-    sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --user; Check-Output $?
+    sh ./build-python.sh install; Check-Output $?
  }
 }

@ -126,7 +131,7 @@ if (($env:TASK -eq "regular") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -e
  cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide
  @("import matplotlib", "matplotlib.use('Agg')") + (Get-Content "plot_example.py") | Set-Content "plot_example.py"
  (Get-Content "plot_example.py").replace('graph.render(view=True)', 'graph.render(view=False)') | Set-Content "plot_example.py"  # prevent interactive window mode
-  conda install -q -y -n $env:CONDA_ENV "h5py>3.0" ipywidgets notebook
+  conda install -y -n $env:CONDA_ENV "h5py>=3.10" "ipywidgets>=8.1.2" "notebook>=7.1.2"
  foreach ($file in @(Get-ChildItem *.py)) {
    @("import sys, warnings", "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))") + (Get-Content $file) | Set-Content $file
    python $file ; Check-Output $?
--- a/.ci/test.sh
+++ b/.ci/test.sh
@ -1,8 +1,21 @@
 #!/bin/bash

+set -e -E -o -u pipefail
+
+# defaults
+CONDA_ENV="test-env"
+IN_UBUNTU_BASE_CONTAINER=${IN_UBUNTU_BASE_CONTAINER:-"false"}
+METHOD=${METHOD:-""}
+PRODUCES_ARTIFACTS=${PRODUCES_ARTIFACTS:-"false"}
+SANITIZERS=${SANITIZERS:-""}
+
+ARCH=$(uname -m)
+
+LGB_VER=$(head -n 1 "${BUILD_DIRECTORY}/VERSION.txt")
+
 if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "gcc" ]]; then
-    export CXX=g++-11
-    export CC=gcc-11
+    export CXX=g++-12
+    export CC=gcc-12
 elif [[ $OS_NAME == "linux" ]] && [[ $COMPILER == "clang" ]]; then
    export CXX=clang++
    export CC=clang
@ -16,13 +29,25 @@ if [[ $IN_UBUNTU_BASE_CONTAINER == "true" ]]; then
    export LC_ALL="en_US.UTF-8"
 fi

+# Setting MACOSX_DEPLOYMENT_TARGET prevents CMake from building against too-new
+# macOS features, and helps tools like Python build tools determine the appropriate
+# wheel compatibility tags.
+#
+# ref:
+#   * https://cmake.org/cmake/help/latest/envvar/MACOSX_DEPLOYMENT_TARGET.html
+#   * https://github.com/scikit-build/scikit-build-core/blob/acb7d0346e4a05bcb47a4ea3939c705ab71e3145/src/scikit_build_core/builder/macos.py#L36
+if [[ $ARCH == "x86_64" ]]; then
+    export MACOSX_DEPLOYMENT_TARGET=10.15
+else
+    export MACOSX_DEPLOYMENT_TARGET=12.0
+fi
+
 if [[ "${TASK}" == "r-package" ]] || [[ "${TASK}" == "r-rchk" ]]; then
-    bash ${BUILD_DIRECTORY}/.ci/test_r_package.sh || exit 1
+    bash "${BUILD_DIRECTORY}/.ci/test-r-package.sh" || exit 1
    exit 0
 fi

 if [[ "$TASK" == "cpp-tests" ]]; then
-    mkdir $BUILD_DIRECTORY/build && cd $BUILD_DIRECTORY/build
    if [[ $METHOD == "with-sanitizers" ]]; then
        extra_cmake_opts="-DUSE_SANITIZER=ON"
        if [[ -n $SANITIZERS ]]; then
@ -31,65 +56,66 @@ if [[ "$TASK" == "cpp-tests" ]]; then
    else
        extra_cmake_opts=""
    fi
-    cmake -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF -DUSE_DEBUG=ON $extra_cmake_opts ..
-    make testlightgbm -j4 || exit 1
-    ./../testlightgbm || exit 1
+    cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF -DUSE_DEBUG=ON $extra_cmake_opts
+    cmake --build build --target testlightgbm -j4 || exit 1
+    ./testlightgbm || exit 1
    exit 0
 fi

+# including python=version[build=*cpython] to ensure that conda doesn't fall back to pypy
 CONDA_PYTHON_REQUIREMENT="python=$PYTHON_VERSION[build=*cpython]"

 if [[ $TASK == "if-else" ]]; then
    mamba create -q -y -n $CONDA_ENV ${CONDA_PYTHON_REQUIREMENT} numpy
    source activate $CONDA_ENV
-    mkdir $BUILD_DIRECTORY/build && cd $BUILD_DIRECTORY/build && cmake .. && make lightgbm -j4 || exit 1
-    cd $BUILD_DIRECTORY/tests/cpp_tests && ../../lightgbm config=train.conf convert_model_language=cpp convert_model=../../src/boosting/gbdt_prediction.cpp && ../../lightgbm config=predict.conf output_result=origin.pred || exit 1
-    cd $BUILD_DIRECTORY/build && make lightgbm -j4 || exit 1
-    cd $BUILD_DIRECTORY/tests/cpp_tests && ../../lightgbm config=predict.conf output_result=ifelse.pred && python test.py || exit 1
+    cmake -B build -S . || exit 1
+    cmake --build build --target lightgbm -j4 || exit 1
+    cd "$BUILD_DIRECTORY/tests/cpp_tests"
+    ../../lightgbm config=train.conf convert_model_language=cpp convert_model=../../src/boosting/gbdt_prediction.cpp
+    ../../lightgbm config=predict.conf output_result=origin.pred
+    ../../lightgbm config=predict.conf output_result=ifelse.pred
+    python test.py
    exit 0
 fi

+cd "${BUILD_DIRECTORY}"
+
 if [[ $TASK == "swig" ]]; then
-    mkdir $BUILD_DIRECTORY/build && cd $BUILD_DIRECTORY/build
-    if [[ $OS_NAME == "macos" ]]; then
-        cmake -DUSE_SWIG=ON -DAPPLE_OUTPUT_DYLIB=ON ..
-    else
-        cmake -DUSE_SWIG=ON ..
-    fi
-    make -j4 || exit 1
+    cmake -B build -S . -DUSE_SWIG=ON
+    cmake --build build -j4 || exit 1
    if [[ $OS_NAME == "linux" ]] && [[ $COMPILER == "gcc" ]]; then
-        objdump -T $BUILD_DIRECTORY/lib_lightgbm.so > $BUILD_DIRECTORY/objdump.log || exit 1
-        objdump -T $BUILD_DIRECTORY/lib_lightgbm_swig.so >> $BUILD_DIRECTORY/objdump.log || exit 1
-        python $BUILD_DIRECTORY/helpers/check_dynamic_dependencies.py $BUILD_DIRECTORY/objdump.log || exit 1
+        objdump -T ./lib_lightgbm.so > ./objdump.log || exit 1
+        objdump -T ./lib_lightgbm_swig.so >> ./objdump.log || exit 1
+        python ./.ci/check-dynamic-dependencies.py ./objdump.log || exit 1
    fi
    if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
-        cp $BUILD_DIRECTORY/build/lightgbmlib.jar $BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_$OS_NAME.jar
+        cp ./build/lightgbmlib.jar $BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_$OS_NAME.jar
    fi
    exit 0
 fi

 if [[ $TASK == "lint" ]]; then
-    cd ${BUILD_DIRECTORY}
    mamba create -q -y -n $CONDA_ENV \
        ${CONDA_PYTHON_REQUIREMENT} \
-        cmakelint \
-        cpplint \
-        mypy \
-        'pre-commit>=3.6.0' \
-        'pyarrow>=14.0' \
-        'r-lintr>=3.1'
+        'cmakelint>=1.4.3' \
+        'cpplint>=1.6.0' \
+        'matplotlib-base>=3.9.1' \
+        'mypy>=1.11.1' \
+        'pre-commit>=3.8.0' \
+        'pyarrow-core>=17.0' \
+        'r-lintr>=3.1.2'
    source activate $CONDA_ENV
    echo "Linting Python code"
-    sh ${BUILD_DIRECTORY}/.ci/lint-python.sh || exit 1
+    bash ./.ci/lint-python.sh || exit 1
    echo "Linting R code"
-    Rscript ${BUILD_DIRECTORY}/.ci/lint_r_code.R ${BUILD_DIRECTORY} || exit 1
+    Rscript ./.ci/lint-r-code.R "${BUILD_DIRECTORY}" || exit 1
    echo "Linting C++ code"
-    sh ${BUILD_DIRECTORY}/.ci/lint-cpp.sh || exit 1
+    bash ./.ci/lint-cpp.sh || exit 1
    exit 0
 fi

 if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
-    cd $BUILD_DIRECTORY/docs
+    cd "${BUILD_DIRECTORY}/docs"
    mamba env create \
        -n $CONDA_ENV \
        --file ./env.yml || exit 1
@ -97,234 +123,205 @@ if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
        -q \
        -y \
        -n $CONDA_ENV \
-            doxygen \
-            'rstcheck>=6.0.0' || exit 1
+            'doxygen>=1.10.0' \
+            'rstcheck>=6.2.4' || exit 1
    source activate $CONDA_ENV
    # check reStructuredText formatting
-    cd $BUILD_DIRECTORY/python-package
+    cd "${BUILD_DIRECTORY}/python-package"
    rstcheck --report-level warning $(find . -type f -name "*.rst") || exit 1
-    cd $BUILD_DIRECTORY/docs
+    cd "${BUILD_DIRECTORY}/docs"
    rstcheck --report-level warning --ignore-directives=autoclass,autofunction,autosummary,doxygenfile $(find . -type f -name "*.rst") || exit 1
    # build docs
    make html || exit 1
    if [[ $TASK == "check-links" ]]; then
        # check docs for broken links
-        pip install --user linkchecker
+        pip install linkchecker
        linkchecker --config=.linkcheckerrc ./_build/html/*.html || exit 1
        exit 0
    fi
    # check the consistency of parameters' descriptions and other stuff
-    cp $BUILD_DIRECTORY/docs/Parameters.rst $BUILD_DIRECTORY/docs/Parameters-backup.rst
-    cp $BUILD_DIRECTORY/src/io/config_auto.cpp $BUILD_DIRECTORY/src/io/config_auto-backup.cpp
-    python $BUILD_DIRECTORY/helpers/parameter_generator.py || exit 1
-    diff $BUILD_DIRECTORY/docs/Parameters-backup.rst $BUILD_DIRECTORY/docs/Parameters.rst || exit 1
-    diff $BUILD_DIRECTORY/src/io/config_auto-backup.cpp $BUILD_DIRECTORY/src/io/config_auto.cpp || exit 1
+    cd "${BUILD_DIRECTORY}"
+    cp ./docs/Parameters.rst ./docs/Parameters-backup.rst
+    cp ./src/io/config_auto.cpp ./src/io/config_auto-backup.cpp
+    python ./.ci/parameter-generator.py || exit 1
+    diff ./docs/Parameters-backup.rst ./docs/Parameters.rst || exit 1
+    diff ./src/io/config_auto-backup.cpp ./src/io/config_auto.cpp || exit 1
    exit 0
 fi

-# older versions of Dask are incompatible with pandas>=2.0, but not all conda packages' metadata accurately reflects that
-#
-# ref: https://github.com/microsoft/LightGBM/issues/6030
-CONSTRAINED_DEPENDENCIES="'dask-core>=2023.5.0' 'distributed>=2023.5.0' 'pandas>=2.0'"
 if [[ $PYTHON_VERSION == "3.7" ]]; then
-    CONSTRAINED_DEPENDENCIES="'dask-core' 'distributed' 'pandas<2.0'"
+    CONDA_REQUIREMENT_FILES="--file ${BUILD_DIRECTORY}/.ci/conda-envs/ci-core-py37.txt"
+elif [[ $PYTHON_VERSION == "3.8" ]]; then
+    CONDA_REQUIREMENT_FILES="--file ${BUILD_DIRECTORY}/.ci/conda-envs/ci-core-py38.txt"
+else
+    CONDA_REQUIREMENT_FILES="--file ${BUILD_DIRECTORY}/.ci/conda-envs/ci-core.txt"
 fi

-# including python=version[build=*cpython] to ensure that conda doesn't fall back to pypy
-mamba create -q -y -n $CONDA_ENV \
-    ${CONSTRAINED_DEPENDENCIES} \
-    cffi \
-    cloudpickle \
-    joblib \
-    matplotlib \
-    numpy \
-    psutil \
-    pyarrow \
-    pytest \
+mamba create \
+    -y \
+    -n $CONDA_ENV \
+    ${CONDA_REQUIREMENT_FILES} \
    ${CONDA_PYTHON_REQUIREMENT} \
-    python-graphviz \
-    scikit-learn \
-    scipy || exit 1
+|| exit 1

 source activate $CONDA_ENV

-cd $BUILD_DIRECTORY
-
-if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "clang" ]]; then
-    # fix "OMP: Error #15: Initializing libiomp5.dylib, but found libomp.dylib already initialized." (OpenMP library conflict due to conda's MKL)
-    for LIBOMP_ALIAS in libgomp.dylib libiomp5.dylib libomp.dylib; do sudo ln -sf "$(brew --cellar libomp)"/*/lib/libomp.dylib $CONDA_PREFIX/lib/$LIBOMP_ALIAS || exit 1; done
-fi
+cd "${BUILD_DIRECTORY}"

 if [[ $TASK == "sdist" ]]; then
-    cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
-    sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-    pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz -v || exit 1
+    sh ./build-python.sh sdist || exit 1
+    sh .ci/check-python-dists.sh ./dist || exit 1
+    pip install ./dist/lightgbm-$LGB_VER.tar.gz -v || exit 1
    if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
-        cp $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
+        cp ./dist/lightgbm-$LGB_VER.tar.gz $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
    fi
-    pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
+    pytest ./tests/python_package_test || exit 1
    exit 0
 elif [[ $TASK == "bdist" ]]; then
    if [[ $OS_NAME == "macos" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        mv \
-            ./dist/*.whl \
-            ./dist/tmp.whl || exit 1
-        mv \
-            ./dist/tmp.whl \
-            dist/lightgbm-$LGB_VER-py3-none-macosx_10_15_x86_64.macosx_11_6_x86_64.macosx_12_5_x86_64.whl || exit 1
+        sh ./build-python.sh bdist_wheel || exit 1
+        sh .ci/check-python-dists.sh ./dist || exit 1
        if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
            cp dist/lightgbm-$LGB_VER-py3-none-macosx*.whl $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
        fi
    else
-        ARCH=$(uname -m)
        if [[ $ARCH == "x86_64" ]]; then
            PLATFORM="manylinux_2_28_x86_64"
        else
            PLATFORM="manylinux2014_$ARCH"
        fi
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --integrated-opencl || exit 1
+        sh ./build-python.sh bdist_wheel --integrated-opencl || exit 1
+        # rename wheel, to fix scikit-build-core choosing the platform 'linux_aarch64' instead of
+        # a manylinux tag
        mv \
            ./dist/*.whl \
            ./dist/tmp.whl || exit 1
        mv \
            ./dist/tmp.whl \
            ./dist/lightgbm-$LGB_VER-py3-none-$PLATFORM.whl || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh .ci/check-python-dists.sh ./dist || exit 1
        if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
            cp dist/lightgbm-$LGB_VER-py3-none-$PLATFORM.whl $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
        fi
        # Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py
        export LIGHTGBM_TEST_DUAL_CPU_GPU=1
    fi
-    pip install --user $BUILD_DIRECTORY/dist/*.whl || exit 1
-    pytest $BUILD_DIRECTORY/tests || exit 1
+    pip install -v ./dist/*.whl || exit 1
+    pytest ./tests || exit 1
    exit 0
 fi

 if [[ $TASK == "gpu" ]]; then
-    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' $BUILD_DIRECTORY/include/LightGBM/config.h
-    grep -q 'std::string device_type = "gpu"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit 1  # make sure that changes were really done
+    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' ./include/LightGBM/config.h
+    grep -q 'std::string device_type = "gpu"' ./include/LightGBM/config.h || exit 1  # make sure that changes were really done
    if [[ $METHOD == "pip" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh ./build-python.sh sdist || exit 1
+        sh .ci/check-python-dists.sh ./dist || exit 1
        pip install \
-            --user \
            -v \
            --config-settings=cmake.define.USE_GPU=ON \
-            $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
+            ./dist/lightgbm-$LGB_VER.tar.gz \
        || exit 1
-        pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
+        pytest ./tests/python_package_test || exit 1
        exit 0
    elif [[ $METHOD == "wheel" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --gpu || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
-        pytest $BUILD_DIRECTORY/tests || exit 1
+        sh ./build-python.sh bdist_wheel --gpu || exit 1
+        sh ./.ci/check-python-dists.sh ./dist || exit 1
+        pip install ./dist/lightgbm-$LGB_VER*.whl -v || exit 1
+        pytest ./tests || exit 1
        exit 0
    elif [[ $METHOD == "source" ]]; then
-        mkdir $BUILD_DIRECTORY/build
-        cd $BUILD_DIRECTORY/build
-        cmake -DUSE_GPU=ON ..
+        cmake -B build -S . -DUSE_GPU=ON
    fi
 elif [[ $TASK == "cuda" ]]; then
-    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' $BUILD_DIRECTORY/include/LightGBM/config.h
-    grep -q 'std::string device_type = "cuda"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit 1  # make sure that changes were really done
+    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' ./include/LightGBM/config.h
+    grep -q 'std::string device_type = "cuda"' ./include/LightGBM/config.h || exit 1  # make sure that changes were really done
    # by default ``gpu_use_dp=false`` for efficiency. change to ``true`` here for exact results in ci tests
-    sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' $BUILD_DIRECTORY/include/LightGBM/config.h
-    grep -q 'gpu_use_dp = true' $BUILD_DIRECTORY/include/LightGBM/config.h || exit 1  # make sure that changes were really done
+    sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' ./include/LightGBM/config.h
+    grep -q 'gpu_use_dp = true' ./include/LightGBM/config.h || exit 1  # make sure that changes were really done
    if [[ $METHOD == "pip" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh ./build-python.sh sdist || exit 1
+        sh ./.ci/check-python-dists.sh ./dist || exit 1
        pip install \
-            --user \
            -v \
            --config-settings=cmake.define.USE_CUDA=ON \
-            $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
+            ./dist/lightgbm-$LGB_VER.tar.gz \
        || exit 1
-        pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
+        pytest ./tests/python_package_test || exit 1
        exit 0
    elif [[ $METHOD == "wheel" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --cuda || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
-        pytest $BUILD_DIRECTORY/tests || exit 1
+        sh ./build-python.sh bdist_wheel --cuda || exit 1
+        sh ./.ci/check-python-dists.sh ./dist || exit 1
+        pip install ./dist/lightgbm-$LGB_VER*.whl -v || exit 1
+        pytest ./tests || exit 1
        exit 0
    elif [[ $METHOD == "source" ]]; then
-        mkdir $BUILD_DIRECTORY/build
-        cd $BUILD_DIRECTORY/build
-        cmake -DUSE_CUDA=ON ..
+        cmake -B build -S . -DUSE_CUDA=ON
    fi
 elif [[ $TASK == "mpi" ]]; then
    if [[ $METHOD == "pip" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh ./build-python.sh sdist || exit 1
+        sh ./.ci/check-python-dists.sh ./dist || exit 1
        pip install \
-            --user \
            -v \
            --config-settings=cmake.define.USE_MPI=ON \
-            $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
+            ./dist/lightgbm-$LGB_VER.tar.gz \
        || exit 1
-        pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
+        pytest ./tests/python_package_test || exit 1
        exit 0
    elif [[ $METHOD == "wheel" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --mpi || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
-        pytest $BUILD_DIRECTORY/tests || exit 1
+        sh ./build-python.sh bdist_wheel --mpi || exit 1
+        sh ./.ci/check-python-dists.sh ./dist || exit 1
+        pip install ./dist/lightgbm-$LGB_VER*.whl -v || exit 1
+        pytest ./tests || exit 1
        exit 0
    elif [[ $METHOD == "source" ]]; then
-        mkdir $BUILD_DIRECTORY/build
-        cd $BUILD_DIRECTORY/build
-        cmake -DUSE_MPI=ON -DUSE_DEBUG=ON ..
+        cmake -B build -S . -DUSE_MPI=ON -DUSE_DEBUG=ON
    fi
 else
-    mkdir $BUILD_DIRECTORY/build
-    cd $BUILD_DIRECTORY/build
-    cmake ..
+    cmake -B build -S .
 fi

-make _lightgbm -j4 || exit 1
+cmake --build build --target _lightgbm -j4 || exit 1

-cd $BUILD_DIRECTORY && sh ./build-python.sh install --precompile --user || exit 1
-pytest $BUILD_DIRECTORY/tests || exit 1
+sh ./build-python.sh install --precompile || exit 1
+pytest ./tests || exit 1

 if [[ $TASK == "regular" ]]; then
    if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
        if [[ $OS_NAME == "macos" ]]; then
-            cp $BUILD_DIRECTORY/lib_lightgbm.so $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.dylib
+            cp ./lib_lightgbm.dylib $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.dylib
        else
            if [[ $COMPILER == "gcc" ]]; then
-                objdump -T $BUILD_DIRECTORY/lib_lightgbm.so > $BUILD_DIRECTORY/objdump.log || exit 1
-                python $BUILD_DIRECTORY/helpers/check_dynamic_dependencies.py $BUILD_DIRECTORY/objdump.log || exit 1
+                objdump -T ./lib_lightgbm.so > ./objdump.log || exit 1
+                python ./.ci/check-dynamic-dependencies.py ./objdump.log || exit 1
            fi
-            cp $BUILD_DIRECTORY/lib_lightgbm.so $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.so
+            cp ./lib_lightgbm.so $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.so
        fi
    fi
-    cd $BUILD_DIRECTORY/examples/python-guide
+    cd "$BUILD_DIRECTORY/examples/python-guide"
    sed -i'.bak' '/import lightgbm as lgb/a\
 import matplotlib\
 matplotlib.use\(\"Agg\"\)\
 ' plot_example.py  # prevent interactive window mode
    sed -i'.bak' 's/graph.render(view=True)/graph.render(view=False)/' plot_example.py
    # requirements for examples
-    mamba install -q -y -n $CONDA_ENV \
-        h5py \
-        ipywidgets \
-        notebook
+    mamba install -y -n $CONDA_ENV \
+        'h5py>=3.10' \
+        'ipywidgets>=8.1.2' \
+        'notebook>=7.1.2'
    for f in *.py **/*.py; do python $f || exit 1; done  # run all examples
-    cd $BUILD_DIRECTORY/examples/python-guide/notebooks
+    cd "$BUILD_DIRECTORY/examples/python-guide/notebooks"
    sed -i'.bak' 's/INTERACTIVE = False/assert False, \\"Interactive mode disabled\\"/' interactive_plot_example.ipynb
    jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb || exit 1  # run all notebooks

    # importing the library should succeed even if all optional dependencies are not present
    conda uninstall -n $CONDA_ENV --force --yes \
        cffi \
-        dask-core \
+        dask \
        distributed \
        joblib \
-        matplotlib \
+        matplotlib-base \
        psutil \
        pyarrow \
        python-graphviz \
--- a/.ci/trigger-dispatch-run.sh
+++ b/.ci/trigger-dispatch-run.sh
@ -4,7 +4,7 @@
 #     Trigger manual workflow run by a dispatch event.
 #
 # [usage]
-#     trigger_dispatch_run.sh <PR_URL> <COMMENT_ID> <DISPATCH_NAME>
+#     trigger-dispatch-run.sh <PR_URL> <COMMENT_ID> <DISPATCH_NAME>
 #
 # PR_URL: URL of pull request from which dispatch is triggering.
 #
@ -12,7 +12,7 @@
 #
 # DISPATCH_NAME: Name of a dispatch to be triggered.

-set -e
+set -e -E -u -o pipefail

 if [ -z "$GITHUB_ACTIONS" ]; then
  echo "Must be run inside GitHub Actions CI"
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -7,4 +7,4 @@
 # offer a reasonable automatic best-guess

 # catch-all rule (this only gets matched if no rules below match)
-*    @guolinke @jameslamb @shiyu1994 @jmoralez @borchero
+*    @guolinke @jameslamb @shiyu1994 @jmoralez @borchero @StrikerRUS
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -0,0 +1,14 @@
+version: 2
+updates:
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: monthly
+    groups:
+      ci-dependencies:
+        patterns:
+          - "*"
+    commit-message:
+      prefix: "[ci]"
+    labels:
+      - maintenance
--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@ -7,54 +7,41 @@ on:
  pull_request:
    branches:
    - master
-    - release/*
+  # Run manually by clicking a button in the UI
+  workflow_dispatch:
+    inputs:
+      restart_docker:
+        description: 'Restart nvidia-docker on the runner before building?'
+        required: true
+        type: boolean
+        default: false

 # automatically cancel in-progress builds if another commit is pushed
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

-env:
-  github_actions: 'true'
-  os_name: linux
-  conda_env: test-env
-
 jobs:
-  test:
-    name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (linux, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
+  # Optionally reinstall + restart docker on the runner before building.
+  # This is safe as long as only 1 of these jobs runs at a time.
+  restart-docker:
+    name: set up docker
    runs-on: [self-hosted, linux]
-    timeout-minutes: 60
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - method: wheel
-            compiler: gcc
-            python_version: "3.11"
-            cuda_version: "11.8.0"
-            task: cuda
-          - method: source
-            compiler: gcc
-            python_version: "3.9"
-            cuda_version: "12.2.0"
-            task: cuda
-          - method: pip
-            compiler: clang
-            python_version: "3.10"
-            cuda_version: "11.8.0"
-            task: cuda
+    timeout-minutes: 30
    steps:
      - name: Setup or update software on host machine
+        if: ${{ inputs.restart_docker }}
        run: |
+            # install core packages
            sudo apt-get update
            sudo apt-get install --no-install-recommends -y \
                apt-transport-https \
                ca-certificates \
                curl \
-                git \
                gnupg-agent \
                lsb-release \
                software-properties-common
+            # set up nvidia-docker
            curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
            sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -y
            curl -sL https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
@ -67,43 +54,77 @@ jobs:
                nvidia-docker2
            sudo chmod a+rw /var/run/docker.sock
            sudo systemctl restart docker
-      - name: Remove old folder with repository
-        run: sudo rm -rf $GITHUB_WORKSPACE
+      - name: mark job successful
+        run: |
+          exit 0
+  test:
+    name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (${{ matrix.linux_version }}, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
+    runs-on: [self-hosted, linux]
+    needs: [restart-docker]
+    container:
+      image: nvcr.io/nvidia/cuda:${{ matrix.cuda_version }}-devel-${{ matrix.linux_version }}
+      env:
+        CMAKE_BUILD_PARALLEL_LEVEL: 4
+        COMPILER: ${{ matrix.compiler }}
+        CONDA: /tmp/miniforge
+        DEBIAN_FRONTEND: noninteractive
+        METHOD: ${{ matrix.method }}
+        OS_NAME: linux
+        PYTHON_VERSION: ${{ matrix.python_version }}
+        TASK: ${{ matrix.task }}
+        SKBUILD_STRICT_CONFIG: true
+      options: --gpus all
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - method: wheel
+            compiler: gcc
+            python_version: "3.10"
+            cuda_version: "11.8.0"
+            linux_version: "ubuntu20.04"
+            task: cuda
+          - method: source
+            compiler: gcc
+            python_version: "3.12"
+            cuda_version: "12.2.0"
+            linux_version: "ubuntu22.04"
+            task: cuda
+          - method: pip
+            compiler: clang
+            python_version: "3.11"
+            cuda_version: "11.8.0"
+            linux_version: "ubuntu20.04"
+            task: cuda
+    steps:
+      - name: Install latest git and sudo
+        run: |
+          apt-get update
+          apt-get install --no-install-recommends -y \
+              ca-certificates \
+              software-properties-common
+          add-apt-repository ppa:git-core/ppa -y
+          apt-get update
+          apt-get install --no-install-recommends -y \
+              git \
+              sudo
      - name: Checkout repository
-        uses: actions/checkout@v1
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
      - name: Setup and run tests
        run: |
-            export ROOT_DOCKER_FOLDER=/LightGBM
-            cat > docker.env <<EOF
-            GITHUB_ACTIONS=${{ env.github_actions }}
-            OS_NAME=${{ env.os_name }}
-            COMPILER=${{ matrix.compiler }}
-            TASK=${{ matrix.task }}
-            METHOD=${{ matrix.method }}
-            CONDA_ENV=${{ env.conda_env }}
-            PYTHON_VERSION=${{ matrix.python_version }}
-            BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
-            LGB_VER=$(head -n 1 VERSION.txt)
-            EOF
-            cat > docker-script.sh <<EOF
-            export CONDA=\$HOME/miniforge
-            export PATH=\$CONDA/bin:\$PATH
-            nvidia-smi
-            $ROOT_DOCKER_FOLDER/.ci/setup.sh || exit 1
-            $ROOT_DOCKER_FOLDER/.ci/test.sh || exit 1
-            EOF
-            cuda_version="${{ matrix.cuda_version }}"
-            cuda_major=${cuda_version%%.*}
-            docker_img="nvcr.io/nvidia/cuda:${cuda_version}-devel"
-            if [[ ${cuda_major} -eq 11 ]]; then
-                docker_img="${docker_img}-ubuntu18.04"
-            elif [[ ${cuda_major} -ge 12 ]]; then
-                docker_img="${docker_img}-ubuntu20.04"
-            fi
-            docker run --env-file docker.env -v "$GITHUB_WORKSPACE":"$ROOT_DOCKER_FOLDER" --rm --gpus all "$docker_img" /bin/bash $ROOT_DOCKER_FOLDER/docker-script.sh
+          export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
+          export PATH=$CONDA/bin:$PATH
+
+          # check GPU usage
+          nvidia-smi
+
+          # build and test
+          $GITHUB_WORKSPACE/.ci/setup.sh
+          $GITHUB_WORKSPACE/.ci/test.sh
  all-cuda-jobs-successful:
    if: always()
    runs-on: ubuntu-latest
--- a/.github/workflows/linkchecker.yml
+++ b/.github/workflows/linkchecker.yml
@ -8,10 +8,9 @@ on:
    - cron: '0 8 * * *'

 env:
-  CONDA_ENV: test-env
-  GITHUB_ACTIONS: 'true'
+  COMPILER: gcc
  OS_NAME: 'linux'
-  PYTHON_VERSION: '3.11'
+  PYTHON_VERSION: '3.12'
  TASK: 'check-links'

 jobs:
@ -20,7 +19,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: false
--- a/.github/workflows/optional_checks.yml
+++ b/.github/workflows/optional_checks.yml
@ -4,7 +4,6 @@ on:
  pull_request:
    branches:
      - master
-      - release/*

 jobs:
  all-optional-checks-successful:
@ -12,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: false
@ -24,7 +23,7 @@ jobs:
            for i in "${workflows[@]}"; do
                workflow_name=${i%;*}
                trigger_phrase=${i#*;}
-                python "$GITHUB_WORKSPACE/.ci/get_workflow_status.py" "$trigger_phrase" \
+                python "$GITHUB_WORKSPACE/.ci/get-workflow-status.py" "$trigger_phrase" \
                || { echo "The last reported status from workflow \"$workflow_name\" is failure. Commit fixes and rerun the workflow."; \
                exit 1; }
            done
--- a/.github/workflows/python_package.yml
+++ b/.github/workflows/python_package.yml
@ -7,7 +7,6 @@ on:
  pull_request:
    branches:
    - master
-    - release/*

 # automatically cancel in-progress builds if another commit is pushed
 concurrency:
@ -15,8 +14,8 @@ concurrency:
  cancel-in-progress: true

 env:
-  CONDA_ENV: test-env
-  GITHUB_ACTIONS: 'true'
+  CMAKE_BUILD_PARALLEL_LEVEL: 4
+  SKBUILD_STRICT_CONFIG: true

 jobs:
  test:
@ -27,33 +26,39 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - os: macOS-latest
+          - os: macos-13
            task: regular
-            python_version: '3.9'
-          - os: macOS-latest
-            task: sdist
            python_version: '3.10'
-          - os: macOS-latest
+          - os: macos-13
+            task: sdist
+            python_version: '3.11'
+          - os: macos-13
            task: bdist
-            python_version: '3.7'
-          - os: macOS-latest
+            python_version: '3.8'
+          - os: macos-13
            task: if-else
            python_version: '3.9'
-          - os: macOS-latest
-            task: mpi
-            method: source
-            python_version: '3.10'
-          - os: macOS-latest
-            task: mpi
-            method: pip
-            python_version: '3.11'
-          - os: macOS-latest
-            task: mpi
+          - os: macos-14
+            task: bdist
            method: wheel
-            python_version: '3.8'
+            python_version: '3.10'
+          # We're currently skipping MPI jobs on macOS, see https://github.com/microsoft/LightGBM/pull/6425
+          # for further details.
+          # - os: macos-13
+          #   task: mpi
+          #   method: source
+          #   python_version: '3.11'
+          # - os: macos-13
+          #   task: mpi
+          #   method: pip
+          #   python_version: '3.12'
+          # - os: macos-13
+          #   task: mpi
+          #   method: wheel
+          #   python_version: '3.9'
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
@ -63,7 +68,11 @@ jobs:
          export TASK="${{ matrix.task }}"
          export METHOD="${{ matrix.method }}"
          export PYTHON_VERSION="${{ matrix.python_version }}"
-          if [[ "${{ matrix.os }}" == "macOS-latest" ]]; then
+          if [[ "${{ matrix.os }}" == "macos-14" ]]; then
+              # use clang when creating macOS release artifacts
+              export COMPILER="clang"
+              export OS_NAME="macos"
+          elif [[ "${{ matrix.os }}" == "macos-13" ]]; then
              export COMPILER="gcc"
              export OS_NAME="macos"
          elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
@ -71,18 +80,23 @@ jobs:
              export OS_NAME="linux"
          fi
          export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
-          export LGB_VER=$(head -n 1 VERSION.txt)
          export CONDA=${HOME}/miniforge
          export PATH=${CONDA}/bin:${PATH}
          $GITHUB_WORKSPACE/.ci/setup.sh || exit 1
          $GITHUB_WORKSPACE/.ci/test.sh || exit 1
-  test-oldest-versions:
-    name: Python - oldest supported versions (ubuntu-latest)
+      - name: upload wheels
+        if: ${{ matrix.method == 'wheel' && matrix.os == 'macos-14' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: macosx-arm64-wheel
+          path: dist/*.whl
+  test-latest-versions:
+    name: Python - latest versions (ubuntu-latest)
    runs-on: ubuntu-latest
    timeout-minutes: 60
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
@ -90,6 +104,7 @@ jobs:
        run: |
          docker run \
            --rm \
+            --env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
            -v $(pwd):/opt/lgb-build \
            -w /opt/lgb-build \
            lightgbm/vsts-agent:manylinux_2_28_x86_64 \
@ -100,12 +115,39 @@ jobs:
            --rm \
            -v $(pwd):/opt/lgb-build \
            -w /opt/lgb-build \
-            python:3.6 \
+            python:3.11 \
+            /bin/bash ./.ci/test-python-latest.sh
+  test-oldest-versions:
+    name: Python - oldest supported versions (ubuntu-latest)
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 5
+          submodules: true
+      - name: Create wheel
+        run: |
+          docker run \
+            --rm \
+            --env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
+            -v $(pwd):/opt/lgb-build \
+            -w /opt/lgb-build \
+            lightgbm/vsts-agent:manylinux_2_28_x86_64 \
+            /bin/bash -c 'PATH=/opt/miniforge/bin:$PATH sh ./build-python.sh bdist_wheel --nomp'
+      - name: Test compatibility
+        run: |
+          docker run \
+            --rm \
+            -v $(pwd):/opt/lgb-build \
+            -w /opt/lgb-build \
+            python:3.7 \
            /bin/bash ./.ci/test-python-oldest.sh
  all-python-package-jobs-successful:
    if: always()
    runs-on: ubuntu-latest
-    needs: [test, test-oldest-versions]
+    needs: [test, test-latest-versions, test-oldest-versions]
    steps:
    - name: Note that all tests succeeded
      uses: re-actors/alls-green@v1.2.2
--- a/.github/workflows/r_configure.yml
+++ b/.github/workflows/r_configure.yml
@ -21,7 +21,7 @@ jobs:
        run: |
          git config --global --add safe.directory "${GITHUB_WORKSPACE}"
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
--- a/.github/workflows/r_package.yml
+++ b/.github/workflows/r_package.yml
@ -7,7 +7,6 @@ on:
  pull_request:
    branches:
    - master
-    - release/*

 # automatically cancel in-progress builds if another commit is pushed
 concurrency:
@ -15,6 +14,16 @@ concurrency:
  cancel-in-progress: true

 env:
+  # https://github.com/actions/checkout/issues/1590#issuecomment-2207052044
+  #
+  # this could be removed (hopefully) when R 3.6 support is removed
+  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
+  # in CMake-driven builds, parallelize compilation
+  CMAKE_BUILD_PARALLEL_LEVEL: 4
+  # on Debian-based images, avoid interactive prompts
+  DEBIAN_FRONTEND: noninteractive
+  # parallelize compilation (extra important for Linux, where CRAN doesn't supply pre-compiled binaries)
+  MAKEFLAGS: "-j4"
  # hack to get around this:
  # https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html
  _R_CHECK_SYSTEM_CLOCK_: 0
@ -51,25 +60,19 @@ jobs:
            r_version: 4.3
            build_type: cmake
            container: 'ubuntu:22.04'
-          - os: ubuntu-latest
-            task: r-package
-            compiler: clang
-            r_version: 3.6
-            build_type: cmake
-            container: 'ubuntu:18.04'
          - os: ubuntu-latest
            task: r-package
            compiler: clang
            r_version: 4.3
            build_type: cmake
            container: 'ubuntu:22.04'
-          - os: macOS-latest
+          - os: macos-13
            task: r-package
            compiler: gcc
            r_version: 4.3
            build_type: cmake
            container: null
-          - os: macOS-latest
+          - os: macos-13
            task: r-package
            compiler: clang
            r_version: 4.3
@ -128,21 +131,19 @@ jobs:
            r_version: 4.3
            build_type: cran
            container: 'ubuntu:22.04'
-          - os: macOS-latest
+          - os: macos-13
            task: r-package
            compiler: clang
            r_version: 4.3
            build_type: cran
            container: null
-          ################
-          # Other checks #
-          ################
-          - os: ubuntu-latest
-            task: r-rchk
-            compiler: gcc
+          # macos-14 = arm64
+          - os: macos-14
+            task: r-package
+            compiler: clang
            r_version: 4.3
            build_type: cran
-            container: 'ubuntu:22.04'
+            container: null
    steps:
      - name: Prevent conversion of line endings on Windows
        if: startsWith(matrix.os, 'windows')
@ -180,35 +181,29 @@ jobs:
      - name: Install pandoc
        uses: r-lib/actions/setup-pandoc@v2
        if: matrix.container != 'ubuntu:18.04'
-      # R 3.6 binary isn't easily available on buntu 18.04,
+      # R 3.6 binary isn't easily available on Ubuntu 18.04,
      # but setup-pandoc>=2.7.1 is uses a too-new glibc for it.
      # ref: https://github.com/microsoft/LightGBM/issues/6298
      - name: Install pandoc
        uses: r-lib/actions/setup-pandoc@v2.6.0
        if: matrix.container == 'ubuntu:18.04'
-      - name: install tinytex
+      - name: Install tinytex
        if: startsWith(matrix.os, 'windows')
        uses: r-lib/actions/setup-tinytex@v2
        env:
          CTAN_MIRROR: https://ctan.math.illinois.edu/systems/win32/miktex
          TINYTEX_INSTALLER: TinyTeX
      - name: Setup and run tests on Linux and macOS
-        if: matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest'
+        if: startsWith(matrix.os, 'macos') || startsWith(matrix.os, 'ubuntu')
        shell: bash
        run: |
          export TASK="${{ matrix.task }}"
          export COMPILER="${{ matrix.compiler }}"
-          export GITHUB_ACTIONS="true"
-          if [[ "${{ matrix.os }}" == "macOS-latest" ]]; then
+          if [[ "${{ matrix.os }}" =~ ^macos ]]; then
              export OS_NAME="macos"
          elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
              export OS_NAME="linux"
              export IN_UBUNTU_BASE_CONTAINER="true"
-              # the default version of cmake provided on Ubuntu 18.04 (v3.10.2), is not supported by LightGBM
-              # see https://github.com/microsoft/LightGBM/issues/5642
-              if [[ "${{ matrix.container }}" == "ubuntu:18.04" ]]; then
-                export INSTALL_CMAKE_FROM_RELEASES="true"
-              fi
          fi
          export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
          export R_VERSION="${{ matrix.r_version }}"
@ -225,9 +220,8 @@ jobs:
          $env:R_VERSION = "${{ matrix.r_version }}"
          $env:R_BUILD_TYPE = "${{ matrix.build_type }}"
          $env:COMPILER = "${{ matrix.compiler }}"
-          $env:GITHUB_ACTIONS = "true"
          $env:TASK = "${{ matrix.task }}"
-          & "$env:GITHUB_WORKSPACE/.ci/test_windows.ps1"
+          & "$env:GITHUB_WORKSPACE/.ci/test-windows.ps1"
  test-r-sanitizers:
    name: r-sanitizers (ubuntu-latest, R-devel, ${{ matrix.compiler }} ASAN/UBSAN)
    timeout-minutes: 60
@ -246,7 +240,7 @@ jobs:
        run: |
          git config --global --add safe.directory "${GITHUB_WORKSPACE}"
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
@ -264,59 +258,85 @@ jobs:
          RDscript${{ matrix.r_customization }} testthat.R >> tests.log 2>&1 || exit_code=-1
          cat ./tests.log
          exit ${exit_code}
-  test-r-debian-clang:
-    name: r-package (debian, R-devel, clang-${{ matrix.clang-version }})
+  test-r-extra-checks:
+    name: r-package (${{ matrix.image }}, R-devel)
    timeout-minutes: 60
    strategy:
      fail-fast: false
      matrix:
-        # list of versions tested in CRAN "Additional Checks":
-        # https://cran.r-project.org/web/checks/check_issue_kinds.html
-        clang-version:
-          - 16
-          - 17
-          - 18
+        # references:
+        #   * CRAN "additional checks": https://cran.r-project.org/web/checks/check_issue_kinds.html
+        #   * images: https://r-hub.github.io/containers/containers.html
+        image:
+          # clang16 should be re-enabled once it's fixed upstream
+          # ref: https://github.com/microsoft/LightGBM/issues/6607
+          #- clang16
+          - clang17
+          - clang18
+          - clang19
+          - gcc14
+          - intel
    runs-on: ubuntu-latest
-    container: rhub/debian-clang-devel
-    env:
-      DEBIAN_FRONTEND: noninteractive
+    container: ghcr.io/r-hub/containers/${{ matrix.image }}:latest
    steps:
-      - name: Install Git before checkout
-        shell: bash
-        run: |
-          apt-get update --allow-releaseinfo-change
-          apt-get install --no-install-recommends -y git
-      - name: Trust git cloning LightGBM
-        run: |
-          git config --global --add safe.directory "${GITHUB_WORKSPACE}"
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
-      - name: install clang
+      - name: Install pandoc
+        uses: r-lib/actions/setup-pandoc@v2
+      - name: Install LaTeX
+        shell: bash
        run: |
-          ./.ci/install-clang-devel.sh ${{ matrix.clang-version }}
+          if type -f apt 2>&1 > /dev/null; then
+            apt-get update
+            apt-get install --no-install-recommends -y \
+                devscripts \
+                texinfo \
+                texlive-latex-extra \
+                texlive-latex-recommended \
+                texlive-fonts-recommended \
+                texlive-fonts-extra \
+                tidy \
+                qpdf
+          else
+            yum update -y
+            yum install -y \
+                devscripts \
+                qpdf \
+                texinfo \
+                texinfo-tex \
+                texlive-latex \
+                tidy
+          fi
      - name: Install packages and run tests
        shell: bash
        run: |
-          export PATH=/opt/R-devel/bin/:${PATH}
          Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())"
          sh build-cran-package.sh
-          R CMD check --as-cran --run-donttest lightgbm_*.tar.gz || exit 1
-          echo ""
-          echo "install logs:"
-          echo ""
-          cat lightgbm.Rcheck/00install.out
-          echo ""
-          if grep -q -E "NOTE|WARNING|ERROR" lightgbm.Rcheck/00check.log; then
-              echo "NOTEs, WARNINGs, or ERRORs have been found by R CMD check"
-              exit 1
+          if [[ "${{ matrix.image }}" =~ "clang" ]]; then
+            # allowing the following NOTEs (produced by default in the clang images):
+            #
+            #   * checking compilation flags used ... NOTE
+            #       Compilation used the following non-portable flag(s):
+            #       ‘-Wp,-D_FORTIFY_SOURCE=3’
+            #
+            # even though CRAN itself sets that:
+            # https://www.stats.ox.ac.uk/pub/bdr/Rconfig/r-devel-linux-x86_64-fedora-clang
+            #
+            declare -i allowed_notes=1
+          else
+            declare -i allowed_notes=0
          fi
+
+          bash .ci/run-r-cmd-check.sh \
+            "$(echo lightgbm_$(head -1 VERSION.txt).tar.gz)" \
+            "${allowed_notes}"
  all-r-package-jobs-successful:
    if: always()
    runs-on: ubuntu-latest
-    needs: [test, test-r-sanitizers, test-r-debian-clang]
+    needs: [test, test-r-sanitizers, test-r-extra-checks]
    steps:
    - name: Note that all tests succeeded
      uses: re-actors/alls-green@v1.2.2
--- a/.github/workflows/r_valgrind.yml
+++ b/.github/workflows/r_valgrind.yml
@ -24,7 +24,7 @@ jobs:
        run: |
          git config --global --add safe.directory "${GITHUB_WORKSPACE}"
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
@ -33,24 +33,24 @@ jobs:
      - name: Send init status
        if: ${{ always() }}
        run: |
-          $GITHUB_WORKSPACE/.ci/set_commit_status.sh "${{ github.workflow }}" "pending" "${{ github.event.client_payload.pr_sha }}"
-          $GITHUB_WORKSPACE/.ci/append_comment.sh \
+          $GITHUB_WORKSPACE/.ci/set-commit-status.sh "${{ github.workflow }}" "pending" "${{ github.event.client_payload.pr_sha }}"
+          $GITHUB_WORKSPACE/.ci/append-comment.sh \
            "${{ github.event.client_payload.comment_number }}" \
            "Workflow **${{ github.workflow }}** has been triggered! 🚀\r\n${GITHUB_SERVER_URL}/microsoft/LightGBM/actions/runs/${GITHUB_RUN_ID}"
      - name: Run tests with valgrind
        shell: bash
-        run: ./.ci/test_r_package_valgrind.sh
+        run: ./.ci/test-r-package-valgrind.sh
      - name: Send final status
        if: ${{ always() }}
        run: |
-          $GITHUB_WORKSPACE/.ci/set_commit_status.sh "${{ github.workflow }}" "${{ job.status }}" "${{ github.event.client_payload.pr_sha }}"
-          $GITHUB_WORKSPACE/.ci/append_comment.sh \
+          $GITHUB_WORKSPACE/.ci/set-commit-status.sh "${{ github.workflow }}" "${{ job.status }}" "${{ github.event.client_payload.pr_sha }}"
+          $GITHUB_WORKSPACE/.ci/append-comment.sh \
            "${{ github.event.client_payload.comment_number }}" \
            "Status: ${{ job.status }}."
      - name: Rerun workflow-indicator
        if: ${{ always() }}
        run: |
-          bash $GITHUB_WORKSPACE/.ci/rerun_workflow.sh \
+          bash $GITHUB_WORKSPACE/.ci/rerun-workflow.sh \
            "optional_checks.yml" \
            "${{ github.event.client_payload.pr_number }}" \
            "${{ github.event.client_payload.pr_branch }}" \
--- a/.github/workflows/static_analysis.yml
+++ b/.github/workflows/static_analysis.yml
@ -9,7 +9,6 @@ on:
  pull_request:
    branches:
    - master
-    - release/*

 # automatically cancel in-progress builds if another commit is pushed
 concurrency:
@ -18,15 +17,14 @@ concurrency:

 env:
  COMPILER: 'gcc'
-  CONDA_ENV: test-env
-  GITHUB_ACTIONS: 'true'
+  MAKEFLAGS: '-j4'
  OS_NAME: 'linux'
-  PYTHON_VERSION: '3.11'
+  PYTHON_VERSION: '3.12'

 jobs:
  test:
    name: ${{ matrix.task }}
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    timeout-minutes: 60
    strategy:
      fail-fast: false
@ -36,7 +34,7 @@ jobs:
          - task: check-docs
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: false
@ -59,7 +57,7 @@ jobs:
        run: |
          git config --global --add safe.directory "${GITHUB_WORKSPACE}"
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
--- a/.github/workflows/triggering_comments.yml
+++ b/.github/workflows/triggering_comments.yml
@ -12,7 +12,7 @@ jobs:
      SECRETS_WORKFLOW: ${{ secrets.WORKFLOW }}
    steps:
    - name: Checkout repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
      with:
        fetch-depth: 5
        submodules: false
@ -20,7 +20,7 @@ jobs:
    - name: Trigger R valgrind tests
      if: github.event.comment.body == '/gha run r-valgrind'
      run: |
-        $GITHUB_WORKSPACE/.ci/trigger_dispatch_run.sh \
+        $GITHUB_WORKSPACE/.ci/trigger-dispatch-run.sh \
          "${{ github.event.issue.pull_request.url }}" \
          "${{ github.event.comment.id }}" \
          "gha_run_r_valgrind"
@ -28,7 +28,7 @@ jobs:
    - name: Trigger update R configure
      if: github.event.comment.body == '/gha run r-configure'
      run: |
-        $GITHUB_WORKSPACE/.ci/trigger_dispatch_run.sh \
+        $GITHUB_WORKSPACE/.ci/trigger-dispatch-run.sh \
          "${{ github.event.issue.pull_request.url }}" \
          "${{ github.event.comment.id }}" \
          "gha_run_r_configure"
--- a/.gitignore
+++ b/.gitignore
@ -405,7 +405,7 @@ python-package/lightgbm/VERSION.txt

 # R build artefacts
 **/autom4te.cache/
-conftest*
+R-package/conftest*
 R-package/config.status
 !R-package/data/agaricus.test.rda
 !R-package/data/agaricus.train.rda
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,3 +1,4 @@
+# exclude files which are auto-generated by build tools
 exclude: |
  (?x)^(
      build|
@ -5,8 +6,17 @@ exclude: |
      lightgbm-python|
      lightgbm_r|
  )$
+  |R-package/configure$
+  |R-package/inst/Makevars$
+  |R-package/inst/Makevars.win$
+  |R-package/man/.*Rd$

 repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
  - repo: https://github.com/pycqa/isort
    rev: 5.13.2
    hooks:
@ -15,7 +25,7 @@ repos:
        args: ["--settings-path", "python-package/pyproject.toml"]
  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
-    rev: v0.2.1
+    rev: v0.5.7
    hooks:
      # Run the linter.
      - id: ruff
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@ -7,24 +7,47 @@ trigger:
    - v*
 pr:
 - master
- release/*
 variables:
  AZURE: 'true'
-  PYTHON_VERSION: '3.11'
-  CONDA_ENV: test-env
+  CMAKE_BUILD_PARALLEL_LEVEL: 4
+  PYTHON_VERSION: '3.12'
  runCodesignValidationInjection: false
  skipComponentGovernanceDetection: true
+  Codeql.Enabled: false
+  Codeql.SkipTaskAutoInjection: true
  DOTNET_CLI_TELEMETRY_OPTOUT: true
  DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
+  SKBUILD_STRICT_CONFIG: true
 resources:
+  # The __work/ directory, where Azure DevOps writes the source files, needs to be read-write because
+  # LightGBM's CI jobs write files in the source directory.
+  #
+  # For all the containers included here, all other directories that Azure mounts in are mounted as read-only
+  # to minimize the risk of side effects from one run affecting future runs.
+  # ref: https://learn.microsoft.com/en-us/azure/devops/pipelines/yaml-schema/resources-containers-container
  containers:
  - container: linux-artifact-builder
    image: lightgbm/vsts-agent:manylinux_2_28_x86_64
+    mountReadOnly:
+      work: false
+      externals: true
+      tools: true
+      tasks: true
  - container: ubuntu-latest
    image: 'ubuntu:22.04'
    options: "--name ci-container -v /usr/bin/docker:/tmp/docker:ro"
+    mountReadOnly:
+      work: false
+      externals: true
+      tools: true
+      tasks: true
  - container: rbase
    image: wch1/r-debug
+    mountReadOnly:
+      work: false
+      externals: true
+      tools: true
+      tasks: true
 jobs:
 ###########################################
 - job: Linux
@ -34,25 +57,25 @@ jobs:
    SETUP_CONDA: 'false'
    OS_NAME: 'linux'
    PRODUCES_ARTIFACTS: 'true'
-  pool: sh-mariner
+  pool: mariner-20240410-0
  container: linux-artifact-builder
  strategy:
    matrix:
      regular:
        TASK: regular
-        PYTHON_VERSION: '3.9'
+        PYTHON_VERSION: '3.10'
      sdist:
        TASK: sdist
-        PYTHON_VERSION: '3.7'
+        PYTHON_VERSION: '3.8'
      bdist:
        TASK: bdist
-        PYTHON_VERSION: '3.8'
+        PYTHON_VERSION: '3.9'
      inference:
        TASK: if-else
      mpi_source:
        TASK: mpi
        METHOD: source
-        PYTHON_VERSION: '3.8'
+        PYTHON_VERSION: '3.9'
      gpu_source:
        TASK: gpu
        METHOD: source
@ -61,17 +84,25 @@ jobs:
  steps:
  - script: |
      echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
-      echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
      echo "##vso[task.prependpath]/usr/lib64/openmpi/bin"
      echo "##vso[task.prependpath]$CONDA/bin"
    displayName: 'Set variables'
+  - script: |
+      git clean -d -f -x
+    displayName: 'Clean source directory'
  - script: |
      echo '$(Build.SourceVersion)' > '$(Build.ArtifactStagingDirectory)/commit.txt'
    displayName: 'Add commit hash to artifacts archive'
-  - bash: $(Build.SourcesDirectory)/.ci/setup.sh
+  - task: Bash@3
    displayName: Setup
-  - bash: $(Build.SourcesDirectory)/.ci/test.sh
+    inputs:
+      filePath: $(Build.SourcesDirectory)/.ci/setup.sh
+      targetType: filePath
+  - task: Bash@3
    displayName: Test
+    inputs:
+      filePath: $(Build.SourcesDirectory)/.ci/test.sh
+      targetType: filePath
  - task: PublishBuildArtifacts@1
    condition: and(succeeded(), in(variables['TASK'], 'regular', 'sdist', 'bdist', 'swig'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))
    inputs:
@ -87,7 +118,7 @@ jobs:
    IN_UBUNTU_BASE_CONTAINER: 'true'
    OS_NAME: 'linux'
    SETUP_CONDA: 'true'
-  pool: sh-mariner
+  pool: mariner-20240410-0
  container: ubuntu-latest
  strategy:
    matrix:
@ -97,7 +128,7 @@ jobs:
        TASK: sdist
      bdist:
        TASK: bdist
-        PYTHON_VERSION: '3.9'
+        PYTHON_VERSION: '3.10'
      inference:
        TASK: if-else
      mpi_source:
@ -106,30 +137,29 @@ jobs:
      mpi_pip:
        TASK: mpi
        METHOD: pip
-        PYTHON_VERSION: '3.10'
+        PYTHON_VERSION: '3.11'
      mpi_wheel:
        TASK: mpi
        METHOD: wheel
-        PYTHON_VERSION: '3.8'
+        PYTHON_VERSION: '3.9'
      gpu_source:
        TASK: gpu
        METHOD: source
-        PYTHON_VERSION: '3.10'
+        PYTHON_VERSION: '3.11'
      gpu_pip:
        TASK: gpu
        METHOD: pip
-        PYTHON_VERSION: '3.9'
+        PYTHON_VERSION: '3.10'
      gpu_wheel:
        TASK: gpu
        METHOD: wheel
-        PYTHON_VERSION: '3.8'
+        PYTHON_VERSION: '3.9'
      cpp_tests:
        TASK: cpp-tests
        METHOD: with-sanitizers
  steps:
  - script: |
      echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
-      echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
      CONDA=$HOME/miniforge
      echo "##vso[task.setvariable variable=CONDA]$CONDA"
      echo "##vso[task.prependpath]$CONDA/bin"
@ -139,16 +169,27 @@ jobs:
      /tmp/docker exec -t -u 0 ci-container \
      sh -c "apt-get update && apt-get -o Dpkg::Options::="--force-confold" -y install sudo"
    displayName: 'Install sudo'
-  - bash: $(Build.SourcesDirectory)/.ci/setup.sh
+  - script: |
+      sudo apt-get update
+      sudo apt-get install -y --no-install-recommends git
+      git clean -d -f -x
+    displayName: 'Clean source directory'
+  - task: Bash@3
    displayName: Setup
-  - bash: $(Build.SourcesDirectory)/.ci/test.sh
+    inputs:
+      filePath: $(Build.SourcesDirectory)/.ci/setup.sh
+      targetType: 'filePath'
+  - task: Bash@3
    displayName: Test
+    inputs:
+      filePath: $(Build.SourcesDirectory)/.ci/test.sh
+      targetType: 'filePath'
 ###########################################
 - job: QEMU_multiarch
 ###########################################
  variables:
+    BUILD_DIRECTORY: /LightGBM
    COMPILER: gcc
-    OS_NAME: 'linux'
    PRODUCES_ARTIFACTS: 'true'
  pool:
    vmImage: ubuntu-22.04
@ -171,26 +212,15 @@ jobs:
      docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
    displayName: 'Enable Docker multi-architecture support'
  - script: |
-      export ROOT_DOCKER_FOLDER=/LightGBM
-      cat > docker.env <<EOF
-      AZURE=$AZURE
-      OS_NAME=$OS_NAME
-      COMPILER=$COMPILER
-      TASK=$TASK
-      METHOD=$METHOD
-      CONDA_ENV=$CONDA_ENV
-      PYTHON_VERSION=$PYTHON_VERSION
-      BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
-      LGB_VER=$(head -n 1 VERSION.txt)
-      PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS
-      BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY
-      EOF
+      git clean -d -f -x
+    displayName: 'Clean source directory'
+  - script: |
      cat > docker-script.sh <<EOF
      export CONDA=\$HOME/miniforge
      export PATH=\$CONDA/bin:/opt/rh/llvm-toolset-7.0/root/usr/bin:\$PATH
      export LD_LIBRARY_PATH=/opt/rh/llvm-toolset-7.0/root/usr/lib64:\$LD_LIBRARY_PATH
-      $ROOT_DOCKER_FOLDER/.ci/setup.sh || exit 1
-      $ROOT_DOCKER_FOLDER/.ci/test.sh || exit 1
+      \$BUILD_DIRECTORY/.ci/setup.sh || exit 1
+      \$BUILD_DIRECTORY/.ci/test.sh || exit 1
      EOF
      IMAGE_URI="lightgbm/vsts-agent:manylinux2014_aarch64"
      docker pull "${IMAGE_URI}" || exit 1
@ -199,11 +229,19 @@ jobs:
      docker run \
        --platform "${PLATFORM}" \
        --rm \
-        --env-file docker.env \
-        -v "$(Build.SourcesDirectory)":"$ROOT_DOCKER_FOLDER" \
+        --env AZURE=true \
+        --env BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY \
+        --env BUILD_DIRECTORY=$BUILD_DIRECTORY \
+        --env COMPILER=$COMPILER \
+        --env METHOD=$METHOD \
+        --env OS_NAME=linux \
+        --env PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS \
+        --env PYTHON_VERSION=$PYTHON_VERSION \
+        --env TASK=$TASK \
+        -v "$(Build.SourcesDirectory)":"$BUILD_DIRECTORY" \
        -v "$(Build.ArtifactStagingDirectory)":"$(Build.ArtifactStagingDirectory)" \
        "${IMAGE_URI}" \
-        /bin/bash $ROOT_DOCKER_FOLDER/docker-script.sh
+        /bin/bash $BUILD_DIRECTORY/docker-script.sh
    displayName: 'Setup and run tests'
  - task: PublishBuildArtifacts@1
    condition: and(succeeded(), in(variables['TASK'], 'bdist'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))
@ -219,7 +257,7 @@ jobs:
    OS_NAME: 'macos'
    PRODUCES_ARTIFACTS: 'true'
  pool:
-    vmImage: 'macOS-11'
+    vmImage: 'macOS-12'
  strategy:
    matrix:
      regular:
@ -239,16 +277,24 @@ jobs:
  steps:
  - script: |
      echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
-      echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
      CONDA=$AGENT_HOMEDIRECTORY/miniforge
      echo "##vso[task.setvariable variable=CONDA]$CONDA"
      echo "##vso[task.prependpath]$CONDA/bin"
      echo "##vso[task.setvariable variable=JAVA_HOME]$JAVA_HOME_8_X64"
    displayName: 'Set variables'
-  - bash: $(Build.SourcesDirectory)/.ci/setup.sh
+  - script: |
+      git clean -d -f -x
+    displayName: 'Clean source directory'
+  - task: Bash@3
    displayName: Setup
-  - bash: $(Build.SourcesDirectory)/.ci/test.sh
+    inputs:
+      filePath: $(Build.SourcesDirectory)/.ci/setup.sh
+      targetType: filePath
+  - task: Bash@3
    displayName: Test
+    inputs:
+      filePath: $(Build.SourcesDirectory)/.ci/test.sh
+      targetType: filePath
  - task: PublishBuildArtifacts@1
    condition: and(succeeded(), in(variables['TASK'], 'regular', 'bdist', 'swig'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))
    inputs:
@ -279,7 +325,10 @@ jobs:
      Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
    displayName: 'Set Variables'
  - script: |
-      cmd /c "powershell -ExecutionPolicy Bypass -File %BUILD_SOURCESDIRECTORY%/.ci/install_opencl.ps1"
+      git clean -d -f -x
+    displayName: 'Clean source directory'
+  - script: |
+      cmd /c "powershell -ExecutionPolicy Bypass -File %BUILD_SOURCESDIRECTORY%/.ci/install-opencl.ps1"
    condition: eq(variables['TASK'], 'bdist')
    displayName: 'Install OpenCL'
  - script: |
@ -288,7 +337,7 @@ jobs:
      cmd /c "conda config --add channels conda-forge"
      cmd /c "conda config --set channel_priority strict"
      cmd /c "conda init powershell"
-      cmd /c "powershell -ExecutionPolicy Bypass -File %BUILD_SOURCESDIRECTORY%/.ci/test_windows.ps1"
+      cmd /c "powershell -ExecutionPolicy Bypass -File %BUILD_SOURCESDIRECTORY%/.ci/test-windows.ps1"
    displayName: Test
  - task: PublishBuildArtifacts@1
    condition: and(succeeded(), in(variables['TASK'], 'regular', 'bdist', 'swig'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))
@ -304,6 +353,9 @@ jobs:
    vmImage: 'ubuntu-22.04'
  container: rbase
  steps:
+  - script: |
+      git clean -d -f -x
+    displayName: 'Clean source directory'
  - script: |
      LGB_VER=$(head -n 1 VERSION.txt | sed "s/rc/-/g")
      R_LIB_PATH=~/Rlib
@ -367,12 +419,12 @@ jobs:
      artifactName: R-package
      downloadPath: $(Build.SourcesDirectory)/R
  - script: |
-      python "$(Build.SourcesDirectory)/.nuget/create_nuget.py" "$(Build.SourcesDirectory)/binaries/PackageAssets"
+      python "$(Build.SourcesDirectory)/.ci/create-nuget.py" "$(Build.SourcesDirectory)/binaries/PackageAssets"
    displayName: 'Create NuGet configuration files'
  - task: NuGetCommand@2
    inputs:
      command: pack
-      packagesToPack: '$(Build.SourcesDirectory)/.nuget/*.nuspec'
+      packagesToPack: '$(Build.SourcesDirectory)/.ci/nuget/*.nuspec'
      packDestination: '$(Build.ArtifactStagingDirectory)/nuget'
  - task: PublishBuildArtifacts@1
    inputs:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,11 +2,11 @@ option(USE_MPI "Enable MPI-based distributed learning" OFF)
 option(USE_OPENMP "Enable OpenMP" ON)
 option(USE_GPU "Enable GPU-accelerated training" OFF)
 option(USE_SWIG "Enable SWIG to generate Java API" OFF)
-option(USE_HDFS "Enable HDFS support (EXPERIMENTAL)" OFF)
 option(USE_TIMETAG "Set to ON to output time costs" OFF)
 option(USE_CUDA "Enable CUDA-accelerated training " OFF)
 option(USE_DEBUG "Set to ON for Debug mode" OFF)
 option(USE_SANITIZER "Use santizer flags" OFF)
+option(USE_HOMEBREW_FALLBACK "(macOS-only) also look in 'brew --prefix' for libraries (e.g. OpenMP)" ON)
 set(
  ENABLED_SANITIZERS
  "address" "leak" "undefined"
@ -23,11 +23,15 @@ option(__BUILD_FOR_PYTHON "Set to ON if building lib_lightgbm for use with the P
 option(__BUILD_FOR_R "Set to ON if building lib_lightgbm for use with the R package" OFF)
 option(__INTEGRATE_OPENCL "Set to ON if building LightGBM with the OpenCL ICD Loader and its dependencies included" OFF)

-if(APPLE)
-  option(APPLE_OUTPUT_DYLIB "Output dylib shared library" OFF)
-endif()
+cmake_minimum_required(VERSION 3.28)

-cmake_minimum_required(VERSION 3.18)
+# If using Visual Studio generators, always target v10.x of the Windows SDK.
+# Doing this avoids lookups that could fall back to very old versions, e.g. by finding
+# outdated registry entries.
+# ref: https://cmake.org/cmake/help/latest/variable/CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION.html
+if(CMAKE_GENERATOR MATCHES "Visual Studio")
+    set(CMAKE_SYSTEM_VERSION 10.0 CACHE INTERNAL "target Windows SDK version" FORCE)
+endif()

 project(lightgbm LANGUAGES C CXX)

@ -118,6 +122,12 @@ include_directories(${EIGEN_DIR})
 add_definitions(-DEIGEN_MPL2_ONLY)
 add_definitions(-DEIGEN_DONT_PARALLELIZE)

+set(FAST_DOUBLE_PARSER_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/external_libs/fast_double_parser/include")
+include_directories(${FAST_DOUBLE_PARSER_INCLUDE_DIR})
+
+set(FMT_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/external_libs/fmt/include")
+include_directories(${FMT_INCLUDE_DIR})
+
 if(__BUILD_FOR_R)
    find_package(LibR REQUIRED)
    message(STATUS "LIBR_EXECUTABLE: ${LIBR_EXECUTABLE}")
@ -152,16 +162,18 @@ if(USE_OPENMP)
    if(APPLE)
        find_package(OpenMP)
        if(NOT OpenMP_FOUND)
-            # libomp 15.0+ from brew is keg-only, so have to search in other locations.
-            # See https://github.com/Homebrew/homebrew-core/issues/112107#issuecomment-1278042927.
-            execute_process(COMMAND brew --prefix libomp
+            if(USE_HOMEBREW_FALLBACK)
+                # libomp 15.0+ from brew is keg-only, so have to search in other locations.
+                # See https://github.com/Homebrew/homebrew-core/issues/112107#issuecomment-1278042927.
+                execute_process(COMMAND brew --prefix libomp
                            OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
                            OUTPUT_STRIP_TRAILING_WHITESPACE)
-            set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
-            set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
-            set(OpenMP_C_LIB_NAMES omp)
-            set(OpenMP_CXX_LIB_NAMES omp)
-            set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
+                set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
+                set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
+                set(OpenMP_C_LIB_NAMES omp)
+                set(OpenMP_CXX_LIB_NAMES omp)
+                set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
+            endif()
            find_package(OpenMP REQUIRED)
        endif()
    else()
@ -197,33 +209,34 @@ if(__INTEGRATE_OPENCL)
    endif()
 endif()

+if(BUILD_CPP_TEST AND MSVC)
+  # Use /MT flag to statically link the C runtime
+  set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
+endif()
+
 if(USE_CUDA)
-    find_package(CUDA 11.0 REQUIRED)
-    include_directories(${CUDA_INCLUDE_DIRS})
+    find_package(CUDAToolkit 11.0 REQUIRED)
+    include_directories(${CUDAToolkit_INCLUDE_DIRS})
    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=${OpenMP_CXX_FLAGS} -Xcompiler=-fPIC -Xcompiler=-Wall")

    # reference for mapping of CUDA toolkit component versions to supported architectures ("compute capabilities"):
    # https://en.wikipedia.org/wiki/CUDA#GPUs_supported
-    set(CUDA_ARCHS "6.0" "6.1" "6.2" "7.0" "7.5")
-    if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
-        list(APPEND CUDA_ARCHS "8.0")
+    set(CUDA_ARCHS "60" "61" "62" "70" "75")
+    if(CUDA_VERSION VERSION_GREATER_EQUAL "110")
+        list(APPEND CUDA_ARCHS "80")
    endif()
-    if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
-        list(APPEND CUDA_ARCHS "8.6")
+    if(CUDA_VERSION VERSION_GREATER_EQUAL "111")
+        list(APPEND CUDA_ARCHS "86")
    endif()
-    if(CUDA_VERSION VERSION_GREATER_EQUAL "11.5")
-        list(APPEND CUDA_ARCHS "8.7")
+    if(CUDA_VERSION VERSION_GREATER_EQUAL "115")
+        list(APPEND CUDA_ARCHS "87")
    endif()
-    if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
-        list(APPEND CUDA_ARCHS "8.9")
-        list(APPEND CUDA_ARCHS "9.0")
+    if(CUDA_VERSION VERSION_GREATER_EQUAL "118")
+        list(APPEND CUDA_ARCHS "89")
+        list(APPEND CUDA_ARCHS "90")
    endif()
    list(POP_BACK CUDA_ARCHS CUDA_LAST_SUPPORTED_ARCH)
    list(APPEND CUDA_ARCHS "${CUDA_LAST_SUPPORTED_ARCH}+PTX")
-    cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS ${CUDA_ARCHS})
-    string(REPLACE ";" " " CUDA_ARCH_FLAGS "${CUDA_ARCH_FLAGS}")
-
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_ARCH_FLAGS}")
    if(USE_DEBUG)
      set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g")
    else()
@ -259,8 +272,12 @@ if(USE_CUDA)

    function(add_histogram hsize hname hadd hconst hdir)
      add_library(histo${hsize}${hname} OBJECT src/treelearner/kernels/histogram${hsize}.cu)
-      set_target_properties(histo${hsize}${hname} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-      set_target_properties(histo${hsize}${hname} PROPERTIES CUDA_ARCHITECTURES OFF)
+      set_target_properties(
+        histo${hsize}${hname}
+        PROPERTIES
+          CUDA_SEPARABLE_COMPILATION ON
+          CUDA_ARCHITECTURES ${CUDA_ARCHS}
+      )
      if(hadd)
        list(APPEND histograms histo${hsize}${hname})
        set(histograms ${histograms} PARENT_SCOPE)
@ -283,15 +300,6 @@ if(USE_CUDA)
    endforeach()
 endif()

-if(USE_HDFS)
-    find_package(JNI REQUIRED)
-    find_path(HDFS_INCLUDE_DIR hdfs.h REQUIRED)
-    find_library(HDFS_LIB NAMES hdfs REQUIRED)
-    include_directories(${HDFS_INCLUDE_DIR})
-    add_definitions(-DUSE_HDFS)
-    set(HDFS_CXX_LIBRARIES ${HDFS_LIB} ${JAVA_JVM_LIBRARY})
-endif()
-
 include(CheckCXXSourceCompiles)
 check_cxx_source_compiles("
 #include <xmmintrin.h>
@ -407,52 +415,89 @@ set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR})

 include_directories(${LightGBM_HEADER_DIR})

-if(APPLE)
-  if(APPLE_OUTPUT_DYLIB)
-    set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib")
-  else()
-    set(CMAKE_SHARED_LIBRARY_SUFFIX ".so")
-  endif()
-endif()
-
 if(USE_MPI)
  include_directories(${MPI_CXX_INCLUDE_PATH})
 endif()

-file(
-    GLOB
-    SOURCES
-      src/boosting/*.cpp
-      src/io/*.cpp
-      src/metric/*.cpp
-      src/objective/*.cpp
-      src/network/*.cpp
-      src/treelearner/*.cpp
-      src/utils/*.cpp
+set(
+    LGBM_SOURCES
+      src/boosting/boosting.cpp
+      src/boosting/gbdt_model_text.cpp
+      src/boosting/gbdt_prediction.cpp
+      src/boosting/gbdt.cpp
+      src/boosting/prediction_early_stop.cpp
+      src/boosting/sample_strategy.cpp
+      src/io/bin.cpp
+      src/io/config_auto.cpp
+      src/io/config.cpp
+      src/io/dataset_loader.cpp
+      src/io/dataset.cpp
+      src/io/file_io.cpp
+      src/io/json11.cpp
+      src/io/metadata.cpp
+      src/io/parser.cpp
+      src/io/train_share_states.cpp
+      src/io/tree.cpp
+      src/metric/dcg_calculator.cpp
+      src/metric/metric.cpp
+      src/network/linker_topo.cpp
+      src/network/linkers_mpi.cpp
+      src/network/linkers_socket.cpp
+      src/network/network.cpp
+      src/objective/objective_function.cpp
+      src/treelearner/data_parallel_tree_learner.cpp
+      src/treelearner/feature_histogram.cpp
+      src/treelearner/feature_parallel_tree_learner.cpp
+      src/treelearner/gpu_tree_learner.cpp
+      src/treelearner/gradient_discretizer.cpp
+      src/treelearner/linear_tree_learner.cpp
+      src/treelearner/serial_tree_learner.cpp
+      src/treelearner/tree_learner.cpp
+      src/treelearner/voting_parallel_tree_learner.cpp
+      src/utils/openmp_wrapper.cpp
 )
-file(
-    GLOB
+set(
    LGBM_CUDA_SOURCES
-      src/treelearner/*.cu
-      src/boosting/cuda/*.cpp
-      src/boosting/cuda/*.cu
-      src/metric/cuda/*.cpp
-      src/metric/cuda/*.cu
-      src/objective/cuda/*.cpp
-      src/objective/cuda/*.cu
-      src/treelearner/cuda/*.cpp
-      src/treelearner/cuda/*.cu
-      src/io/cuda/*.cu
-      src/io/cuda/*.cpp
-      src/cuda/*.cpp
-      src/cuda/*.cu
+      src/boosting/cuda/cuda_score_updater.cpp
+      src/boosting/cuda/cuda_score_updater.cu
+      src/metric/cuda/cuda_binary_metric.cpp
+      src/metric/cuda/cuda_pointwise_metric.cpp
+      src/metric/cuda/cuda_regression_metric.cpp
+      src/metric/cuda/cuda_pointwise_metric.cu
+      src/objective/cuda/cuda_binary_objective.cpp
+      src/objective/cuda/cuda_multiclass_objective.cpp
+      src/objective/cuda/cuda_rank_objective.cpp
+      src/objective/cuda/cuda_regression_objective.cpp
+      src/objective/cuda/cuda_binary_objective.cu
+      src/objective/cuda/cuda_multiclass_objective.cu
+      src/objective/cuda/cuda_rank_objective.cu
+      src/objective/cuda/cuda_regression_objective.cu
+      src/treelearner/cuda/cuda_best_split_finder.cpp
+      src/treelearner/cuda/cuda_data_partition.cpp
+      src/treelearner/cuda/cuda_histogram_constructor.cpp
+      src/treelearner/cuda/cuda_leaf_splits.cpp
+      src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
+      src/treelearner/cuda/cuda_best_split_finder.cu
+      src/treelearner/cuda/cuda_data_partition.cu
+      src/treelearner/cuda/cuda_gradient_discretizer.cu
+      src/treelearner/cuda/cuda_histogram_constructor.cu
+      src/treelearner/cuda/cuda_leaf_splits.cu
+      src/treelearner/cuda/cuda_single_gpu_tree_learner.cu
+      src/io/cuda/cuda_column_data.cu
+      src/io/cuda/cuda_tree.cu
+      src/io/cuda/cuda_column_data.cpp
+      src/io/cuda/cuda_metadata.cpp
+      src/io/cuda/cuda_row_data.cpp
+      src/io/cuda/cuda_tree.cpp
+      src/cuda/cuda_utils.cpp
+      src/cuda/cuda_algorithms.cu
 )

 if(USE_CUDA)
-  list(APPEND SOURCES ${LGBM_CUDA_SOURCES})
+  list(APPEND LGBM_SOURCES ${LGBM_CUDA_SOURCES})
 endif()

-add_library(lightgbm_objs OBJECT ${SOURCES})
+add_library(lightgbm_objs OBJECT ${LGBM_SOURCES})

 if(BUILD_CLI)
    add_executable(lightgbm src/main.cpp src/application/application.cpp)
@ -473,11 +518,22 @@ if(BUILD_STATIC_LIB)
 else()
  add_library(_lightgbm SHARED)
 endif()
+
+# R expects libraries of the form <project>.{dll,dylib,so}, not lib_<project>.{dll,dylib,so}
+if(__BUILD_FOR_R)
+  set_target_properties(
+    _lightgbm
+    PROPERTIES
+      PREFIX ""
+      OUTPUT_NAME "lightgbm"
+  )
+endif()
+
 # LightGBM headers include openmp, cuda, R etc. headers,
 # thus PUBLIC is required for building _lightgbm_swig target.
 target_link_libraries(_lightgbm PUBLIC lightgbm_capi_objs lightgbm_objs)

-if(MSVC)
+if(MSVC AND NOT __BUILD_FOR_R)
  set_target_properties(_lightgbm PROPERTIES OUTPUT_NAME "lib_lightgbm")
 endif()

@ -487,10 +543,14 @@ if(USE_SWIG)
  set_property(SOURCE swig/lightgbmlib.i PROPERTY SWIG_FLAGS "${swig_options}")
  swig_add_library(_lightgbm_swig LANGUAGE java SOURCES swig/lightgbmlib.i)
  swig_link_libraries(_lightgbm_swig _lightgbm)
-  # needed to ensure Linux build does not have lib prefix specified twice, e.g. liblib_lightgbm_swig
-  set_target_properties(_lightgbm_swig PROPERTIES PREFIX "")
-  # needed in some versions of CMake for VS and MinGW builds to ensure output dll has lib prefix
-  set_target_properties(_lightgbm_swig PROPERTIES OUTPUT_NAME "lib_lightgbm_swig")
+  set_target_properties(
+    _lightgbm_swig
+    PROPERTIES
+      # needed to ensure Linux build does not have lib prefix specified twice, e.g. liblib_lightgbm_swig
+      PREFIX ""
+      # needed in some versions of CMake for VS and MinGW builds to ensure output dll has lib prefix
+      OUTPUT_NAME "lib_lightgbm_swig"
+  )
  if(WIN32)
    if(MINGW OR CYGWIN)
        add_custom_command(
@ -561,6 +621,12 @@ if(USE_SWIG)
          copy_if_different
          "${PROJECT_SOURCE_DIR}/lib_lightgbm.so"
          com/microsoft/ml/lightgbm/linux/x86_64
+        COMMAND
+          "${CMAKE_COMMAND}"
+          -E
+          copy_if_different
+          "${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.so"
+          com/microsoft/ml/lightgbm/linux/x86_64
        COMMAND "${Java_JAR_EXECUTABLE}" -cf lightgbmlib.jar com
    )
  endif()
@ -571,7 +637,7 @@ if(USE_MPI)
 endif()

 if(USE_OPENMP)
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+  if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
    target_link_libraries(lightgbm_objs PUBLIC OpenMP::OpenMP_CXX)
    # c_api headers also includes OpenMP headers, thus compiling
    # lightgbm_capi_objs needs include directory for OpenMP.
@ -597,23 +663,29 @@ if(__INTEGRATE_OPENCL)
 endif()

 if(USE_CUDA)
-  # Disable cmake warning about policy CMP0104. Refer to issue #3754 and PR #4268.
-  # Custom target properties does not propagate, thus we need to specify for
-  # each target that contains or depends on cuda source.
-  set_target_properties(lightgbm_objs PROPERTIES CUDA_ARCHITECTURES OFF)
-  set_target_properties(_lightgbm PROPERTIES CUDA_ARCHITECTURES OFF)
-  if(BUILD_CLI)
-    set_target_properties(lightgbm PROPERTIES CUDA_ARCHITECTURES OFF)
-  endif()

-  set_target_properties(lightgbm_objs PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+  set_target_properties(
+    lightgbm_objs
+    PROPERTIES
+      CUDA_ARCHITECTURES ${CUDA_ARCHS}
+      CUDA_SEPARABLE_COMPILATION ON
+  )
+
+  set_target_properties(
+    _lightgbm
+    PROPERTIES
+      CUDA_ARCHITECTURES ${CUDA_ARCHS}
+      CUDA_RESOLVE_DEVICE_SYMBOLS ON
+  )

-  # Device linking is not supported for object libraries.
-  # Thus we have to specify them on final targets.
  if(BUILD_CLI)
-    set_target_properties(lightgbm PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+    set_target_properties(
+      lightgbm
+      PROPERTIES
+        CUDA_ARCHITECTURES ${CUDA_ARCHS}
+        CUDA_RESOLVE_DEVICE_SYMBOLS ON
+    )
  endif()
-  set_target_properties(_lightgbm PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)

  # histograms are list of object libraries. Linking object library to other
  # object libraries only gets usage requirements, the linked objects won't be
@ -624,10 +696,6 @@ if(USE_CUDA)
  target_link_libraries(_lightgbm PRIVATE ${histograms})
 endif()

-if(USE_HDFS)
-  target_link_libraries(lightgbm_objs PUBLIC ${HDFS_CXX_LIBRARIES})
-endif()
-
 if(WIN32)
    if(MINGW OR CYGWIN)
      target_link_libraries(lightgbm_objs PUBLIC ws2_32 iphlpapi)
@ -664,21 +732,21 @@ if(BUILD_CPP_TEST)
  set(LightGBM_TEST_HEADER_DIR ${PROJECT_SOURCE_DIR}/tests/cpp_tests)
  include_directories(${LightGBM_TEST_HEADER_DIR})

-  file(GLOB CPP_TEST_SOURCES tests/cpp_tests/*.cpp)
+  set(
+    CPP_TEST_SOURCES
+      tests/cpp_tests/test_array_args.cpp
+      tests/cpp_tests/test_arrow.cpp
+      tests/cpp_tests/test_byte_buffer.cpp
+      tests/cpp_tests/test_chunked_array.cpp
+      tests/cpp_tests/test_common.cpp
+      tests/cpp_tests/test_main.cpp
+      tests/cpp_tests/test_serialize.cpp
+      tests/cpp_tests/test_single_row.cpp
+      tests/cpp_tests/test_stream.cpp
+      tests/cpp_tests/testutils.cpp
+    )
  if(MSVC)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /permissive-")
-    set(
-      CompilerFlags
-        CMAKE_CXX_FLAGS
-        CMAKE_CXX_FLAGS_DEBUG
-        CMAKE_CXX_FLAGS_RELEASE
-        CMAKE_C_FLAGS
-        CMAKE_C_FLAGS_DEBUG
-        CMAKE_C_FLAGS_RELEASE
-    )
-    foreach(CompilerFlag ${CompilerFlags})
-      string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
-    endforeach()
  endif()
  add_executable(testlightgbm ${CPP_TEST_SOURCES})
  target_link_libraries(testlightgbm PRIVATE lightgbm_objs lightgbm_capi_objs GTest::GTest)
@ -695,6 +763,90 @@ if(__BUILD_FOR_PYTHON)
    set(CMAKE_INSTALL_PREFIX "lightgbm")
 endif()

+# The macOS linker puts an absolute path to linked libraries in lib_lightgbm.dylib.
+# This block overrides that information for LightGBM's OpenMP dependency, to allow
+# finding that library in more places.
+#
+# This reduces the risk of runtime issues resulting from multiple {libgomp,libiomp,libomp}.dylib being loaded.
+#
+if(APPLE AND USE_OPENMP AND NOT BUILD_STATIC_LIB)
+  # store path to {libgomp,libiomp,libomp}.dylib found at build time in a variable
+  get_target_property(
+    OpenMP_LIBRARY_LOCATION
+    OpenMP::OpenMP_CXX
+    INTERFACE_LINK_LIBRARIES
+  )
+  # get just the filename of that path
+  # (to deal with the possibility that it might be 'libomp.dylib' or 'libgomp.dylib' or 'libiomp.dylib')
+  get_filename_component(
+    OpenMP_LIBRARY_NAME
+    ${OpenMP_LIBRARY_LOCATION}
+    NAME
+  )
+  # get directory of that path
+  get_filename_component(
+    OpenMP_LIBRARY_DIR
+    ${OpenMP_LIBRARY_LOCATION}
+    DIRECTORY
+  )
+  # get exact name of the library in a variable
+  get_target_property(
+    __LIB_LIGHTGBM_OUTPUT_NAME
+    _lightgbm
+    OUTPUT_NAME
+  )
+  if(NOT __LIB_LIGHTGBM_OUTPUT_NAME)
+    set(__LIB_LIGHTGBM_OUTPUT_NAME "lib_lightgbm")
+  endif()
+
+  if(CMAKE_SHARED_LIBRARY_SUFFIX_CXX)
+    set(
+      __LIB_LIGHTGBM_FILENAME "${__LIB_LIGHTGBM_OUTPUT_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX_CXX}"
+      CACHE INTERNAL "lightgbm shared library filename"
+    )
+  else()
+    set(
+      __LIB_LIGHTGBM_FILENAME "${__LIB_LIGHTGBM_OUTPUT_NAME}.dylib"
+      CACHE INTERNAL "lightgbm shared library filename"
+    )
+  endif()
+
+  # Override the absolute path to OpenMP with a relative one using @rpath.
+  #
+  # This also ensures that if a {libgomp,libiomp,libomp}.dylib has already been loaded, it'll just use that.
+  add_custom_command(
+    TARGET _lightgbm
+    POST_BUILD
+      COMMAND
+        install_name_tool
+        -change
+        ${OpenMP_LIBRARY_LOCATION}
+        "@rpath/${OpenMP_LIBRARY_NAME}"
+        "${__LIB_LIGHTGBM_FILENAME}"
+      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+      COMMENT "Replacing hard-coded OpenMP install_name with '@rpath/${OpenMP_LIBRARY_NAME}'..."
+  )
+  # add RPATH entries to ensure the loader looks in the following, in the following order:
+  #
+  #   - ${OpenMP_LIBRARY_DIR}        (wherever find_package(OpenMP) found OpenMP at build time)
+  #   - /opt/homebrew/opt/libomp/lib (where 'brew install' / 'brew link' puts libomp.dylib)
+  #   - /opt/local/lib/libomp        (where 'port install' puts libomp.dylib)
+  #
+  set_target_properties(
+    _lightgbm
+    PROPERTIES
+      BUILD_WITH_INSTALL_RPATH TRUE
+      if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+          # with clang, libomp doesn't ship with the compiler and might be supplied separately
+          INSTALL_RPATH "${OpenMP_LIBRARY_DIR};/opt/homebrew/opt/libomp/lib;/opt/local/lib/libomp;"
+      else()
+          # with other compilers, OpenMP ships with the compiler (e.g. libgomp with gcc)
+          INSTALL_RPATH "${OpenMP_LIBRARY_DIR}"
+      endif()
+      INSTALL_RPATH_USE_LINK_PATH FALSE
+  )
+endif()
+
 install(
  TARGETS _lightgbm
  RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
@ -703,5 +855,17 @@ install(
 )

 if(INSTALL_HEADERS)
-    install(DIRECTORY ${LightGBM_HEADER_DIR}/LightGBM DESTINATION ${CMAKE_INSTALL_PREFIX}/include)
+    install(
+      DIRECTORY ${LightGBM_HEADER_DIR}/LightGBM
+      DESTINATION ${CMAKE_INSTALL_PREFIX}/include
+    )
+    install(
+      FILES ${FAST_DOUBLE_PARSER_INCLUDE_DIR}/fast_double_parser.h
+      DESTINATION ${CMAKE_INSTALL_PREFIX}/include/LightGBM/utils
+    )
+    install(
+      DIRECTORY ${FMT_INCLUDE_DIR}/
+      DESTINATION ${CMAKE_INSTALL_PREFIX}/include/LightGBM/utils
+      FILES_MATCHING PATTERN "*.h"
+    )
 endif()
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -6,7 +6,7 @@ Your help is very valuable to make it better for everyone.

 ## How to Contribute

- Check for the [Roadmap](https://github.com/microsoft/LightGBM/projects/1) and the [Feature Requests Hub](https://github.com/microsoft/LightGBM/issues/2302), and submit pull requests to address chosen issue. If you need development guideline, you can check the [Development Guide](https://github.com/microsoft/LightGBM/blob/master/docs/Development-Guide.rst) or directly ask us in Issues/Pull Requests.
+- Check the [Feature Requests Hub](https://github.com/microsoft/LightGBM/issues/2302), and submit pull requests to address chosen issue. If you need development guideline, you can check the [Development Guide](https://github.com/microsoft/LightGBM/blob/master/docs/Development-Guide.rst) or directly ask us in Issues/Pull Requests.
 - Contribute to the [tests](https://github.com/microsoft/LightGBM/tree/master/tests) to make it more reliable.
 - Contribute to the [documentation](https://github.com/microsoft/LightGBM/tree/master/docs) to make it clearer for everyone.
 - Contribute to the [examples](https://github.com/microsoft/LightGBM/tree/master/examples) to share your experience with other users.
--- a/R-package/.Rbuildignore
+++ b/R-package/.Rbuildignore
@ -8,6 +8,7 @@ AUTOCONF_UBUNTU_VERSION
 ^docs$
 ^.*\.dll
 \.drone\.yml
+^.*\.dylib
 \.git
 \.gitkeep$
 ^.*\.history
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@ -63,4 +63,4 @@ Imports:
    utils
 SystemRequirements:
    ~~CXXSTD~~
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
--- a/R-package/LICENSE
+++ b/R-package/LICENSE
@ -1,2 +1,2 @@
 YEAR: 2016
-COPYRIGHT HOLDER: Microsoft Corporation
+COPYRIGHT HOLDER: Microsoft Corporation
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@ -62,4 +62,4 @@ importFrom(parallel,detectCores)
 importFrom(stats,quantile)
 importFrom(utils,modifyList)
 importFrom(utils,read.delim)
-useDynLib(lib_lightgbm , .registration = TRUE)
+useDynLib(lightgbm , .registration = TRUE)
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@ -307,6 +307,46 @@ Booster <- R6::R6Class(

    },

+    # Number of trees per iteration
+    num_trees_per_iter = function() {
+
+      self$restore_handle()
+
+      trees_per_iter <- 1L
+      .Call(
+        LGBM_BoosterNumModelPerIteration_R
+        , private$handle
+        , trees_per_iter
+      )
+      return(trees_per_iter)
+
+    },
+
+    # Total number of trees
+    num_trees = function() {
+
+      self$restore_handle()
+
+      ntrees <- 0L
+      .Call(
+        LGBM_BoosterNumberOfTotalModel_R
+        , private$handle
+        , ntrees
+      )
+      return(ntrees)
+
+    },
+
+    # Number of iterations (= rounds)
+    num_iter = function() {
+
+      ntrees <- self$num_trees()
+      trees_per_iter <- self$num_trees_per_iter()
+
+      return(ntrees / trees_per_iter)
+
+    },
+
    # Get upper bound
    upper_bound = function() {

@ -416,7 +456,12 @@ Booster <- R6::R6Class(
    },

    # Save model
-    save_model = function(filename, num_iteration = NULL, feature_importance_type = 0L) {
+    save_model = function(
+      filename
+      , num_iteration = NULL
+      , feature_importance_type = 0L
+      , start_iteration = 1L
+    ) {

      self$restore_handle()

@ -432,12 +477,18 @@ Booster <- R6::R6Class(
        , as.integer(num_iteration)
        , as.integer(feature_importance_type)
        , filename
+        , as.integer(start_iteration) - 1L  # Turn to 0-based
      )

      return(invisible(self))
    },

-    save_model_to_string = function(num_iteration = NULL, feature_importance_type = 0L, as_char = TRUE) {
+    save_model_to_string = function(
+      num_iteration = NULL
+      , feature_importance_type = 0L
+      , as_char = TRUE
+      , start_iteration = 1L
+    ) {

      self$restore_handle()

@ -450,6 +501,7 @@ Booster <- R6::R6Class(
          , private$handle
          , as.integer(num_iteration)
          , as.integer(feature_importance_type)
+          , as.integer(start_iteration) - 1L  # Turn to 0-based
      )

      if (as_char) {
@ -461,7 +513,9 @@ Booster <- R6::R6Class(
    },

    # Dump model in memory
-    dump_model = function(num_iteration = NULL, feature_importance_type = 0L) {
+    dump_model = function(
+      num_iteration = NULL, feature_importance_type = 0L, start_iteration = 1L
+    ) {

      self$restore_handle()

@ -474,6 +528,7 @@ Booster <- R6::R6Class(
        , private$handle
        , as.integer(num_iteration)
        , as.integer(feature_importance_type)
+        , as.integer(start_iteration) - 1L  # Turn to 0-based
      )

      return(model_str)
@ -1288,8 +1343,13 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #' @title Save LightGBM model
 #' @description Save LightGBM model
 #' @param booster Object of class \code{lgb.Booster}
-#' @param filename saved filename
-#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
+#' @param filename Saved filename
+#' @param num_iteration Number of iterations to save, NULL or <= 0 means use best iteration
+#' @param start_iteration Index (1-based) of the first boosting round to save.
+#'        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+#'        means "save the fifth, sixth, and seventh tree"
+#'
+#'        \emph{New in version 4.4.0}
 #'
 #' @return lgb.Booster
 #'
@ -1322,7 +1382,9 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #' lgb.save(model, tempfile(fileext = ".txt"))
 #' }
 #' @export
-lgb.save <- function(booster, filename, num_iteration = NULL) {
+lgb.save <- function(
+    booster, filename, num_iteration = NULL, start_iteration = 1L
+  ) {

  if (!.is_Booster(x = booster)) {
    stop("lgb.save: booster should be an ", sQuote("lgb.Booster"))
@ -1338,6 +1400,7 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
    invisible(booster$save_model(
      filename = filename
      , num_iteration = num_iteration
+      , start_iteration = start_iteration
    ))
  )

@ -1347,7 +1410,12 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
 #' @title Dump LightGBM model to json
 #' @description Dump LightGBM model to json
 #' @param booster Object of class \code{lgb.Booster}
-#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
+#' @param num_iteration Number of iterations to be dumped. NULL or <= 0 means use best iteration
+#' @param start_iteration Index (1-based) of the first boosting round to dump.
+#'        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+#'        means "dump the fifth, sixth, and seventh tree"
+#'
+#'        \emph{New in version 4.4.0}
 #'
 #' @return json format of model
 #'
@ -1380,14 +1448,18 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
 #' json_model <- lgb.dump(model)
 #' }
 #' @export
-lgb.dump <- function(booster, num_iteration = NULL) {
+lgb.dump <- function(booster, num_iteration = NULL, start_iteration = 1L) {

  if (!.is_Booster(x = booster)) {
    stop("lgb.dump: booster should be an ", sQuote("lgb.Booster"))
  }

  # Return booster at requested iteration
-  return(booster$dump_model(num_iteration =  num_iteration))
+  return(
+    booster$dump_model(
+      num_iteration = num_iteration, start_iteration = start_iteration
+    )
+  )

 }

--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@ -170,7 +170,12 @@ Dataset <- R6::R6Class(

            # Check if more categorical features were output over the feature space
            data_is_not_filename <- !is.character(private$raw_data)
-            if (data_is_not_filename && max(private$categorical_feature) > ncol(private$raw_data)) {
+            if (
+              data_is_not_filename
+              && !is.null(private$raw_data)
+              && is.null(private$used_indices)
+              && max(private$categorical_feature) > ncol(private$raw_data)
+            ) {
              stop(
                "lgb.Dataset.construct: supplied a too large value in categorical_feature: "
                , max(private$categorical_feature)
@ -753,8 +758,13 @@ Dataset <- R6::R6Class(
 )

 #' @title Construct \code{lgb.Dataset} object
-#' @description Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
-#'              or local file (that was created previously by saving an \code{lgb.Dataset}).
+#' @description LightGBM does not train on raw data.
+#'              It discretizes continuous features into histogram bins, tries to
+#'              combine categorical features, and automatically handles missing and
+#               infinite values.
+#'
+#'              The \code{Dataset} class handles that preprocessing, and holds that
+#'              alternative representation of the input data.
 #' @inheritParams lgb_shared_dataset_params
 #' @param data a \code{matrix} object, a \code{dgCMatrix} object,
 #'             a character representing a path to a text file (CSV, TSV, or LibSVM),
@ -1049,6 +1059,9 @@ dimnames.lgb.Dataset <- function(x) {
 #' @title Slice a dataset
 #' @description Get a new \code{lgb.Dataset} containing the specified rows of
 #'              original \code{lgb.Dataset} object
+#'
+#'              \emph{Renamed from} \code{slice()} \emph{in 4.4.0}
+#'
 #' @param dataset Object of class \code{lgb.Dataset}
 #' @param idxset an integer vector of indices of rows needed
 #' @return constructed sub dataset
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@ -25,8 +25,8 @@ CVBooster <- R6::R6Class(
 #' @description Cross validation logic used by LightGBM
 #' @inheritParams lgb_shared_params
 #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
-#' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
-#' @param weight vector of response values. If not NULL, will set to dataset
+#' @param label Deprecated. See "Deprecated Arguments" section below.
+#' @param weight Deprecated. See "Deprecated Arguments" section below.
 #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
 #' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
 #'               This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
@ -36,10 +36,8 @@ CVBooster <- R6::R6Class(
 #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
 #'              (each element must be a vector of test fold's indices). When folds are supplied,
 #'              the \code{nfold} and \code{stratified} parameters are ignored.
-#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
-#' @param categorical_feature categorical features. This can either be a character vector of feature
-#'                            names or an integer vector with the indices of the features (e.g.
-#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
+#' @param colnames Deprecated. See "Deprecated Arguments" section below.
+#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
 #' @param callbacks List of callback functions that are applied at each iteration.
 #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                   into a predictor model which frees up memory and the original datasets
@ -70,6 +68,13 @@ CVBooster <- R6::R6Class(
 #'   , nfold = 3L
 #' )
 #' }
+#'
+#' @section Deprecated Arguments:
+#'
+#' A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
+#' to argument \code{'data'}. It will also remove support for passing arguments
+#' \code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
+#'
 #' @importFrom data.table data.table setorderv
 #' @export
 lgb.cv <- function(params = list()
@ -102,12 +107,32 @@ lgb.cv <- function(params = list()

  # If 'data' is not an lgb.Dataset, try to construct one using 'label'
  if (!.is_Dataset(x = data)) {
+    warning(paste0(
+      "Passing anything other than an lgb.Dataset object to lgb.cv() is deprecated. "
+      , "Either pass an lgb.Dataset object, or use lightgbm()."
+    ))
    if (is.null(label)) {
      stop("'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'")
    }
    data <- lgb.Dataset(data = data, label = label)
  }

+  # raise deprecation warnings if necessary
+  # ref: https://github.com/microsoft/LightGBM/issues/6435
+  args <- names(match.call())
+  if ("categorical_feature" %in% args) {
+    .emit_dataset_kwarg_warning("categorical_feature", "lgb.cv")
+  }
+  if ("colnames" %in% args) {
+    .emit_dataset_kwarg_warning("colnames", "lgb.cv")
+  }
+  if ("label" %in% args) {
+    .emit_dataset_kwarg_warning("label", "lgb.cv")
+  }
+  if ("weight" %in% args) {
+    .emit_dataset_kwarg_warning("weight", "lgb.cv")
+  }
+
  # set some parameters, resolving the way they were passed in with other parameters
  # in `params`.
  # this ensures that the model stored with Booster$save() correctly represents
@ -270,7 +295,9 @@ lgb.cv <- function(params = list()

  # Cannot use early stopping with 'dart' boosting
  if (using_dart) {
-    warning("Early stopping is not available in 'dart' mode.")
+    if (using_early_stopping) {
+      warning("Early stopping is not available in 'dart' mode.")
+    }
    using_early_stopping <- FALSE

    # Remove the cb_early_stop() function if it was passed in to callbacks
--- a/R-package/R/lgb.model.dt.tree.R
+++ b/R-package/R/lgb.model.dt.tree.R
@ -1,9 +1,14 @@
 #' @name lgb.model.dt.tree
 #' @title Parse a LightGBM model json dump
 #' @description Parse a LightGBM model json dump into a \code{data.table} structure.
-#' @param model object of class \code{lgb.Booster}
-#' @param num_iteration number of iterations you want to predict with. NULL or
-#'                      <= 0 means use best iteration
+#' @param model object of class \code{lgb.Booster}.
+#' @param num_iteration Number of iterations to include. NULL or <= 0 means use best iteration.
+#' @param start_iteration Index (1-based) of the first boosting round to include in the output.
+#'        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+#'        means "return information about the fifth, sixth, and seventh trees".
+#'
+#'        \emph{New in version 4.4.0}
+#'
 #' @return
 #' A \code{data.table} with detailed information about model trees' nodes and leafs.
 #'
@ -51,9 +56,15 @@
 #' @importFrom data.table := rbindlist
 #' @importFrom jsonlite fromJSON
 #' @export
-lgb.model.dt.tree <- function(model, num_iteration = NULL) {
+lgb.model.dt.tree <- function(
+    model, num_iteration = NULL, start_iteration = 1L
+  ) {

-  json_model <- lgb.dump(booster = model, num_iteration = num_iteration)
+  json_model <- lgb.dump(
+    booster = model
+    , num_iteration = num_iteration
+    , start_iteration = start_iteration
+  )

  parsed_json_model <- jsonlite::fromJSON(
    txt = json_model
@ -84,12 +95,21 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
  tree_dt[, split_feature := feature_names]

  return(tree_dt)
-
 }


 #' @importFrom data.table := data.table rbindlist
 .single_tree_parse <- function(lgb_tree) {
+  tree_info_cols <- c(
+    "split_index"
+    , "split_feature"
+    , "split_gain"
+    , "threshold"
+    , "decision_type"
+    , "default_left"
+    , "internal_value"
+    , "internal_count"
+  )

  # Traverse tree function
  pre_order_traversal <- function(env = NULL, tree_node_leaf, current_depth = 0L, parent_index = NA_integer_) {
@ -97,7 +117,8 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
    if (is.null(env)) {
      # Setup initial default data.table with default types
      env <- new.env(parent = emptyenv())
-      env$single_tree_dt <- data.table::data.table(
+      env$single_tree_dt <- list()
+      env$single_tree_dt[[1L]] <- data.table::data.table(
        tree_index = integer(0L)
        , depth = integer(0L)
        , split_index = integer(0L)
@ -127,19 +148,10 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
      if (!is.null(tree_node_leaf$split_index)) {

        # update data.table
-        env$single_tree_dt <- data.table::rbindlist(l = list(env$single_tree_dt,
-                                                             c(tree_node_leaf[c("split_index",
-                                                                                "split_feature",
-                                                                                "split_gain",
-                                                                                "threshold",
-                                                                                "decision_type",
-                                                                                "default_left",
-                                                                                "internal_value",
-                                                                                "internal_count")],
-                                                               "depth" = current_depth,
-                                                               "node_parent" = parent_index)),
-                                                    use.names = TRUE,
-                                                    fill = TRUE)
+        env$single_tree_dt[[length(env$single_tree_dt) + 1L]] <- c(
+          tree_node_leaf[tree_info_cols]
+          , list("depth" = current_depth, "node_parent" = parent_index)
+        )

        # Traverse tree again both left and right
        pre_order_traversal(
@ -154,31 +166,27 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
          , current_depth = current_depth + 1L
          , parent_index = tree_node_leaf$split_index
        )
-
      } else if (!is.null(tree_node_leaf$leaf_index)) {

-        # update data.table
-        env$single_tree_dt <- data.table::rbindlist(l = list(env$single_tree_dt,
-                                                             c(tree_node_leaf[c("leaf_index",
-                                                                                "leaf_value",
-                                                                                "leaf_count")],
-                                                               "depth" = current_depth,
-                                                               "leaf_parent" = parent_index)),
-                                                    use.names = TRUE,
-                                                    fill = TRUE)
-
+        # update list
+        env$single_tree_dt[[length(env$single_tree_dt) + 1L]] <- c(
+          tree_node_leaf[c("leaf_index", "leaf_value", "leaf_count")]
+          , list("depth" = current_depth, "leaf_parent" = parent_index)
+        )
      }
-
    }
    return(env$single_tree_dt)
  }

-  # Traverse structure
-  single_tree_dt <- pre_order_traversal(tree_node_leaf = lgb_tree$tree_structure)
+  # Traverse structure and rowbind everything
+  single_tree_dt <- data.table::rbindlist(
+    pre_order_traversal(tree_node_leaf = lgb_tree$tree_structure)
+    , use.names = TRUE
+    , fill = TRUE
+  )

  # Store index
  single_tree_dt[, tree_index := lgb_tree$tree_index]

  return(single_tree_dt)
-
 }
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@ -6,10 +6,8 @@
 #' @inheritParams lgb_shared_params
 #' @param valids a list of \code{lgb.Dataset} objects, used for validation
 #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
-#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
-#' @param categorical_feature categorical features. This can either be a character vector of feature
-#'                            names or an integer vector with the indices of the features (e.g.
-#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
+#' @param colnames Deprecated. See "Deprecated Arguments" section below.
+#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
 #' @param callbacks List of callback functions that are applied at each iteration.
 #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the
 #'                   booster model into a predictor model which frees up memory and the
@ -43,6 +41,13 @@
 #'   , early_stopping_rounds = 3L
 #' )
 #' }
+#'
+#' @section Deprecated Arguments:
+#'
+#' A future release of \code{lightgbm} will remove support for passing arguments
+#' \code{'categorical_feature'} and \code{'colnames'}. Pass those things to
+#' \code{lgb.Dataset} instead.
+#'
 #' @export
 lgb.train <- function(params = list(),
                      data,
@ -78,6 +83,16 @@ lgb.train <- function(params = list(),
    }
  }

+  # raise deprecation warnings if necessary
+  # ref: https://github.com/microsoft/LightGBM/issues/6435
+  args <- names(match.call())
+  if ("categorical_feature" %in% args) {
+    .emit_dataset_kwarg_warning("categorical_feature", "lgb.train")
+  }
+  if ("colnames" %in% args) {
+    .emit_dataset_kwarg_warning("colnames", "lgb.train")
+  }
+
  # set some parameters, resolving the way they were passed in with other parameters
  # in `params`.
  # this ensures that the model stored with Booster$save() correctly represents
@ -243,7 +258,9 @@ lgb.train <- function(params = list(),

  # Cannot use early stopping with 'dart' boosting
  if (using_dart) {
-    warning("Early stopping is not available in 'dart' mode.")
+    if (using_early_stopping) {
+      warning("Early stopping is not available in 'dart' mode.")
+    }
    using_early_stopping <- FALSE

    # Remove the cb_early_stop() function if it was passed in to callbacks
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@ -144,6 +144,12 @@ NULL
 #'
 #'                    \emph{New in version 4.0.0}
 #'
+#' @param colnames Character vector of features. Only used if \code{data} is not an \code{\link{lgb.Dataset}}.
+#' @param categorical_feature categorical features. This can either be a character vector of feature
+#'                            names or an integer vector with the indices of the features (e.g.
+#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
+#'                            Only used if \code{data} is not an \code{\link{lgb.Dataset}}.
+#'
 #' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example
 #'     \itemize{
 #'        \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
@ -152,10 +158,6 @@ NULL
 #'                    \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
 #'        \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
 #'        \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
-#'        \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
-#'        \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
-#'                            names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
-#'                            say "the first and tenth columns").}
 #'        \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                          into a predictor model which frees up memory and the original datasets}
 #'     }
@ -176,6 +178,8 @@ lightgbm <- function(data,
                     objective = "auto",
                     init_score = NULL,
                     num_threads = NULL,
+                     colnames = NULL,
+                     categorical_feature = NULL,
                     ...) {

  # validate inputs early to avoid unnecessary computation
@ -221,7 +225,14 @@ lightgbm <- function(data,

  # Check whether data is lgb.Dataset, if not then create lgb.Dataset manually
  if (!.is_Dataset(x = dtrain)) {
-    dtrain <- lgb.Dataset(data = data, label = label, weight = weights, init_score = init_score)
+    dtrain <- lgb.Dataset(
+      data = data
+      , label = label
+      , weight = weights
+      , init_score = init_score
+      , categorical_feature = categorical_feature
+      , colnames = colnames
+    )
  }

  train_args <- list(
@ -325,7 +336,7 @@ NULL
 #' @import methods
 #' @importFrom Matrix Matrix
 #' @importFrom R6 R6Class
-#' @useDynLib lib_lightgbm , .registration = TRUE
+#' @useDynLib lightgbm , .registration = TRUE
 NULL

 # Suppress false positive warnings from R CMD CHECK about
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -59,68 +59,66 @@

 }

+# [description]
+#
+#     Besides applying checks, this function
+#
+#         1. turns feature *names* into 1-based integer positions, then
+#         2. adds an extra list element with skipped features, then
+#         3. turns 1-based integer positions into 0-based positions, and finally
+#         4. collapses the values of each list element into a string like "[0, 1]".
+#
 .check_interaction_constraints <- function(interaction_constraints, column_names) {
-
-  # Convert interaction constraints to feature numbers
-  string_constraints <- list()
-
-  if (!is.null(interaction_constraints)) {
-
-    if (!methods::is(interaction_constraints, "list")) {
-        stop("interaction_constraints must be a list")
-    }
-    constraint_is_character_or_numeric <- sapply(
-        X = interaction_constraints
-        , FUN = function(x) {
-            return(is.character(x) || is.numeric(x))
-        }
-    )
-    if (!all(constraint_is_character_or_numeric)) {
-        stop("every element in interaction_constraints must be a character vector or numeric vector")
-    }
-
-    for (constraint in interaction_constraints) {
-
-      # Check for character name
-      if (is.character(constraint)) {
-
-          constraint_indices <- as.integer(match(constraint, column_names) - 1L)
-
-          # Provided indices, but some indices are not existing?
-          if (sum(is.na(constraint_indices)) > 0L) {
-            stop(
-              "supplied an unknown feature in interaction_constraints "
-              , sQuote(constraint[is.na(constraint_indices)])
-            )
-          }
-
-        } else {
-
-          # Check that constraint indices are at most number of features
-          if (max(constraint) > length(column_names)) {
-            stop(
-              "supplied a too large value in interaction_constraints: "
-              , max(constraint)
-              , " but only "
-              , length(column_names)
-              , " features"
-            )
-          }
-
-          # Store indices as [0, n-1] indexed instead of [1, n] indexed
-          constraint_indices <- as.integer(constraint - 1L)
-
-        }
-
-        # Convert constraint to string
-        constraint_string <- paste0("[", paste0(constraint_indices, collapse = ","), "]")
-        string_constraints <- append(string_constraints, constraint_string)
-    }
-
+  if (is.null(interaction_constraints)) {
+    return(list())
+  }
+  if (!identical(class(interaction_constraints), "list")) {
+    stop("interaction_constraints must be a list")
  }

-  return(string_constraints)
+  column_indices <- seq_along(column_names)

+  # Convert feature names to 1-based integer positions and apply checks
+  for (j in seq_along(interaction_constraints)) {
+    constraint <- interaction_constraints[[j]]
+
+    if (is.character(constraint)) {
+      constraint_indices <- match(constraint, column_names)
+    } else if (is.numeric(constraint)) {
+      constraint_indices <- as.integer(constraint)
+    } else {
+      stop("every element in interaction_constraints must be a character vector or numeric vector")
+    }
+
+    # Features outside range?
+    bad <- !(constraint_indices %in% column_indices)
+    if (any(bad)) {
+      stop(
+        "unknown feature(s) in interaction_constraints: "
+        , toString(sQuote(constraint[bad], q = "'"))
+      )
+    }
+
+    interaction_constraints[[j]] <- constraint_indices
+  }
+
+  # Add missing features as new interaction set
+  remaining_indices <- setdiff(
+    column_indices, sort(unique(unlist(interaction_constraints)))
+  )
+  if (length(remaining_indices) > 0L) {
+    interaction_constraints <- c(
+      interaction_constraints, list(remaining_indices)
+    )
+  }
+
+  # Turn indices 0-based and convert to string
+  for (j in seq_along(interaction_constraints)) {
+    interaction_constraints[[j]] <- paste0(
+      "[", paste0(interaction_constraints[[j]] - 1L, collapse = ","), "]"
+    )
+  }
+  return(interaction_constraints)
 }


@ -260,3 +258,19 @@
    return(a == b)
  }
 }
+
+# ref: https://github.com/microsoft/LightGBM/issues/6435
+.emit_dataset_kwarg_warning <- function(calling_function, argname) {
+  msg <- sprintf(
+    paste0(
+      "Argument '%s' to %s() is deprecated and will be removed in a future release. "
+      , "Set '%s' with lgb.Dataset() instead. "
+      , "See https://github.com/microsoft/LightGBM/issues/6435."
+    )
+    , argname
+    , calling_function
+    , argname
+  )
+  warning(msg)
+  return(invisible(NULL))
+}
--- a/R-package/README.md
+++ b/R-package/README.md
@ -142,7 +142,7 @@ Rscript build_r.R --use-msys2

 #### Mac OS Preparation

-You can perform installation either with **Apple Clang** or **gcc**. In case you prefer **Apple Clang**, you should install **OpenMP** (details for installation can be found in [Installation Guide](https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#apple-clang)) first and **CMake** version 3.16 or higher is required. In case you prefer **gcc**, you need to install it (details for installation can be found in [Installation Guide](https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#gcc)) and set some environment variables to tell R to use `gcc` and `g++`. If you install these from Homebrew, your versions of `g++` and `gcc` are most likely in `/usr/local/bin`, as shown below.
+You can perform installation either with **Apple Clang** or **gcc**. In case you prefer **Apple Clang**, you should install **OpenMP** (details for installation can be found in [Installation Guide](https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#apple-clang)) first. In case you prefer **gcc**, you need to install it (details for installation can be found in [Installation Guide](https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#gcc)) and set some environment variables to tell R to use `gcc` and `g++`. If you install these from Homebrew, your versions of `g++` and `gcc` are most likely in `/usr/local/bin`, as shown below.

 ```
 # replace 8 with version of gcc installed on your machine
@ -221,7 +221,7 @@ CRAN does not prepare precompiled binaries for Linux, and as of this writing nei

 ### Installing from a Pre-compiled lib_lightgbm <a name="lib_lightgbm"></a>

-Previous versions of LightGBM offered the ability to first compile the C++ library (`lib_lightgbm.so` or `lib_lightgbm.dll`) and then build an R package that wraps it.
+Previous versions of LightGBM offered the ability to first compile the C++ library (`lib_lightgbm.{dll,dylib,so}`) and then build an R package that wraps it.

 As of version 3.0.0, this is no longer supported. If building from source is difficult for you, please [open an issue](https://github.com/microsoft/LightGBM/issues).

--- a/R-package/configure
+++ b/R-package/configure
@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for lightgbm 4.3.0.99.
+# Generated by GNU Autoconf 2.71 for lightgbm 4.5.0.99.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@ -607,8 +607,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='lightgbm'
 PACKAGE_TARNAME='lightgbm'
-PACKAGE_VERSION='4.3.0.99'
-PACKAGE_STRING='lightgbm 4.3.0.99'
+PACKAGE_VERSION='4.5.0.99'
+PACKAGE_STRING='lightgbm 4.5.0.99'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@ -1211,7 +1211,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures lightgbm 4.3.0.99 to adapt to many kinds of systems.
+\`configure' configures lightgbm 4.5.0.99 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@ -1273,7 +1273,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of lightgbm 4.3.0.99:";;
+     short | recursive ) echo "Configuration of lightgbm 4.5.0.99:";;
   esac
  cat <<\_ACEOF

@ -1341,7 +1341,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-lightgbm configure 4.3.0.99
+lightgbm configure 4.5.0.99
 generated by GNU Autoconf 2.71

 Copyright (C) 2021 Free Software Foundation, Inc.
@ -1378,7 +1378,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by lightgbm $as_me 4.3.0.99, which was
+It was created by lightgbm $as_me 4.5.0.99, which was
 generated by GNU Autoconf 2.71.  Invocation command line was

  $ $0$ac_configure_args_raw
@ -2454,7 +2454,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by lightgbm $as_me 4.3.0.99, which was
+This file was extended by lightgbm $as_me 4.5.0.99, which was
 generated by GNU Autoconf 2.71.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@ -2509,7 +2509,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-lightgbm config.status 4.3.0.99
+lightgbm config.status 4.5.0.99
 configured by $0, generated by GNU Autoconf 2.71,
  with options \\"\$ac_cs_config\\"

--- a/R-package/cran-comments.md
+++ b/R-package/cran-comments.md
@ -1,5 +1,38 @@
 # CRAN Submission History

+## v4.5.0 - Submission 1 - (July 25, 2024)
+
+### CRAN response
+
+Accepted to CRAN
+
+### Maintainer Notes
+
+This release was a response to a request from CRAN.
+On July 4, 2024, CRAN notified us that the following compiler warnings raised by `gcc` 14 needed to be fixed by August 3, 2024.
+
+```text
+Result: WARN
+  Found the following significant warnings:
+    io/dense_bin.hpp:617:27: warning: template-id not allowed for constructor in C++20 [-Wtemplate-id-cdtor]
+    io/multi_val_dense_bin.hpp:346:26: warning: template-id not allowed for constructor in C++20 [-Wtemplate-id-cdtor]
+    io/multi_val_sparse_bin.hpp:433:36: warning: template-id not allowed for constructor in C++20 [-Wtemplate-id-cdtor]
+    io/sparse_bin.hpp:785:19: warning: template-id not allowed for constructor in C++20 [-Wtemplate-id-cdtor]
+  See ‘/data/gannet/ripley/R/packages/tests-devel/lightgbm.Rcheck/00install.out’ for details.
+```
+
+This release contains fixes for those issues.
+
+## v4.4.0 - Submission 1 - (June 14, 2024)
+
+### CRAN response
+
+Accepted to CRAN
+
+### Maintainer Notes
+
+This was a standard release of `{lightgbm}`, not intended to fix any particular R-specific issues.
+
 ## v4.3.0 - Submission 1 - (January 18, 2024)

 ### CRAN response
@ -14,7 +47,7 @@ warning was not fixed within 14 days.
 ```text
 /usr/local/clang-trunk/bin/../include/c++/v1/__fwd/string_view.h:22:41:
 warning: 'char_traits<fmt::detail::char8_type>' is deprecated:
-char_traits<T> for T not equal to char, wchar_t, char8_t, char16_t or char32_t is non-standard and is provided for a temporary period. 
+char_traits<T> for T not equal to char, wchar_t, char8_t, char16_t or char32_t is non-standard and is provided for a temporary period.
 It will be removed in LLVM 19, so please migrate off of it. [-Wdeprecated-declarations]
 ```

--- a/R-package/man/lgb.Dataset.Rd
+++ b/R-package/man/lgb.Dataset.Rd
@ -60,8 +60,12 @@ second group, etc.}
 constructed dataset
 }
 \description{
-Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
-             or local file (that was created previously by saving an \code{lgb.Dataset}).
+LightGBM does not train on raw data.
+             It discretizes continuous features into histogram bins, tries to
+             combine categorical features, and automatically handles missing and
+
+             The \code{Dataset} class handles that preprocessing, and holds that
+             alternative representation of the input data.
 }
 \examples{
 \donttest{
--- a/R-package/man/lgb.cv.Rd
+++ b/R-package/man/lgb.cv.Rd
@ -41,9 +41,9 @@ may allow you to pass other types of data like \code{matrix} and then separately

 \item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}

-\item{label}{Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}}
+\item{label}{Deprecated. See "Deprecated Arguments" section below.}

-\item{weight}{vector of response values. If not NULL, will set to dataset}
+\item{weight}{Deprecated. See "Deprecated Arguments" section below.}

 \item{obj}{objective function, can be character or custom objective function. Examples include
 \code{regression}, \code{regression_l1}, \code{huber},
@ -103,11 +103,9 @@ the \code{nfold} and \code{stratified} parameters are ignored.}

 \item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model}

-\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
+\item{colnames}{Deprecated. See "Deprecated Arguments" section below.}

-\item{categorical_feature}{categorical features. This can either be a character vector of feature
-names or an integer vector with the indices of the features (e.g.
-\code{c(1L, 10L)} to say "the first and tenth columns").}
+\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.}

 \item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
 training will stop if the evaluation of any metric on any validation set
@ -133,6 +131,14 @@ a trained model \code{lgb.CVBooster}.
 \description{
 Cross validation logic used by LightGBM
 }
+\section{Deprecated Arguments}{
+
+
+A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
+to argument \code{'data'}. It will also remove support for passing arguments
+\code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
+}
+
 \section{Early Stopping}{


@ -171,4 +177,5 @@ model <- lgb.cv(
  , nfold = 3L
 )
 }
+
 }
--- a/R-package/man/lgb.dump.Rd
+++ b/R-package/man/lgb.dump.Rd
@ -4,12 +4,18 @@
 \alias{lgb.dump}
 \title{Dump LightGBM model to json}
 \usage{
-lgb.dump(booster, num_iteration = NULL)
+lgb.dump(booster, num_iteration = NULL, start_iteration = 1L)
 }
 \arguments{
 \item{booster}{Object of class \code{lgb.Booster}}

-\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration}
+\item{num_iteration}{Number of iterations to be dumped. NULL or <= 0 means use best iteration}
+
+\item{start_iteration}{Index (1-based) of the first boosting round to dump.
+       For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+       means "dump the fifth, sixth, and seventh tree"
+
+       \emph{New in version 4.4.0}}
 }
 \value{
 json format of model
--- a/R-package/man/lgb.model.dt.tree.Rd
+++ b/R-package/man/lgb.model.dt.tree.Rd
@ -4,13 +4,18 @@
 \alias{lgb.model.dt.tree}
 \title{Parse a LightGBM model json dump}
 \usage{
-lgb.model.dt.tree(model, num_iteration = NULL)
+lgb.model.dt.tree(model, num_iteration = NULL, start_iteration = 1L)
 }
 \arguments{
-\item{model}{object of class \code{lgb.Booster}}
+\item{model}{object of class \code{lgb.Booster}.}

-\item{num_iteration}{number of iterations you want to predict with. NULL or
-<= 0 means use best iteration}
+\item{num_iteration}{Number of iterations to include. NULL or <= 0 means use best iteration.}
+
+\item{start_iteration}{Index (1-based) of the first boosting round to include in the output.
+       For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+       means "return information about the fifth, sixth, and seventh trees".
+
+       \emph{New in version 4.4.0}}
 }
 \value{
 A \code{data.table} with detailed information about model trees' nodes and leafs.
--- a/R-package/man/lgb.save.Rd
+++ b/R-package/man/lgb.save.Rd
@ -4,14 +4,20 @@
 \alias{lgb.save}
 \title{Save LightGBM model}
 \usage{
-lgb.save(booster, filename, num_iteration = NULL)
+lgb.save(booster, filename, num_iteration = NULL, start_iteration = 1L)
 }
 \arguments{
 \item{booster}{Object of class \code{lgb.Booster}}

-\item{filename}{saved filename}
+\item{filename}{Saved filename}

-\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration}
+\item{num_iteration}{Number of iterations to save, NULL or <= 0 means use best iteration}
+
+\item{start_iteration}{Index (1-based) of the first boosting round to save.
+       For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+       means "save the fifth, sixth, and seventh tree"
+
+       \emph{New in version 4.4.0}}
 }
 \value{
 lgb.Booster
--- a/R-package/man/lgb.slice.Dataset.Rd
+++ b/R-package/man/lgb.slice.Dataset.Rd
@ -17,6 +17,8 @@ constructed sub dataset
 \description{
 Get a new \code{lgb.Dataset} containing the specified rows of
             original \code{lgb.Dataset} object
+
+             \emph{Renamed from} \code{slice()} \emph{in 4.4.0}
 }
 \examples{
 \donttest{
--- a/R-package/man/lgb.train.Rd
+++ b/R-package/man/lgb.train.Rd
@ -82,11 +82,9 @@ printing of evaluation during training}

 \item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model}

-\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
+\item{colnames}{Deprecated. See "Deprecated Arguments" section below.}

-\item{categorical_feature}{categorical features. This can either be a character vector of feature
-names or an integer vector with the indices of the features (e.g.
-\code{c(1L, 10L)} to say "the first and tenth columns").}
+\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.}

 \item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
 training will stop if the evaluation of any metric on any validation set
@ -111,6 +109,14 @@ Low-level R interface to train a LightGBM model. Unlike \code{\link{lightgbm}},
             this function is focused on performance (e.g. speed, memory efficiency). It is also
             less likely to have breaking API changes in new releases than \code{\link{lightgbm}}.
 }
+\section{Deprecated Arguments}{
+
+
+A future release of \code{lightgbm} will remove support for passing arguments
+\code{'categorical_feature'} and \code{'colnames'}. Pass those things to
+\code{lgb.Dataset} instead.
+}
+
 \section{Early Stopping}{


@ -154,4 +160,5 @@ model <- lgb.train(
  , early_stopping_rounds = 3L
 )
 }
+
 }
--- a/R-package/man/lightgbm.Rd
+++ b/R-package/man/lightgbm.Rd
@ -19,6 +19,8 @@ lightgbm(
  objective = "auto",
  init_score = NULL,
  num_threads = NULL,
+  colnames = NULL,
+  categorical_feature = NULL,
  ...
 )
 }
@ -96,6 +98,13 @@ set to the iteration number of the best iteration.}

                   \emph{New in version 4.0.0}}

+\item{colnames}{Character vector of features. Only used if \code{data} is not an \code{\link{lgb.Dataset}}.}
+
+\item{categorical_feature}{categorical features. This can either be a character vector of feature
+names or an integer vector with the indices of the features (e.g.
+\code{c(1L, 10L)} to say "the first and tenth columns").
+Only used if \code{data} is not an \code{\link{lgb.Dataset}}.}
+
 \item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example
 \itemize{
   \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
@ -104,10 +113,6 @@ set to the iteration number of the best iteration.}
               \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
   \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
   \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
-   \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
-   \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
-                       names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
-                       say "the first and tenth columns").}
   \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
                     into a predictor model which frees up memory and the original datasets}
 }}
--- a/R-package/pkgdown/_pkgdown.yml
+++ b/R-package/pkgdown/_pkgdown.yml
@ -63,7 +63,6 @@ reference:
    - '`dimnames.lgb.Dataset`'
    - '`get_field`'
    - '`set_field`'
-    - '`slice`'
    - '`lgb.Dataset`'
    - '`lgb.Dataset.construct`'
    - '`lgb.Dataset.create.valid`'
@ -71,6 +70,7 @@ reference:
    - '`lgb.Dataset.set.categorical`'
    - '`lgb.Dataset.set.reference`'
    - '`lgb.convert_with_rules`'
+    - '`lgb.slice.Dataset`'
  - title: Machine Learning
    desc: Train models with LightGBM and then use them to make predictions on new data
    contents:
@ -78,6 +78,7 @@ reference:
    - '`lgb.train`'
    - '`predict.lgb.Booster`'
    - '`lgb.cv`'
+    - '`lgb.configure_fast_predict`'
  - title: Saving / Loading Models
    desc: Save and load LightGBM models
    contents:
@ -85,6 +86,9 @@ reference:
    - '`lgb.save`'
    - '`lgb.load`'
    - '`lgb.model.dt.tree`'
+    - '`lgb.drop_serialized`'
+    - '`lgb.make_serializable`'
+    - '`lgb.restore_handle`'
  - title: Model Interpretation
    desc: Analyze your models
    contents:
@ -93,3 +97,10 @@ reference:
    - '`lgb.interprete`'
    - '`lgb.plot.importance`'
    - '`lgb.plot.interpretation`'
+    - '`print.lgb.Booster`'
+    - '`summary.lgb.Booster`'
+  - title: Multithreading Control
+    desc: Manage degree of parallelism used by LightGBM
+    contents:
+    - '`getLGBMThreads`'
+    - '`setLGBMThreads`'
--- a/R-package/recreate-configure.sh
+++ b/R-package/recreate-configure.sh
@ -1,5 +1,7 @@
 #!/bin/bash

+set -e -E -u -o pipefail
+
 # recreates 'configure' from 'configure.ac'
 # this script should run on Ubuntu 22.04
 AUTOCONF_VERSION=$(cat R-package/AUTOCONF_UBUNTU_VERSION)
--- a/R-package/src/install.libs.R
+++ b/R-package/src/install.libs.R
@ -15,14 +15,8 @@ if (.Machine$sizeof.pointer != 8L) {
  stop("LightGBM only supports 64-bit R, please check the version of R and Rtools.")
 }

-R_int_UUID <- .Internal(internalsID())
 R_ver <- as.double(R.Version()$major) + as.double(R.Version()$minor) / 10.0

-if (!(R_int_UUID == "0310d4b8-ccb1-4bb8-ba94-d36a55f60262"
-    || R_int_UUID == "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327")) {
-  warning("Warning: unmatched R_INTERNALS_UUID, may not run normally.")
-}
-
 # Get some paths
 source_dir <- file.path(R_PACKAGE_SOURCE, "src", fsep = "/")
 build_dir <- file.path(source_dir, "build", fsep = "/")
@ -133,7 +127,13 @@ if (WINDOWS && use_visual_studio) {
 }

 # Prepare installation steps
-cmake_args <- NULL
+cmake_args <- c(
+  "-D__BUILD_FOR_R=ON"
+  # pass in R version, to help FindLibR find the R library
+  , sprintf("-DCMAKE_R_VERSION='%s.%s'", R.Version()[["major"]], R.Version()[["minor"]])
+  # ensure CMake build respects how R is configured (`R CMD config SHLIB_EXT`)
+  , sprintf("-DCMAKE_SHARED_LIBRARY_SUFFIX_CXX='%s'", SHLIB_EXT)
+)
 build_cmd <- "make"
 build_args <- c("_lightgbm", make_args_from_build_script)
 lib_folder <- file.path(source_dir, fsep = "/")
@ -174,16 +174,6 @@ windows_makefile_generator <- WINDOWS_BUILD_TOOLS[[windows_toolchain]][["makefil
 if (use_gpu) {
  cmake_args <- c(cmake_args, "-DUSE_GPU=ON")
 }
-cmake_args <- c(cmake_args, "-D__BUILD_FOR_R=ON")
-
-# Pass in R version, used to help find R executable for linking
-R_version_string <- paste(
-  R.Version()[["major"]]
-  , R.Version()[["minor"]]
-  , sep = "."
-)
-r_version_arg <- sprintf("-DCMAKE_R_VERSION='%s'", R_version_string)
-cmake_args <- c(cmake_args, r_version_arg)

 # the checks below might already run `cmake -G`. If they do, set this flag
 # to TRUE to avoid re-running it later
@ -225,9 +215,9 @@ if (!makefiles_already_generated) {
 }

 # build the library
-message("Building lib_lightgbm")
+message(paste0("Building lightgbm", SHLIB_EXT))
 .run_shell_command(build_cmd, build_args)
-src <- file.path(lib_folder, paste0("lib_lightgbm", SHLIB_EXT), fsep = "/")
+src <- file.path(lib_folder, paste0("lightgbm", SHLIB_EXT), fsep = "/")

 # Packages with install.libs.R need to copy some artifacts into the
 # expected places in the package structure.
@ -245,7 +235,7 @@ if (file.exists(src)) {
  }

 } else {
-  stop(paste0("Cannot find lib_lightgbm", SHLIB_EXT))
+  stop(paste0("Cannot find lightgbm", SHLIB_EXT))
 }

 # clean up the "build" directory
--- a/R-package/src/lightgbm_R.cpp
+++ b/R-package/src/lightgbm_R.cpp
@ -11,9 +11,16 @@
 #include <LightGBM/utils/text_reader.h>

 #include <R_ext/Rdynload.h>
+#include <R_ext/Altrep.h>

+#ifndef R_NO_REMAP
 #define R_NO_REMAP
+#endif
+
+#ifndef R_USE_C99_IN_CXX
 #define R_USE_C99_IN_CXX
+#endif
+
 #include <R_ext/Error.h>

 #include <string>
@ -24,6 +31,150 @@
 #include <utility>
 #include <vector>
 #include <algorithm>
+#include <type_traits>
+
+R_altrep_class_t lgb_altrepped_char_vec;
+R_altrep_class_t lgb_altrepped_int_arr;
+R_altrep_class_t lgb_altrepped_dbl_arr;
+
+template <class T>
+void delete_cpp_array(SEXP R_ptr) {
+  T *ptr_to_cpp_obj = static_cast<T*>(R_ExternalPtrAddr(R_ptr));
+  delete[] ptr_to_cpp_obj;
+  R_ClearExternalPtr(R_ptr);
+}
+
+void delete_cpp_char_vec(SEXP R_ptr) {
+  std::vector<char> *ptr_to_cpp_obj = static_cast<std::vector<char>*>(R_ExternalPtrAddr(R_ptr));
+  delete ptr_to_cpp_obj;
+  R_ClearExternalPtr(R_ptr);
+}
+
+// Note: MSVC has issues with Altrep classes, so they are disabled for it.
+// See: https://github.com/microsoft/LightGBM/pull/6213#issuecomment-2111025768
+#ifdef _MSC_VER
+#  define LGB_NO_ALTREP
+#endif
+
+#ifndef LGB_NO_ALTREP
+SEXP make_altrepped_raw_vec(void *void_ptr) {
+  std::unique_ptr<std::vector<char>> *ptr_to_cpp_vec = static_cast<std::unique_ptr<std::vector<char>>*>(void_ptr);
+  SEXP R_ptr = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP R_raw = Rf_protect(R_new_altrep(lgb_altrepped_char_vec, R_NilValue, R_NilValue));
+
+  R_SetExternalPtrAddr(R_ptr, ptr_to_cpp_vec->get());
+  R_RegisterCFinalizerEx(R_ptr, delete_cpp_char_vec, TRUE);
+  ptr_to_cpp_vec->release();
+
+  R_set_altrep_data1(R_raw, R_ptr);
+  Rf_unprotect(2);
+  return R_raw;
+}
+#else
+SEXP make_r_raw_vec(void *void_ptr) {
+  std::unique_ptr<std::vector<char>> *ptr_to_cpp_vec = static_cast<std::unique_ptr<std::vector<char>>*>(void_ptr);
+  R_xlen_t len = ptr_to_cpp_vec->get()->size();
+  SEXP out = Rf_protect(Rf_allocVector(RAWSXP, len));
+  std::copy(ptr_to_cpp_vec->get()->begin(), ptr_to_cpp_vec->get()->end(), reinterpret_cast<char*>(RAW(out)));
+  Rf_unprotect(1);
+  return out;
+}
+#define make_altrepped_raw_vec make_r_raw_vec
+#endif
+
+std::vector<char>* get_ptr_from_altrepped_raw(SEXP R_raw) {
+  return static_cast<std::vector<char>*>(R_ExternalPtrAddr(R_altrep_data1(R_raw)));
+}
+
+R_xlen_t get_altrepped_raw_len(SEXP R_raw) {
+  return get_ptr_from_altrepped_raw(R_raw)->size();
+}
+
+const void* get_altrepped_raw_dataptr_or_null(SEXP R_raw) {
+  return get_ptr_from_altrepped_raw(R_raw)->data();
+}
+
+void* get_altrepped_raw_dataptr(SEXP R_raw, Rboolean writeable) {
+  return get_ptr_from_altrepped_raw(R_raw)->data();
+}
+
+#ifndef LGB_NO_ALTREP
+template <class T>
+R_altrep_class_t get_altrep_class_for_type() {
+  if (std::is_same<T, double>::value) {
+    return lgb_altrepped_dbl_arr;
+  } else {
+    return lgb_altrepped_int_arr;
+  }
+}
+#else
+template <class T>
+SEXPTYPE get_sexptype_class_for_type() {
+  if (std::is_same<T, double>::value) {
+    return REALSXP;
+  } else {
+    return INTSXP;
+  }
+}
+
+template <class T>
+T* get_r_vec_ptr(SEXP x) {
+  if (std::is_same<T, double>::value) {
+    return static_cast<T*>(static_cast<void*>(REAL(x)));
+  } else {
+    return static_cast<T*>(static_cast<void*>(INTEGER(x)));
+  }
+}
+#endif
+
+template <class T>
+struct arr_and_len {
+  T *arr;
+  int64_t len;
+};
+
+#ifndef LGB_NO_ALTREP
+template <class T>
+SEXP make_altrepped_vec_from_arr(void *void_ptr) {
+  T *arr = static_cast<arr_and_len<T>*>(void_ptr)->arr;
+  uint64_t len = static_cast<arr_and_len<T>*>(void_ptr)->len;
+  SEXP R_ptr = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP R_len = Rf_protect(Rf_allocVector(REALSXP, 1));
+  SEXP R_vec = Rf_protect(R_new_altrep(get_altrep_class_for_type<T>(), R_NilValue, R_NilValue));
+
+  REAL(R_len)[0] = static_cast<double>(len);
+  R_SetExternalPtrAddr(R_ptr, arr);
+  R_RegisterCFinalizerEx(R_ptr, delete_cpp_array<T>, TRUE);
+
+  R_set_altrep_data1(R_vec, R_ptr);
+  R_set_altrep_data2(R_vec, R_len);
+  Rf_unprotect(3);
+  return R_vec;
+}
+#else
+template <class T>
+SEXP make_R_vec_from_arr(void *void_ptr) {
+  T *arr = static_cast<arr_and_len<T>*>(void_ptr)->arr;
+  uint64_t len = static_cast<arr_and_len<T>*>(void_ptr)->len;
+  SEXP out = Rf_protect(Rf_allocVector(get_sexptype_class_for_type<T>(), len));
+  std::copy(arr, arr + len, get_r_vec_ptr<T>(out));
+  Rf_unprotect(1);
+  return out;
+}
+#define make_altrepped_vec_from_arr make_R_vec_from_arr
+#endif
+
+R_xlen_t get_altrepped_vec_len(SEXP R_vec) {
+  return static_cast<R_xlen_t>(Rf_asReal(R_altrep_data2(R_vec)));
+}
+
+const void* get_altrepped_vec_dataptr_or_null(SEXP R_vec) {
+  return R_ExternalPtrAddr(R_altrep_data1(R_vec));
+}
+
+void* get_altrepped_vec_dataptr(SEXP R_vec, Rboolean writeable) {
+  return R_ExternalPtrAddr(R_altrep_data1(R_vec));
+}

 #define COL_MAJOR (0)

@ -143,18 +294,18 @@ SEXP LGBM_DatasetCreateFromFile_R(SEXP filename,
  SEXP parameters,
  SEXP reference) {
  R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  DatasetHandle handle = nullptr;
  DatasetHandle ref = nullptr;
  if (!Rf_isNull(reference)) {
    ref = R_ExternalPtrAddr(reference);
  }
-  const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
  CHECK_CALL(LGBM_DatasetCreateFromFile(filename_ptr, parameters_ptr, ref, &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
-  UNPROTECT(3);
+  Rf_unprotect(3);
  return ret;
  R_API_END();
 }
@ -168,14 +319,14 @@ SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
  SEXP parameters,
  SEXP reference) {
  R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  const int* p_indptr = INTEGER(indptr);
  const int* p_indices = INTEGER(indices);
  const double* p_data = REAL(data);
  int64_t nindptr = static_cast<int64_t>(Rf_asInteger(num_indptr));
  int64_t ndata = static_cast<int64_t>(Rf_asInteger(nelem));
  int64_t nrow = static_cast<int64_t>(Rf_asInteger(num_row));
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
  DatasetHandle handle = nullptr;
  DatasetHandle ref = nullptr;
  if (!Rf_isNull(reference)) {
@ -186,7 +337,7 @@ SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
    nrow, parameters_ptr, ref, &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }
@ -197,11 +348,11 @@ SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
  SEXP parameters,
  SEXP reference) {
  R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  int32_t nrow = static_cast<int32_t>(Rf_asInteger(num_row));
  int32_t ncol = static_cast<int32_t>(Rf_asInteger(num_col));
  double* p_mat = REAL(data);
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
  DatasetHandle handle = nullptr;
  DatasetHandle ref = nullptr;
  if (!Rf_isNull(reference)) {
@ -211,7 +362,7 @@ SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
    parameters_ptr, ref, &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }
@ -222,7 +373,7 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle,
  SEXP parameters) {
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  int32_t len = static_cast<int32_t>(Rf_asInteger(len_used_row_indices));
  std::unique_ptr<int32_t[]> idxvec(new int32_t[len]);
  // convert from one-based to zero-based index
@ -233,14 +384,14 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle,
  for (int32_t i = 0; i < len; ++i) {
    idxvec[i] = static_cast<int32_t>(used_row_indices_[i] - 1);
  }
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
  DatasetHandle res = nullptr;
  CHECK_CALL(LGBM_DatasetGetSubset(R_ExternalPtrAddr(handle),
    idxvec.get(), len, parameters_ptr,
    &res));
  R_SetExternalPtrAddr(ret, res);
  R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }
@ -249,7 +400,7 @@ SEXP LGBM_DatasetSetFeatureNames_R(SEXP handle,
  SEXP feature_names) {
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
-  auto vec_names = Split(CHAR(PROTECT(Rf_asChar(feature_names))), '\t');
+  auto vec_names = Split(CHAR(Rf_protect(Rf_asChar(feature_names))), '\t');
  int len = static_cast<int>(vec_names.size());
  std::unique_ptr<const char*[]> vec_sptr(new const char*[len]);
  for (int i = 0; i < len; ++i) {
@ -257,13 +408,13 @@ SEXP LGBM_DatasetSetFeatureNames_R(SEXP handle,
  }
  CHECK_CALL(LGBM_DatasetSetFeatureNames(R_ExternalPtrAddr(handle),
    vec_sptr.get(), len));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }

 SEXP LGBM_DatasetGetFeatureNames_R(SEXP handle) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
  SEXP feature_names;
@ -301,11 +452,11 @@ SEXP LGBM_DatasetGetFeatureNames_R(SEXP handle) {
        ptr_names.data()));
  }
  CHECK_EQ(len, out_len);
-  feature_names = PROTECT(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
+  feature_names = Rf_protect(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
  for (int i = 0; i < len; ++i) {
    SET_STRING_ELT(feature_names, i, safe_R_mkChar(ptr_names[i], &cont_token));
  }
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return feature_names;
  R_API_END();
 }
@ -314,10 +465,10 @@ SEXP LGBM_DatasetSaveBinary_R(SEXP handle,
  SEXP filename) {
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
-  const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
+  const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
  CHECK_CALL(LGBM_DatasetSaveBinary(R_ExternalPtrAddr(handle),
    filename_ptr));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -339,7 +490,7 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
  int len = Rf_asInteger(num_element);
-  const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
+  const char* name = CHAR(Rf_protect(Rf_asChar(field_name)));
  if (!strcmp("group", name) || !strcmp("query", name)) {
    CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, INTEGER(field_data), len, C_API_DTYPE_INT32));
  } else if (!strcmp("init_score", name)) {
@ -349,7 +500,7 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
    std::copy(REAL(field_data), REAL(field_data) + len, vec.get());
    CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, vec.get(), len, C_API_DTYPE_FLOAT32));
  }
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -359,7 +510,7 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
  SEXP field_data) {
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
-  const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
+  const char* name = CHAR(Rf_protect(Rf_asChar(field_name)));
  int out_len = 0;
  int out_type = 0;
  const void* res;
@ -381,7 +532,7 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
    auto p_data = reinterpret_cast<const float*>(res);
    std::copy(p_data, p_data + out_len, REAL(field_data));
  }
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -391,7 +542,7 @@ SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
  SEXP out) {
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
-  const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
+  const char* name = CHAR(Rf_protect(Rf_asChar(field_name)));
  int out_len = 0;
  int out_type = 0;
  const void* res;
@ -400,7 +551,7 @@ SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
    out_len -= 1;
  }
  INTEGER(out)[0] = out_len;
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -408,10 +559,10 @@ SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
 SEXP LGBM_DatasetUpdateParamChecking_R(SEXP old_params,
  SEXP new_params) {
  R_API_BEGIN();
-  const char* old_params_ptr = CHAR(PROTECT(Rf_asChar(old_params)));
-  const char* new_params_ptr = CHAR(PROTECT(Rf_asChar(new_params)));
+  const char* old_params_ptr = CHAR(Rf_protect(Rf_asChar(old_params)));
+  const char* new_params_ptr = CHAR(Rf_protect(Rf_asChar(new_params)));
  CHECK_CALL(LGBM_DatasetUpdateParamChecking(old_params_ptr, new_params_ptr));
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return R_NilValue;
  R_API_END();
 }
@ -468,34 +619,34 @@ SEXP LGBM_BoosterCreate_R(SEXP train_data,
  SEXP parameters) {
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(train_data);
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
  BoosterHandle handle = nullptr;
  CHECK_CALL(LGBM_BoosterCreate(R_ExternalPtrAddr(train_data), parameters_ptr, &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }

 SEXP LGBM_BoosterCreateFromModelfile_R(SEXP filename) {
  R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  int out_num_iterations = 0;
-  const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
+  const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
  BoosterHandle handle = nullptr;
  CHECK_CALL(LGBM_BoosterCreateFromModelfile(filename_ptr, &out_num_iterations, &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }

 SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
  R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  SEXP temp = NULL;
  int n_protected = 1;
  int out_num_iterations = 0;
@ -510,7 +661,7 @@ SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
      break;
    }
    case STRSXP: {
-      temp = PROTECT(STRING_ELT(model_str, 0));
+      temp = Rf_protect(STRING_ELT(model_str, 0));
      n_protected++;
      model_str_ptr = reinterpret_cast<const char*>(CHAR(temp));
    }
@ -519,7 +670,7 @@ SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
  CHECK_CALL(LGBM_BoosterLoadModelFromString(model_str_ptr, &out_num_iterations, &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
-  UNPROTECT(n_protected);
+  Rf_unprotect(n_protected);
  return ret;
  R_API_END();
 }
@ -558,9 +709,9 @@ SEXP LGBM_BoosterResetParameter_R(SEXP handle,
  SEXP parameters) {
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
  CHECK_CALL(LGBM_BoosterResetParameter(R_ExternalPtrAddr(handle), parameters_ptr));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -618,8 +769,7 @@ SEXP LGBM_BoosterRollbackOneIter_R(SEXP handle) {
  R_API_END();
 }

-SEXP LGBM_BoosterGetCurrentIteration_R(SEXP handle,
-  SEXP out) {
+SEXP LGBM_BoosterGetCurrentIteration_R(SEXP handle, SEXP out) {
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int out_iteration;
@ -629,6 +779,26 @@ SEXP LGBM_BoosterGetCurrentIteration_R(SEXP handle,
  R_API_END();
 }

+SEXP LGBM_BoosterNumModelPerIteration_R(SEXP handle, SEXP out) {
+  R_API_BEGIN();
+  _AssertBoosterHandleNotNull(handle);
+  int models_per_iter;
+  CHECK_CALL(LGBM_BoosterNumModelPerIteration(R_ExternalPtrAddr(handle), &models_per_iter));
+  INTEGER(out)[0] = models_per_iter;
+  return R_NilValue;
+  R_API_END();
+}
+
+SEXP LGBM_BoosterNumberOfTotalModel_R(SEXP handle, SEXP out) {
+  R_API_BEGIN();
+  _AssertBoosterHandleNotNull(handle);
+  int total_models;
+  CHECK_CALL(LGBM_BoosterNumberOfTotalModel(R_ExternalPtrAddr(handle), &total_models));
+  INTEGER(out)[0] = total_models;
+  return R_NilValue;
+  R_API_END();
+}
+
 SEXP LGBM_BoosterGetUpperBoundValue_R(SEXP handle,
  SEXP out_result) {
  R_API_BEGIN();
@ -650,7 +820,7 @@ SEXP LGBM_BoosterGetLowerBoundValue_R(SEXP handle,
 }

 SEXP LGBM_BoosterGetEvalNames_R(SEXP handle) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  SEXP eval_names;
@ -689,11 +859,11 @@ SEXP LGBM_BoosterGetEvalNames_R(SEXP handle) {
        ptr_names.data()));
  }
  CHECK_EQ(out_len, len);
-  eval_names = PROTECT(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
+  eval_names = Rf_protect(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
  for (int i = 0; i < len; ++i) {
    SET_STRING_ELT(eval_names, i, safe_R_mkChar(ptr_names[i], &cont_token));
  }
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return eval_names;
  R_API_END();
 }
@ -763,14 +933,14 @@ SEXP LGBM_BoosterPredictForFile_R(SEXP handle,
  SEXP result_filename) {
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
-  const char* data_filename_ptr = CHAR(PROTECT(Rf_asChar(data_filename)));
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
-  const char* result_filename_ptr = CHAR(PROTECT(Rf_asChar(result_filename)));
+  const char* data_filename_ptr = CHAR(Rf_protect(Rf_asChar(data_filename)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
+  const char* result_filename_ptr = CHAR(Rf_protect(Rf_asChar(result_filename)));
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
  CHECK_CALL(LGBM_BoosterPredictForFile(R_ExternalPtrAddr(handle), data_filename_ptr,
    Rf_asInteger(data_has_header), pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), parameter_ptr,
    result_filename_ptr));
-  UNPROTECT(3);
+  Rf_unprotect(3);
  return R_NilValue;
  R_API_END();
 }
@ -819,12 +989,12 @@ SEXP LGBM_BoosterPredictForCSC_R(SEXP handle,
  int64_t nrow = static_cast<int64_t>(Rf_asInteger(num_row));
  double* ptr_ret = REAL(out_result);
  int64_t out_len;
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  CHECK_CALL(LGBM_BoosterPredictForCSC(R_ExternalPtrAddr(handle),
    p_indptr, C_API_DTYPE_INT32, p_indices,
    p_data, C_API_DTYPE_FLOAT64, nindptr, ndata,
    nrow, pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), parameter_ptr, &out_len, ptr_ret));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -844,7 +1014,7 @@ SEXP LGBM_BoosterPredictForCSR_R(SEXP handle,
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  int64_t out_len;
  CHECK_CALL(LGBM_BoosterPredictForCSR(R_ExternalPtrAddr(handle),
    INTEGER(indptr), C_API_DTYPE_INT32, INTEGER(indices),
@ -852,7 +1022,7 @@ SEXP LGBM_BoosterPredictForCSR_R(SEXP handle,
    Rf_xlength(indptr), Rf_xlength(data), Rf_asInteger(ncols),
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
    parameter_ptr, &out_len, REAL(out_result)));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -871,7 +1041,7 @@ SEXP LGBM_BoosterPredictForCSRSingleRow_R(SEXP handle,
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  int nnz = static_cast<int>(Rf_xlength(data));
  const int indptr[] = {0, nnz};
  int64_t out_len;
@ -881,7 +1051,7 @@ SEXP LGBM_BoosterPredictForCSRSingleRow_R(SEXP handle,
    2, nnz, Rf_asInteger(ncols),
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
    parameter_ptr, &out_len, REAL(out_result)));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -901,8 +1071,8 @@ SEXP LGBM_BoosterPredictForCSRSingleRowFastInit_R(SEXP handle,
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  FastConfigHandle out_fastConfig;
  CHECK_CALL(LGBM_BoosterPredictForCSRSingleRowFastInit(R_ExternalPtrAddr(handle),
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
@ -910,7 +1080,7 @@ SEXP LGBM_BoosterPredictForCSRSingleRowFastInit_R(SEXP handle,
    parameter_ptr, &out_fastConfig));
  R_SetExternalPtrAddr(ret, out_fastConfig);
  R_RegisterCFinalizerEx(ret, LGBM_FastConfigFree_wrapped, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }
@ -950,12 +1120,12 @@ SEXP LGBM_BoosterPredictForMat_R(SEXP handle,
  int32_t ncol = static_cast<int32_t>(Rf_asInteger(num_col));
  const double* p_mat = REAL(data);
  double* ptr_ret = REAL(out_result);
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  int64_t out_len;
  CHECK_CALL(LGBM_BoosterPredictForMat(R_ExternalPtrAddr(handle),
    p_mat, C_API_DTYPE_FLOAT64, nrow, ncol, COL_MAJOR,
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), parameter_ptr, &out_len, ptr_ret));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -964,8 +1134,6 @@ struct SparseOutputPointers {
  void* indptr;
  int32_t* indices;
  void* data;
-  int indptr_type;
-  int data_type;
  SparseOutputPointers(void* indptr, int32_t* indices, void* data)
  : indptr(indptr), indices(indices), data(data) {}
 };
@ -985,12 +1153,12 @@ SEXP LGBM_BoosterPredictSparseOutput_R(SEXP handle,
  SEXP start_iteration,
  SEXP num_iteration,
  SEXP parameter) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  const char* out_names[] = {"indptr", "indices", "data", ""};
-  SEXP out = PROTECT(Rf_mkNamed(VECSXP, out_names));
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  SEXP out = Rf_protect(Rf_mkNamed(VECSXP, out_names));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));

  int64_t out_len[2];
  void *out_indptr;
@ -1015,17 +1183,28 @@ SEXP LGBM_BoosterPredictSparseOutput_R(SEXP handle,
    &delete_SparseOutputPointers
  };

-  SEXP out_indptr_R = safe_R_int(out_len[1], &cont_token);
-  SET_VECTOR_ELT(out, 0, out_indptr_R);
-  SEXP out_indices_R = safe_R_int(out_len[0], &cont_token);
-  SET_VECTOR_ELT(out, 1, out_indices_R);
-  SEXP out_data_R = safe_R_real(out_len[0], &cont_token);
-  SET_VECTOR_ELT(out, 2, out_data_R);
-  std::memcpy(INTEGER(out_indptr_R), out_indptr, out_len[1]*sizeof(int));
-  std::memcpy(INTEGER(out_indices_R), out_indices, out_len[0]*sizeof(int));
-  std::memcpy(REAL(out_data_R), out_data, out_len[0]*sizeof(double));
+  arr_and_len<int> indptr_str{static_cast<int*>(out_indptr), out_len[1]};
+  SET_VECTOR_ELT(
+    out, 0,
+    R_UnwindProtect(make_altrepped_vec_from_arr<int>,
+      static_cast<void*>(&indptr_str), throw_R_memerr, &cont_token, cont_token));
+  pointers_struct->indptr = nullptr;

-  UNPROTECT(3);
+  arr_and_len<int> indices_str{static_cast<int*>(out_indices), out_len[0]};
+  SET_VECTOR_ELT(
+    out, 1,
+    R_UnwindProtect(make_altrepped_vec_from_arr<int>,
+      static_cast<void*>(&indices_str), throw_R_memerr, &cont_token, cont_token));
+  pointers_struct->indices = nullptr;
+
+  arr_and_len<double> data_str{static_cast<double*>(out_data), out_len[0]};
+  SET_VECTOR_ELT(
+    out, 2,
+    R_UnwindProtect(make_altrepped_vec_from_arr<double>,
+      static_cast<void*>(&data_str), throw_R_memerr, &cont_token, cont_token));
+  pointers_struct->data = nullptr;
+
+  Rf_unprotect(3);
  return out;
  R_API_END();
 }
@ -1042,14 +1221,14 @@ SEXP LGBM_BoosterPredictForMatSingleRow_R(SEXP handle,
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  double* ptr_ret = REAL(out_result);
  int64_t out_len;
  CHECK_CALL(LGBM_BoosterPredictForMatSingleRow(R_ExternalPtrAddr(handle),
    REAL(data), C_API_DTYPE_FLOAT64, Rf_xlength(data), 1,
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
    parameter_ptr, &out_len, ptr_ret));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -1065,8 +1244,8 @@ SEXP LGBM_BoosterPredictForMatSingleRowFastInit_R(SEXP handle,
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  FastConfigHandle out_fastConfig;
  CHECK_CALL(LGBM_BoosterPredictForMatSingleRowFastInit(R_ExternalPtrAddr(handle),
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
@ -1074,7 +1253,7 @@ SEXP LGBM_BoosterPredictForMatSingleRowFastInit_R(SEXP handle,
    parameter_ptr, &out_fastConfig));
  R_SetExternalPtrAddr(ret, out_fastConfig);
  R_RegisterCFinalizerEx(ret, LGBM_FastConfigFree_wrapped, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }
@ -1093,67 +1272,101 @@ SEXP LGBM_BoosterPredictForMatSingleRowFast_R(SEXP handle_fastConfig,
 SEXP LGBM_BoosterSaveModel_R(SEXP handle,
  SEXP num_iteration,
  SEXP feature_importance_type,
-  SEXP filename) {
+  SEXP filename,
+  SEXP start_iteration) {
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
-  const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
-  CHECK_CALL(LGBM_BoosterSaveModel(R_ExternalPtrAddr(handle), 0, Rf_asInteger(num_iteration), Rf_asInteger(feature_importance_type), filename_ptr));
-  UNPROTECT(1);
+  const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
+  CHECK_CALL(LGBM_BoosterSaveModel(R_ExternalPtrAddr(handle), Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), Rf_asInteger(feature_importance_type), filename_ptr));
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }

+// Note: for some reason, MSVC crashes when an error is thrown here
+// if the buffer variable is defined as 'std::unique_ptr<std::vector<char>>',
+// but not if it is defined as '<std::vector<char>'.
+#ifndef _MSC_VER
 SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
  SEXP num_iteration,
-  SEXP feature_importance_type) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP feature_importance_type,
+  SEXP start_iteration) {
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int64_t out_len = 0;
  int64_t buf_len = 1024 * 1024;
  int num_iter = Rf_asInteger(num_iteration);
+  int start_iter = Rf_asInteger(start_iteration);
+  int importance_type = Rf_asInteger(feature_importance_type);
+  std::unique_ptr<std::vector<char>> inner_char_buf(new std::vector<char>(buf_len));
+  CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, buf_len, &out_len, inner_char_buf->data()));
+  inner_char_buf->resize(out_len);
+  if (out_len > buf_len) {
+    CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, inner_char_buf->data()));
+  }
+  SEXP out = R_UnwindProtect(make_altrepped_raw_vec, &inner_char_buf, throw_R_memerr, &cont_token, cont_token);
+  Rf_unprotect(1);
+  return out;
+  R_API_END();
+}
+#else
+SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
+  SEXP num_iteration,
+  SEXP feature_importance_type,
+  SEXP start_iteration) {
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
+  R_API_BEGIN();
+  _AssertBoosterHandleNotNull(handle);
+  int64_t out_len = 0;
+  int64_t buf_len = 1024 * 1024;
+  int num_iter = Rf_asInteger(num_iteration);
+  int start_iter = Rf_asInteger(start_iteration);
  int importance_type = Rf_asInteger(feature_importance_type);
  std::vector<char> inner_char_buf(buf_len);
-  CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
-  SEXP model_str = PROTECT(safe_R_raw(out_len, &cont_token));
+  CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
+  SEXP model_str = Rf_protect(safe_R_raw(out_len, &cont_token));
  // if the model string was larger than the initial buffer, call the function again, writing directly to the R object
  if (out_len > buf_len) {
-    CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, out_len, &out_len, reinterpret_cast<char*>(RAW(model_str))));
+    CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, reinterpret_cast<char*>(RAW(model_str))));
  } else {
    std::copy(inner_char_buf.begin(), inner_char_buf.begin() + out_len, reinterpret_cast<char*>(RAW(model_str)));
  }
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return model_str;
  R_API_END();
 }
+#endif

 SEXP LGBM_BoosterDumpModel_R(SEXP handle,
  SEXP num_iteration,
-  SEXP feature_importance_type) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP feature_importance_type,
+  SEXP start_iteration) {
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  SEXP model_str;
  int64_t out_len = 0;
  int64_t buf_len = 1024 * 1024;
  int num_iter = Rf_asInteger(num_iteration);
+  int start_iter = Rf_asInteger(start_iteration);
  int importance_type = Rf_asInteger(feature_importance_type);
  std::vector<char> inner_char_buf(buf_len);
-  CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
+  CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
  // if the model string was larger than the initial buffer, allocate a bigger buffer and try again
  if (out_len > buf_len) {
    inner_char_buf.resize(out_len);
-    CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, out_len, &out_len, inner_char_buf.data()));
+    CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, inner_char_buf.data()));
  }
-  model_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
+  model_str = Rf_protect(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
  SET_STRING_ELT(model_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return model_str;
  R_API_END();
 }

 SEXP LGBM_DumpParamAliases_R() {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  SEXP aliases_str;
  int64_t out_len = 0;
@ -1165,15 +1378,15 @@ SEXP LGBM_DumpParamAliases_R() {
    inner_char_buf.resize(out_len);
    CHECK_CALL(LGBM_DumpParamAliases(out_len, &out_len, inner_char_buf.data()));
  }
-  aliases_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
+  aliases_str = Rf_protect(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
  SET_STRING_ELT(aliases_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return aliases_str;
  R_API_END();
 }

 SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  SEXP params_str;
@ -1186,9 +1399,9 @@ SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
    inner_char_buf.resize(out_len);
    CHECK_CALL(LGBM_BoosterGetLoadedParam(R_ExternalPtrAddr(handle), out_len, &out_len, inner_char_buf.data()));
  }
-  params_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
+  params_str = Rf_protect(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
  SET_STRING_ELT(params_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return params_str;
  R_API_END();
 }
@ -1243,6 +1456,8 @@ static const R_CallMethodDef CallEntries[] = {
  {"LGBM_BoosterUpdateOneIterCustom_R"           , (DL_FUNC) &LGBM_BoosterUpdateOneIterCustom_R           , 4},
  {"LGBM_BoosterRollbackOneIter_R"               , (DL_FUNC) &LGBM_BoosterRollbackOneIter_R               , 1},
  {"LGBM_BoosterGetCurrentIteration_R"           , (DL_FUNC) &LGBM_BoosterGetCurrentIteration_R           , 2},
+  {"LGBM_BoosterNumModelPerIteration_R"          , (DL_FUNC) &LGBM_BoosterNumModelPerIteration_R          , 2},
+  {"LGBM_BoosterNumberOfTotalModel_R"            , (DL_FUNC) &LGBM_BoosterNumberOfTotalModel_R            , 2},
  {"LGBM_BoosterGetUpperBoundValue_R"            , (DL_FUNC) &LGBM_BoosterGetUpperBoundValue_R            , 2},
  {"LGBM_BoosterGetLowerBoundValue_R"            , (DL_FUNC) &LGBM_BoosterGetLowerBoundValue_R            , 2},
  {"LGBM_BoosterGetEvalNames_R"                  , (DL_FUNC) &LGBM_BoosterGetEvalNames_R                  , 1},
@ -1261,9 +1476,9 @@ static const R_CallMethodDef CallEntries[] = {
  {"LGBM_BoosterPredictForMatSingleRow_R"        , (DL_FUNC) &LGBM_BoosterPredictForMatSingleRow_R        , 9},
  {"LGBM_BoosterPredictForMatSingleRowFastInit_R", (DL_FUNC) &LGBM_BoosterPredictForMatSingleRowFastInit_R, 8},
  {"LGBM_BoosterPredictForMatSingleRowFast_R"    , (DL_FUNC) &LGBM_BoosterPredictForMatSingleRowFast_R    , 3},
-  {"LGBM_BoosterSaveModel_R"                     , (DL_FUNC) &LGBM_BoosterSaveModel_R                     , 4},
-  {"LGBM_BoosterSaveModelToString_R"             , (DL_FUNC) &LGBM_BoosterSaveModelToString_R             , 3},
-  {"LGBM_BoosterDumpModel_R"                     , (DL_FUNC) &LGBM_BoosterDumpModel_R                     , 3},
+  {"LGBM_BoosterSaveModel_R"                     , (DL_FUNC) &LGBM_BoosterSaveModel_R                     , 5},
+  {"LGBM_BoosterSaveModelToString_R"             , (DL_FUNC) &LGBM_BoosterSaveModelToString_R             , 4},
+  {"LGBM_BoosterDumpModel_R"                     , (DL_FUNC) &LGBM_BoosterDumpModel_R                     , 4},
  {"LGBM_NullBoosterHandleError_R"               , (DL_FUNC) &LGBM_NullBoosterHandleError_R               , 0},
  {"LGBM_DumpParamAliases_R"                     , (DL_FUNC) &LGBM_DumpParamAliases_R                     , 0},
  {"LGBM_GetMaxThreads_R"                        , (DL_FUNC) &LGBM_GetMaxThreads_R                        , 1},
@ -1276,4 +1491,21 @@ LIGHTGBM_C_EXPORT void R_init_lightgbm(DllInfo *dll);
 void R_init_lightgbm(DllInfo *dll) {
  R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
  R_useDynamicSymbols(dll, FALSE);
+
+#ifndef LGB_NO_ALTREP
+  lgb_altrepped_char_vec = R_make_altraw_class("lgb_altrepped_char_vec", "lightgbm", dll);
+  R_set_altrep_Length_method(lgb_altrepped_char_vec, get_altrepped_raw_len);
+  R_set_altvec_Dataptr_method(lgb_altrepped_char_vec, get_altrepped_raw_dataptr);
+  R_set_altvec_Dataptr_or_null_method(lgb_altrepped_char_vec, get_altrepped_raw_dataptr_or_null);
+
+  lgb_altrepped_int_arr = R_make_altinteger_class("lgb_altrepped_int_arr", "lightgbm", dll);
+  R_set_altrep_Length_method(lgb_altrepped_int_arr, get_altrepped_vec_len);
+  R_set_altvec_Dataptr_method(lgb_altrepped_int_arr, get_altrepped_vec_dataptr);
+  R_set_altvec_Dataptr_or_null_method(lgb_altrepped_int_arr, get_altrepped_vec_dataptr_or_null);
+
+  lgb_altrepped_dbl_arr = R_make_altreal_class("lgb_altrepped_dbl_arr", "lightgbm", dll);
+  R_set_altrep_Length_method(lgb_altrepped_dbl_arr, get_altrepped_vec_len);
+  R_set_altvec_Dataptr_method(lgb_altrepped_dbl_arr, get_altrepped_vec_dataptr);
+  R_set_altvec_Dataptr_or_null_method(lgb_altrepped_dbl_arr, get_altrepped_vec_dataptr_or_null);
+#endif
 }
--- a/R-package/src/lightgbm_R.h
+++ b/R-package/src/lightgbm_R.h
@ -7,8 +7,14 @@

 #include <LightGBM/c_api.h>

+#ifndef R_NO_REMAP
 #define R_NO_REMAP
+#endif
+
+#ifndef R_USE_C99_IN_CXX
 #define R_USE_C99_IN_CXX
+#endif
+
 #include <Rinternals.h>

 /*!
@ -384,6 +390,28 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterGetCurrentIteration_R(
  SEXP out
 );

+/*!
+ * \brief Get number of trees per iteration
+ * \param handle Booster handle
+ * \param out Number of trees per iteration
+ * \return R NULL value
+ */
+LIGHTGBM_C_EXPORT SEXP LGBM_BoosterNumModelPerIteration_R(
+    SEXP handle,
+    SEXP out
+);
+
+/*!
+ * \brief Get total number of trees
+ * \param handle Booster handle
+ * \param out Total number of trees of Booster
+ * \return R NULL value
+ */
+LIGHTGBM_C_EXPORT SEXP LGBM_BoosterNumberOfTotalModel_R(
+    SEXP handle,
+    SEXP out
+);
+
 /*!
 * \brief Get model upper bound value.
 * \param handle Handle of Booster
@ -809,13 +837,15 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMatSingleRowFast_R(
 * \param num_iteration, <= 0 means save all
 * \param feature_importance_type type of feature importance, 0: split, 1: gain
 * \param filename file name
+* \param start_iteration Starting iteration (0 based)
 * \return R NULL value
 */
 LIGHTGBM_C_EXPORT SEXP LGBM_BoosterSaveModel_R(
  SEXP handle,
  SEXP num_iteration,
  SEXP feature_importance_type,
-  SEXP filename
+  SEXP filename,
+  SEXP start_iteration
 );

 /*!
@ -823,12 +853,14 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterSaveModel_R(
 * \param handle Booster handle
 * \param num_iteration, <= 0 means save all
 * \param feature_importance_type type of feature importance, 0: split, 1: gain
+* \param start_iteration Starting iteration (0 based)
 * \return R character vector (length=1) with model string
 */
 LIGHTGBM_C_EXPORT SEXP LGBM_BoosterSaveModelToString_R(
  SEXP handle,
  SEXP num_iteration,
-  SEXP feature_importance_type
+  SEXP feature_importance_type,
+  SEXP start_iteration
 );

 /*!
@ -836,12 +868,14 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterSaveModelToString_R(
 * \param handle Booster handle
 * \param num_iteration, <= 0 means save all
 * \param feature_importance_type type of feature importance, 0: split, 1: gain
+* \param start_iteration Index of starting iteration (0 based)
 * \return R character vector (length=1) with model JSON
 */
 LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R(
  SEXP handle,
  SEXP num_iteration,
-  SEXP feature_importance_type
+  SEXP feature_importance_type,
+  SEXP start_iteration
 );

 /*!
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@ -433,7 +433,7 @@ test_that("lgb.cv() rejects negative or 0 value passed to nrounds", {
  }
 })

-test_that("lgb.cv() throws an informative error is 'data' is not an lgb.Dataset and labels are not given", {
+test_that("lgb.cv() throws an informative error if 'data' is not an lgb.Dataset and labels are not given", {
  bad_values <- list(
    4L
    , "hello"
@ -1788,11 +1788,6 @@ test_that("lgb.train() works with early stopping for regression with a metric th


 test_that("lgb.train() supports non-ASCII feature names", {
-  dtrain <- lgb.Dataset(
-    data = matrix(rnorm(400L), ncol =  4L)
-    , label = rnorm(100L)
-    , params = list(num_threads = .LGB_MAX_THREADS)
-  )
  # content below is equivalent to
  #
  #  feature_names <- c("F_零", "F_一", "F_二", "F_三")
@ -1805,6 +1800,12 @@ test_that("lgb.train() supports non-ASCII feature names", {
    , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xba, 0x8c)))
    , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xb8, 0x89)))
  )
+  dtrain <- lgb.Dataset(
+    data = matrix(rnorm(400L), ncol =  4L)
+    , label = rnorm(100L)
+    , params = list(num_threads = .LGB_MAX_THREADS)
+    , colnames = feature_names
+  )
  bst <- lgb.train(
    data = dtrain
    , nrounds = 5L
@ -1814,7 +1815,6 @@ test_that("lgb.train() supports non-ASCII feature names", {
      , verbose = .LGB_VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
-    , colnames = feature_names
  )
  expect_true(.is_Booster(bst))
  dumped_model <- jsonlite::fromJSON(bst$dump_model())
@ -2776,14 +2776,12 @@ test_that(paste0("lgb.train() throws an informative error if the members of inte
 test_that("lgb.train() throws an informative error if interaction_constraints contains a too large index", {
  dtrain <- lgb.Dataset(train$data, label = train$label)
  params <- list(objective = "regression",
-                 interaction_constraints = list(c(1L, length(colnames(train$data)) + 1L), 3L))
-    expect_error({
-      bst <- lightgbm(
-        data = dtrain
-        , params = params
-        , nrounds = 2L
-      )
-    }, "supplied a too large value in interaction_constraints")
+                 interaction_constraints = list(c(1L, ncol(train$data) + 1L:2L), 3L))
+    expect_error(
+      lightgbm(data = dtrain, params = params, nrounds = 2L)
+      , "unknown feature(s) in interaction_constraints: '127', '128'"
+      , fixed = TRUE
+    )
 })

 test_that(paste0("lgb.train() gives same result when interaction_constraints is specified as a list of ",
@ -2838,7 +2836,11 @@ test_that(paste0("lgb.train() gives same result when interaction_constraints is

 test_that(paste0("lgb.train() gives same results when using interaction_constraints and specifying colnames"), {
  set.seed(1L)
-  dtrain <- lgb.Dataset(train$data, label = train$label, params = list(num_threads = .LGB_MAX_THREADS))
+  dtrain <- lgb.Dataset(
+    train$data
+    , label = train$label
+    , params = list(num_threads = .LGB_MAX_THREADS)
+  )

  params <- list(
    objective = "regression"
@ -2854,6 +2856,7 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
  pred1 <- bst$predict(test$data)

  new_colnames <- paste0(colnames(train$data), "_x")
+  dtrain$set_colnames(new_colnames)
  params <- list(
    objective = "regression"
    , interaction_constraints = list(c(new_colnames[1L], new_colnames[2L]), new_colnames[3L])
@ -2864,7 +2867,6 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
    data = dtrain
    , params = params
    , nrounds = 2L
-    , colnames = new_colnames
  )
  pred2 <- bst$predict(test$data)

@ -2872,6 +2874,37 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai

 })

+test_that("Interaction constraints add missing features correctly as new group", {
+  dtrain <- lgb.Dataset(
+    train$data[, 1L:6L]  # Pick only some columns
+    , label = train$label
+    , params = list(num_threads = .LGB_MAX_THREADS)
+  )
+
+  list_of_constraints <- list(
+    list(3L, 1L:2L)
+    , list("cap-shape=convex", c("cap-shape=bell", "cap-shape=conical"))
+  )
+
+  for (constraints in list_of_constraints) {
+    params <- list(
+      objective = "regression"
+      , interaction_constraints = constraints
+      , verbose = .LGB_VERBOSITY
+      , num_threads = .LGB_MAX_THREADS
+    )
+    bst <- lightgbm(data = dtrain, params = params, nrounds = 10L)
+
+    expected_list <- list("[2]", "[0,1]", "[3,4,5]")
+    expect_equal(bst$params$interaction_constraints, expected_list)
+
+    expected_string <- "[interaction_constraints: [2],[0,1],[3,4,5]]"
+    expect_true(
+      grepl(expected_string, bst$save_model_to_string(), fixed = TRUE)
+    )
+  }
+})
+
 .generate_trainset_for_monotone_constraints_tests <- function(x3_to_categorical) {
  n_samples <- 3000L
  x1_positively_correlated_with_y <- runif(n = n_samples, min = 0.0, max = 1.0)
--- a/R-package/tests/testthat/test_dataset.R
+++ b/R-package/tests/testthat/test_dataset.R
@ -440,6 +440,35 @@ test_that("lgb.Dataset: should be able to run lgb.cv() immediately after using l
  expect_true(methods::is(bst, "lgb.CVBooster"))
 })

+test_that("lgb.Dataset: should be able to be used in lgb.cv() when constructed with categorical feature indices", {
+  data("mtcars")
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1L])
+  categorical_feature <- which(names(mtcars) %in% c("cyl", "vs", "am", "gear", "carb")) - 1L
+  dtrain <- lgb.Dataset(
+    data = x
+    , label = y
+    , categorical_feature = categorical_feature
+    , free_raw_data = TRUE
+    , params = list(num_threads = .LGB_MAX_THREADS)
+  )
+  # constructing the Dataset frees the raw data
+  dtrain$construct()
+  params <- list(
+    objective = "regression"
+    , num_leaves = 2L
+    , verbose = .LGB_VERBOSITY
+    , num_threads = .LGB_MAX_THREADS
+  )
+  # cv should reuse the same categorical features without checking the indices
+  bst <- lgb.cv(params = params, data = dtrain, stratified = FALSE, nrounds = 1L)
+  expect_equal(
+    unlist(bst$boosters[[1L]]$booster$params$categorical_feature)
+    , categorical_feature - 1L  # 0-based
+  )
+})
+
+
 test_that("lgb.Dataset: should be able to use and retrieve long feature names", {
  # set one feature to a value longer than the default buffer size used
  # in LGBM_DatasetGetFeatureNames_R
@ -621,3 +650,12 @@ test_that("lgb.Dataset can be constructed with categorical features and without
    lgb.Dataset(raw_mat, categorical_feature = 2L)$construct()
  }, regexp = "supplied a too large value in categorical_feature: 2 but only 1 features")
 })
+
+test_that("lgb.Dataset.slice fails with a categorical feature index greater than the number of features", {
+  data <- matrix(runif(100L), nrow = 50L, ncol = 2L)
+  ds <- lgb.Dataset(data = data, categorical_feature = 3L)
+  subset <- ds$slice(1L:20L)
+  expect_error({
+    subset$construct()
+  }, regexp = "supplied a too large value in categorical_feature: 3 but only 2 features")
+})
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@ -174,7 +174,7 @@ test_that("Loading a Booster from a text file works", {
        , bagging_freq = 1L
        , boost_from_average = FALSE
        , categorical_feature = c(1L, 2L)
-        , interaction_constraints = list(c(1L, 2L), 1L)
+        , interaction_constraints = list(1L:2L, 3L, 4L:ncol(train$data))
        , feature_contri = rep(0.5, ncol(train$data))
        , metric = c("mape", "average_precision")
        , learning_rate = 1.0
@ -623,6 +623,174 @@ test_that("Booster$update() throws an informative error if you provide a non-Dat
    }, regexp = "lgb.Booster.update: Only can use lgb.Dataset", fixed = TRUE)
 })

+test_that("Booster$num_trees_per_iter() works as expected", {
+  set.seed(708L)
+
+  X <- data.matrix(iris[2L:4L])
+  y_reg <- iris[, 1L]
+  y_binary <- as.integer(y_reg > median(y_reg))
+  y_class <- as.integer(iris[, 5L]) - 1L
+  num_class <- 3L
+
+  nrounds <- 10L
+
+  # Regression and binary probabilistic classification (1 iteration = 1 tree)
+  fit_reg <- lgb.train(
+    params = list(
+      objective = "mse"
+      , verbose = .LGB_VERBOSITY
+      , num_threads = .LGB_MAX_THREADS
+    )
+    , data = lgb.Dataset(X, label = y_reg)
+    , nrounds = nrounds
+  )
+
+  fit_binary <- lgb.train(
+    params = list(
+      objective = "binary"
+      , verbose = .LGB_VERBOSITY
+      , num_threads = .LGB_MAX_THREADS
+    )
+    , data = lgb.Dataset(X, label = y_binary)
+    , nrounds = nrounds
+  )
+
+  # Multiclass probabilistic classification (1 iteration = num_class trees)
+  fit_class <- lgb.train(
+    params = list(
+      objective = "multiclass"
+      , verbose = .LGB_VERBOSITY
+      , num_threads = .LGB_MAX_THREADS
+      , num_class = num_class
+    )
+    , data = lgb.Dataset(X, label = y_class)
+    , nrounds = nrounds
+  )
+
+  expect_equal(fit_reg$num_trees_per_iter(), 1L)
+  expect_equal(fit_binary$num_trees_per_iter(), 1L)
+  expect_equal(fit_class$num_trees_per_iter(), num_class)
+})
+
+test_that("Booster$num_trees() and $num_iter() works (no early stopping)", {
+  set.seed(708L)
+
+  X <- data.matrix(iris[2L:4L])
+  y_reg <- iris[, 1L]
+  y_binary <- as.integer(y_reg > median(y_reg))
+  y_class <- as.integer(iris[, 5L]) - 1L
+  num_class <- 3L
+  nrounds <- 10L
+
+  # Regression and binary probabilistic classification (1 iteration = 1 tree)
+  fit_reg <- lgb.train(
+    params = list(
+      objective = "mse"
+      , verbose = .LGB_VERBOSITY
+      , num_threads = .LGB_MAX_THREADS
+    )
+    , data = lgb.Dataset(X, label = y_reg)
+    , nrounds = nrounds
+  )
+
+  fit_binary <- lgb.train(
+    params = list(
+      objective = "binary"
+      , verbose = .LGB_VERBOSITY
+      , num_threads = .LGB_MAX_THREADS
+    )
+    , data = lgb.Dataset(X, label = y_binary)
+    , nrounds = nrounds
+  )
+
+  # Multiclass probabilistic classification (1 iteration = num_class trees)
+  fit_class <- lgb.train(
+    params = list(
+      objective = "multiclass"
+      , verbose = .LGB_VERBOSITY
+      , num_threads = .LGB_MAX_THREADS
+      , num_class = num_class
+    )
+    , data = lgb.Dataset(X, label = y_class)
+    , nrounds = nrounds
+  )
+
+  expect_equal(fit_reg$num_trees(), nrounds)
+  expect_equal(fit_binary$num_trees(), nrounds)
+  expect_equal(fit_class$num_trees(), num_class * nrounds)
+
+  expect_equal(fit_reg$num_iter(), nrounds)
+  expect_equal(fit_binary$num_iter(), nrounds)
+  expect_equal(fit_class$num_iter(), nrounds)
+})
+
+test_that("Booster$num_trees() and $num_iter() work (with early stopping)", {
+  set.seed(708L)
+
+  X <- data.matrix(iris[2L:4L])
+  y_reg <- iris[, 1L]
+  y_binary <- as.integer(y_reg > median(y_reg))
+  y_class <- as.integer(iris[, 5L]) - 1L
+  train_ix <- c(1L:40L, 51L:90L, 101L:140L)
+  X_train <- X[train_ix, ]
+  X_valid <- X[-train_ix, ]
+
+  num_class <- 3L
+  nrounds <- 1000L
+  early_stopping <- 2L
+
+  # Regression and binary probabilistic classification (1 iteration = 1 tree)
+  fit_reg <- lgb.train(
+    params = list(
+      objective = "mse"
+      , verbose = .LGB_VERBOSITY
+      , num_threads = .LGB_MAX_THREADS
+    )
+    , data = lgb.Dataset(X_train, label = y_reg[train_ix])
+    , valids = list(valid = lgb.Dataset(X_valid, label = y_reg[-train_ix]))
+    , nrounds = nrounds
+    , early_stopping_round = early_stopping
+  )
+
+  fit_binary <- lgb.train(
+    params = list(
+      objective = "binary"
+      , verbose = .LGB_VERBOSITY
+      , num_threads = .LGB_MAX_THREADS
+    )
+    , data = lgb.Dataset(X_train, label = y_binary[train_ix])
+    , valids = list(valid = lgb.Dataset(X_valid, label = y_binary[-train_ix]))
+    , nrounds = nrounds
+    , early_stopping_round = early_stopping
+  )
+
+  # Multiclass probabilistic classification (1 iteration = num_class trees)
+  fit_class <- lgb.train(
+    params = list(
+      objective = "multiclass"
+      , verbose = .LGB_VERBOSITY
+      , num_threads = .LGB_MAX_THREADS
+      , num_class = num_class
+    )
+    , data = lgb.Dataset(X_train, label = y_class[train_ix])
+    , valids = list(valid = lgb.Dataset(X_valid, label = y_class[-train_ix]))
+    , nrounds = nrounds
+    , early_stopping_round = early_stopping
+  )
+
+  expected_trees_reg <- fit_reg$best_iter + early_stopping
+  expected_trees_binary <- fit_binary$best_iter + early_stopping
+  expected_trees_class <- (fit_class$best_iter + early_stopping) * num_class
+
+  expect_equal(fit_reg$num_trees(), expected_trees_reg)
+  expect_equal(fit_binary$num_trees(), expected_trees_binary)
+  expect_equal(fit_class$num_trees(), expected_trees_class)
+
+  expect_equal(fit_reg$num_iter(), expected_trees_reg)
+  expect_equal(fit_binary$num_iter(), expected_trees_binary)
+  expect_equal(fit_class$num_iter(), expected_trees_class / num_class)
+})
+
 test_that("Booster should store parameters and Booster$reset_parameter() should update them", {
    data(agaricus.train, package = "lightgbm")
    dtrain <- lgb.Dataset(
@ -850,6 +1018,7 @@ test_that("all parameters are stored correctly with save_model_to_string()", {
        , "[extra_trees: 0]"
        , "[extra_seed: 6642]"
        , "[early_stopping_round: 0]"
+        , "[early_stopping_min_delta: 0]"
        , "[first_metric_only: 0]"
        , "[max_delta_step: 0]"
        , "[lambda_l1: 0]"
@ -1518,3 +1687,95 @@ test_that("LGBM_BoosterGetNumFeature_R returns correct outputs", {
    ncols <- .Call(LGBM_BoosterGetNumFeature_R, model$.__enclos_env__$private$handle)
    expect_equal(ncols, ncol(iris) - 1L)
 })
+
+# Helper function that creates a fitted model with nrounds boosting rounds
+.get_test_model <- function(nrounds) {
+    set.seed(1L)
+    data(agaricus.train, package = "lightgbm")
+    train <- agaricus.train
+    bst <- lightgbm(
+        data = as.matrix(train$data)
+        , label = train$label
+        , params = list(objective = "binary", num_threads = .LGB_MAX_THREADS)
+        , nrounds = nrounds
+        , verbose = .LGB_VERBOSITY
+    )
+    return(bst)
+}
+
+# Simplified version of lgb.model.dt.tree()
+.get_trees_from_dump <- function(x) {
+  parsed <- jsonlite::fromJSON(
+    txt = x
+    , simplifyVector = TRUE
+    , simplifyDataFrame = FALSE
+    , simplifyMatrix = FALSE
+    , flatten = FALSE
+  )
+  return(lapply(parsed$tree_info, FUN = .single_tree_parse))
+}
+
+test_that("num_iteration and start_iteration work for lgb.dump()", {
+  bst <- .get_test_model(5L)
+
+  first2 <- .get_trees_from_dump(lgb.dump(bst, num_iteration = 2L))
+  last3 <- .get_trees_from_dump(
+    lgb.dump(bst, num_iteration = 3L, start_iteration = 3L)
+  )
+  all5 <- .get_trees_from_dump(lgb.dump(bst))
+  too_many <- .get_trees_from_dump(lgb.dump(bst, num_iteration = 10L))
+
+  expect_equal(
+    data.table::rbindlist(c(first2, last3)), data.table::rbindlist(all5)
+  )
+  expect_equal(too_many, all5)
+})
+
+test_that("num_iteration and start_iteration work for lgb.save()", {
+  .get_n_trees <- function(x) {
+    return(length(.get_trees_from_dump(lgb.dump(x))))
+  }
+
+  .save_and_load <- function(bst, ...) {
+    model_file <- tempfile(fileext = ".model")
+    lgb.save(bst, model_file, ...)
+    return(lgb.load(model_file))
+  }
+
+  bst <- .get_test_model(5L)
+  n_first2 <- .get_n_trees(.save_and_load(bst, num_iteration = 2L))
+  n_last3 <- .get_n_trees(
+    .save_and_load(bst, num_iteration = 3L, start_iteration = 3L)
+  )
+  n_all5 <- .get_n_trees(.save_and_load(bst))
+  n_too_many <- .get_n_trees(.save_and_load(bst, num_iteration = 10L))
+
+  expect_equal(n_first2, 2L)
+  expect_equal(n_last3, 3L)
+  expect_equal(n_all5, 5L)
+  expect_equal(n_too_many, 5L)
+})
+
+test_that("num_iteration and start_iteration work for save_model_to_string()", {
+  .get_n_trees_from_string <- function(x) {
+    return(sum(gregexpr("Tree=", x, fixed = TRUE)[[1L]] > 0L))
+  }
+
+  bst <- .get_test_model(5L)
+
+  n_first2 <- .get_n_trees_from_string(
+    bst$save_model_to_string(num_iteration = 2L)
+  )
+  n_last3 <- .get_n_trees_from_string(
+    bst$save_model_to_string(num_iteration = 3L, start_iteration = 3L)
+  )
+  n_all5 <- .get_n_trees_from_string(bst$save_model_to_string())
+  n_too_many <- .get_n_trees_from_string(
+    bst$save_model_to_string(num_iteration = 10L)
+  )
+
+  expect_equal(n_first2, 2L)
+  expect_equal(n_last3, 3L)
+  expect_equal(n_all5, 5L)
+  expect_equal(n_too_many, 5L)
+})
--- a/R-package/tests/testthat/test_lgb.model.dt.tree.R
+++ b/R-package/tests/testthat/test_lgb.model.dt.tree.R
@ -0,0 +1,184 @@
+NROUNDS <- 10L
+MAX_DEPTH <- 3L
+N <- nrow(iris)
+X <- data.matrix(iris[2L:4L])
+FEAT <- colnames(X)
+NCLASS <- nlevels(iris[, 5L])
+
+model_reg <- lgb.train(
+  params = list(
+    objective = "regression"
+    , num_threads = .LGB_MAX_THREADS
+    , max.depth = MAX_DEPTH
+  )
+  , data = lgb.Dataset(X, label = iris[, 1L])
+  , verbose = .LGB_VERBOSITY
+  , nrounds = NROUNDS
+)
+
+model_binary <- lgb.train(
+  params = list(
+    objective = "binary"
+    , num_threads = .LGB_MAX_THREADS
+    , max.depth = MAX_DEPTH
+  )
+  , data = lgb.Dataset(X, label = iris[, 5L] == "setosa")
+  , verbose = .LGB_VERBOSITY
+  , nrounds = NROUNDS
+)
+
+model_multiclass <- lgb.train(
+  params = list(
+    objective = "multiclass"
+    , num_threads = .LGB_MAX_THREADS
+    , max.depth = MAX_DEPTH
+    , num_classes = NCLASS
+  )
+  , data = lgb.Dataset(X, label = as.integer(iris[, 5L]) - 1L)
+  , verbose = .LGB_VERBOSITY
+  , nrounds = NROUNDS
+)
+
+model_rank <- lgb.train(
+  params = list(
+    objective = "lambdarank"
+    , num_threads = .LGB_MAX_THREADS
+    , max.depth = MAX_DEPTH
+    , lambdarank_truncation_level = 3L
+  )
+  , data = lgb.Dataset(
+    X
+    , label = as.integer(iris[, 1L] > 5.8)
+    , group = rep(10L, times = 15L)
+  )
+  , verbose = .LGB_VERBOSITY
+  , nrounds = NROUNDS
+)
+
+models <- list(
+  reg = model_reg
+  , bin = model_binary
+  , multi = model_multiclass
+  , rank = model_rank
+)
+
+for (model_name in names(models)) {
+  model <- models[[model_name]]
+  expected_n_trees <- NROUNDS
+  if (model_name == "multi") {
+    expected_n_trees <- NROUNDS * NCLASS
+  }
+  df <- as.data.frame(lgb.model.dt.tree(model))
+  df_list <- split(df, f = df$tree_index, drop = TRUE)
+
+  df_leaf <- df[!is.na(df$leaf_index), ]
+  df_internal <- df[is.na(df$leaf_index), ]
+
+  test_that("lgb.model.dt.tree() returns the right number of trees", {
+    expect_equal(length(unique(df$tree_index)), expected_n_trees)
+  })
+
+  test_that("num_iteration can return less trees", {
+    expect_equal(
+      length(unique(lgb.model.dt.tree(model, num_iteration = 2L)$tree_index))
+      , 2L * (if (model_name == "multi") NCLASS else 1L)
+    )
+  })
+
+  test_that("Tree index from lgb.model.dt.tree() is in 0:(NROUNS-1)", {
+    expect_equal(unique(df$tree_index), (0L:(expected_n_trees - 1L)))
+  })
+
+  test_that("Depth calculated from lgb.model.dt.tree() respects max.depth", {
+    expect_true(max(df$depth) <= MAX_DEPTH)
+  })
+
+  test_that("Each tree from lgb.model.dt.tree() has single root node", {
+    expect_equal(
+      unname(sapply(df_list, function(df) sum(df$depth == 0L)))
+      , rep(1L, expected_n_trees)
+    )
+  })
+
+  test_that("Each tree from lgb.model.dt.tree() has two depth 1 nodes", {
+    expect_equal(
+      unname(sapply(df_list, function(df) sum(df$depth == 1L)))
+      , rep(2L, expected_n_trees)
+    )
+  })
+
+  test_that("leaves from lgb.model.dt.tree() do not have split info", {
+    internal_node_cols <- c(
+      "split_index"
+      , "split_feature"
+      , "split_gain"
+      , "threshold"
+      , "decision_type"
+      , "default_left"
+      , "internal_value"
+      , "internal_count"
+    )
+    expect_true(all(is.na(df_leaf[internal_node_cols])))
+  })
+
+  test_that("leaves from lgb.model.dt.tree() have valid leaf info", {
+    expect_true(all(df_leaf$leaf_index %in% 0L:(2.0^MAX_DEPTH - 1.0)))
+    expect_true(all(is.finite(df_leaf$leaf_value)))
+    expect_true(all(df_leaf$leaf_count > 0L & df_leaf$leaf_count <= N))
+  })
+
+  test_that("non-leaves from lgb.model.dt.tree() do not have leaf info", {
+    leaf_node_cols <- c(
+      "leaf_index", "leaf_parent", "leaf_value", "leaf_count"
+    )
+    expect_true(all(is.na(df_internal[leaf_node_cols])))
+  })
+
+  test_that("non-leaves from lgb.model.dt.tree() have valid split info", {
+    expect_true(
+      all(
+        sapply(
+          split(df_internal, df_internal$tree_index),
+          function(x) all(x$split_index %in% 0L:(nrow(x) - 1L))
+        )
+      )
+    )
+
+    expect_true(all(df_internal$split_feature %in% FEAT))
+
+    num_cols <- c("split_gain", "threshold", "internal_value")
+    expect_true(all(is.finite(unlist(df_internal[, num_cols]))))
+
+    # range of decision type?
+    expect_true(all(df_internal$default_left %in% c(TRUE, FALSE)))
+
+    counts <- df_internal$internal_count
+    expect_true(all(counts > 1L & counts <= N))
+  })
+}
+
+test_that("num_iteration and start_iteration work as expected", {
+  set.seed(1L)
+  data(agaricus.train, package = "lightgbm")
+  train <- agaricus.train
+  bst <- lightgbm(
+    data = as.matrix(train$data)
+    , label = train$label
+    , params = list(objective = "binary", num_threads = .LGB_MAX_THREADS)
+    , nrounds = 5L
+    , verbose = .LGB_VERBOSITY
+  )
+
+  first2 <- lgb.model.dt.tree(bst, num_iteration = 2L)
+  last3 <- lgb.model.dt.tree(bst, num_iteration = 3L, start_iteration = 3L)
+  all5 <- lgb.model.dt.tree(bst)
+  too_many <- lgb.model.dt.tree(bst, num_iteration = 10L)
+
+  expect_equal(data.table::rbindlist(list(first2, last3)), all5)
+  expect_equal(too_many, all5)
+
+  # Check tree indices
+  expect_equal(unique(first2[["tree_index"]]), 0L:1L)
+  expect_equal(unique(last3[["tree_index"]]), 2L:4L)
+  expect_equal(unique(all5[["tree_index"]]), 0L:4L)
+})
--- a/R-package/tests/testthat/test_parameters.R
+++ b/R-package/tests/testthat/test_parameters.R
@ -91,7 +91,7 @@ test_that(".PARAMETER_ALIASES() uses the internal session cache", {
  expect_false(exists(cache_key, where = .lgb_session_cache_env))
 })

-test_that("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", {
+test_that("training should warn if you use 'dart' boosting with early stopping", {
  for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]) {
    params <- list(
        num_leaves = 5L
@ -101,14 +101,69 @@ test_that("training should warn if you use 'dart' boosting, specified with 'boos
        , num_threads = .LGB_MAX_THREADS
    )
    params[[boosting_param]] <- "dart"
+
+    # warning: early stopping requested
    expect_warning({
      result <- lightgbm(
        data = train$data
        , label = train$label
        , params = params
-        , nrounds = 5L
-        , verbose = -1L
+        , nrounds = 2L
+        , verbose = .LGB_VERBOSITY
+        , early_stopping_rounds = 1L
      )
    }, regexp = "Early stopping is not available in 'dart' mode")
+
+    # no warning: early stopping not requested
+    expect_silent({
+      result <- lightgbm(
+        data = train$data
+        , label = train$label
+        , params = params
+        , nrounds = 2L
+        , verbose = .LGB_VERBOSITY
+        , early_stopping_rounds = NULL
+      )
+    })
+  }
+})
+
+test_that("lgb.cv() should warn if you use 'dart' boosting with early stopping", {
+  for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]) {
+    params <- list(
+      num_leaves = 5L
+      , objective = "binary"
+      , metric = "binary_error"
+      , num_threads = .LGB_MAX_THREADS
+    )
+    params[[boosting_param]] <- "dart"
+
+    # warning: early stopping requested
+    expect_warning({
+      result <- lgb.cv(
+        data = lgb.Dataset(
+          data  = train$data
+          , label = train$label
+        )
+        , params = params
+        , nrounds = 2L
+        , verbose = .LGB_VERBOSITY
+        , early_stopping_rounds = 1L
+      )
+    }, regexp = "Early stopping is not available in 'dart' mode")
+
+    # no warning: early stopping not requested
+    expect_silent({
+      result <- lgb.cv(
+        data = lgb.Dataset(
+          data  = train$data
+          , label = train$label
+        )
+        , params = params
+        , nrounds = 2L
+        , verbose = .LGB_VERBOSITY
+        , early_stopping_rounds = NULL
+      )
+    })
  }
 })
--- a/R-package/tests/testthat/test_utils.R
+++ b/R-package/tests/testthat/test_utils.R
@ -147,3 +147,21 @@ test_that(".equal_or_both_null produces expected results", {
    expect_false(.equal_or_both_null(10.0, 1L))
    expect_true(.equal_or_both_null(0L, 0L))
 })
+
+test_that(".check_interaction_constraints() adds skipped features", {
+  ref <- letters[1L:5L]
+  ic_num <- list(1L, c(2L, 3L))
+  ic_char <- list("a", c("b", "c"))
+  expected <- list("[0]", "[1,2]", "[3,4]")
+
+  ic_checked_num <- .check_interaction_constraints(
+    interaction_constraints = ic_num, column_names = ref
+  )
+
+  ic_checked_char <- .check_interaction_constraints(
+    interaction_constraints = ic_char, column_names = ref
+  )
+
+  expect_equal(ic_checked_num, expected)
+  expect_equal(ic_checked_char, expected)
+})
--- a/README.md
+++ b/README.md
@ -3,18 +3,20 @@
 Light Gradient Boosting Machine
 ===============================

-[![Python-package GitHub Actions Build Status](https://github.com/microsoft/LightGBM/workflows/Python-package/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions)
-[![R-package GitHub Actions Build Status](https://github.com/microsoft/LightGBM/workflows/R-package/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions)
-[![CUDA Version GitHub Actions Build Status](https://github.com/microsoft/LightGBM/workflows/CUDA%20Version/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions)
-[![Static Analysis GitHub Actions Build Status](https://github.com/microsoft/LightGBM/workflows/Static%20Analysis/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions)
+[![Python-package GitHub Actions Build Status](https://github.com/microsoft/LightGBM/actions/workflows/python_package.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/python_package.yml)
+[![R-package GitHub Actions Build Status](https://github.com/microsoft/LightGBM/actions/workflows/r_package.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/r_package.yml)
+[![CUDA Version GitHub Actions Build Status](https://github.com/microsoft/LightGBM/actions/workflows/cuda.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/cuda.yml)
+[![Static Analysis GitHub Actions Build Status](https://github.com/microsoft/LightGBM/actions/workflows/static_analysis.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/static_analysis.yml)
 [![Azure Pipelines Build Status](https://lightgbm-ci.visualstudio.com/lightgbm-ci/_apis/build/status/Microsoft.LightGBM?branchName=master)](https://lightgbm-ci.visualstudio.com/lightgbm-ci/_build/latest?definitionId=1)
 [![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/1ys5ot401m0fep6l/branch/master?svg=true)](https://ci.appveyor.com/project/guolinke/lightgbm/branch/master)
 [![Documentation Status](https://readthedocs.org/projects/lightgbm/badge/?version=latest)](https://lightgbm.readthedocs.io/)
-[![Link checks](https://github.com/microsoft/LightGBM/workflows/Link%20checks/badge.svg)](https://github.com/microsoft/LightGBM/actions?query=workflow%3A%22Link+checks%22)
+[![Link checks](https://github.com/microsoft/LightGBM/actions/workflows/linkchecker.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/linkchecker.yml)
 [![License](https://img.shields.io/github/license/microsoft/lightgbm.svg)](https://github.com/microsoft/LightGBM/blob/master/LICENSE)
 [![Python Versions](https://img.shields.io/pypi/pyversions/lightgbm.svg?logo=python&logoColor=white)](https://pypi.org/project/lightgbm)
 [![PyPI Version](https://img.shields.io/pypi/v/lightgbm.svg?logo=pypi&logoColor=white)](https://pypi.org/project/lightgbm)
+[![conda Version](https://img.shields.io/conda/vn/conda-forge/lightgbm?logo=conda-forge&logoColor=white&label=conda)](https://anaconda.org/conda-forge/lightgbm)
 [![CRAN Version](https://www.r-pkg.org/badges/version/lightgbm)](https://cran.r-project.org/package=lightgbm)
+[![NuGet Version](https://img.shields.io/nuget/v/lightgbm?logo=nuget&logoColor=white)](https://www.nuget.org/packages/LightGBM)

 LightGBM is a gradient boosting framework that uses tree based learning algorithms. It is designed to be distributed and efficient with the following advantages:

@ -55,8 +57,6 @@ News

 Please refer to changelogs at [GitHub releases](https://github.com/microsoft/LightGBM/releases) page.

-Some old update logs are available at [Key Events](https://github.com/microsoft/LightGBM/blob/master/docs/Key-Events.md) page.
-
 External (Unofficial) Repositories
 ----------------------------------

@ -115,7 +115,7 @@ Ruby gem: https://github.com/ankane/lightgbm-ruby

 LightGBM4j (Java high-level binding): https://github.com/metarank/lightgbm4j

-lightgbm-rs (Rust binding): https://github.com/vaaaaanquish/lightgbm-rs
+lightgbm3 (Rust binding): https://github.com/Mottl/lightgbm3-rs

 MLflow (experiment tracking, model monitoring framework): https://github.com/mlflow/mlflow

@ -133,7 +133,7 @@ Support
 -------

 - Ask a question [on Stack Overflow with the `lightgbm` tag](https://stackoverflow.com/questions/ask?tags=lightgbm), we monitor this for new questions.
- Open **bug reports** and **feature requests** (not questions) on [GitHub issues](https://github.com/microsoft/LightGBM/issues).
+- Open **bug reports** and **feature requests** on [GitHub issues](https://github.com/microsoft/LightGBM/issues).

 How to Contribute
 -----------------
@ -156,8 +156,6 @@ Qi Meng, Guolin Ke, Taifeng Wang, Wei Chen, Qiwei Ye, Zhi-Ming Ma, Tie-Yan Liu.

 Huan Zhang, Si Si and Cho-Jui Hsieh. "[GPU Acceleration for Large-scale Tree Boosting](https://arxiv.org/abs/1706.08359)". SysML Conference, 2018.

-**Note**: If you use LightGBM in your GitHub projects, please add `lightgbm` in the `requirements.txt`.
-
 License
 -------

--- a/SECURITY.md
+++ b/SECURITY.md
@ -14,7 +14,7 @@ Instead, please report them to the Microsoft Security Response Center (MSRC) at

 If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).

-You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
+You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).

 Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:

--- a/VERSION.txt
+++ b/VERSION.txt
@ -1 +1 @@
-4.3.0.99
+4.5.0.99
--- a/build-cran-package.sh
+++ b/build-cran-package.sh
@ -4,7 +4,7 @@
 #     Prepare a source distribution of the R package
 #     to be submitted to CRAN.
 #
-# [arguments] 
+# [arguments]
 #
 #     --r-executable Customize the R executable used by `R CMD build`.
 #                    Useful if building the R package in an environment with
@ -27,7 +27,7 @@
 #     # skip vignette building
 #     sh build-cran-package.sh --no-build-vignettes

-set -e
+set -e -E -u

 # Default values of arguments
 BUILD_VIGNETTES=true
@ -76,12 +76,12 @@ fi

 cp \
    external_libs/fast_double_parser/include/fast_double_parser.h \
-    "${TEMP_R_DIR}/src/include/LightGBM"
+    "${TEMP_R_DIR}/src/include/LightGBM/utils"

-mkdir -p "${TEMP_R_DIR}/src/include/LightGBM/fmt"
+mkdir -p "${TEMP_R_DIR}/src/include/LightGBM/utils/fmt"
 cp \
    external_libs/fmt/include/fmt/*.h \
-    "${TEMP_R_DIR}/src/include/LightGBM/fmt/"
+    "${TEMP_R_DIR}/src/include/LightGBM/utils/fmt"

 # including only specific files from Eigen, to keep the R package
 # small and avoid redistributing code with licenses incompatible with
@ -155,33 +155,6 @@ cd "${TEMP_R_DIR}"
    done
    find . -name '*.h.bak' -o -name '*.hpp.bak' -o -name '*.cpp.bak' -exec rm {} \;

-    sed \
-        -i.bak \
-        -e 's/\.\..*fmt\/format\.h/LightGBM\/fmt\/format\.h/' \
-        src/include/LightGBM/utils/common.h
-
-    sed \
-        -i.bak \
-        -e 's/\.\..*fast_double_parser\.h/LightGBM\/fast_double_parser\.h/' \
-        src/include/LightGBM/utils/common.h
-
-    # When building an R package with 'configure', it seems
-    # you're guaranteed to get a shared library called
-    #  <packagename>.so/dll. The package source code expects
-    # 'lib_lightgbm.so', not 'lightgbm.so', to comply with the way
-    # this project has historically handled installation
-    echo "Changing lib_lightgbm to lightgbm"
-    for file in R/*.R; do
-        sed \
-            -i.bak \
-            -e 's/lib_lightgbm/lightgbm/' \
-            "${file}"
-    done
-    sed \
-        -i.bak \
-        -e 's/lib_lightgbm/lightgbm/' \
-        NAMESPACE
-
    # 'processx' is listed as a 'Suggests' dependency in DESCRIPTION
    # because it is used in install.libs.R, a file that is not
    # included in the CRAN distribution of the package
@ -191,8 +164,7 @@ cd "${TEMP_R_DIR}"
        DESCRIPTION

    echo "Cleaning sed backup files"
-    rm R/*.R.bak
-    rm NAMESPACE.bak
+    rm *.bak

 cd "${ORIG_WD}"

@ -230,8 +202,12 @@ if ${BUILD_VIGNETTES} ; then
        rm -f ./lightgbm/src/utils/*.o

        echo "re-tarring ${TARBALL_NAME}"
+        # --no-xattrs is the default in GNU tar but not some distributions of BSD tar.
+        # Enable it here to avoid errors on macOS.
+        # ref: https://stackoverflow.com/a/74373784/3986677
        tar \
            -cz \
+            --no-xattrs \
            -f "${TARBALL_NAME}" \
            lightgbm \
        > /dev/null 2>&1
--- a/build-python.sh
+++ b/build-python.sh
@ -40,8 +40,6 @@
 #                                   Compile CUDA version.
 #     --gpu
 #                                   Compile GPU version.
-#     --hdfs
-#                                   Compile HDFS version.
 #     --integrated-opencl
 #                                   Compile integrated OpenCL version.
 #     --mingw
@ -62,7 +60,7 @@
 #                                   Install into user-specific instead of global site-packages directory.
 #                                   Only used with 'install' command.

-set -e -u
+set -e -E -u

 echo "building lightgbm"

@ -148,9 +146,6 @@ while [ $# -gt 0 ]; do
    --gpu)
        BUILD_ARGS="${BUILD_ARGS} --config-setting=cmake.define.USE_GPU=ON"
        ;;
-    --hdfs)
-        BUILD_ARGS="${BUILD_ARGS} --config-setting=cmake.define.USE_HDFS=ON"
-        ;;
    --integrated-opencl)
        BUILD_ARGS="${BUILD_ARGS} --config-setting=cmake.define.__INTEGRATE_OPENCL=ON"
        ;;
@ -317,12 +312,15 @@ if test "${INSTALL}" = true; then
        echo 'requires = ["setuptools"]' >> ./pyproject.toml
        echo 'build-backend = "setuptools.build_meta"' >> ./pyproject.toml
        echo "" >> ./pyproject.toml
-        echo "recursive-include lightgbm *.dll *.so" > ./MANIFEST.in
+        echo "recursive-include lightgbm *.dll *.dylib *.so" > ./MANIFEST.in
        echo "" >> ./MANIFEST.in
        mkdir -p ./lightgbm/lib
        if test -f ../lib_lightgbm.so; then
            echo "found pre-compiled lib_lightgbm.so"
            cp ../lib_lightgbm.so ./lightgbm/lib/lib_lightgbm.so
+        elif test -f ../lib_lightgbm.dylib; then
+            echo "found pre-compiled lib_lightgbm.dylib"
+            cp ../lib_lightgbm.dylib ./lightgbm/lib/lib_lightgbm.dylib
        elif test -f ../Release/lib_lightgbm.dll; then
            echo "found pre-compiled Release/lib_lightgbm.dll"
            cp ../Release/lib_lightgbm.dll ./lightgbm/lib/lib_lightgbm.dll
@ -360,14 +358,20 @@ fi

 if test "${INSTALL}" = true; then
    echo "--- installing lightgbm ---"
-    # ref for use of '--find-links': https://stackoverflow.com/a/52481267/3986677
    cd ../dist
+    if test "${BUILD_WHEEL}" = true; then
+        PACKAGE_NAME="lightgbm*.whl"
+    else
+        PACKAGE_NAME="lightgbm*.tar.gz"
+    fi
+    # ref for use of '--find-links': https://stackoverflow.com/a/52481267/3986677
    pip install \
        ${PIP_INSTALL_ARGS} \
        --force-reinstall \
        --no-cache-dir \
+        --no-deps \
        --find-links=. \
-        lightgbm
+        ${PACKAGE_NAME}
    cd ../
 fi

--- a/build_r.R
+++ b/build_r.R
@ -398,42 +398,6 @@ description_contents <- gsub(
 )
 writeLines(description_contents, DESCRIPTION_FILE)

-# CMake-based builds can't currently use R's builtin routine registration,
-# so have to update NAMESPACE manually, with a statement like this:
-#
-# useDynLib(lib_lightgbm, LGBM_DatasetCreateFromFile_R, ...)
-#
-# See https://cran.r-project.org/doc/manuals/r-release/R-exts.html#useDynLib for
-# documentation of this approach, where the NAMESPACE file uses a statement like
-# useDynLib(foo, myRoutine, myOtherRoutine)
-NAMESPACE_FILE <- file.path(TEMP_R_DIR, "NAMESPACE")
-namespace_contents <- readLines(NAMESPACE_FILE)
-dynlib_line <- grep(
-  pattern = "^useDynLib"
-  , x = namespace_contents
-)
-
-c_api_contents <- readLines(file.path(TEMP_SOURCE_DIR, "src", "lightgbm_R.h"))
-c_api_contents <- c_api_contents[startsWith(c_api_contents, "LIGHTGBM_C_EXPORT")]
-c_api_contents <- gsub(
-  pattern = "LIGHTGBM_C_EXPORT SEXP "
-  , replacement = ""
-  , x = c_api_contents
-  , fixed = TRUE
-)
-c_api_symbols <- gsub(
-  pattern = "\\(.*"
-  , replacement = ""
-  , x = c_api_contents
-)
-dynlib_statement <- paste0(
-  "useDynLib(lib_lightgbm, "
-  , toString(c_api_symbols)
-  , ")"
-)
-namespace_contents[dynlib_line] <- dynlib_statement
-writeLines(namespace_contents, NAMESPACE_FILE)
-
 # NOTE: --keep-empty-dirs is necessary to keep the deep paths expected
 #       by CMake while also meeting the CRAN req to create object files
 #       on demand
--- a/cmake/IntegratedOpenCL.cmake
+++ b/cmake/IntegratedOpenCL.cmake
@ -16,20 +16,26 @@ include(FetchContent)
 FetchContent_Declare(OpenCL-Headers GIT_REPOSITORY ${OPENCL_HEADER_REPOSITORY} GIT_TAG ${OPENCL_HEADER_TAG})
 FetchContent_GetProperties(OpenCL-Headers)
 if(NOT OpenCL-Headers_POPULATED)
-  FetchContent_Populate(OpenCL-Headers)
+  FetchContent_MakeAvailable(OpenCL-Headers)
  message(STATUS "Populated OpenCL Headers")
 endif()
 set(OPENCL_ICD_LOADER_HEADERS_DIR ${opencl-headers_SOURCE_DIR} CACHE PATH "") # for OpenCL ICD Loader
 set(OpenCL_INCLUDE_DIR ${opencl-headers_SOURCE_DIR} CACHE PATH "") # for Boost::Compute

-FetchContent_Declare(OpenCL-ICD-Loader GIT_REPOSITORY ${OPENCL_LOADER_REPOSITORY} GIT_TAG ${OPENCL_LOADER_TAG})
+FetchContent_Declare(
+  OpenCL-ICD-Loader
+  GIT_REPOSITORY
+  ${OPENCL_LOADER_REPOSITORY}
+  GIT_TAG
+  ${OPENCL_LOADER_TAG}
+  EXCLUDE_FROM_ALL
+)
 FetchContent_GetProperties(OpenCL-ICD-Loader)
 if(NOT OpenCL-ICD-Loader_POPULATED)
-  FetchContent_Populate(OpenCL-ICD-Loader)
+  FetchContent_MakeAvailable(OpenCL-ICD-Loader)
  if(WIN32)
    set(USE_DYNAMIC_VCXX_RUNTIME ON)
  endif()
-  add_subdirectory(${opencl-icd-loader_SOURCE_DIR} ${opencl-icd-loader_BINARY_DIR} EXCLUDE_FROM_ALL)
  message(STATUS "Populated OpenCL ICD Loader")
 endif()
 list(APPEND INTEGRATED_OPENCL_INCLUDES ${OPENCL_ICD_LOADER_HEADERS_DIR})
--- a/docker/dockerfile-cli
+++ b/docker/dockerfile-cli
@ -8,7 +8,7 @@ ENV \
 RUN apt-get update -y && \
    apt-get install -y --no-install-recommends \
        ca-certificates \
-        cmake \
+        curl \
        build-essential \
        gcc \
        g++ \
@ -16,16 +16,20 @@ RUN apt-get update -y && \
        libomp-dev && \
    rm -rf /var/lib/apt/lists/*

+RUN curl -L -o cmake.sh https://github.com/Kitware/CMake/releases/download/v3.29.2/cmake-3.29.2-linux-x86_64.sh && \
+    chmod +x cmake.sh && \
+    sh ./cmake.sh --prefix=/usr/local --skip-license && \
+    rm cmake.sh
+
 RUN git clone \
        --recursive \
        --branch stable \
        --depth 1 \
        https://github.com/Microsoft/LightGBM && \
-    mkdir LightGBM/build && \
-    cd LightGBM/build && \
-    cmake .. && \
-    make -j4 && \
-    make install && \
+    cd ./LightGBM && \
+    cmake -B build -S . && \
+    cmake --build build -j4 && \
+    cmake --install build && \
    cd "${HOME}" && \
    rm -rf LightGBM

--- a/docker/gpu/dockerfile-cli-only-distroless.gpu
+++ b/docker/gpu/dockerfile-cli-only-distroless.gpu
@ -40,9 +40,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 # LightGBM
 WORKDIR /opt
 RUN git clone --recursive --branch stable --depth 1 https://github.com/Microsoft/LightGBM && \
-    cd LightGBM && mkdir build && cd build && \
-    cmake -DUSE_GPU=1 -DOpenCL_LIBRARY=${OPENCL_LIBRARIES}/libOpenCL.so.1 -DOpenCL_INCLUDE_DIR=$OPENCL_INCLUDE_DIR .. && \
-    make OPENCL_HEADERS=$OPENCL_INCLUDE_DIR LIBOPENCL=$OPENCL_LIBRARIES
+    cd LightGBM && \
+    cmake -B build -S . -DUSE_GPU=1 -DOpenCL_LIBRARY=${OPENCL_LIBRARIES}/libOpenCL.so.1 -DOpenCL_INCLUDE_DIR=$OPENCL_INCLUDE_DIR && \
+    OPENCL_HEADERS=$OPENCL_INCLUDE_DIR LIBOPENCL=$OPENCL_LIBRARIES cmake --build build

 FROM gcr.io/distroless/cc-debian10
 COPY --from=build \
--- a/docker/gpu/dockerfile-cli-only.gpu
+++ b/docker/gpu/dockerfile-cli-only.gpu
@ -40,9 +40,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 # LightGBM
 WORKDIR /opt
 RUN git clone --recursive --branch stable --depth 1 https://github.com/Microsoft/LightGBM && \
-    cd LightGBM && mkdir build && cd build && \
-    cmake -DUSE_GPU=1 -DOpenCL_LIBRARY=${OPENCL_LIBRARIES}/libOpenCL.so.1 -DOpenCL_INCLUDE_DIR=$OPENCL_INCLUDE_DIR .. && \
-    make OPENCL_HEADERS=$OPENCL_INCLUDE_DIR LIBOPENCL=$OPENCL_LIBRARIES
+    cd LightGBM && \
+    cmake -B build -S . -DUSE_GPU=1 -DOpenCL_LIBRARY=${OPENCL_LIBRARIES}/libOpenCL.so.1 -DOpenCL_INCLUDE_DIR=$OPENCL_INCLUDE_DIR && \
+    OPENCL_HEADERS=$OPENCL_INCLUDE_DIR LIBOPENCL=$OPENCL_LIBRARIES cmake --build build

 FROM nvidia/opencl:runtime
 RUN apt-get update && apt-get install -y --no-install-recommends \
--- a/docker/gpu/dockerfile.gpu
+++ b/docker/gpu/dockerfile.gpu
@ -82,9 +82,9 @@ RUN conda config --set always_yes yes --set changeps1 no && \

 RUN cd /usr/local/src && mkdir lightgbm && cd lightgbm && \
    git clone --recursive --branch stable --depth 1 https://github.com/microsoft/LightGBM && \
-    cd LightGBM && mkdir build && cd build && \
-    cmake -DUSE_GPU=1 -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/ .. && \
-    make OPENCL_HEADERS=/usr/local/cuda-8.0/targets/x86_64-linux/include LIBOPENCL=/usr/local/cuda-8.0/targets/x86_64-linux/lib
+    cd LightGBM && \
+    cmake -B build -S . -DUSE_GPU=1 -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/ && \
+    OPENCL_HEADERS=/usr/local/cuda-8.0/targets/x86_64-linux/include LIBOPENCL=/usr/local/cuda-8.0/targets/x86_64-linux/lib cmake --build build

 ENV PATH /usr/local/src/lightgbm/LightGBM:${PATH}

--- a/docs/Advanced-Topics.rst
+++ b/docs/Advanced-Topics.rst
@ -73,11 +73,6 @@ GPU Support

 -  Refer to `GPU Tutorial <./GPU-Tutorial.rst>`__ and `GPU Targets <./GPU-Targets.rst>`__.

-Recommendations for gcc Users (MinGW, \*nix)
--------------------------------------------
-
-  Refer to `gcc Tips <./gcc-Tips.rst>`__.
-
 Support for Position Bias Treatment
 ------------------------------------

@ -113,8 +108,8 @@ Unlike a categorical feature, however, ``positions`` are used to adjust the targ
 The position file corresponds with training data file line by line, and has one position per line. And if the name of training data file is ``train.txt``, the position file should be named as ``train.txt.position`` and placed in the same folder as the data file.
 In this case, LightGBM will load the position file automatically if it exists. The positions can also be specified through the ``Dataset`` constructor when using Python API. If the positions are specified in both approaches, the ``.position`` file will be ignored.

-Currently, implemented is an approach to model position bias by using an idea of Generalized Additive Models (`GAM <https://en.wikipedia.org/wiki/Generalized_additive_model>`_) to linearly decompose the document score ``s`` into the sum of a relevance component ``f`` and a positional component ``g``:  ``s(x, pos) = f(x) + g(pos)`` where the former component depends on the original query-document features and the latter depends on the position of an item. 
-During the training, the compound scoring function ``s(x, pos)`` is fit with a standard ranking algorithm (e.g., LambdaMART) which boils down to jointly learning the relevance component ``f(x)`` (it is later returned as an unbiased model) and the position factors ``g(pos)`` that help better explain the observed (biased) labels. 
-Similar score decomposition ideas have previously been applied for classification & pointwise ranking tasks with assumptions of binary labels and binary relevance (a.k.a. "two-tower" models, refer to the papers: `Towards Disentangling Relevance and Bias in Unbiased Learning to Rank <https://arxiv.org/abs/2212.13937>`_, `PAL: a position-bias aware learning framework for CTR prediction in live recommender systems <https://dl.acm.org/doi/10.1145/3298689.3347033>`_, `A General Framework for Debiasing in CTR Prediction <https://arxiv.org/abs/2112.02767>`_). 
-In LightGBM, we adapt this idea to general pairwise Lerarning-to-Rank with arbitrary ordinal relevance labels. 
+Currently, implemented is an approach to model position bias by using an idea of Generalized Additive Models (`GAM <https://en.wikipedia.org/wiki/Generalized_additive_model>`_) to linearly decompose the document score ``s`` into the sum of a relevance component ``f`` and a positional component ``g``:  ``s(x, pos) = f(x) + g(pos)`` where the former component depends on the original query-document features and the latter depends on the position of an item.
+During the training, the compound scoring function ``s(x, pos)`` is fit with a standard ranking algorithm (e.g., LambdaMART) which boils down to jointly learning the relevance component ``f(x)`` (it is later returned as an unbiased model) and the position factors ``g(pos)`` that help better explain the observed (biased) labels.
+Similar score decomposition ideas have previously been applied for classification & pointwise ranking tasks with assumptions of binary labels and binary relevance (a.k.a. "two-tower" models, refer to the papers: `Towards Disentangling Relevance and Bias in Unbiased Learning to Rank <https://arxiv.org/abs/2212.13937>`_, `PAL: a position-bias aware learning framework for CTR prediction in live recommender systems <https://dl.acm.org/doi/10.1145/3298689.3347033>`_, `A General Framework for Debiasing in CTR Prediction <https://arxiv.org/abs/2112.02767>`_).
+In LightGBM, we adapt this idea to general pairwise Lerarning-to-Rank with arbitrary ordinal relevance labels.
 Besides, GAMs have been used in the context of explainable ML (`Accurate Intelligible Models with Pairwise Interactions <https://www.cs.cornell.edu/~yinlou/papers/lou-kdd13.pdf>`_) to linearly decompose the contribution of each feature (and possibly their pairwise interactions) to the overall score, for subsequent analysis and interpretation of their effects in the trained models.
--- a/docs/FAQ.rst
+++ b/docs/FAQ.rst
@ -149,7 +149,7 @@ and copy memory as required by creating new processes instead of forking (or, us

 Cloud platform container services may cause LightGBM to hang, if they use Linux fork to run multiple containers on a
 single instance. For example, LightGBM hangs in AWS Batch array jobs, which `use the ECS agent
-<https://aws.amazon.com/batch/faqs/#Features>`__ to manage multiple running jobs. Setting ``nthreads=1`` mitigates the issue.
+<https://aws.amazon.com/batch/faqs>`__ to manage multiple running jobs. Setting ``nthreads=1`` mitigates the issue.

 12. Why is early stopping not enabled by default in LightGBM?
 -------------------------------------------------------------
@ -202,7 +202,7 @@ If you are using any Python package that depends on ``threadpoolctl``, you also

 Detailed description of conflicts between multiple OpenMP instances is provided in the `following document <https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md>`__.

-**Solution**: Assuming you are using LightGBM Python-package and conda as a package manager, we strongly recommend using ``conda-forge`` channel as the only source of all your Python package installations because it contains built-in patches to workaround OpenMP conflicts. Some other workarounds are listed `here <https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md#user-content-workarounds-for-intel-openmp-and-llvm-openmp-case>`__.
+**Solution**: Assuming you are using LightGBM Python-package and conda as a package manager, we strongly recommend using ``conda-forge`` channel as the only source of all your Python package installations because it contains built-in patches to workaround OpenMP conflicts. Some other workarounds are listed `here <https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md>`__ under the "Workarounds for Intel OpenMP and LLVM OpenMP case" section.

 If this is not your case, then you should find conflicting OpenMP library installations on your own and leave only one of them.

@ -231,11 +231,28 @@ In older versions, avoid printing the ``Dataset`` after calling ``setinfo()``.

 As of LightGBM v4.0.0, ``setinfo()`` has been replaced by a new method, ``set_field()``.

-3. ``error in data.table::data.table()...argument 2 is NULL``
-------------------------------------------------------------
+3. ``error in data.table::data.table()...argument 2 is NULL``.
+--------------------------------------------------------------

 If you are experiencing this error when running ``lightgbm``, you may be facing the same issue reported in `#2715 <https://github.com/microsoft/LightGBM/issues/2715>`_ and later in `#2989 <https://github.com/microsoft/LightGBM/pull/2989#issuecomment-614374151>`_. We have seen that in some situations, using ``data.table`` 1.11.x results in this error. To get around this, you can upgrade your version of ``data.table`` to at least version 1.12.0.

+4. ``package/dependency ‘Matrix’ is not available ...``
+-------------------------------------------------------
+
+In April 2024, ``Matrix==1.7-0`` was published to CRAN.
+That version had a floor of ``R (>=4.4.0)``.
+``{Matrix}`` is a hard runtime dependency of ``{lightgbm}``, so on any version of R older than ``4.4.0``, running ``install.packages("lightgbm")`` results in something like the following.
+
+.. code-block:: text
+
+    package ‘Matrix’ is not available for this version of R
+
+To fix that without upgrading to R 4.4.0 or greater, manually install an older version of ``{Matrix}``.
+
+.. code-block:: R
+
+    install.packages('https://cran.r-project.org/src/contrib/Archive/Matrix/Matrix_1.6-5.tar.gz', repos = NULL)
+
 ------

 Python-package
@ -305,13 +322,15 @@ We are doing our best to provide universal wheels which have high running speed
 However, sometimes it's just impossible to guarantee the possibility of usage of LightGBM in any specific environment (see `Microsoft/LightGBM#1743 <https://github.com/microsoft/LightGBM/issues/1743>`__).

 Therefore, the first thing you should try in case of segfaults is **compiling from the source** using ``pip install --no-binary lightgbm lightgbm``.
-For the OS-specific prerequisites see `this guide <https://github.com/microsoft/LightGBM/blob/master/python-package/README.rst#user-content-build-from-sources>`__.
+For the OS-specific prerequisites see https://github.com/microsoft/LightGBM/blob/master/python-package/README.rst.

 Also, feel free to post a new issue in our GitHub repository. We always look at each case individually and try to find a root cause.

 4. I would like to install LightGBM from conda. What channel should I choose?
 -----------------------------------------------------------------------------

-We strongly recommend installation from the ``conda-forge`` channel and not from the ``default`` one due to many reasons.
-The main ones are less time delay for new releases, greater number of supported architectures and better handling of dependency conflicts, especially workaround for OpenMP is crucial for LightGBM.
-More details can be found in `this comment <https://github.com/microsoft/LightGBM/issues/4948#issuecomment-1013766397>`__.
+We strongly recommend installation from the ``conda-forge`` channel and not from the ``default`` one.
+
+For some specific examples, see `this comment <https://github.com/microsoft/LightGBM/issues/4948#issuecomment-1013766397>`__.
+
+In addition, as of ``lightgbm==4.4.0``, the ``conda-forge`` package automatically supports CUDA-based GPU acceleration.
--- a/docs/Features.rst
+++ b/docs/Features.rst
@ -21,7 +21,7 @@ LightGBM uses histogram-based algorithms\ `[4, 5, 6] <#references>`__, which buc
   -  To get one leaf's histograms in a binary tree, use the histogram subtraction of its parent and its neighbor

   -  So it needs to construct histograms for only one leaf (with smaller ``#data`` than its neighbor). It then can get histograms of its neighbor by histogram subtraction with small cost (``O(#bins)``)
-   
+
 -  **Reduce memory usage**

   -  Replaces continuous values with discrete bins. If ``#bins`` is small, can use small data type, e.g. uint8\_t, to store training data
--- a/docs/GPU-Targets.rst
+++ b/docs/GPU-Targets.rst
@ -107,7 +107,7 @@ Example of using GPU (``gpu_platform_id = 0`` and ``gpu_device_id = 0`` in our s
    [LightGBM] [Info] 40 dense feature groups (0.12 MB) transferred to GPU in 0.004211 secs. 76 sparse feature groups.
    [LightGBM] [Info] No further splits with positive gain, best gain: -inf
    [LightGBM] [Info] Trained a tree with leaves=16 and depth=8
-    [1]:    test's rmse:1.10643e-17 
+    [1]:    test's rmse:1.10643e-17
    [LightGBM] [Info] No further splits with positive gain, best gain: -inf
    [LightGBM] [Info] Trained a tree with leaves=7 and depth=5
    [2]:    test's rmse:0
@ -145,11 +145,11 @@ Example of using CPU (``gpu_platform_id = 0``, ``gpu_device_id = 1``). The GPU d
    [LightGBM] [Info] 40 dense feature groups (0.12 MB) transferred to GPU in 0.004540 secs. 76 sparse feature groups.
    [LightGBM] [Info] No further splits with positive gain, best gain: -inf
    [LightGBM] [Info] Trained a tree with leaves=16 and depth=8
-    [1]:    test's rmse:1.10643e-17 
+    [1]:    test's rmse:1.10643e-17
    [LightGBM] [Info] No further splits with positive gain, best gain: -inf
    [LightGBM] [Info] Trained a tree with leaves=7 and depth=5
    [2]:    test's rmse:0
-    
+

 Known issues:

--- a/docs/GPU-Tutorial.rst
+++ b/docs/GPU-Tutorial.rst
@ -3,8 +3,6 @@ LightGBM GPU Tutorial

 The purpose of this document is to give you a quick step-by-step tutorial on GPU training.

-For Windows, please see `GPU Windows Tutorial <./GPU-Windows.rst>`__.
-
 We will use the GPU instance on `Microsoft Azure cloud computing platform`_ for demonstration,
 but you can use any machine with modern AMD or NVIDIA GPUs.

@ -59,13 +57,10 @@ Now we are ready to checkout LightGBM and compile it with GPU support:

    git clone --recursive https://github.com/microsoft/LightGBM
    cd LightGBM
-    mkdir build
-    cd build
-    cmake -DUSE_GPU=1 .. 
+    cmake -B build -S . -DUSE_GPU=1
    # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following:
-    # cmake -DUSE_GPU=1 -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/ ..
-    make -j$(nproc)
-    cd ..
+    # cmake -B build -S . -DUSE_GPU=1 -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/
+    cmake --build build -j$(nproc)

 You will see two binaries are generated, ``lightgbm`` and ``lib_lightgbm.so``.

@ -179,8 +174,6 @@ Further Reading

 - `GPU SDK Correspondence and Device Targeting Table <./GPU-Targets.rst>`__

- `GPU Windows Tutorial <./GPU-Windows.rst>`__
-
 Reference
 ---------

--- a/docs/GPU-Windows.rst
+++ b/docs/GPU-Windows.rst
@ -1,617 +1,3 @@
-GPU Windows Compilation
-=======================
+The content of this document was very outdated and is no longer available to avoid any misleadings.

-This guide is for the MinGW build.
-
-For the MSVC (Visual Studio) build with GPU, please refer to `Installation Guide <./Installation-Guide.rst#build-gpu-version>`__.
-(We recommend you to use this since it is much easier).
-
-Install LightGBM GPU version in Windows (CLI / R / Python), using MinGW/gcc
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-This is for a vanilla installation of Boost, including full compilation steps from source without precompiled libraries.
-
-Installation steps (depends on what you are going to do):
-
-  Install the appropriate OpenCL SDK
-
-  Install MinGW
-
-  Install Boost
-
-  Install Git
-
-  Install CMake
-
-  Create LightGBM binaries
-
-  Debugging LightGBM in CLI (if GPU is crashing or any other crash reason)
-
-If you wish to use another compiler like Visual Studio C++ compiler, you need to adapt the steps to your needs.
-
-For this compilation tutorial, we are using AMD SDK for our OpenCL steps.
-However, you are free to use any OpenCL SDK you want, you just need to adjust the PATH correctly.
-
-You will also need administrator rights. This will not work without them.
-
-At the end, you can restore your original PATH.
-
--------------
-
-Modifying PATH (for newbies)
----------------------------
-
-To modify PATH, just follow the pictures after going to the ``Control Panel``:
-
-.. image:: ./_static/images/screenshot-system.png
-   :align: center
-   :target: ./_static/images/screenshot-system.png
-   :alt: A screenshot of the System option under System and Security of the Control Panel.
-
-Then, go to ``Advanced`` > ``Environment Variables...``:
-
-.. image:: ./_static/images/screenshot-advanced-system-settings.png
-   :align: center
-   :target: ./_static/images/screenshot-advanced-system-settings.png
-   :alt: A screenshot of the System Properties window.
-
-Under ``System variables``, the variable ``Path``:
-
-.. image:: ./_static/images/screenshot-environment-variables.png
-   :align: center
-   :target: ./_static/images/screenshot-environment-variables.png
-   :alt: A screenshot of the Environment variables window with variable path selected under the system variables.
-
--------------
-
-Antivirus Performance Impact
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Does not apply to you if you do not use a third-party antivirus nor the default preinstalled antivirus on Windows.
-
-**Windows Defender or any other antivirus will have a significant impact on the speed you will be able to perform the steps.**
-It is recommended to **turn them off temporarily** until you finished with building and setting up everything, then turn them back on, if you are using them.
-
--------------
-
-OpenCL SDK Installation
-----------------------
-
-Installing the appropriate OpenCL SDK requires you to download the correct vendor source SDK.
-You need to know what you are going to use LightGBM!
-
-  For running on Intel, get `Intel SDK for OpenCL`_ (**NOT RECOMMENDED**).
-
-  For running on AMD, get AMD APP SDK (downloads `for Linux`_ and `for Windows`_). You may want to replace the ``OpenCL.dll`` from the GPU driver package with the one from the SDK, if the one shipped with the driver lacks some functions.
-
-  For running on NVIDIA, get `CUDA Toolkit`_.
-
-  Or you can try to use `Khronos official OpenCL headers`_, the CMake module would automatically find the OpenCL library used in your system, though the result may be not portable.
-
-Further reading and correspondence table (especially if you intend to use cross-platform devices,
-like Intel CPU with AMD APP SDK): `GPU SDK Correspondence and Device Targeting Table <./GPU-Targets.rst>`__.
-
-**Warning**: using Intel OpenCL is not recommended and may crash your machine due to being non compliant to OpenCL standards.
-If your objective is to use LightGBM + OpenCL on CPU, please use AMD APP SDK instead (it can run also on Intel CPUs without any issues).
-
--------------
-
-MinGW Correct Compiler Selection
--------------------------------
-
-If you are expecting to use LightGBM without R, you need to install MinGW.
-Installing MinGW is straightforward, download `this`_.
-
-Make sure you are using the x86\_64 architecture, and do not modify anything else.
-You may choose a version other than the most recent one if you need a previous MinGW version.
-
-.. image:: ./_static/images/screenshot-mingw-installation.png
-   :align: center
-   :target: ./_static/images/screenshot-mingw-installation.png
-   :alt: A screenshot of the Min G W installation setup settings window.
-
-Then, add to your PATH the following (to adjust to your MinGW version):
-
-.. code:: text
-
-    C:\Program Files\mingw-w64\x86_64-5.3.0-posix-seh-rt_v4-rev0\mingw64\bin
-
-**Warning**: R users (even if you do not want LightGBM for R)
-
-If you have RTools and MinGW installed, and wish to use LightGBM in R,
-get rid of MinGW from PATH (to keep: ``c:\Rtools\bin;c:\Rtools\mingw_32\bin`` for 32-bit R installation,
-``c:\Rtools\bin;c:\Rtools\mingw_64\bin`` for 64-bit R installation).
-
-You can check which MinGW version you are using by running the following in a command prompt: ``gcc -v``:
-
-.. image:: ./_static/images/screenshot-r-mingw-used.png
-   :align: center
-   :target: ./_static/images/screenshot-r-mingw-used.png
-   :alt: A screenshot of the administrator command prompt where G C C version is being checked.
-
-To check whether you need 32-bit or 64-bit MinGW for R, install LightGBM as usual and check for the following:
-
-.. code:: r
-
-    * installing *source* package 'lightgbm' ...
-    ** libs
-    c:/Rtools/mingw_64/bin/g++
-
-If it says ``mingw_64`` then you need the 64-bit version (PATH with ``c:\Rtools\bin;c:\Rtools\mingw_64\bin``),
-otherwise you need the 32-bit version (``c:\Rtools\bin;c:\Rtools\mingw_32\bin``), the latter being a very rare and untested case.
-
-NOTE: If you are using `Rtools` 4.0 or later, the path will have `mingw64` instead of `mingw_64` (PATH with `C:\rtools40\mingw64\bin`), and `mingw32` instead of `mingw_32` (`C:\rtools40\mingw32\bin`). The 32-bit version remains an unsupported solution under Rtools 4.0.
-
-Download the prebuilt Boost
---------------------------
-
-Download  `Prebuilt Boost x86_64`_ or `Prebuilt Boost i686`_ and unpack them with `7zip`_, alternatively you can build Boost from source.
-
--------------
-
-Boost Compilation
-----------------
-
-Installing Boost requires downloading Boost and installing it.
-It takes about 10 minutes to several hours depending on your CPU speed and network speed.
-
-We will assume an installation in ``C:\boost`` and a general installation (like in Unix variants: without versioning and without type tags).
-
-There is one mandatory step to check the compiler:
-
-  **Warning**: if you want the R installation:
-   If you have already MinGW in your PATH variable, get rid of it (you will link to the wrong compiler otherwise).
-
-  **Warning**: if you want the CLI installation:
-   If you have already Rtools in your PATH variable, get rid of it (you will link to the wrong compiler otherwise).
-
-  R installation must have Rtools in PATH
-
-  CLI / Python installation must have MinGW (not Rtools) in PATH
-
-In addition, assuming you are going to use ``C:\boost`` for the folder path,
-you should add now already the following to PATH: ``C:\boost\boost-build\bin``, ``C:\boost\boost-build\include\boost``.
-Adjust ``C:\boost`` if you install it elsewhere.
-
-We can now start downloading and compiling the required Boost libraries:
-
-  Download `Boost`_ (for example, the filename for 1.63.0 version is ``boost_1_63_0.zip``)
-
-  Extract the archive to ``C:\boost``
-
-  Open a command prompt, and run
-
-   .. code:: console
-
-       cd C:\boost\boost_1_63_0\tools\build
-       bootstrap.bat gcc
-       b2 install --prefix="C:\boost\boost-build" toolset=gcc
-       cd C:\boost\boost_1_63_0
-
-To build the Boost libraries, you have two choices for command prompt:
-
-  If you have only one single core, you can use the default
-
-   .. code:: console
-
-       b2 install --build_dir="C:\boost\boost-build" --prefix="C:\boost\boost-build" toolset=gcc --with=filesystem,system threading=multi --layout=system release
-
-  If you want to do a multithreaded library building (faster), add ``-j N`` by replacing N by the number of cores/threads you have.
-   For instance, for 2 cores, you would do
-
-   .. code:: console
-
-       b2 install --build_dir="C:\boost\boost-build" --prefix="C:\boost\boost-build" toolset=gcc --with=filesystem,system threading=multi --layout=system release -j 2
-
-Ignore all the errors popping up, like Python, etc., they do not matter for us.
-
-Your folder should look like this at the end (not fully detailed):
-
-.. code:: text
-
-    - C
-      |--- boost
-      |------ boost_1_63_0
-      |--------- some folders and files
-      |------ boost-build
-      |--------- bin
-      |--------- include
-      |------------ boost
-      |--------- lib
-      |--------- share
-
-This is what you should (approximately) get at the end of Boost compilation:
-
-.. image:: ./_static/images/screenshot-boost-compiled.png
-   :align: center
-   :target: ./_static/images/screenshot-boost-compiled.png
-   :alt: A screenshot of the command prompt that ends with text that reads - updated 14621 targets.
-
-If you are getting an error:
-
-  Wipe your Boost directory
-
-  Close the command prompt
-
-  Make sure you added
-   ``C:\boost\boost-build\bin``, ``C:\boost\boost-build\include\boost`` to
-   your PATH (adjust accordingly if you use another folder)
-
-  Do the Boost compilation steps again (extract => command prompt => ``cd`` => ``bootstrap`` => ``b2`` => ``cd`` => ``b2``
-
--------------
-
-Git Installation
----------------
-
-Installing Git for Windows is straightforward, use the following `link`_.
-
-.. image:: ./_static/images/screenshot-git-for-windows.png
-   :align: center
-   :target: ./_static/images/screenshot-git-for-windows.png
-   :alt: A screenshot of the website to download git that shows various versions of git compatible with 32 bit and 64 bit Windows separately.
-
-Now, we can fetch LightGBM repository for GitHub. Run Git Bash and the following command:
-
-.. code:: console
-
-    cd C:/
-    mkdir github_repos
-    cd github_repos
-    git clone --recursive https://github.com/microsoft/LightGBM
-
-Your LightGBM repository copy should now be under ``C:\github_repos\LightGBM``.
-You are free to use any folder you want, but you have to adapt.
-
-Keep Git Bash open.
-
--------------
-
-CMake Installation, Configuration, Generation
---------------------------------------------
-
-**CLI / Python users only**
-
-Installing CMake requires one download first and then a lot of configuration for LightGBM:
-
-.. image:: ./_static/images/screenshot-downloading-cmake.png
-   :align: center
-   :target: ./_static/images/screenshot-downloading-cmake.png
-   :alt: A screenshot of the binary distributions of C Make for downloading on 64 bit Windows.
-
-  Download `CMake`_
-
-  Install CMake
-
-  Run cmake-gui
-
-  Select the folder where you put LightGBM for ``Where is the source code``,
-   default using our steps would be ``C:/github_repos/LightGBM``
-
-  Copy the folder name, and add ``/build`` for "Where to build the binaries",
-   default using our steps would be ``C:/github_repos/LightGBM/build``
-
-  Click ``Configure``
-
-   .. image:: ./_static/images/screenshot-create-directory.png
-      :align: center
-      :target: ./_static/images/screenshot-create-directory.png
-      :alt: A screenshot with a pop-up window that reads - Build directory does not exist, should I create it?
-
-   .. image:: ./_static/images/screenshot-mingw-makefiles-to-use.png
-      :align: center
-      :target: ./_static/images/screenshot-mingw-makefiles-to-use.png
-      :alt: A screenshot that asks to specify the generator for the project which should be selected as Min G W makefiles and selected as the use default native compilers option.
-
-  Lookup for ``USE_GPU`` and check the checkbox
-
-   .. image:: ./_static/images/screenshot-use-gpu.png
-      :align: center
-      :target: ./_static/images/screenshot-use-gpu.png
-      :alt: A screenshot of the C Make window where the checkbox with the test Use G P U is checked.
-
-  Click ``Configure``
-
-   You should get (approximately) the following after clicking Configure:
-
-   .. image:: ./_static/images/screenshot-configured-lightgbm.png
-      :align: center
-      :target: ./_static/images/screenshot-configured-lightgbm.png
-      :alt: A screenshot of the C Make window after clicking on the configure button.
-
-   .. code:: text
-
-       Looking for CL_VERSION_2_0
-       Looking for CL_VERSION_2_0 - found
-       Found OpenCL: C:/Windows/System32/OpenCL.dll (found version "2.0")
-       OpenCL include directory:C:/Program Files (x86)/AMD APP SDK/3.0/include
-       Boost version: 1.63.0
-       Found the following Boost libraries:
-         filesystem
-         system
-       Configuring done
-
-  Click ``Generate`` to get the following message:
-
-   .. code:: text
-
-       Generating done
-
-This is straightforward, as CMake is providing a large help into locating the correct elements.
-
--------------
-
-LightGBM Compilation (CLI: final step)
--------------------------------------
-
-Installation in CLI
-~~~~~~~~~~~~~~~~~~~
-
-**CLI / Python users**
-
-Creating LightGBM libraries is very simple as all the important and hard steps were done before.
-
-You can do everything in the Git Bash console you left open:
-
-  If you closed Git Bash console previously, run this to get back to the build folder:
-
-   .. code:: console
-
-       cd C:/github_repos/LightGBM/build
-
-  If you did not close the Git Bash console previously, run this to get to the build folder:
-
-   .. code:: console
-
-       cd LightGBM/build
-
-  Setup MinGW as ``make`` using
-
-   .. code:: console
-
-       alias make='mingw32-make'
-
-   otherwise, beware error and name clash!
-
-  In Git Bash, run ``make`` and see LightGBM being installing!
-
-.. image:: ./_static/images/screenshot-lightgbm-with-gpu-support-compiled.png
-   :align: center
-   :target: ./_static/images/screenshot-lightgbm-with-gpu-support-compiled.png
-   :alt: A screenshot of the git bash window with Light G B M successfully installed.
-
-If everything was done correctly, you now compiled CLI LightGBM with GPU support!
-
-Testing in CLI
-~~~~~~~~~~~~~~
-
-You can now test LightGBM directly in CLI in a **command prompt** (not Git Bash):
-
-.. code:: console
-
-    cd C:/github_repos/LightGBM/examples/binary_classification
-    "../../lightgbm.exe" config=train.conf data=binary.train valid=binary.test objective=binary device=gpu
-
-.. image:: ./_static/images/screenshot-lightgbm-in-cli-with-gpu.png
-   :align: center
-   :target: ./_static/images/screenshot-lightgbm-in-cli-with-gpu.png
-   :alt: A screenshot of the command prompt where a binary classification model is being trained using Light G B M.
-
-Congratulations for reaching this stage!
-
-To learn how to target a correct CPU or GPU for training, please see: `GPU SDK Correspondence and Device Targeting Table <./GPU-Targets.rst>`__.
-
--------------
-
-Debugging LightGBM Crashes in CLI
---------------------------------
-
-Now that you compiled LightGBM, you try it... and you always see a segmentation fault or an undocumented crash with GPU support:
-
-.. image:: ./_static/images/screenshot-segmentation-fault.png
-   :align: center
-   :target: ./_static/images/screenshot-segmentation-fault.png
-   :alt: A screenshot of the command prompt where a segmentation fault has occurred while using Light G B M.
-
-Please check if you are using the right device (``Using GPU device: ...``). You can find a list of your OpenCL devices using `GPUCapsViewer`_, and make sure you are using a discrete (AMD/NVIDIA) GPU if you have both integrated (Intel) and discrete GPUs installed.
-Also, try to set ``gpu_device_id = 0`` and ``gpu_platform_id = 0`` or ``gpu_device_id = -1`` and ``gpu_platform_id = -1`` to use the first platform and device or the default platform and device.
-If it still does not work, then you should follow all the steps below.
-
-You will have to redo the compilation steps for LightGBM to add debugging mode. This involves:
-
-  Deleting ``C:/github_repos/LightGBM/build`` folder
-
-  Deleting ``lightgbm.exe``, ``lib_lightgbm.dll``, and ``lib_lightgbm.dll.a`` files
-
-.. image:: ./_static/images/screenshot-files-to-remove.png
-   :align: center
-   :target: ./_static/images/screenshot-files-to-remove.png
-   :alt: A screenshot of the Light G B M folder with 1 folder and 3 files selected to be removed.
-
-Once you removed the file, go into CMake, and follow the usual steps.
-Before clicking "Generate", click on "Add Entry":
-
-.. image:: ./_static/images/screenshot-added-manual-entry-in-cmake.png
-   :align: center
-   :target: ./_static/images/screenshot-added-manual-entry-in-cmake.png
-   :alt: A screenshot of the Cache Entry popup where the name is set to CMAKE_BUILD_TYPE in all caps, the type is set to STRING in all caps and the value is set to Debug.
-
-In addition, click on Configure and Generate:
-
-.. image:: ./_static/images/screenshot-configured-and-generated-cmake.png
-   :align: center
-   :target: ./_static/images/screenshot-configured-and-generated-cmake.png
-   :alt: A screenshot of the C Make window after clicking on configure and generate.
-
-And then, follow the regular LightGBM CLI installation from there.
-
-Once you have installed LightGBM CLI, assuming your LightGBM is in ``C:\github_repos\LightGBM``,
-open a command prompt and run the following:
-
-.. code:: console
-
-    gdb --args "../../lightgbm.exe" config=train.conf data=binary.train valid=binary.test objective=binary device=gpu
-
-.. image:: ./_static/images/screenshot-debug-run.png
-   :align: center
-   :target: ./_static/images/screenshot-debug-run.png
-   :alt: A screenshot of the command prompt after the command above is run.
-
-Type ``run`` and press the Enter key.
-
-You will probably get something similar to this:
-
-.. code:: text
-
-    [LightGBM] [Info] This is the GPU trainer!!
-    [LightGBM] [Info] Total Bins 6143
-    [LightGBM] [Info] Number of data: 7000, number of used features: 28
-    [New Thread 105220.0x1a62c]
-    [LightGBM] [Info] Using GPU Device: Oland, Vendor: Advanced Micro Devices, Inc.
-    [LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
-
-    Program received signal SIGSEGV, Segmentation fault.
-    0x00007ffbb37c11f1 in strlen () from C:\Windows\system32\msvcrt.dll
-    (gdb)
-
-There, write ``backtrace`` and press the Enter key as many times as gdb requests two choices:
-
-.. code:: text
-
-    Program received signal SIGSEGV, Segmentation fault.
-    0x00007ffbb37c11f1 in strlen () from C:\Windows\system32\msvcrt.dll
-    (gdb) backtrace
-    #0  0x00007ffbb37c11f1 in strlen () from C:\Windows\system32\msvcrt.dll
-    #1  0x000000000048bbe5 in std::char_traits<char>::length (__s=0x0)
-        at C:/PROGRA~1/MINGW-~1/X86_64~1.0-P/mingw64/x86_64-w64-mingw32/include/c++/bits/char_traits.h:267
-    #2  std::operator+<char, std::char_traits<char>, std::allocator<char> > (__rhs="\\", __lhs=0x0)
-        at C:/PROGRA~1/MINGW-~1/X86_64~1.0-P/mingw64/x86_64-w64-mingw32/include/c++/bits/basic_string.tcc:1157
-    #3  boost::compute::detail::appdata_path[abi:cxx11]() () at C:/boost/boost-build/include/boost/compute/detail/path.hpp:38
-    #4  0x000000000048eec3 in boost::compute::detail::program_binary_path (hash="d27987d5bd61e2d28cd32b8d7a7916126354dc81", create=create@entry=false)
-        at C:/boost/boost-build/include/boost/compute/detail/path.hpp:46
-    #5  0x00000000004913de in boost::compute::program::load_program_binary (hash="d27987d5bd61e2d28cd32b8d7a7916126354dc81", ctx=...)
-        at C:/boost/boost-build/include/boost/compute/program.hpp:605
-    #6  0x0000000000490ece in boost::compute::program::build_with_source (
-        source="\n#ifndef _HISTOGRAM_256_KERNEL_\n#define _HISTOGRAM_256_KERNEL_\n\n#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n#pragma OPENC
-    L EXTENSION cl_khr_global_int32_base_atomics : enable\n\n//"..., context=...,
-        options=" -D POWER_FEATURE_WORKGROUPS=5 -D USE_CONSTANT_BUF=0 -D USE_DP_FLOAT=0 -D CONST_HESSIAN=0 -cl-strict-aliasing -cl-mad-enable -cl-no-signed-zeros -c
-    l-fast-relaxed-math") at C:/boost/boost-build/include/boost/compute/program.hpp:549
-    #7  0x0000000000454339 in LightGBM::GPUTreeLearner::BuildGPUKernels () at C:\LightGBM\src\treelearner\gpu_tree_learner.cpp:583
-    #8  0x00000000636044f2 in libgomp-1!GOMP_parallel () from C:\Program Files\mingw-w64\x86_64-5.3.0-posix-seh-rt_v4-rev0\mingw64\bin\libgomp-1.dll
-    #9  0x0000000000455e7e in LightGBM::GPUTreeLearner::BuildGPUKernels (this=this@entry=0x3b9cac0)
-        at C:\LightGBM\src\treelearner\gpu_tree_learner.cpp:569
-    #10 0x0000000000457b49 in LightGBM::GPUTreeLearner::InitGPU (this=0x3b9cac0, platform_id=<optimized out>, device_id=<optimized out>)
-        at C:\LightGBM\src\treelearner\gpu_tree_learner.cpp:720
-    #11 0x0000000000410395 in LightGBM::GBDT::ResetTrainingData (this=0x1f26c90, config=<optimized out>, train_data=0x1f28180, objective_function=0x1f280e0,
-        training_metrics=std::vector of length 2, capacity 2 = {...}) at C:\LightGBM\src\boosting\gbdt.cpp:98
-    #12 0x0000000000402e93 in LightGBM::Application::InitTrain (this=this@entry=0x23f9d0) at C:\LightGBM\src\application\application.cpp:213
-    ---Type <return> to continue, or q <return> to quit---
-    #13 0x00000000004f0b55 in LightGBM::Application::Run (this=0x23f9d0) at C:/LightGBM/include/LightGBM/application.h:84
-    #14 main (argc=6, argv=0x1f21e90) at C:\LightGBM\src\main.cpp:7
-
-Right-click the command prompt, click "Mark", and select all the text from the first line (with the command prompt containing gdb) to the last line printed, containing all the log, such as:
-
-.. code:: text
-
-    C:\LightGBM\examples\binary_classification>gdb --args "../../lightgbm.exe" config=train.conf data=binary.train valid=binary.test objective=binary device=gpu
-    GNU gdb (GDB) 7.10.1
-    Copyright (C) 2015 Free Software Foundation, Inc.
-    License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
-    This is free software: you are free to change and redistribute it.
-    There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
-    and "show warranty" for details.
-    This GDB was configured as "x86_64-w64-mingw32".
-    Type "show configuration" for configuration details.
-    For bug reporting instructions, please see:
-    <http://www.gnu.org/software/gdb/bugs/>.
-    Find the GDB manual and other documentation resources online at:
-    <http://www.gnu.org/software/gdb/documentation/>.
-    For help, type "help".
-    Type "apropos word" to search for commands related to "word"...
-    Reading symbols from ../../lightgbm.exe...done.
-    (gdb) run
-    Starting program: C:\LightGBM\lightgbm.exe "config=train.conf" "data=binary.train" "valid=binary.test" "objective=binary" "device=gpu"
-    [New Thread 105220.0x199b8]
-    [New Thread 105220.0x783c]
-    [Thread 105220.0x783c exited with code 0]
-    [LightGBM] [Info] Finished loading parameters
-    [New Thread 105220.0x19490]
-    [New Thread 105220.0x1a71c]
-    [New Thread 105220.0x19a24]
-    [New Thread 105220.0x4fb0]
-    [Thread 105220.0x4fb0 exited with code 0]
-    [LightGBM] [Info] Loading weights...
-    [New Thread 105220.0x19988]
-    [Thread 105220.0x19988 exited with code 0]
-    [New Thread 105220.0x1a8fc]
-    [Thread 105220.0x1a8fc exited with code 0]
-    [LightGBM] [Info] Loading weights...
-    [New Thread 105220.0x1a90c]
-    [Thread 105220.0x1a90c exited with code 0]
-    [LightGBM] [Info] Finished loading data in 1.011408 seconds
-    [LightGBM] [Info] Number of positive: 3716, number of negative: 3284
-    [LightGBM] [Info] This is the GPU trainer!!
-    [LightGBM] [Info] Total Bins 6143
-    [LightGBM] [Info] Number of data: 7000, number of used features: 28
-    [New Thread 105220.0x1a62c]
-    [LightGBM] [Info] Using GPU Device: Oland, Vendor: Advanced Micro Devices, Inc.
-    [LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
-
-    Program received signal SIGSEGV, Segmentation fault.
-    0x00007ffbb37c11f1 in strlen () from C:\Windows\system32\msvcrt.dll
-    (gdb) backtrace
-    #0  0x00007ffbb37c11f1 in strlen () from C:\Windows\system32\msvcrt.dll
-    #1  0x000000000048bbe5 in std::char_traits<char>::length (__s=0x0)
-        at C:/PROGRA~1/MINGW-~1/X86_64~1.0-P/mingw64/x86_64-w64-mingw32/include/c++/bits/char_traits.h:267
-    #2  std::operator+<char, std::char_traits<char>, std::allocator<char> > (__rhs="\\", __lhs=0x0)
-        at C:/PROGRA~1/MINGW-~1/X86_64~1.0-P/mingw64/x86_64-w64-mingw32/include/c++/bits/basic_string.tcc:1157
-    #3  boost::compute::detail::appdata_path[abi:cxx11]() () at C:/boost/boost-build/include/boost/compute/detail/path.hpp:38
-    #4  0x000000000048eec3 in boost::compute::detail::program_binary_path (hash="d27987d5bd61e2d28cd32b8d7a7916126354dc81", create=create@entry=false)
-        at C:/boost/boost-build/include/boost/compute/detail/path.hpp:46
-    #5  0x00000000004913de in boost::compute::program::load_program_binary (hash="d27987d5bd61e2d28cd32b8d7a7916126354dc81", ctx=...)
-        at C:/boost/boost-build/include/boost/compute/program.hpp:605
-    #6  0x0000000000490ece in boost::compute::program::build_with_source (
-        source="\n#ifndef _HISTOGRAM_256_KERNEL_\n#define _HISTOGRAM_256_KERNEL_\n\n#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n\n//"..., context=...,
-        options=" -D POWER_FEATURE_WORKGROUPS=5 -D USE_CONSTANT_BUF=0 -D USE_DP_FLOAT=0 -D CONST_HESSIAN=0 -cl-strict-aliasing -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math") at C:/boost/boost-build/include/boost/compute/program.hpp:549
-    #7  0x0000000000454339 in LightGBM::GPUTreeLearner::BuildGPUKernels () at C:\LightGBM\src\treelearner\gpu_tree_learner.cpp:583
-    #8  0x00000000636044f2 in libgomp-1!GOMP_parallel () from C:\Program Files\mingw-w64\x86_64-5.3.0-posix-seh-rt_v4-rev0\mingw64\bin\libgomp-1.dll
-    #9  0x0000000000455e7e in LightGBM::GPUTreeLearner::BuildGPUKernels (this=this@entry=0x3b9cac0)
-        at C:\LightGBM\src\treelearner\gpu_tree_learner.cpp:569
-    #10 0x0000000000457b49 in LightGBM::GPUTreeLearner::InitGPU (this=0x3b9cac0, platform_id=<optimized out>, device_id=<optimized out>)
-        at C:\LightGBM\src\treelearner\gpu_tree_learner.cpp:720
-    #11 0x0000000000410395 in LightGBM::GBDT::ResetTrainingData (this=0x1f26c90, config=<optimized out>, train_data=0x1f28180, objective_function=0x1f280e0,
-        training_metrics=std::vector of length 2, capacity 2 = {...}) at C:\LightGBM\src\boosting\gbdt.cpp:98
-    #12 0x0000000000402e93 in LightGBM::Application::InitTrain (this=this@entry=0x23f9d0) at C:\LightGBM\src\application\application.cpp:213
-    ---Type <return> to continue, or q <return> to quit---
-    #13 0x00000000004f0b55 in LightGBM::Application::Run (this=0x23f9d0) at C:/LightGBM/include/LightGBM/application.h:84
-    #14 main (argc=6, argv=0x1f21e90) at C:\LightGBM\src\main.cpp:7
-
-And open an issue in GitHub `here`_ with that log.
-
-.. _Intel SDK for OpenCL: https://software.intel.com/en-us/articles/opencl-drivers
-
-.. _CUDA Toolkit: https://developer.nvidia.com/cuda-downloads
-
-.. _for Linux: https://github.com/microsoft/LightGBM/releases/download/v2.0.12/AMD-APP-SDKInstaller-v3.0.130.136-GA-linux64.tar.bz2
-
-.. _for Windows: https://github.com/microsoft/LightGBM/releases/download/v2.0.12/AMD-APP-SDKInstaller-v3.0.130.135-GA-windows-F-x64.exe
-
-.. _Khronos official OpenCL headers: https://github.com/KhronosGroup/OpenCL-Headers
-
-.. _this: https://sourceforge.net/projects/mingw-w64/files/Toolchains%20targetting%20Win32/Personal%20Builds/mingw-builds/installer/mingw-w64-install.exe/download
-
-.. _Boost: https://www.boost.org/users/history/
-
-.. _Prebuilt Boost x86_64: https://www.rpmfind.net/linux/fedora/linux/releases/38/Everything/x86_64/os/Packages/m/mingw64-boost-static-1.78.0-4.fc38.noarch.rpm
-
-.. _Prebuilt Boost i686: https://www.rpmfind.net/linux/fedora/linux/releases/38/Everything/x86_64/os/Packages/m/mingw32-boost-static-1.78.0-4.fc38.noarch.rpm
-
-.. _7zip: https://www.7-zip.org/download.html
-
-.. _link: https://git-scm.com/download/win
-
-.. _CMake: https://cmake.org/download/
-
-.. _here: https://github.com/microsoft/LightGBM/issues
-
-.. _GPUCapsViewer: http://www.ozone3d.net/gpu_caps_viewer/
+Starting from the ``3.2.0`` version LightGBM Python packages have been having built-in support of training on GPU devices.
--- a/docs/Installation-Guide.rst
+++ b/docs/Installation-Guide.rst
@ -77,10 +77,8 @@ From Command Line

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -A x64 ..
-     cmake --build . --target ALL_BUILD --config Release
+     cmake -B build -S . -A x64
+     cmake --build build --target ALL_BUILD --config Release

 The ``.exe`` and ``.dll`` files will be in ``LightGBM/Release`` folder.

@ -95,20 +93,16 @@ MinGW-w64

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -G "MinGW Makefiles" ..
-     mingw32-make.exe -j4
+     cmake -B build -S . -G "MinGW Makefiles"
+     cmake --build build -j4

 The ``.exe`` and ``.dll`` files will be in ``LightGBM/`` folder.

-**Note**: You may need to run the ``cmake -G "MinGW Makefiles" ..`` one more time if you encounter the ``sh.exe was found in your PATH`` error.
+**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles"`` one more time if you encounter the ``sh.exe was found in your PATH`` error.

 It is recommended that you use **Visual Studio** since it has better multithreading efficiency in **Windows** for many-core systems
 (see `Question 4 <./FAQ.rst#i-am-using-windows-should-i-use-visual-studio-or-mingw-for-compiling-lightgbm>`__ and `Question 8 <./FAQ.rst#cpu-usage-is-low-like-10-in-windows-when-using-lightgbm-on-very-large-datasets-with-many-core-systems>`__).

-Also, you may want to read `gcc Tips <./gcc-Tips.rst>`__.
-
 Linux
 ~~~~~

@ -122,17 +116,11 @@ On Linux LightGBM can be built using **CMake** and **gcc** or **Clang**.

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake ..
-     make -j4
-
-**Note**: glibc >= 2.28 is required.
+     cmake -B build -S .
+     cmake --build build -j4

 **Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).

-Also, you may want to read `gcc Tips <./gcc-Tips.rst>`__.
-
 Using ``Ninja``
 ^^^^^^^^^^^^^^^

@ -142,10 +130,8 @@ On Linux, LightGBM can also be built with `Ninja <https://ninja-build.org/>`__ i

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -G 'Ninja' ..
-     ninja -j2
+     cmake -B build -S . -G 'Ninja'
+     cmake --build build -j2

 macOS
 ~~~~~
@ -185,10 +171,8 @@ Build from GitHub

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake ..
-     make -j4
+     cmake -B build -S .
+     cmake --build build -j4

 gcc
 ^^^
@ -212,12 +196,8 @@ gcc
     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
     export CXX=g++-7 CC=gcc-7  # replace "7" with version of gcc installed on your machine
-     mkdir build
-     cd build
-     cmake ..
-     make -j4
-
-Also, you may want to read `gcc Tips <./gcc-Tips.rst>`__.
+     cmake -B build -S .
+     cmake --build build -j4

 Docker
 ~~~~~~
@ -274,10 +254,8 @@ From Command Line

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -A x64 -DUSE_OPENMP=OFF ..
-     cmake --build . --target ALL_BUILD --config Release
+     cmake -B build -S . -A x64 -DUSE_OPENMP=OFF
+     cmake --build build --target ALL_BUILD --config Release

 The ``.exe`` and ``.dll`` files will be in ``LightGBM/Release`` folder.

@ -292,14 +270,12 @@ MinGW-w64

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -G "MinGW Makefiles" -DUSE_OPENMP=OFF ..
-     mingw32-make.exe -j4
+     cmake -B build -S . -G "MinGW Makefiles" -DUSE_OPENMP=OFF
+     cmake --build build -j4

 The ``.exe`` and ``.dll`` files will be in ``LightGBM/`` folder.

-**Note**: You may need to run the ``cmake -G "MinGW Makefiles" -DUSE_OPENMP=OFF ..`` one more time if you encounter the ``sh.exe was found in your PATH`` error.
+**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_OPENMP=OFF`` one more time if you encounter the ``sh.exe was found in your PATH`` error.

 Linux
 ^^^^^
@ -314,12 +290,8 @@ On Linux a version of LightGBM without OpenMP support can be built using **CMake

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -DUSE_OPENMP=OFF ..
-     make -j4
-
-**Note**: glibc >= 2.14 is required.
+     cmake -B build -S . -DUSE_OPENMP=OFF
+     cmake --build build -j4

 macOS
 ^^^^^
@ -343,10 +315,8 @@ Only **Apple Clang** version 8.1 or higher is supported.

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -DUSE_OPENMP=OFF ..
-     make -j4
+     cmake -B build -S . -DUSE_OPENMP=OFF
+     cmake --build build -j4

 gcc
 ***
@ -370,10 +340,8 @@ gcc
     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
     export CXX=g++-7 CC=gcc-7  # replace "7" with version of gcc installed on your machine
-     mkdir build
-     cd build
-     cmake -DUSE_OPENMP=OFF ..
-     make -j4
+     cmake -B build -S . -DUSE_OPENMP=OFF
+     cmake --build build -j4

 Build MPI Version
 ~~~~~~~~~~~~~~~~~
@ -422,10 +390,8 @@ From Command Line

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -A x64 -DUSE_MPI=ON ..
-     cmake --build . --target ALL_BUILD --config Release
+     cmake -B build -S . -A x64 -DUSE_MPI=ON
+     cmake --build build --target ALL_BUILD --config Release

 The ``.exe`` and ``.dll`` files will be in ``LightGBM/Release`` folder.

@ -446,12 +412,8 @@ On Linux an MPI version of LightGBM can be built using **Open MPI**, **CMake** a

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -DUSE_MPI=ON ..
-     make -j4
-
-**Note**: glibc >= 2.14 is required.
+     cmake -B build -S . -DUSE_MPI=ON
+     cmake --build build -j4

 **Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).

@ -489,10 +451,8 @@ Only **Apple Clang** version 8.1 or higher is supported.

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -DUSE_MPI=ON ..
-     make -j4
+     cmake -B build -S . -DUSE_MPI=ON
+     cmake --build build -j4

 gcc
 ***
@ -522,10 +482,8 @@ gcc
     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
     export CXX=g++-7 CC=gcc-7  # replace "7" with version of gcc installed on your machine
-     mkdir build
-     cd build
-     cmake -DUSE_MPI=ON ..
-     make -j4
+     cmake -B build -S . -DUSE_MPI=ON
+     cmake --build build -j4

 Build GPU Version
 ~~~~~~~~~~~~~~~~~
@ -555,14 +513,10 @@ To build LightGBM GPU version, run the following commands:

  git clone --recursive https://github.com/microsoft/LightGBM
  cd LightGBM
-  mkdir build
-  cd build
-  cmake -DUSE_GPU=1 ..
+  cmake -B build -S . -DUSE_GPU=1
  # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following:
-  # cmake -DUSE_GPU=1 -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/ ..
-  make -j4
-
-**Note**: glibc >= 2.14 is required.
+  # cmake -B build -S . -DUSE_GPU=1 -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/
+  cmake --build build

 **Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).

@ -571,7 +525,7 @@ Windows

 On Windows a GPU version of LightGBM (``device_type=gpu``) can be built using **OpenCL**, **Boost**, **CMake** and **VS Build Tools** or **MinGW**.

-If you use **MinGW**, the build procedure is similar to the build on Linux. Refer to `GPU Windows Compilation <./GPU-Windows.rst>`__ to get more details.
+If you use **MinGW**, the build procedure is similar to the build on Linux.

 Following procedure is for the **MSVC** (Microsoft Visual C++) build.

@ -605,12 +559,10 @@ Following procedure is for the **MSVC** (Microsoft Visual C++) build.

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -A x64 -DUSE_GPU=1 -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 ..
+     cmake -B build -S . -A x64 -DUSE_GPU=1 -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0
     # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following:
-     # cmake -A x64 -DUSE_GPU=1 -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 -DOpenCL_LIBRARY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/x64/OpenCL.lib" -DOpenCL_INCLUDE_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include" ..
-     cmake --build . --target ALL_BUILD --config Release
+     # cmake -B build -S . -A x64 -DUSE_GPU=1 -DBOOST_ROOT=C:/local/boost_1_63_0 -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0 -DOpenCL_LIBRARY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/x64/OpenCL.lib" -DOpenCL_INCLUDE_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include"
+     cmake --build build --target ALL_BUILD --config Release

   **Note**: ``C:/local/boost_1_63_0`` and ``C:/local/boost_1_63_0/lib64-msvc-14.0`` are locations of your **Boost** binaries (assuming you've downloaded 1.63.0 version for Visual Studio 2015).

@ -624,9 +576,8 @@ Build CUDA Version

 The `original GPU build <#build-gpu-version>`__ of LightGBM (``device_type=gpu``) is based on OpenCL.

-The CUDA-based build (``device_type=cuda``) is a separate implementation and requires an NVIDIA graphics card with compute capability 6.0 and higher. It should be considered experimental, and we suggest using it only when it is impossible to use OpenCL version (for example, on IBM POWER microprocessors).
-
-**Note**: only Linux is supported, other operating systems are not supported yet.
+The CUDA-based build (``device_type=cuda``) is a separate implementation.
+Use this version in Linux environments with an NVIDIA GPU with compute capability 6.0 or higher.

 Linux
 ^^^^^
@ -645,47 +596,21 @@ To build LightGBM CUDA version, run the following commands:

  git clone --recursive https://github.com/microsoft/LightGBM
  cd LightGBM
-  mkdir build
-  cd build
-  cmake -DUSE_CUDA=1 ..
-  make -j4
-
-**Note**: glibc >= 2.14 is required.
+  cmake -B build -S . -DUSE_CUDA=1
+  cmake --build build -j4

 **Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).

-Build HDFS Version
-~~~~~~~~~~~~~~~~~~
-
-The HDFS version of LightGBM was tested on CDH-5.14.4 cluster.
-
-Linux
+macOS
 ^^^^^

-On Linux a HDFS version of LightGBM can be built using **CMake** and **gcc**.
+The CUDA version is not supported on macOS.

-1. Install `CMake`_.
+Windows
+^^^^^^^

-2. Run the following commands:
-
-   .. code:: sh
-
-     git clone --recursive https://github.com/microsoft/LightGBM
-     cd LightGBM
-     mkdir build
-     cd build
-     cmake -DUSE_HDFS=ON ..
-     # if you have installed HDFS to a customized location, you should specify paths to HDFS headers (hdfs.h) and library (libhdfs.so) like the following:
-     # cmake \
-     #   -DUSE_HDFS=ON \
-     #   -DHDFS_LIB="/opt/cloudera/parcels/CDH-5.14.4-1.cdh5.14.4.p0.3/lib64/libhdfs.so" \
-     #   -DHDFS_INCLUDE_DIR="/opt/cloudera/parcels/CDH-5.14.4-1.cdh5.14.4.p0.3/include/" \
-     #   ..
-     make -j4
-
-**Note**: glibc >= 2.14 is required.
-
-**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
+The CUDA version is not supported on Windows.
+Use the GPU version (``device_type=gpu``) for GPU acceleration on Windows.

 Build Java Wrapper
 ~~~~~~~~~~~~~~~~~~
@ -710,10 +635,8 @@ VS Build Tools

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -A x64 -DUSE_SWIG=ON ..
-     cmake --build . --target ALL_BUILD --config Release
+     cmake -B build -S . -A x64 -DUSE_SWIG=ON
+     cmake --build build --target ALL_BUILD --config Release

 The ``.jar`` file will be in ``LightGBM/build`` folder and the ``.dll`` files will be in ``LightGBM/Release`` folder.

@ -730,20 +653,16 @@ MinGW-w64

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -G "MinGW Makefiles" -DUSE_SWIG=ON ..
-     mingw32-make.exe -j4
+     cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON
+     cmake --build build -j4

 The ``.jar`` file will be in ``LightGBM/build`` folder and the ``.dll`` files will be in ``LightGBM/`` folder.

-**Note**: You may need to run the ``cmake -G "MinGW Makefiles" -DUSE_SWIG=ON ..`` one more time if you encounter the ``sh.exe was found in your PATH`` error.
+**Note**: You may need to run the ``cmake -B build -S . -G "MinGW Makefiles" -DUSE_SWIG=ON`` one more time if you encounter the ``sh.exe was found in your PATH`` error.

 It is recommended to use **VS Build Tools (Visual Studio)** since it has better multithreading efficiency in **Windows** for many-core systems
 (see `Question 4 <./FAQ.rst#i-am-using-windows-should-i-use-visual-studio-or-mingw-for-compiling-lightgbm>`__ and `Question 8 <./FAQ.rst#cpu-usage-is-low-like-10-in-windows-when-using-lightgbm-on-very-large-datasets-with-many-core-systems>`__).

-Also, you may want to read `gcc Tips <./gcc-Tips.rst>`__.
-
 Linux
 ^^^^^

@ -757,12 +676,8 @@ On Linux a Java wrapper of LightGBM can be built using **Java**, **SWIG**, **CMa

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -DUSE_SWIG=ON ..
-     make -j4
-
-**Note**: glibc >= 2.14 is required.
+     cmake -B build -S . -DUSE_SWIG=ON
+     cmake --build build -j4

 **Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).

@ -797,10 +712,8 @@ Only **Apple Clang** version 8.1 or higher is supported.

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -DUSE_SWIG=ON -DAPPLE_OUTPUT_DYLIB=ON ..
-     make -j4
+     cmake -B build -S . -DUSE_SWIG=ON
+     cmake --build build -j4

 gcc
 ***
@ -824,12 +737,8 @@ gcc
     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
     export CXX=g++-7 CC=gcc-7  # replace "7" with version of gcc installed on your machine
-     mkdir build
-     cd build
-     cmake -DUSE_SWIG=ON -DAPPLE_OUTPUT_DYLIB=ON ..
-     make -j4
-
-Also, you may want to read `gcc Tips <./gcc-Tips.rst>`__.
+     cmake -B build -S . -DUSE_SWIG=ON
+     cmake --build build -j4

 Build C++ Unit Tests
 ~~~~~~~~~~~~~~~~~~~~
@ -847,10 +756,8 @@ On Windows, C++ unit tests of LightGBM can be built using **CMake** and **VS Bui

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -A x64 -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF ..
-     cmake --build . --target testlightgbm --config Debug
+     cmake -B build -S . -A x64 -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF
+     cmake --build build --target testlightgbm --config Debug

 The ``.exe`` file will be in ``LightGBM/Debug`` folder.

@ -867,12 +774,8 @@ On Linux a C++ unit tests of LightGBM can be built using **CMake** and **gcc** o

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF ..
-     make testlightgbm -j4
-
-**Note**: glibc >= 2.14 is required.
+     cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF
+     cmake --build build --target testlightgbm -j4

 macOS
 ^^^^^
@ -896,10 +799,8 @@ Only **Apple Clang** version 8.1 or higher is supported.

     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
-     mkdir build
-     cd build
-     cmake -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF ..
-     make testlightgbm -j4
+     cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF
+     cmake --build build --target testlightgbm -j4

 gcc
 ***
@ -923,10 +824,8 @@ gcc
     git clone --recursive https://github.com/microsoft/LightGBM
     cd LightGBM
     export CXX=g++-7 CC=gcc-7  # replace "7" with version of gcc installed on your machine
-     mkdir build
-     cd build
-     cmake -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF ..
-     make testlightgbm -j4
+     cmake -B build -S . -DBUILD_CPP_TEST=ON -DUSE_OPENMP=OFF
+     cmake --build build --target testlightgbm -j4


 .. |download artifacts| image:: ./_static/images/artifacts-not-available.svg
--- a/docs/Key-Events.md
+++ b/docs/Key-Events.md
@ -1,125 +1 @@
-# Table/List of Key Modifications of LightGBM
-
-The list includes the commits where the major feature added is considered working with the least amount of flaws. This is useful if you are trying to get a specific commit, such as the first properly working commit for categorical support.
-
-## Modification Table
-
-| Date | Commit | Type | Description |
-| --- | --- | --- | --- |
-| 02/09/2017 | [574d780](https://github.com/microsoft/LightGBM/pull/878) | Docs | Readme Badges (Pull Request 878) |
-| 14/08/2017 | [1b8910b](https://github.com/microsoft/LightGBM/pull/825) | Feature | SHAP Feature Importance (Pull Request 825) |
-| 02/08/2017 | [f2bec2b](https://github.com/microsoft/LightGBM/pull/762) | Feature | Improved categorical feature support (Pull Request 762) |
-| 30/07/2017 | [00cb04a](https://github.com/microsoft/LightGBM/pull/747) | Feature | Better missing value handle (Pull Request 747) |
-| 13/07/2017 | [3421bc6](https://github.com/microsoft/LightGBM/pull/664) | Feature | Automatic Compiler for R-package compilation (Pull Request 664) |
-| 10/07/2017 | [ce999b7](https://github.com/microsoft/LightGBM/pull/678) | Feature | Random Forest mode (Pull Request 678) |
-| 22/06/2017 | [d862b3e](https://github.com/microsoft/LightGBM/pull/642) | CIntegration | Travis OSX Support (Pull Request 642) |
-| 20/06/2017 | [80c641c](https://github.com/microsoft/LightGBM/pull/635) | Release | Python pip package (Pull Request 635) |
-| 18/06/2017 | [4d2aa84](https://github.com/microsoft/LightGBM/pull/634) | CIntegration | AppVeyor Support (Pull Request 634) |
-| 06/06/2017 | [2c9ce59](https://github.com/microsoft/LightGBM/pull/592) | Release | R-package version 0.2 (Pull Request 592) |
-| 05/06/2017 | [f98d75f](https://github.com/microsoft/LightGBM/pull/584) | Feature | Use custom compiler for R-package (Pull Request 584) |
-| 29/05/2017 | [993bbd5](https://github.com/microsoft/LightGBM/pull/559) | Parameter | Early Stopping for predictions (Pull Request 559) |
-| 26/05/2017 | [3abff37](https://github.com/microsoft/LightGBM/commit/3abff370bb353293e4a03e516111dd02785fbd97) | Feature | Parameter to disable missing values (Commit) |
-| 21/05/2017 | [4cf9376](https://github.com/microsoft/LightGBM/commit/4cf9376d6652d3d7afa82e98dfb363af9275969d) | Feature | Limitation of threads for dataset construction (Commit) |
-| 15/05/2017 | [e984b0d](https://github.com/microsoft/LightGBM/pull/516) | Feature | Support for missing values (Pull Request 516) |
-| 14/05/2017 | [358553d](https://github.com/microsoft/LightGBM/pull/511) | Docs | Interactive External Website (Pull Request 511) |
-| 04/05/2017 | [ca30b8c](https://github.com/microsoft/LightGBM/pull/489) | Docs | Read The Docs (Pull Request 489) |
-| 28/04/2017 | [8a19834](https://github.com/microsoft/LightGBM/pull/469) | Feature | If-Then-Else C++ Trees Generation (Pull Request 469) |
-| 18/04/2017 | [7339ed6](https://github.com/microsoft/LightGBM/pull/426) | Feature | Whitespaces Removed from Features support (Pull Request 426) |
-| 14/04/2017 | [9224a9d](https://github.com/microsoft/LightGBM/pull/415) | Release | GPU support (Pull Request 415)<br>With original ([0bb4a82](https://github.com/microsoft/LightGBM/pull/368) on 09/04/2017, Pull Request 368) |
-| 13/04/2017 | [ab55910](https://github.com/microsoft/LightGBM/pull/404) | Feature | Speed Improvements for Prediction (Pull Request 404) |
-| 06/04/2017 | [bfb0217](https://github.com/microsoft/LightGBM/pull/383) | Feature | Objective Transformations to the Objective (Pull Request 383) |
-| 05/04/2017 | [d4c4d9a](https://github.com/microsoft/LightGBM/pull/381) | Feature | Regression Speed Improvements (Pull Request 381) |
-| 03/04/2017 | [b6c973a](https://github.com/microsoft/LightGBM/pull/378) | Feature | Unloading Memory Fix support for R-package (Pull Request 378) |
-| 17/03/2017 | [06a915a](https://github.com/microsoft/LightGBM/pull/347) | Feature | RDS support for R pacakge (Pull Request 347) |
-| 10/03/2017 | [b7e5f07](https://github.com/microsoft/LightGBM/pull/340) | Feature | Support Self-Contained R-package (Pull Request 340) |
-| 02/03/2017 | [4d6ff28](https://github.com/microsoft/LightGBM/pull/330) | Feature | Feature Contribution Plot for R-package (Pull Request 330) |
-| 01/03/2017 | [166421e](https://github.com/microsoft/LightGBM/pull/328) | Feature | Feature Importance Plot for R-package (Pull Request 328) |
-| 27/02/2017 | [fddb52f](https://github.com/microsoft/LightGBM/pull/323) | Feature | Feature Contribution for R-package (Pull Request 323) |
-| 20/02/2017 | [10212b5](https://github.com/microsoft/LightGBM/pull/300) | Release | v2 (Pull Request 300) |
-| 12/02/2017 | [ea6bc0a](https://github.com/microsoft/LightGBM/pull/290) | Release | v1 (Pull Request 290) |
-| 02/02/2017 | [8c8ed6c](https://github.com/microsoft/LightGBM/pull/275) | Feature | Docker support (Pull Request 275) |
-| 30/01/2017 | [4f3e9d8](https://github.com/microsoft/LightGBM/pull/270) | Feature | Poisson Objective support (Pull Request 270) |
-| 28/01/2017 | [5856554](https://github.com/microsoft/LightGBM/pull/266) | Feature | Plot Metric support for Python-package (Pull Request 266) |
-| 25/01/2017 | [8980fc7](https://github.com/microsoft/LightGBM/pull/262) | Feature | Plot Tree support for Python-package (Pull Request 262)<br>With original ([fafbcb3](https://github.com/microsoft/LightGBM/pull/258) on 25/01/2017, Pull Request 258) |
-| 20/01/2017 | [abaefb5](https://github.com/microsoft/LightGBM/pull/237) | Feature | Feature Importance Plot for Python-package (Pull Request 237) |
-| 16/01/2017 | [a2ae838](https://github.com/microsoft/LightGBM/pull/229) | Feature | Categorical Feature support for R-package (Pull Request 229) |
-| 16/01/2017 | [57d5527](https://github.com/microsoft/LightGBM/pull/218) | Feature | Pandas Categorical support for Python-package (Pull Request 193)<br>With original ([a2ae838](https://github.com/microsoft/LightGBM/pull/193) on 12/01/2017, Pull Request 193) |
-| 10/01/2017 | [fb732c3](https://github.com/microsoft/LightGBM/pull/180) | Feature | Fair Loss Objective support (Pull Request 180) |
-| 09/01/2017 | [27d3eb3](https://github.com/microsoft/LightGBM/pull/178) | Feature | Huber Loss Objective support (Pull Request 178)<br>With original ([a87af87](https://github.com/microsoft/LightGBM/pull/174) on 09/01/2017, Pull Request 174) |
-| 09/01/2017 | [9b2558d](https://github.com/microsoft/LightGBM/pull/177) | Feature | devtools R Installation support (Pull Request 177) |
-| 09/01/2017 | [6219df7](https://github.com/microsoft/LightGBM/pull/175) | Feature | L1 Loss Objective support (Pull Request 175) |
-| 08/01/2017 | [551d59c](https://github.com/microsoft/LightGBM/pull/168) | Release | R support (Pull Request 168) |
-| 05/01/2017 | [96d08f4](https://github.com/microsoft/LightGBM/pull/153) | Feature | PMML support (Pull Request 153) |
-| 01/01/2017 | [a034cee](https://github.com/microsoft/LightGBM/pull/151) | Feature | Pickle support for Python-package (Pull Request 151) |
-| 26/12/2016 | [96cba41](https://github.com/microsoft/LightGBM/pull/139) | Parameter | DART xgboost support (Pull Request 139) |
-| 19/12/2016 | [99b483d](https://github.com/microsoft/LightGBM/pull/133) | Parameter | Learning Rate in DART support (Pull Request 133) |
-| 01/12/2016 | [16d1853](https://github.com/microsoft/LightGBM/pull/94) | Release | Python support (Pull Request 94) |
-| 11/11/2016 | [98be7e3](https://github.com/microsoft/LightGBM/pull/67) | Feature | DART booster support (Pull Request 67) |
-| 08/11/2016 | [785398a](https://github.com/microsoft/LightGBM/pull/69) | Parameter | L1 Regularization, L2 Regularization, Minimum Gain to Split support (Pull Request 69) |
-| 05/11/2016 | [1466f90](https://github.com/microsoft/LightGBM/pull/108) | Release | Categorical Feature support (Pull Request 108) |
-| 01/11/2016 | [aa796a8](https://github.com/microsoft/LightGBM/pull/53) | Feature | Multiclass classification support (Pull Request 53) |
-| 28/10/2016 | [c45d1d9](https://github.com/microsoft/LightGBM/pull/42) | Feature | Feature Importance support (Pull Request 42) |
-| 25/10/2016 | [a6a75fe](https://github.com/microsoft/LightGBM/pull/35) | Parameter | Maximum Depth support (Pull Request 35) |
-| 24/10/2016 | [9fe0dea](https://github.com/microsoft/LightGBM/pull/30) | Parameter | Leaf Index Prediction support (Pull Request 30) |
-| 21/10/2016 | [7aaba32](https://github.com/microsoft/LightGBM/pull/27) | Parameter | Early Stopping support (Pull Request 27)<br>With original ([7d4b6d4](https://github.com/microsoft/LightGBM/pull/21) on 20/10/2017, Pull Request 21) |
-| 17/10/2016 | [65ddd85](https://github.com/guFalcon/LightGBM/commit/65ddd852d8d160d86080c45512bd435d15837927) | Release | LightGBM compilable (Commit) |
-
-## Modification List
-
-* 02/09/2017 [Microsoft/LightGBM@574d780](https://github.com/microsoft/LightGBM/pull/878): Docs: Readme Badges (Pull Request 878)
-* 14/08/2017 [Microsoft/LightGBM@1b8910b](https://github.com/microsoft/LightGBM/pull/825): Feature: SHAP Feature Importance (Pull Request 825)
-* 02/08/2017 [Microsoft/LightGBM@f2bec2b](https://github.com/microsoft/LightGBM/pull/762): Feature: Improved categorical feature support (Pull Request 762)
-* 30/07/2017 [Microsoft/LightGBM@00cb04a](https://github.com/microsoft/LightGBM/pull/747): Feature: Better missing value handle (Pull Request 747)
-* 13/07/2017 [Microsoft/LightGBM@3421bc6](https://github.com/microsoft/LightGBM/pull/664): Feature: Automatic Compiler for R-package compilation (Pull Request 664)
-* 10/07/2017 [Microsoft/LightGBM@ce999b7](https://github.com/microsoft/LightGBM/pull/678): Feature: Random Forest mode (Pull Request 678)
-* 22/06/2017 [Microsoft/LightGBM@d862b3e](https://github.com/microsoft/LightGBM/pull/642): CIntegration: Travis OSX Support (Pull Request 642)
-* 20/06/2017 [Microsoft/LightGBM@80c641c](https://github.com/microsoft/LightGBM/pull/635): Release: Python pip package (Pull Request 635)
-* 18/06/2017 [Microsoft/LightGBM@4d2aa84](https://github.com/microsoft/LightGBM/pull/634): CIntegration: AppVeyor Support (Pull Request 634)
-* 06/06/2017 [Microsoft/LightGBM@2c9ce59](https://github.com/microsoft/LightGBM/pull/592): Release: R-package version 0.2 (Pull Request 592) 
-* 05/06/2017 [Microsoft/LightGBM@f98d75f](https://github.com/microsoft/LightGBM/pull/584): Feature: Use custom compiler for R-package (Pull Request 584)
-* 29/05/2017 [Microsoft/LightGBM@993bbd5](https://github.com/microsoft/LightGBM/pull/559): Parameter: Early Stopping for predictions (Pull Request 559)
-* 26/05/2017 [Microsoft/LightGBM@3abff37](https://github.com/microsoft/LightGBM/commit/3abff370bb353293e4a03e516111dd02785fbd97): Feature: Parameter to disable missing values (Commit)
-* 21/05/2017 [Microsoft/LightGBM@4cf9376](https://github.com/microsoft/LightGBM/commit/4cf9376d6652d3d7afa82e98dfb363af9275969d): Feature: Limitation of threads for dataset construction (Commit)
-* 15/05/2017 [Microsoft/LightGBM@e984b0d](https://github.com/microsoft/LightGBM/pull/516): Feature: Support for missing values (Pull Request 516)
-* 14/05/2017 [Microsoft/LightGBM@358553d](https://github.com/microsoft/LightGBM/pull/511): Docs: Interactive External Website (Pull Request 511)
-* 04/05/2017 [Microsoft/LightGBM@ca30b8c](https://github.com/microsoft/LightGBM/pull/489): Docs: Read The Docs (Pull Request 489)
-* 28/04/2017 [Microsoft/LightGBM@8a19834](https://github.com/microsoft/LightGBM/pull/469): Feature: If-Then-Else C++ Trees Generation (Pull Request 469)
-* 18/04/2017 ([Microsoft/LightGBM@7339ed6](https://github.com/microsoft/LightGBM/pull/426)): Feature: Whitespaces Removed from Features support (Pull Request 426)
-* 14/04/2017 ([Microsoft/LightGBM@9224a9d](https://github.com/microsoft/LightGBM/pull/415)): Release: GPU support (Pull Request 415) with original ([Microsoft/LightGBM@0bb4a82](https://github.com/microsoft/LightGBM/pull/368) on 09/04/2017, Pull Request 368)
-* 13/04/2017 ([Microsoft/LightGBM@ab55910](https://github.com/microsoft/LightGBM/pull/404)): Feature: Speed Improvements for Prediction (Pull Request 404)
-* 06/04/2017 ([Microsoft/LightGBM@bfb0217](https://github.com/microsoft/LightGBM/pull/383)): Feature: Objective Transformations to the Objective (Pull Request 383)
-* 05/04/2017 ([Microsoft/LightGBM@d4c4d9a](https://github.com/microsoft/LightGBM/pull/381)): Feature: Regression Speed Improvements (Pull Request 381)
-* 03/04/2017 ([Microsoft/LightGBM@b6c973a](https://github.com/microsoft/LightGBM/pull/378)): Feature: Unloading Memory Fix support for R-package (Pull Request 378)
-* 17/03/2017 ([Microsoft/LightGBM@06a915a](https://github.com/microsoft/LightGBM/pull/347)): Feature: RDS support for R pacakge (Pull Request 347)
-* 10/03/2017 ([Microsoft/LightGBM@b7e5f07](https://github.com/microsoft/LightGBM/pull/340)): Feature: Support Self-Contained R-package (Pull Request 340)
-* 02/03/2017 ([Microsoft/LightGBM@4d6ff28](https://github.com/microsoft/LightGBM/pull/330)): Feature: Feature Contribution Plot for R-package (Pull Request 330)
-* 01/03/2017 ([Microsoft/LightGBM@166421e](https://github.com/microsoft/LightGBM/pull/328)): Feature: Feature Importance Plot for R-package (Pull Request 328)
-* 27/02/2017 ([Microsoft/LightGBM@fddb52f](https://github.com/microsoft/LightGBM/pull/323)): Feature: Feature Contribution for R-package (Pull Request 323)
-* 20/02/2017 ([Microsoft/LightGBM@10212b5](https://github.com/microsoft/LightGBM/pull/300)): Release: v2 (Pull Request 300)
-* 12/02/2017 ([Microsoft/LightGBM@ea6bc0a](https://github.com/microsoft/LightGBM/pull/290)): Release: v1 (Pull Request 290)
-* 02/02/2017 ([Microsoft/LightGBM@8c8ed6c](https://github.com/microsoft/LightGBM/pull/275)): Feature: Docker support (Pull Request 275)
-* 30/01/2017 ([Microsoft/LightGBM@4f3e9d8](https://github.com/microsoft/LightGBM/pull/270)): Feature: Poisson Objective support (Pull Request 270)
-* 28/01/2017 ([Microsoft/LightGBM@5856554](https://github.com/microsoft/LightGBM/pull/266)): Feature: Plot Metric support for Python-package (Pull Request 266)
-* 25/01/2017 ([Microsoft/LightGBM@8980fc7](https://github.com/microsoft/LightGBM/pull/262)): Feature: Plot Tree support for Python-package (Pull Request 262) with original ([Microsoft/LightGBM@fafbcb3](https://github.com/microsoft/LightGBM/pull/258) on 25/01/2017, Pull Request 258)
-* 20/01/2017 ([Microsoft/LightGBM@abaefb5](https://github.com/microsoft/LightGBM/pull/237)): Feature: Feature Importance Plot for Python-package (Pull Request 237)
-* 16/01/2017 ([Microsoft/LightGBM@a2ae838](https://github.com/microsoft/LightGBM/pull/229)): Feature: Categorical Feature support for R-package (Pull Request 229)
-* 16/01/2017 ([Microsoft/LightGBM@57d5527](https://github.com/microsoft/LightGBM/pull/218)): Feature: Pandas Categorical support for Python-package (Pull Request 193) with original ([Microsoft/LightGBM@a2ae838](https://github.com/microsoft/LightGBM/pull/193) on 12/01/2017, Pull Request 193)
-* 10/01/2017 ([Microsoft/LightGBM@fb732c3](https://github.com/microsoft/LightGBM/pull/180)): Feature: Fair Loss Objective support (Pull Request 180)
-* 09/01/2017 ([Microsoft/LightGBM@27d3eb3](https://github.com/microsoft/LightGBM/pull/178)): Feature: Huber Loss Objective support (Pull Request 178) with original ([Microsoft/LightGBM@a87af87](https://github.com/microsoft/LightGBM/pull/174) on 09/01/2017, Pull Request 174)
-* 09/01/2017 ([Microsoft/LightGBM@9b2558d](https://github.com/microsoft/LightGBM/pull/177)): Feature: devtools R Installation support (Pull Request 177)
-* 09/01/2017 ([Microsoft/LightGBM@6219df7](https://github.com/microsoft/LightGBM/pull/175)): Feature: L1 Loss Objective support (Pull Request 175)
-* 08/01/2017 ([Microsoft/LightGBM@551d59c](https://github.com/microsoft/LightGBM/pull/168)): Release: R support (Pull Request 168)
-* 05/01/2017 ([Microsoft/LightGBM@96d08f4](https://github.com/microsoft/LightGBM/pull/153)): Feature: PMML support (Pull Request 153)
-* 01/01/2017 ([Microsoft/LightGBM@a034cee](https://github.com/microsoft/LightGBM/pull/151)): Feature: Pickle support for Python-package (Pull Request 151)
-* 26/12/2016 ([Microsoft/LightGBM@96cba41](https://github.com/microsoft/LightGBM/pull/139)): Parameter: DART xgboost support (Pull Request 139)
-* 19/12/2016 ([Microsoft/LightGBM@99b483d](https://github.com/microsoft/LightGBM/pull/133)): Parameter: Learning Rate in DART support (Pull Request 133)
-* 01/12/2016 ([Microsoft/LightGBM@16d1853](https://github.com/microsoft/LightGBM/pull/94)): Release: Python support (Pull Request 94)
-* 11/11/2016 ([Microsoft/LightGBM@98be7e3](https://github.com/microsoft/LightGBM/pull/67)): Feature: DART booster support (Pull Request 67)
-* 08/11/2016 ([Microsoft/LightGBM@785398a](https://github.com/microsoft/LightGBM/pull/69)): Parameter: L1 Regularization, L2 Regularization, Minimum Gain to Split support (Pull Request 69)
-* 05/11/2016 ([Microsoft/LightGBM@1466f90](https://github.com/microsoft/LightGBM/pull/108)): Release: Categorical Feature support (Pull Request 108)
-* 01/11/2016 ([Microsoft/LightGBM@aa796a8](https://github.com/microsoft/LightGBM/pull/53)): Feature: Multiclass classification support (Pull Request 53)
-* 28/10/2016 ([Microsoft/LightGBM@c45d1d9](https://github.com/microsoft/LightGBM/pull/42)): Feature: Feature Importance support (Pull Request 42)
-* 25/10/2016 ([Microsoft/LightGBM@a6a75fe](https://github.com/microsoft/LightGBM/pull/35)): Parameter: Maximum Depth support (Pull Request 35)
-* 24/10/2016 ([Microsoft/LightGBM@9fe0dea](https://github.com/microsoft/LightGBM/pull/30)): Parameter: Leaf Index Prediction support (Pull Request 30)
-* 21/10/2016 ([Microsoft/LightGBM@7aaba32](https://github.com/microsoft/LightGBM/pull/27)): Parameter: Early Stopping support (Pull Request 27) with original ([Microsoft/LightGBM@7d4b6d4](https://github.com/microsoft/LightGBM/pull/21) on 20/10/2017, Pull Request 21)
-* 17/10/2016 ([Microsoft/LightGBM@65ddd85](https://github.com/guFalcon/LightGBM/commit/65ddd852d8d160d86080c45512bd435d15837927)): Release: LightGBM compilable (Commit)
+The content of this document was very outdated and is no longer available to avoid any misleadings.
--- a/docs/Makefile
+++ b/docs/Makefile
@ -17,4 +17,4 @@ help:
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/Parameters-Tuning.rst
+++ b/docs/Parameters-Tuning.rst
@ -22,7 +22,7 @@ To get good results using a leaf-wise tree, these are some important parameters:
 1. ``num_leaves``. This is the main parameter to control the complexity of the tree model.
   Theoretically, we can set ``num_leaves = 2^(max_depth)`` to obtain the same number of leaves as depth-wise tree.
   However, this simple conversion is not good in practice.
-   The reason is that a leaf-wise tree is typically much deeper than a depth-wise tree for a fixed number of leaves. Unconstrained depth can induce over-fitting.
+   A leaf-wise tree is typically much deeper than a depth-wise tree for a fixed number of leaves. Unconstrained depth can induce over-fitting.
   Thus, when trying to tune the ``num_leaves``, we should let it be smaller than ``2^(max_depth)``.
   For example, when the ``max_depth=7`` the depth-wise tree can get good accuracy,
   but setting ``num_leaves`` to ``127`` may cause over-fitting, and setting it to ``70`` or ``80`` may get better accuracy than depth-wise.
@ -33,6 +33,7 @@ To get good results using a leaf-wise tree, these are some important parameters:
   In practice, setting it to hundreds or thousands is enough for a large dataset.

 3. ``max_depth``. You also can use ``max_depth`` to limit the tree depth explicitly.
+   If you set ``max_depth``, also explicitly set ``num_leaves`` to some value ``<= 2^max_depth``.

 For Faster Speed
 ----------------
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@ -1,4 +1,4 @@
-..  List of parameters is auto generated by LightGBM\helpers\parameter_generator.py from LightGBM\include\LightGBM\config.h file.
+..  List of parameters is auto generated by LightGBM\.ci\parameter-generator.py from LightGBM\include\LightGBM\config.h file.

 .. role:: raw-html(raw)
    :format: html
@ -125,6 +125,14 @@ Core Parameters

      -  label should be ``int`` type, and larger number represents the higher relevance (e.g. 0:bad, 1:fair, 2:good, 3:perfect)

+   -  custom objective function (gradients and hessians not computed directly by LightGBM)
+
+      -  ``custom``
+
+      -  must be passed through parameters explicitly in the C API
+
+      -  **Note**: cannot be used in CLI version
+
 -  ``boosting`` :raw-html:`<a id="boosting" title="Permalink to this parameter" href="#boosting">&#x1F517;&#xFE0E;</a>`, default = ``gbdt``, type = enum, options: ``gbdt``, ``rf``, ``dart``, aliases: ``boosting_type``, ``boost``

   -  ``gbdt``, traditional Gradient Boosting Decision Tree, aliases: ``gbrt``
@ -143,7 +151,7 @@ Core Parameters

   -  ``goss``, Gradient-based One-Side Sampling

-   -  *New in 4.0.0*
+   -  *New in version 4.0.0*

 -  ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename``

@ -219,7 +227,7 @@ Core Parameters

   -  **Note**: for the faster speed, GPU uses 32-bit float point to sum up by default, so this may affect the accuracy for some tasks. You can set ``gpu_use_dp=true`` to enable 64-bit float point, but it will slow down the training

-   -  **Note**: refer to `Installation Guide <./Installation-Guide.rst#build-gpu-version>`__ to build LightGBM with GPU support
+   -  **Note**: refer to `Installation Guide <./Installation-Guide.rst>`__ to build LightGBM with GPU or CUDA support

 -  ``seed`` :raw-html:`<a id="seed" title="Permalink to this parameter" href="#seed">&#x1F517;&#xFE0E;</a>`, default = ``None``, type = int, aliases: ``random_seed``, ``random_state``

@ -352,7 +360,7 @@ Learning Control Parameters

   -  frequency for bagging

-   -  ``0`` means disable bagging; ``k`` means perform bagging at every ``k`` iteration. Every ``k``-th iteration, LightGBM will randomly select ``bagging_fraction * 100 %`` of the data to use for the next ``k`` iterations
+   -  ``0`` means disable bagging; ``k`` means perform bagging at every ``k`` iteration. Every ``k``-th iteration, LightGBM will randomly select ``bagging_fraction * 100%`` of the data to use for the next ``k`` iterations

   -  **Note**: bagging is only effective when ``0.0 < bagging_fraction < 1.0``

@ -360,6 +368,10 @@ Learning Control Parameters

   -  random seed for bagging

+-  ``bagging_by_query`` :raw-html:`<a id="bagging_by_query" title="Permalink to this parameter" href="#bagging_by_query">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
+
+   -  whether to do bagging sample by query
+
 -  ``feature_fraction`` :raw-html:`<a id="feature_fraction" title="Permalink to this parameter" href="#feature_fraction">&#x1F517;&#xFE0E;</a>`, default = ``1.0``, type = double, aliases: ``sub_feature``, ``colsample_bytree``, constraints: ``0.0 < feature_fraction <= 1.0``

   -  LightGBM will randomly select a subset of features on each iteration (tree) if ``feature_fraction`` is smaller than ``1.0``. For example, if you set it to ``0.8``, LightGBM will select 80% of features before training each tree
@ -404,6 +416,12 @@ Learning Control Parameters

   -  can be used to speed up training

+-  ``early_stopping_min_delta`` :raw-html:`<a id="early_stopping_min_delta" title="Permalink to this parameter" href="#early_stopping_min_delta">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, constraints: ``early_stopping_min_delta >= 0.0``
+
+   -  when early stopping is used (i.e. ``early_stopping_round > 0``), require the early stopping metric to improve by at least this delta to be considered an improvement
+
+   -  *New in version 4.4.0*
+
 -  ``first_metric_only`` :raw-html:`<a id="first_metric_only" title="Permalink to this parameter" href="#first_metric_only">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool

   -  LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping
@ -458,7 +476,7 @@ Learning Control Parameters

   -  used only in ``dart``

-   -  set this to ``true``, if you want to use xgboost dart mode
+   -  set this to ``true``, if you want to use XGBoost DART mode

 -  ``uniform_drop`` :raw-html:`<a id="uniform_drop" title="Permalink to this parameter" href="#uniform_drop">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool

@ -486,6 +504,8 @@ Learning Control Parameters

 -  ``min_data_per_group`` :raw-html:`<a id="min_data_per_group" title="Permalink to this parameter" href="#min_data_per_group">&#x1F517;&#xFE0E;</a>`, default = ``100``, type = int, constraints: ``min_data_per_group > 0``

+   -  used for the categorical features
+
   -  minimal number of data per categorical group

 -  ``max_cat_threshold`` :raw-html:`<a id="max_cat_threshold" title="Permalink to this parameter" href="#max_cat_threshold">&#x1F517;&#xFE0E;</a>`, default = ``32``, type = int, constraints: ``max_cat_threshold > 0``
@ -510,6 +530,8 @@ Learning Control Parameters

 -  ``max_cat_to_onehot`` :raw-html:`<a id="max_cat_to_onehot" title="Permalink to this parameter" href="#max_cat_to_onehot">&#x1F517;&#xFE0E;</a>`, default = ``4``, type = int, constraints: ``max_cat_to_onehot > 0``

+   -  used for the categorical features
+
   -  when number of categories of one feature smaller than or equal to ``max_cat_to_onehot``, one-vs-other split algorithm will be used

 -  ``top_k`` :raw-html:`<a id="top_k" title="Permalink to this parameter" href="#top_k">&#x1F517;&#xFE0E;</a>`, default = ``20``, type = int, aliases: ``topk``, constraints: ``top_k > 0``
@ -524,7 +546,7 @@ Learning Control Parameters

   -  ``1`` means increasing, ``-1`` means decreasing, ``0`` means non-constraint

-   -  you need to specify all features in order. For example, ``mc=-1,0,1`` means decreasing for 1st feature, non-constraint for 2nd feature and increasing for the 3rd feature
+   -  you need to specify all features in order. For example, ``mc=-1,0,1`` means decreasing for the 1st feature, non-constraint for the 2nd feature and increasing for the 3rd feature

 -  ``monotone_constraints_method`` :raw-html:`<a id="monotone_constraints_method" title="Permalink to this parameter" href="#monotone_constraints_method">&#x1F517;&#xFE0E;</a>`, default = ``basic``, type = enum, options: ``basic``, ``intermediate``, ``advanced``, aliases: ``monotone_constraining_method``, ``mc_method``

@ -532,11 +554,11 @@ Learning Control Parameters

   -  monotone constraints method

-      -  ``basic``, the most basic monotone constraints method. It does not slow the library at all, but over-constrains the predictions
+      -  ``basic``, the most basic monotone constraints method. It does not slow down the training speed at all, but over-constrains the predictions

-      -  ``intermediate``, a `more advanced method <https://hal.science/hal-02862802/document>`__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results
+      -  ``intermediate``, a `more advanced method <https://hal.science/hal-02862802/document>`__, which may slow down the training speed very slightly. However, this method is much less constraining than the basic method and should significantly improve the results

-      -  ``advanced``, an `even more advanced method <https://hal.science/hal-02862802/document>`__, which may slow the library. However, this method is even less constraining than the intermediate method and should again significantly improve the results
+      -  ``advanced``, an `even more advanced method <https://hal.science/hal-02862802/document>`__, which may slow down the training speed. However, this method is even less constraining than the intermediate method and should again significantly improve the results

 -  ``monotone_penalty`` :raw-html:`<a id="monotone_penalty" title="Permalink to this parameter" href="#monotone_penalty">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``monotone_splits_penalty``, ``ms_penalty``, ``mc_penalty``, constraints: ``monotone_penalty >= 0.0``

@ -596,7 +618,7 @@ Learning Control Parameters

   -  helps prevent overfitting on leaves with few samples

-   -  if set to zero, no smoothing is applied
+   -  if ``0.0`` (the default), no smoothing is applied

   -  if ``path_smooth > 0`` then ``min_data_in_leaf`` must be at least ``2``

@ -616,7 +638,7 @@ Learning Control Parameters

      -  for Python-package, list of lists, e.g. ``[[0, 1, 2], [2, 3]]``

-      -  for R-package, list of character or numeric vectors, e.g. ``list(c("var1", "var2", "var3"), c("var3", "var4"))`` or ``list(c(1L, 2L, 3L), c(3L, 4L))``. Numeric vectors should use 1-based indexing, where ``1L`` is the first feature, ``2L`` is the second feature, etc
+      -  for R-package, list of character or numeric vectors, e.g. ``list(c("var1", "var2", "var3"), c("var3", "var4"))`` or ``list(c(1L, 2L, 3L), c(3L, 4L))``. Numeric vectors should use 1-based indexing, where ``1L`` is the first feature, ``2L`` is the second feature, etc.

   -  any two features can only appear in the same branch only if there exists a constraint containing both features

@ -668,35 +690,43 @@ Learning Control Parameters

   -  gradient quantization can accelerate training, with little accuracy drop in most cases

-   -  **Note**: can be used only with ``device_type = cpu``
+   -  **Note**: works only with ``cpu`` and ``cuda`` device type

   -  *New in version 4.0.0*

 -  ``num_grad_quant_bins`` :raw-html:`<a id="num_grad_quant_bins" title="Permalink to this parameter" href="#num_grad_quant_bins">&#x1F517;&#xFE0E;</a>`, default = ``4``, type = int

+   -  used only if ``use_quantized_grad=true``
+
   -  number of bins to quantization gradients and hessians

   -  with more bins, the quantized training will be closer to full precision training

-   -  **Note**: can be used only with ``device_type = cpu``
+   -  **Note**: works only with ``cpu`` and ``cuda`` device type

-   -  *New in 4.0.0*
+   -  *New in version 4.0.0*

 -  ``quant_train_renew_leaf`` :raw-html:`<a id="quant_train_renew_leaf" title="Permalink to this parameter" href="#quant_train_renew_leaf">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool

+   -  used only if ``use_quantized_grad=true``
+
   -  whether to renew the leaf values with original gradients when quantized training

   -  renewing is very helpful for good quantized training accuracy for ranking objectives

-   -  **Note**: can be used only with ``device_type = cpu``
+   -  **Note**: works only with ``cpu`` and ``cuda`` device type

-   -  *New in 4.0.0*
+   -  *New in version 4.0.0*

 -  ``stochastic_rounding`` :raw-html:`<a id="stochastic_rounding" title="Permalink to this parameter" href="#stochastic_rounding">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool

+   -  used only if ``use_quantized_grad=true``
+
   -  whether to use stochastic rounding in gradient quantization

-   -  *New in 4.0.0*
+   -  **Note**: works only with ``cpu`` and ``cuda`` device type
+
+   -  *New in version 4.0.0*

 IO Parameters
 -------------
@ -708,25 +738,25 @@ Dataset Parameters

   -  fit piecewise linear gradient boosting tree

-      -  tree splits are chosen in the usual way, but the model at each leaf is linear instead of constant
+   -  tree splits are chosen in the usual way, but the model at each leaf is linear instead of constant

-      -  the linear model at each leaf includes all the numerical features in that leaf's branch
+   -  the linear model at each leaf includes all the numerical features in that leaf's branch

-      -  the first tree has constant leaf values
+   -  the first tree has constant leaf values

-      -  categorical features are used for splits as normal but are not used in the linear models
+   -  categorical features are used for splits as normal but are not used in the linear models

-      -  missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R
+   -  missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R

-      -  it is recommended to rescale data before training so that features have similar mean and standard deviation
+   -  it is recommended to rescale data before training so that features have similar mean and standard deviation

-      -  **Note**: only works with CPU and ``serial`` tree learner
+   -  **Note**: works only with ``cpu`` device type and ``serial`` tree learner

-      -  **Note**: ``regression_l1`` objective is not supported with linear tree boosting
+   -  **Note**: ``regression_l1`` objective is not supported with linear tree boosting

-      -  **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM
+   -  **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM

-      -  **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves
+   -  **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves

 -  ``max_bin`` :raw-html:`<a id="max_bin" title="Permalink to this parameter" href="#max_bin">&#x1F517;&#xFE0E;</a>`, default = ``255``, type = int, aliases: ``max_bins``, constraints: ``max_bin > 1``

@ -916,7 +946,7 @@ Dataset Parameters

   -  **Note**: ``lightgbm-transform`` is not maintained by LightGBM's maintainers. Bug reports or feature requests should go to `issues page <https://github.com/microsoft/lightgbm-transform/issues>`__

-   -  *New in 4.0.0*
+   -  *New in version 4.0.0*

 Predict Parameters
 ~~~~~~~~~~~~~~~~~~
@ -991,13 +1021,13 @@ Predict Parameters

 -  ``pred_early_stop_freq`` :raw-html:`<a id="pred_early_stop_freq" title="Permalink to this parameter" href="#pred_early_stop_freq">&#x1F517;&#xFE0E;</a>`, default = ``10``, type = int

-   -  used only in ``prediction`` task
+   -  used only in ``prediction`` task and if ``pred_early_stop=true``

   -  the frequency of checking early-stopping prediction

 -  ``pred_early_stop_margin`` :raw-html:`<a id="pred_early_stop_margin" title="Permalink to this parameter" href="#pred_early_stop_margin">&#x1F517;&#xFE0E;</a>`, default = ``10.0``, type = double

-   -  used only in ``prediction`` task
+   -  used only in ``prediction`` task and if ``pred_early_stop=true``

   -  the threshold of margin in early-stopping prediction

@ -1137,7 +1167,9 @@ Objective Parameters

 -  ``lambdarank_position_bias_regularization`` :raw-html:`<a id="lambdarank_position_bias_regularization" title="Permalink to this parameter" href="#lambdarank_position_bias_regularization">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, constraints: ``lambdarank_position_bias_regularization >= 0.0``

-   -  used only in ``lambdarank`` application when positional information is provided and position bias is modeled. Larger values reduce the inferred position bias factors.
+   -  used only in ``lambdarank`` application when positional information is provided and position bias is modeled
+
+   -  larger values reduce the inferred position bias factors

   -  *New in version 4.1.0*

@ -1255,7 +1287,7 @@ Network Parameters

   -  the number of machines for distributed learning application

-   -  this parameter is needed to be set in both **socket** and **mpi** versions
+   -  this parameter is needed to be set in both **socket** and **MPI** versions

 -  ``local_listen_port`` :raw-html:`<a id="local_listen_port" title="Permalink to this parameter" href="#local_listen_port">&#x1F517;&#xFE0E;</a>`, default = ``12400 (random for Dask-package)``, type = int, aliases: ``local_port``, ``port``, constraints: ``local_listen_port > 0``

@ -1284,6 +1316,8 @@ GPU Parameters

 -  ``gpu_platform_id`` :raw-html:`<a id="gpu_platform_id" title="Permalink to this parameter" href="#gpu_platform_id">&#x1F517;&#xFE0E;</a>`, default = ``-1``, type = int

+   -  used only with ``gpu`` device type
+
   -  OpenCL platform ID. Usually each GPU vendor exposes one OpenCL platform

   -  ``-1`` means the system-wide default platform
@ -1292,7 +1326,7 @@ GPU Parameters

 -  ``gpu_device_id`` :raw-html:`<a id="gpu_device_id" title="Permalink to this parameter" href="#gpu_device_id">&#x1F517;&#xFE0E;</a>`, default = ``-1``, type = int

-   -  OpenCL device ID in the specified platform. Each GPU in the selected platform has a unique device ID
+   -  OpenCL device ID in the specified platform or CUDA device ID. Each GPU in the selected platform has a unique device ID

   -  ``-1`` means the default device in the selected platform

@ -1302,13 +1336,13 @@ GPU Parameters

   -  set this to ``true`` to use double precision math on GPU (by default single precision is used)

-   -  **Note**: can be used only in OpenCL implementation, in CUDA implementation only double precision is currently supported
+   -  **Note**: can be used only in OpenCL implementation (``device_type="gpu"``), in CUDA implementation only double precision is currently supported

 -  ``num_gpu`` :raw-html:`<a id="num_gpu" title="Permalink to this parameter" href="#num_gpu">&#x1F517;&#xFE0E;</a>`, default = ``1``, type = int, constraints: ``num_gpu > 0``

   -  number of GPUs

-   -  **Note**: can be used only in CUDA implementation
+   -  **Note**: can be used only in CUDA implementation (``device_type="cuda"``)

 .. end params list

--- a/docs/Python-Intro.rst
+++ b/docs/Python-Intro.rst
@ -59,8 +59,9 @@ Many of the examples in this page use functionality from ``numpy``. To run the e

 .. code:: python

-    data = np.random.rand(500, 10)  # 500 entities, each contains 10 features
-    label = np.random.randint(2, size=500)  # binary target
+    rng = np.random.default_rng()
+    data = rng.uniform(size=(500, 10))  # 500 entities, each contains 10 features
+    label = rng.integers(low=0, high=2, size=(500, ))  # binary target
    train_data = lgb.Dataset(data, label=label)

 **To load a scipy.sparse.csr\_matrix array into Dataset:**
@ -139,7 +140,8 @@ It doesn't need to convert to one-hot encoding, and is much faster than one-hot

 .. code:: python

-    w = np.random.rand(500, )
+    rng = np.random.default_rng()
+    w = rng.uniform(size=(500, ))
    train_data = lgb.Dataset(data, label=label, weight=w)

 or
@ -147,7 +149,8 @@ or
 .. code:: python

    train_data = lgb.Dataset(data, label=label)
-    w = np.random.rand(500, )
+    rng = np.random.default_rng()
+    w = rng.uniform(size=(500, ))
    train_data.set_weight(w)

 And you can use ``Dataset.set_init_score()`` to set initial score, and ``Dataset.set_group()`` to set group/query data for ranking tasks.
@ -249,7 +252,8 @@ A model that has been trained or loaded can perform predictions on datasets:
 .. code:: python

    # 7 entities, each contains 10 features
-    data = np.random.rand(7, 10)
+    rng = np.random.default_rng()
+    data = rng.uniform(size=(7, 10))
    ypred = bst.predict(data)

 If early stopping is enabled during training, you can get predictions from the best iteration with ``bst.best_iteration``:
--- a/Показать больше
+++ b/Показать больше
 @ -1 +1 @@
 .3.0.99
 .5.0.99