Merge branch 'master' into nccl-dev

2024-06-30 22:14:04 +08:00 · 2024-06-30 22:14:04 +08:00 · 1e6e4a1cca
--- a/.appveyor.yml
+++ b/.appveyor.yml
@ -1,16 +1,14 @@
-version: 4.3.0.99.{build}
+version: 4.4.0.99.{build}

 image: Visual Studio 2015
 platform: x64
-configuration:  # a trick to construct a build matrix with multiple Python versions
+configuration:
  - '3.8'

-# only build pull requests and
-# commits to 'master' or any branch starting with 'release'
+# only build on 'master' and pull requests targeting it
 branches:
  only:
    - master
-    - /^release/

 environment:
  matrix:
@ -25,12 +23,13 @@ install:
  - git submodule update --init --recursive  # get `external_libs` folder
  - set PATH=C:\mingw-w64\x86_64-8.1.0-posix-seh-rt_v6-rev0\mingw64\bin;%PATH%
  - set PYTHON_VERSION=%CONFIGURATION%
-  - set CONDA_ENV="test-env"
  - ps: |
+      $env:ALLOW_SKIP_ARROW_TESTS = "1"
+      $env:APPVEYOR = "true"
+      $env:CMAKE_BUILD_PARALLEL_LEVEL = 4
      $env:MINICONDA = "C:\Miniconda3-x64"
      $env:PATH = "$env:MINICONDA;$env:MINICONDA\Scripts;$env:PATH"
      $env:BUILD_SOURCESDIRECTORY = "$env:APPVEYOR_BUILD_FOLDER"
-      $env:LGB_VER = (Get-Content $env:APPVEYOR_BUILD_FOLDER\VERSION.txt).trim()

 build: false

--- a/.ci/check_python_dists.sh
+++ b/.ci/check_python_dists.sh
@ -26,11 +26,12 @@ fi
 PY_MINOR_VER=$(python -c "import sys; print(sys.version_info.minor)")
 if [ $PY_MINOR_VER -gt 7 ]; then
    echo "pydistcheck..."
-    pip install pydistcheck
+    pip install 'pydistcheck>=0.7.0'
    if { test "${TASK}" = "cuda" || test "${METHOD}" = "wheel"; }; then
        pydistcheck \
            --inspect \
-            --ignore 'compiled-objects-have-debug-symbols,distro-too-large-compressed' \
+            --ignore 'compiled-objects-have-debug-symbols'\
+            --ignore 'distro-too-large-compressed' \
            --max-allowed-size-uncompressed '500M' \
            --max-allowed-files 800 \
            ${DIST_DIR}/* || exit 1
--- a/.ci/conda-envs/ci-core-py38.txt
+++ b/.ci/conda-envs/ci-core-py38.txt
@ -0,0 +1,51 @@
+# [description]
+#
+#   Similar to ci-core.txt, but specific to Python 3.8.
+#
+#   Unlike ci-core.txt, this includes a Python version and uses
+#   `=` and `<=` pins to make solves faster and prevent against
+#   issues like https://github.com/microsoft/LightGBM/pull/6370.
+#
+# [usage]
+#
+#   conda create \
+#     --name test-env \
+#     --file ./.ci/conda-envs/ci-core-py38.txt
+#
+
+# python
+python=3.8.*
+
+# direct imports
+cffi=1.15.*
+dask=2023.5.*
+distributed=2023.5.*
+joblib=1.4.*
+matplotlib-base=3.7.*
+numpy=1.24.*
+pandas=1.5.*
+pyarrow-core=16.1.*
+python-graphviz=0.20.*
+scikit-learn=1.3.*
+scipy=1.10.*
+
+# testing-only dependencies
+cloudpickle=3.0.*
+pluggy=1.5.*
+psutil=5.9.8
+pytest=8.2.*
+
+# other recursive dependencies, just
+# pinned here to help speed up solves
+bokeh=3.1.*
+fsspec=2024.5.*
+msgpack-python=1.0.*
+pluggy=1.5.*
+pytz=2024.1
+setuptools=69.5.*
+snappy=1.2.*
+tomli=2.0.*
+tornado=6.4.*
+wheel=0.43.*
+zict=3.0.*
+zipp=3.17.*
--- a/.ci/get_workflow_status.py
+++ b/.ci/get_workflow_status.py
@ -6,15 +6,12 @@

 TRIGGER_PHRASE: Code phrase that triggers workflow.
 """
+
 import json
 from os import environ
 from sys import argv, exit
 from time import sleep
-
-try:
-    from urllib import request
-except ImportError:
-    import urllib2 as request
+from urllib import request


 def get_runs(trigger_phrase):
--- a/.ci/lint_r_code.R
+++ b/.ci/lint_r_code.R
@ -52,6 +52,8 @@ LINTERS_TO_USE <- list(
    , "inner_combine"        = lintr::inner_combine_linter()
    , "is_numeric"           = lintr::is_numeric_linter()
    , "lengths"              = lintr::lengths_linter()
+    , "length_levels"        = lintr::length_levels_linter()
+    , "length_test"          = lintr::length_test_linter()
    , "line_length"          = lintr::line_length_linter(length = 120L)
    , "literal_coercion"     = lintr::literal_coercion_linter()
    , "matrix"               = lintr::matrix_apply_linter()
@ -66,6 +68,7 @@ LINTERS_TO_USE <- list(
    , "redundant_equals"     = lintr::redundant_equals_linter()
    , "regex_subset"         = lintr::regex_subset_linter()
    , "routine_registration" = lintr::routine_registration_linter()
+    , "scalar_in"            = lintr::scalar_in_linter()
    , "semicolon"            = lintr::semicolon_linter()
    , "seq"                  = lintr::seq_linter()
    , "spaces_inside"        = lintr::spaces_inside_linter()
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@ -14,15 +14,13 @@ if [[ $OS_NAME == "macos" ]]; then
    if  [[ $COMPILER == "clang" ]]; then
        brew install libomp
        if [[ $AZURE == "true" ]]; then
-            sudo xcode-select -s /Applications/Xcode_11.7.app/Contents/Developer || exit 1
+            sudo xcode-select -s /Applications/Xcode_13.1.0.app/Contents/Developer || exit 1
        fi
    else  # gcc
        # Check https://github.com/actions/runner-images/tree/main/images/macos for available
        # versions of Xcode
        sudo xcode-select -s /Applications/Xcode_14.3.1.app/Contents/Developer || exit 1
-        if [[ $TASK != "mpi" ]]; then
-            brew install gcc
-        fi
+        brew install gcc
    fi
    if [[ $TASK == "mpi" ]]; then
        brew install open-mpi
@ -30,10 +28,6 @@ if [[ $OS_NAME == "macos" ]]; then
    if [[ $TASK == "swig" ]]; then
        brew install swig
    fi
-    curl \
-        -sL \
-        -o miniforge.sh \
-        https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-${ARCH}.sh
 else  # Linux
    if [[ $IN_UBUNTU_BASE_CONTAINER == "true" ]]; then
        # fixes error "unable to initialize frontend: Dialog"
@ -45,35 +39,30 @@ else  # Linux
            software-properties-common

        sudo apt-get install --no-install-recommends -y \
-            apt-utils \
            build-essential \
            ca-certificates \
            cmake \
            curl \
            git \
-            iputils-ping \
-            jq \
            libcurl4 \
            libicu-dev \
            libssl-dev \
-            libunwind8 \
            locales \
-            locales-all \
-            netcat \
-            unzip \
-            zip || exit 1
+            locales-all || exit 1
        if [[ $COMPILER == "clang" ]]; then
            sudo apt-get install --no-install-recommends -y \
                clang \
                libomp-dev
        elif [[ $COMPILER == "clang-17" ]]; then
-            sudo apt-get install wget
+            sudo apt-get install --no-install-recommends -y \
+                wget
            wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc
            sudo apt-add-repository deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
            sudo apt-add-repository deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
            sudo apt-get update
-            sudo apt-get install -y clang-17
-            sudo apt-get install --no-install-recommends -y libomp-17-dev
+            sudo apt-get install -y \
+                clang-17 \
+                libomp-17-dev
        fi

        export LANG="en_US.UTF-8"
@ -144,16 +133,14 @@ else  # Linux
        apt-get install --no-install-recommends -y \
            cmake
    fi
-    if [[ $SETUP_CONDA != "false" ]]; then
-        curl \
-            -sL \
-            -o miniforge.sh \
-            https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-${ARCH}.sh
-    fi
 fi

 if [[ "${TASK}" != "r-package" ]] && [[ "${TASK}" != "r-rchk" ]]; then
    if [[ $SETUP_CONDA != "false" ]]; then
+        curl \
+            -sL \
+            -o miniforge.sh \
+            https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-${ARCH}.sh
        sh miniforge.sh -b -p $CONDA
    fi
    conda config --set always_yes yes --set changeps1 no
--- a/.ci/test-python-latest.sh
+++ b/.ci/test-python-latest.sh
@ -0,0 +1,50 @@
+#!/bin/bash
+
+set -e -E -u -o pipefail
+
+# latest versions of lightgbm's dependencies,
+# including pre-releases and nightlies
+#
+# ref: https://github.com/pydata/xarray/blob/31111b3afe44fd6f7dac363264e94186cc5168d2/.github/workflows/upstream-dev-ci.yaml
+echo "installing testing dependencies"
+python -m pip install \
+    cloudpickle \
+    psutil \
+    pytest
+echo "done installing testing dependencies"
+
+echo "installing lightgbm's dependencies"
+python -m pip install \
+    --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
+    --prefer-binary \
+    --pre \
+    --upgrade \
+        'numpy>=2.0.0.dev0' \
+        'matplotlib>=3.10.0.dev0' \
+        'pandas>=3.0.0.dev0' \
+        'scikit-learn>=1.6.dev0' \
+        'scipy>=1.15.0.dev0'
+
+python -m pip install \
+    --extra-index-url https://pypi.fury.io/arrow-nightlies/ \
+    --prefer-binary \
+    --pre \
+    --upgrade \
+        'pyarrow>=17.0.0.dev0'
+
+python -m pip install \
+    'cffi>=1.15.1'
+
+echo "done installing lightgbm's dependencies"
+
+echo "installing lightgbm"
+pip install --no-deps dist/*.whl
+echo "done installing lightgbm"
+
+echo "installed package versions:"
+pip freeze
+
+echo ""
+echo "running tests"
+pytest tests/c_api_test/
+pytest tests/python_package_test/
--- a/.ci/test-python-oldest.sh
+++ b/.ci/test-python-oldest.sh
@ -3,19 +3,20 @@
 set -e -E -u -o pipefail

 # oldest versions of dependencies published after
-# minimum supported Python version's first release
+# minimum supported Python version's first release,
+# for which there are wheels compatible with the
+# python:{version} image
 #
 # see https://devguide.python.org/versions/
 #
 echo "installing lightgbm's dependencies"
 pip install \
  'cffi==1.15.1' \
-  'dataclasses' \
-  'numpy==1.16.6' \
-  'pandas==0.24.0' \
+  'numpy==1.19.0' \
+  'pandas==1.1.3' \
  'pyarrow==6.0.1' \
-  'scikit-learn==0.18.2' \
-  'scipy==0.19.0' \
+  'scikit-learn==0.24.0' \
+  'scipy==1.6.0' \
 || exit 1
 echo "done installing lightgbm's dependencies"

--- a/.ci/test.sh
+++ b/.ci/test.sh
@ -3,6 +3,7 @@
 set -e -E -o -u pipefail

 # defaults
+CONDA_ENV="test-env"
 IN_UBUNTU_BASE_CONTAINER=${IN_UBUNTU_BASE_CONTAINER:-"false"}
 METHOD=${METHOD:-""}
 PRODUCES_ARTIFACTS=${PRODUCES_ARTIFACTS:-"false"}
@ -10,6 +11,8 @@ SANITIZERS=${SANITIZERS:-""}

 ARCH=$(uname -m)

+LGB_VER=$(head -n 1 "${BUILD_DIRECTORY}/VERSION.txt")
+
 if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "gcc" ]]; then
    export CXX=g++-11
    export CC=gcc-11
@ -26,8 +29,21 @@ if [[ $IN_UBUNTU_BASE_CONTAINER == "true" ]]; then
    export LC_ALL="en_US.UTF-8"
 fi

+# Setting MACOSX_DEPLOYMENT_TARGET prevents CMake from building against too-new
+# macOS features, and helps tools like Python build tools determine the appropriate
+# wheel compatibility tags.
+#
+# ref:
+#   * https://cmake.org/cmake/help/latest/envvar/MACOSX_DEPLOYMENT_TARGET.html
+#   * https://github.com/scikit-build/scikit-build-core/blob/acb7d0346e4a05bcb47a4ea3939c705ab71e3145/src/scikit_build_core/builder/macos.py#L36
+if [[ $ARCH == "x86_64" ]]; then
+    export MACOSX_DEPLOYMENT_TARGET=10.15
+else
+    export MACOSX_DEPLOYMENT_TARGET=12.0
+fi
+
 if [[ "${TASK}" == "r-package" ]] || [[ "${TASK}" == "r-rchk" ]]; then
-    bash ${BUILD_DIRECTORY}/.ci/test_r_package.sh || exit 1
+    bash "${BUILD_DIRECTORY}/.ci/test_r_package.sh" || exit 1
    exit 0
 fi

@ -54,27 +70,31 @@ if [[ $TASK == "if-else" ]]; then
    source activate $CONDA_ENV
    cmake -B build -S . || exit 1
    cmake --build build --target lightgbm -j4 || exit 1
-    cd $BUILD_DIRECTORY/tests/cpp_tests && ../../lightgbm config=train.conf convert_model_language=cpp convert_model=../../src/boosting/gbdt_prediction.cpp && ../../lightgbm config=predict.conf output_result=origin.pred || exit 1
-    cd $BUILD_DIRECTORY/tests/cpp_tests && ../../lightgbm config=predict.conf output_result=ifelse.pred && python test.py || exit 1
+    cd "$BUILD_DIRECTORY/tests/cpp_tests"
+    ../../lightgbm config=train.conf convert_model_language=cpp convert_model=../../src/boosting/gbdt_prediction.cpp
+    ../../lightgbm config=predict.conf output_result=origin.pred
+    ../../lightgbm config=predict.conf output_result=ifelse.pred
+    python test.py
    exit 0
 fi

+cd "${BUILD_DIRECTORY}"
+
 if [[ $TASK == "swig" ]]; then
    cmake -B build -S . -DUSE_SWIG=ON
    cmake --build build -j4 || exit 1
    if [[ $OS_NAME == "linux" ]] && [[ $COMPILER == "gcc" ]]; then
-        objdump -T $BUILD_DIRECTORY/lib_lightgbm.so > $BUILD_DIRECTORY/objdump.log || exit 1
-        objdump -T $BUILD_DIRECTORY/lib_lightgbm_swig.so >> $BUILD_DIRECTORY/objdump.log || exit 1
-        python $BUILD_DIRECTORY/helpers/check_dynamic_dependencies.py $BUILD_DIRECTORY/objdump.log || exit 1
+        objdump -T ./lib_lightgbm.so > ./objdump.log || exit 1
+        objdump -T ./lib_lightgbm_swig.so >> ./objdump.log || exit 1
+        python ./helpers/check_dynamic_dependencies.py ./objdump.log || exit 1
    fi
    if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
-        cp $BUILD_DIRECTORY/build/lightgbmlib.jar $BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_$OS_NAME.jar
+        cp ./build/lightgbmlib.jar $BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_$OS_NAME.jar
    fi
    exit 0
 fi

 if [[ $TASK == "lint" ]]; then
-    cd ${BUILD_DIRECTORY}
    mamba create -q -y -n $CONDA_ENV \
        ${CONDA_PYTHON_REQUIREMENT} \
        'cmakelint>=1.4.2' \
@ -83,19 +103,19 @@ if [[ $TASK == "lint" ]]; then
        'mypy>=1.8.0' \
        'pre-commit>=3.6.0' \
        'pyarrow>=6.0' \
-        'r-lintr>=3.1'
+        'r-lintr>=3.1.2'
    source activate $CONDA_ENV
    echo "Linting Python code"
-    bash ${BUILD_DIRECTORY}/.ci/lint-python.sh || exit 1
+    bash ./.ci/lint-python.sh || exit 1
    echo "Linting R code"
-    Rscript ${BUILD_DIRECTORY}/.ci/lint_r_code.R ${BUILD_DIRECTORY} || exit 1
+    Rscript ./.ci/lint_r_code.R "${BUILD_DIRECTORY}" || exit 1
    echo "Linting C++ code"
-    bash ${BUILD_DIRECTORY}/.ci/lint-cpp.sh || exit 1
+    bash ./.ci/lint-cpp.sh || exit 1
    exit 0
 fi

 if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
-    cd $BUILD_DIRECTORY/docs
+    cd "${BUILD_DIRECTORY}/docs"
    mamba env create \
        -n $CONDA_ENV \
        --file ./env.yml || exit 1
@ -107,29 +127,32 @@ if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
            'rstcheck>=6.2.0' || exit 1
    source activate $CONDA_ENV
    # check reStructuredText formatting
-    cd $BUILD_DIRECTORY/python-package
+    cd "${BUILD_DIRECTORY}/python-package"
    rstcheck --report-level warning $(find . -type f -name "*.rst") || exit 1
-    cd $BUILD_DIRECTORY/docs
+    cd "${BUILD_DIRECTORY}/docs"
    rstcheck --report-level warning --ignore-directives=autoclass,autofunction,autosummary,doxygenfile $(find . -type f -name "*.rst") || exit 1
    # build docs
    make html || exit 1
    if [[ $TASK == "check-links" ]]; then
        # check docs for broken links
-        pip install --user linkchecker
+        pip install linkchecker
        linkchecker --config=.linkcheckerrc ./_build/html/*.html || exit 1
        exit 0
    fi
    # check the consistency of parameters' descriptions and other stuff
-    cp $BUILD_DIRECTORY/docs/Parameters.rst $BUILD_DIRECTORY/docs/Parameters-backup.rst
-    cp $BUILD_DIRECTORY/src/io/config_auto.cpp $BUILD_DIRECTORY/src/io/config_auto-backup.cpp
-    python $BUILD_DIRECTORY/helpers/parameter_generator.py || exit 1
-    diff $BUILD_DIRECTORY/docs/Parameters-backup.rst $BUILD_DIRECTORY/docs/Parameters.rst || exit 1
-    diff $BUILD_DIRECTORY/src/io/config_auto-backup.cpp $BUILD_DIRECTORY/src/io/config_auto.cpp || exit 1
+    cd "${BUILD_DIRECTORY}"
+    cp ./docs/Parameters.rst ./docs/Parameters-backup.rst
+    cp ./src/io/config_auto.cpp ./src/io/config_auto-backup.cpp
+    python ./helpers/parameter_generator.py || exit 1
+    diff ./docs/Parameters-backup.rst ./docs/Parameters.rst || exit 1
+    diff ./src/io/config_auto-backup.cpp ./src/io/config_auto.cpp || exit 1
    exit 0
 fi

 if [[ $PYTHON_VERSION == "3.7" ]]; then
    CONDA_REQUIREMENT_FILES="--file ${BUILD_DIRECTORY}/.ci/conda-envs/ci-core-py37.txt"
+elif [[ $PYTHON_VERSION == "3.8" ]]; then
+    CONDA_REQUIREMENT_FILES="--file ${BUILD_DIRECTORY}/.ci/conda-envs/ci-core-py38.txt"
 else
    CONDA_REQUIREMENT_FILES="--file ${BUILD_DIRECTORY}/.ci/conda-envs/ci-core.txt"
 fi
@ -143,38 +166,21 @@ mamba create \

 source activate $CONDA_ENV

-cd $BUILD_DIRECTORY
-
-if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "clang" ]]; then
-    # fix "OMP: Error #15: Initializing libiomp5.dylib, but found libomp.dylib already initialized." (OpenMP library conflict due to conda's MKL)
-    for LIBOMP_ALIAS in libgomp.dylib libiomp5.dylib libomp.dylib; do sudo ln -sf "$(brew --cellar libomp)"/*/lib/libomp.dylib $CONDA_PREFIX/lib/$LIBOMP_ALIAS || exit 1; done
-fi
+cd "${BUILD_DIRECTORY}"

 if [[ $TASK == "sdist" ]]; then
-    cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
-    sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-    pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz -v || exit 1
+    sh ./build-python.sh sdist || exit 1
+    sh .ci/check_python_dists.sh ./dist || exit 1
+    pip install ./dist/lightgbm-$LGB_VER.tar.gz -v || exit 1
    if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
-        cp $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
+        cp ./dist/lightgbm-$LGB_VER.tar.gz $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
    fi
-    pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
+    pytest ./tests/python_package_test || exit 1
    exit 0
 elif [[ $TASK == "bdist" ]]; then
    if [[ $OS_NAME == "macos" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        mv \
-            ./dist/*.whl \
-            ./dist/tmp.whl || exit 1
-        if [[ $ARCH == "x86_64" ]]; then
-            PLATFORM="macosx_10_15_x86_64.macosx_11_6_x86_64.macosx_12_5_x86_64"
-        else
-            echo "ERROR: macos wheels not supported yet on architecture '${ARCH}'"
-            exit 1
-        fi
-        mv \
-            ./dist/tmp.whl \
-            dist/lightgbm-$LGB_VER-py3-none-$PLATFORM.whl || exit 1
+        sh ./build-python.sh bdist_wheel || exit 1
+        sh .ci/check_python_dists.sh ./dist || exit 1
        if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
            cp dist/lightgbm-$LGB_VER-py3-none-macosx*.whl $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
        fi
@ -184,91 +190,88 @@ elif [[ $TASK == "bdist" ]]; then
        else
            PLATFORM="manylinux2014_$ARCH"
        fi
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --integrated-opencl || exit 1
+        sh ./build-python.sh bdist_wheel --integrated-opencl || exit 1
        mv \
            ./dist/*.whl \
            ./dist/tmp.whl || exit 1
        mv \
            ./dist/tmp.whl \
            ./dist/lightgbm-$LGB_VER-py3-none-$PLATFORM.whl || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh .ci/check_python_dists.sh ./dist || exit 1
        if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
            cp dist/lightgbm-$LGB_VER-py3-none-$PLATFORM.whl $BUILD_ARTIFACTSTAGINGDIRECTORY || exit 1
        fi
        # Make sure we can do both CPU and GPU; see tests/python_package_test/test_dual.py
        export LIGHTGBM_TEST_DUAL_CPU_GPU=1
    fi
-    pip install --user $BUILD_DIRECTORY/dist/*.whl || exit 1
-    pytest $BUILD_DIRECTORY/tests || exit 1
+    pip install -v ./dist/*.whl || exit 1
+    pytest ./tests || exit 1
    exit 0
 fi

 if [[ $TASK == "gpu" ]]; then
-    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' $BUILD_DIRECTORY/include/LightGBM/config.h
-    grep -q 'std::string device_type = "gpu"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit 1  # make sure that changes were really done
+    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' ./include/LightGBM/config.h
+    grep -q 'std::string device_type = "gpu"' ./include/LightGBM/config.h || exit 1  # make sure that changes were really done
    if [[ $METHOD == "pip" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh ./build-python.sh sdist || exit 1
+        sh .ci/check_python_dists.sh ./dist || exit 1
        pip install \
-            --user \
            -v \
            --config-settings=cmake.define.USE_GPU=ON \
-            $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
+            ./dist/lightgbm-$LGB_VER.tar.gz \
        || exit 1
-        pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
+        pytest ./tests/python_package_test || exit 1
        exit 0
    elif [[ $METHOD == "wheel" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --gpu || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
-        pytest $BUILD_DIRECTORY/tests || exit 1
+        sh ./build-python.sh bdist_wheel --gpu || exit 1
+        sh ./.ci/check_python_dists.sh ./dist || exit 1
+        pip install ./dist/lightgbm-$LGB_VER*.whl -v || exit 1
+        pytest ./tests || exit 1
        exit 0
    elif [[ $METHOD == "source" ]]; then
        cmake -B build -S . -DUSE_GPU=ON
    fi
 elif [[ $TASK == "cuda" ]]; then
-    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' $BUILD_DIRECTORY/include/LightGBM/config.h
-    grep -q 'std::string device_type = "cuda"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit 1  # make sure that changes were really done
+    sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' ./include/LightGBM/config.h
+    grep -q 'std::string device_type = "cuda"' ./include/LightGBM/config.h || exit 1  # make sure that changes were really done
    # by default ``gpu_use_dp=false`` for efficiency. change to ``true`` here for exact results in ci tests
-    sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' $BUILD_DIRECTORY/include/LightGBM/config.h
-    grep -q 'gpu_use_dp = true' $BUILD_DIRECTORY/include/LightGBM/config.h || exit 1  # make sure that changes were really done
+    sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' ./include/LightGBM/config.h
+    grep -q 'gpu_use_dp = true' ./include/LightGBM/config.h || exit 1  # make sure that changes were really done
    if [[ $METHOD == "pip" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh ./build-python.sh sdist || exit 1
+        sh ./.ci/check_python_dists.sh ./dist || exit 1
        pip install \
-            --user \
            -v \
            --config-settings=cmake.define.USE_CUDA=ON \
-            $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
+            ./dist/lightgbm-$LGB_VER.tar.gz \
        || exit 1
-        pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
+        pytest ./tests/python_package_test || exit 1
        exit 0
    elif [[ $METHOD == "wheel" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --cuda || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
-        pytest $BUILD_DIRECTORY/tests || exit 1
+        sh ./build-python.sh bdist_wheel --cuda || exit 1
+        sh ./.ci/check_python_dists.sh ./dist || exit 1
+        pip install ./dist/lightgbm-$LGB_VER*.whl -v || exit 1
+        pytest ./tests || exit 1
        exit 0
    elif [[ $METHOD == "source" ]]; then
        cmake -B build -S . -DUSE_CUDA=ON
    fi
 elif [[ $TASK == "mpi" ]]; then
    if [[ $METHOD == "pip" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh sdist || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
+        sh ./build-python.sh sdist || exit 1
+        sh ./.ci/check_python_dists.sh ./dist || exit 1
        pip install \
-            --user \
            -v \
            --config-settings=cmake.define.USE_MPI=ON \
-            $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER.tar.gz \
+            ./dist/lightgbm-$LGB_VER.tar.gz \
        || exit 1
-        pytest $BUILD_DIRECTORY/tests/python_package_test || exit 1
+        pytest ./tests/python_package_test || exit 1
        exit 0
    elif [[ $METHOD == "wheel" ]]; then
-        cd $BUILD_DIRECTORY && sh ./build-python.sh bdist_wheel --mpi || exit 1
-        sh $BUILD_DIRECTORY/.ci/check_python_dists.sh $BUILD_DIRECTORY/dist || exit 1
-        pip install --user $BUILD_DIRECTORY/dist/lightgbm-$LGB_VER*.whl -v || exit 1
-        pytest $BUILD_DIRECTORY/tests || exit 1
+        sh ./build-python.sh bdist_wheel --mpi || exit 1
+        sh ./.ci/check_python_dists.sh ./dist || exit 1
+        pip install ./dist/lightgbm-$LGB_VER*.whl -v || exit 1
+        pytest ./tests || exit 1
        exit 0
    elif [[ $METHOD == "source" ]]; then
        cmake -B build -S . -DUSE_MPI=ON -DUSE_DEBUG=ON
@ -279,22 +282,22 @@ fi

 cmake --build build --target _lightgbm -j4 || exit 1

-cd $BUILD_DIRECTORY && sh ./build-python.sh install --precompile --user || exit 1
-pytest $BUILD_DIRECTORY/tests || exit 1
+sh ./build-python.sh install --precompile || exit 1
+pytest ./tests || exit 1

 if [[ $TASK == "regular" ]]; then
    if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
        if [[ $OS_NAME == "macos" ]]; then
-            cp $BUILD_DIRECTORY/lib_lightgbm.dylib $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.dylib
+            cp ./lib_lightgbm.dylib $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.dylib
        else
            if [[ $COMPILER == "gcc" ]]; then
-                objdump -T $BUILD_DIRECTORY/lib_lightgbm.so > $BUILD_DIRECTORY/objdump.log || exit 1
-                python $BUILD_DIRECTORY/helpers/check_dynamic_dependencies.py $BUILD_DIRECTORY/objdump.log || exit 1
+                objdump -T ./lib_lightgbm.so > ./objdump.log || exit 1
+                python ./helpers/check_dynamic_dependencies.py ./objdump.log || exit 1
            fi
-            cp $BUILD_DIRECTORY/lib_lightgbm.so $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.so
+            cp ./lib_lightgbm.so $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.so
        fi
    fi
-    cd $BUILD_DIRECTORY/examples/python-guide
+    cd "$BUILD_DIRECTORY/examples/python-guide"
    sed -i'.bak' '/import lightgbm as lgb/a\
 import matplotlib\
 matplotlib.use\(\"Agg\"\)\
@ -306,7 +309,7 @@ matplotlib.use\(\"Agg\"\)\
        'ipywidgets>=8.1.2' \
        'notebook>=7.1.2'
    for f in *.py **/*.py; do python $f || exit 1; done  # run all examples
-    cd $BUILD_DIRECTORY/examples/python-guide/notebooks
+    cd "$BUILD_DIRECTORY/examples/python-guide/notebooks"
    sed -i'.bak' 's/INTERACTIVE = False/assert False, \\"Interactive mode disabled\\"/' interactive_plot_example.ipynb
    jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb || exit 1  # run all notebooks

--- a/.ci/test_r_package.sh
+++ b/.ci/test_r_package.sh
@ -106,10 +106,10 @@ if [[ $OS_NAME == "macos" ]]; then
        -target / || exit 1
 fi

-# fix for issue where CRAN was not returning {lattice} when using R 3.6
+# fix for issue where CRAN was not returning {lattice} and {evaluate} when using R 3.6
 # "Warning: dependency ‘lattice’ is not available"
 if [[ "${R_MAJOR_VERSION}" == "3" ]]; then
-    Rscript --vanilla -e "install.packages('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', repos = NULL, lib = '${R_LIB_PATH}')"
+    Rscript --vanilla -e "install.packages(c('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', 'https://cran.r-project.org/src/contrib/Archive/evaluate/evaluate_0.23.tar.gz'), repos = NULL, lib = '${R_LIB_PATH}')"
 else
    # {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}.
    # This should be unnecessary on R >=4.4.0
@ -136,7 +136,7 @@ if [[ $OS_NAME == "macos" ]]; then
 fi
 Rscript --vanilla -e "options(install.packages.compile.from.source = '${compile_from_source}'); install.packages(${packages}, repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}', dependencies = c('Depends', 'Imports', 'LinkingTo'), Ncpus = parallel::detectCores())" || exit 1

-cd ${BUILD_DIRECTORY}
+cd "${BUILD_DIRECTORY}"

 PKG_TARBALL="lightgbm_*.tar.gz"
 LOG_FILE_NAME="lightgbm.Rcheck/00check.log"
@ -147,7 +147,7 @@ elif [[ $R_BUILD_TYPE == "cran" ]]; then
    # on Linux, we recreate configure in CI to test if
    # a change in a PR has changed configure.ac
    if [[ $OS_NAME == "linux" ]]; then
-        ${BUILD_DIRECTORY}/R-package/recreate-configure.sh
+        ./R-package/recreate-configure.sh

        num_files_changed=$(
            git diff --name-only | wc -l
--- a/.ci/test_r_package_valgrind.sh
+++ b/.ci/test_r_package_valgrind.sh
@ -72,10 +72,14 @@ bytes_possibly_lost=$(
    | tr -d ","
 )
 echo "valgrind found ${bytes_possibly_lost} bytes possibly lost"
-if [[ ${bytes_possibly_lost} -gt 1056 ]]; then
+if [[ ${bytes_possibly_lost} -gt 1104 ]]; then
    exit 1
 fi

+# ensure 'grep --count' doesn't cause failures
+set +e
+
+echo "checking for invalid reads"
 invalid_reads=$(
  cat ${VALGRIND_LOGS_FILE} \
    | grep --count -i "Invalid read"
@ -85,6 +89,7 @@ if [[ ${invalid_reads} -gt 0 ]]; then
    exit 1
 fi

+echo "checking for invalid writes"
 invalid_writes=$(
  cat ${VALGRIND_LOGS_FILE} \
    | grep --count -i "Invalid write"
--- a/.ci/test_windows.ps1
+++ b/.ci/test_windows.ps1
@ -6,14 +6,11 @@ function Check-Output {
  }
 }

-# unify environment variable for Azure DevOps and AppVeyor
-if (Test-Path env:APPVEYOR) {
-  $env:APPVEYOR = "true"
-  $env:ALLOW_SKIP_ARROW_TESTS = "1"
-}
+$env:CONDA_ENV = "test-env"
+$env:LGB_VER = (Get-Content $env:BUILD_SOURCESDIRECTORY\VERSION.txt).trim()

 if ($env:TASK -eq "r-package") {
-  & $env:BUILD_SOURCESDIRECTORY\.ci\test_r_package_windows.ps1 ; Check-Output $?
+  & .\.ci\test_r_package_windows.ps1 ; Check-Output $?
  Exit 0
 }

@ -34,7 +31,7 @@ if ($env:TASK -eq "swig") {
  cmake -B build -S . -A x64 -DUSE_SWIG=ON ; Check-Output $?
  cmake --build build --target ALL_BUILD --config Release ; Check-Output $?
  if ($env:AZURE -eq "true") {
-    cp $env:BUILD_SOURCESDIRECTORY/build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Check-Output $?
+    cp ./build/lightgbmlib.jar $env:BUILD_ARTIFACTSTAGINGDIRECTORY/lightgbmlib_win.jar ; Check-Output $?
  }
  Exit 0
 }
@ -43,16 +40,12 @@ if ($env:TASK -eq "swig") {
 conda init powershell
 conda activate
 conda config --set always_yes yes --set changeps1 no
-
-# ref:
-# * https://stackoverflow.com/a/62897729/3986677
-# * https://github.com/microsoft/LightGBM/issues/5899
-conda install "brotlipy>=0.7"
-
-conda update -q -y conda
+conda update -q -y conda "python=$env:PYTHON_VERSION[build=*cpython]"

 if ($env:PYTHON_VERSION -eq "3.7") {
  $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py37.txt"
+} elseif ($env:PYTHON_VERSION -eq "3.8") {
+  $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core-py38.txt"
 } else {
  $env:CONDA_REQUIREMENT_FILE = "$env:BUILD_SOURCESDIRECTORY/.ci/conda-envs/ci-core.txt"
 }
@ -67,18 +60,17 @@ if ($env:TASK -ne "bdist") {
  conda activate $env:CONDA_ENV
 }

+cd $env:BUILD_SOURCESDIRECTORY
 if ($env:TASK -eq "regular") {
  cmake -B build -S . -A x64 ; Check-Output $?
  cmake --build build --target ALL_BUILD --config Release ; Check-Output $?
-  cd $env:BUILD_SOURCESDIRECTORY
-  sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --precompile ; Check-Output $?
-  cp $env:BUILD_SOURCESDIRECTORY/Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY
-  cp $env:BUILD_SOURCESDIRECTORY/Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY
+  sh ./build-python.sh install --precompile ; Check-Output $?
+  cp ./Release/lib_lightgbm.dll $env:BUILD_ARTIFACTSTAGINGDIRECTORY
+  cp ./Release/lightgbm.exe $env:BUILD_ARTIFACTSTAGINGDIRECTORY
 }
 elseif ($env:TASK -eq "sdist") {
-  cd $env:BUILD_SOURCESDIRECTORY
-  sh $env:BUILD_SOURCESDIRECTORY/build-python.sh sdist ; Check-Output $?
-  sh $env:BUILD_SOURCESDIRECTORY/.ci/check_python_dists.sh $env:BUILD_SOURCESDIRECTORY/dist ; Check-Output $?
+  sh ./build-python.sh sdist ; Check-Output $?
+  sh ./.ci/check_python_dists.sh ./dist ; Check-Output $?
  cd dist; pip install @(Get-ChildItem *.gz) -v ; Check-Output $?
 }
 elseif ($env:TASK -eq "bdist") {
@ -92,17 +84,15 @@ elseif ($env:TASK -eq "bdist") {
  Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors

  conda activate $env:CONDA_ENV
-  cd $env:BUILD_SOURCESDIRECTORY
  sh "build-python.sh" bdist_wheel --integrated-opencl ; Check-Output $?
-  sh $env:BUILD_SOURCESDIRECTORY/.ci/check_python_dists.sh $env:BUILD_SOURCESDIRECTORY/dist ; Check-Output $?
-  cd dist; pip install --user @(Get-ChildItem *py3-none-win_amd64.whl) ; Check-Output $?
+  sh ./.ci/check_python_dists.sh ./dist ; Check-Output $?
+  cd dist; pip install @(Get-ChildItem *py3-none-win_amd64.whl) ; Check-Output $?
  cp @(Get-ChildItem *py3-none-win_amd64.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY
 } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) {
-  cd $env:BUILD_SOURCESDIRECTORY
  if ($env:COMPILER -eq "MINGW") {
-    sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --user --mingw ; Check-Output $?
+    sh ./build-python.sh install --mingw ; Check-Output $?
  } else {
-    sh $env:BUILD_SOURCESDIRECTORY/build-python.sh install --user; Check-Output $?
+    sh ./build-python.sh install; Check-Output $?
  }
 }

--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@ -7,54 +7,41 @@ on:
  pull_request:
    branches:
    - master
-    - release/*
+  # Run manually by clicking a button in the UI
+  workflow_dispatch:
+    inputs:
+      restart_docker:
+        description: 'Restart nvidia-docker on the runner before building?'
+        required: true
+        type: boolean
+        default: false

 # automatically cancel in-progress builds if another commit is pushed
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

-env:
-  github_actions: 'true'
-  os_name: linux
-  conda_env: test-env
-
 jobs:
-  test:
-    name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (linux, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
+  # Optionally reinstall + restart docker on the runner before building.
+  # This is safe as long as only 1 of these jobs runs at a time.
+  restart-docker:
+    name: set up docker
    runs-on: [self-hosted, linux]
-    timeout-minutes: 60
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - method: wheel
-            compiler: gcc
-            python_version: "3.11"
-            cuda_version: "11.8.0"
-            task: cuda
-          - method: source
-            compiler: gcc
-            python_version: "3.9"
-            cuda_version: "12.2.0"
-            task: cuda
-          - method: pip
-            compiler: clang
-            python_version: "3.10"
-            cuda_version: "11.8.0"
-            task: cuda
+    timeout-minutes: 30
    steps:
      - name: Setup or update software on host machine
+        if: ${{ inputs.restart_docker }}
        run: |
+            # install core packages
            sudo apt-get update
            sudo apt-get install --no-install-recommends -y \
                apt-transport-https \
                ca-certificates \
                curl \
-                git \
                gnupg-agent \
                lsb-release \
                software-properties-common
+            # set up nvidia-docker
            curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
            sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -y
            curl -sL https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
@ -67,43 +54,76 @@ jobs:
                nvidia-docker2
            sudo chmod a+rw /var/run/docker.sock
            sudo systemctl restart docker
-      - name: Remove old folder with repository
-        run: sudo rm -rf $GITHUB_WORKSPACE
+      - name: mark job successful
+        run: |
+          exit 0
+  test:
+    name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (${{ matrix.linux_version }}, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
+    runs-on: [self-hosted, linux]
+    needs: [restart-docker]
+    container:
+      image: nvcr.io/nvidia/cuda:${{ matrix.cuda_version }}-devel-${{ matrix.linux_version }}
+      env:
+        CMAKE_BUILD_PARALLEL_LEVEL: 4
+        COMPILER: ${{ matrix.compiler }}
+        CONDA: /tmp/miniforge
+        DEBIAN_FRONTEND: noninteractive
+        METHOD: ${{ matrix.method }}
+        OS_NAME: linux
+        PYTHON_VERSION: ${{ matrix.python_version }}
+        TASK: ${{ matrix.task }}
+        SKBUILD_STRICT_CONFIG: true
+      options: --gpus all
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - method: wheel
+            compiler: gcc
+            python_version: "3.10"
+            cuda_version: "11.8.0"
+            linux_version: "ubuntu20.04"
+            task: cuda
+          - method: source
+            compiler: gcc
+            python_version: "3.12"
+            cuda_version: "12.2.0"
+            linux_version: "ubuntu22.04"
+            task: cuda
+          - method: pip
+            compiler: clang
+            python_version: "3.11"
+            cuda_version: "11.8.0"
+            linux_version: "ubuntu20.04"
+            task: cuda
+    steps:
+      - name: Install latest git
+        run: |
+          apt-get update
+          apt-get install --no-install-recommends -y \
+              ca-certificates \
+              software-properties-common
+          add-apt-repository ppa:git-core/ppa -y
+          apt-get update
+          apt-get install --no-install-recommends -y \
+              git
      - name: Checkout repository
-        uses: actions/checkout@v1
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
      - name: Setup and run tests
        run: |
-            export ROOT_DOCKER_FOLDER=/LightGBM
-            cat > docker.env <<EOF
-            GITHUB_ACTIONS=${{ env.github_actions }}
-            OS_NAME=${{ env.os_name }}
-            COMPILER=${{ matrix.compiler }}
-            TASK=${{ matrix.task }}
-            METHOD=${{ matrix.method }}
-            CONDA_ENV=${{ env.conda_env }}
-            PYTHON_VERSION=${{ matrix.python_version }}
-            BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
-            LGB_VER=$(head -n 1 VERSION.txt)
-            EOF
-            cat > docker-script.sh <<EOF
-            export CONDA=\$HOME/miniforge
-            export PATH=\$CONDA/bin:\$PATH
-            nvidia-smi
-            $ROOT_DOCKER_FOLDER/.ci/setup.sh || exit 1
-            $ROOT_DOCKER_FOLDER/.ci/test.sh || exit 1
-            EOF
-            cuda_version="${{ matrix.cuda_version }}"
-            cuda_major=${cuda_version%%.*}
-            docker_img="nvcr.io/nvidia/cuda:${cuda_version}-devel"
-            if [[ ${cuda_major} -eq 11 ]]; then
-                docker_img="${docker_img}-ubuntu18.04"
-            elif [[ ${cuda_major} -ge 12 ]]; then
-                docker_img="${docker_img}-ubuntu20.04"
-            fi
-            docker run --env-file docker.env -v "$GITHUB_WORKSPACE":"$ROOT_DOCKER_FOLDER" --rm --gpus all "$docker_img" /bin/bash $ROOT_DOCKER_FOLDER/docker-script.sh
+          export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
+          export PATH=$CONDA/bin:$PATH
+
+          # check GPU usage
+          nvidia-smi
+
+          # build and test
+          $GITHUB_WORKSPACE/.ci/setup.sh
+          $GITHUB_WORKSPACE/.ci/test.sh
  all-cuda-jobs-successful:
    if: always()
    runs-on: ubuntu-latest
--- a/.github/workflows/linkchecker.yml
+++ b/.github/workflows/linkchecker.yml
@ -8,10 +8,9 @@ on:
    - cron: '0 8 * * *'

 env:
-  CONDA_ENV: test-env
-  GITHUB_ACTIONS: 'true'
+  COMPILER: gcc
  OS_NAME: 'linux'
-  PYTHON_VERSION: '3.11'
+  PYTHON_VERSION: '3.12'
  TASK: 'check-links'

 jobs:
@ -20,7 +19,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: false
--- a/.github/workflows/optional_checks.yml
+++ b/.github/workflows/optional_checks.yml
@ -4,7 +4,6 @@ on:
  pull_request:
    branches:
      - master
-      - release/*

 jobs:
  all-optional-checks-successful:
@ -12,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: false
--- a/.github/workflows/python_package.yml
+++ b/.github/workflows/python_package.yml
@ -7,7 +7,6 @@ on:
  pull_request:
    branches:
    - master
-    - release/*

 # automatically cancel in-progress builds if another commit is pushed
 concurrency:
@ -15,8 +14,8 @@ concurrency:
  cancel-in-progress: true

 env:
-  CONDA_ENV: test-env
-  GITHUB_ACTIONS: 'true'
+  CMAKE_BUILD_PARALLEL_LEVEL: 4
+  SKBUILD_STRICT_CONFIG: true

 jobs:
  test:
@ -29,33 +28,37 @@ jobs:
        include:
          - os: macos-13
            task: regular
-            python_version: '3.9'
-          - os: macos-13
-            task: sdist
            python_version: '3.10'
+          - os: macos-13
+            task: sdist
+            python_version: '3.11'
          - os: macos-13
            task: bdist
-            python_version: '3.7'
+            python_version: '3.8'
          - os: macos-13
            task: if-else
            python_version: '3.9'
+          - os: macos-14
+            task: bdist
+            method: wheel
+            python_version: '3.10'
          # We're currently skipping MPI jobs on macOS, see https://github.com/microsoft/LightGBM/pull/6425
          # for further details.
          # - os: macos-13
          #   task: mpi
          #   method: source
-          #   python_version: '3.10'
-          # - os: macos-13
-          #   task: mpi
-          #   method: pip
          #   python_version: '3.11'
          # - os: macos-13
          #   task: mpi
+          #   method: pip
+          #   python_version: '3.12'
+          # - os: macos-13
+          #   task: mpi
          #   method: wheel
-          #   python_version: '3.8'
+          #   python_version: '3.9'
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
@ -65,7 +68,11 @@ jobs:
          export TASK="${{ matrix.task }}"
          export METHOD="${{ matrix.method }}"
          export PYTHON_VERSION="${{ matrix.python_version }}"
-          if [[ "${{ matrix.os }}" == "macos-13" ]]; then
+          if [[ "${{ matrix.os }}" == "macos-14" ]]; then
+              # use clang when creating macOS release artifacts
+              export COMPILER="clang"
+              export OS_NAME="macos"
+          elif [[ "${{ matrix.os }}" == "macos-13" ]]; then
              export COMPILER="gcc"
              export OS_NAME="macos"
          elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
@ -73,18 +80,23 @@ jobs:
              export OS_NAME="linux"
          fi
          export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
-          export LGB_VER=$(head -n 1 VERSION.txt)
          export CONDA=${HOME}/miniforge
          export PATH=${CONDA}/bin:${PATH}
          $GITHUB_WORKSPACE/.ci/setup.sh || exit 1
          $GITHUB_WORKSPACE/.ci/test.sh || exit 1
-  test-oldest-versions:
-    name: Python - oldest supported versions (ubuntu-latest)
+      - name: upload wheels
+        if: ${{ matrix.method == 'wheel' && matrix.os == 'macos-14' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: macosx-arm64-wheel
+          path: dist/*.whl
+  test-latest-versions:
+    name: Python - latest versions (ubuntu-latest)
    runs-on: ubuntu-latest
    timeout-minutes: 60
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
@ -92,6 +104,7 @@ jobs:
        run: |
          docker run \
            --rm \
+            --env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
            -v $(pwd):/opt/lgb-build \
            -w /opt/lgb-build \
            lightgbm/vsts-agent:manylinux_2_28_x86_64 \
@ -102,12 +115,39 @@ jobs:
            --rm \
            -v $(pwd):/opt/lgb-build \
            -w /opt/lgb-build \
-            python:3.6 \
+            python:3.11 \
+            /bin/bash ./.ci/test-python-latest.sh
+  test-oldest-versions:
+    name: Python - oldest supported versions (ubuntu-latest)
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 5
+          submodules: true
+      - name: Create wheel
+        run: |
+          docker run \
+            --rm \
+            --env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
+            -v $(pwd):/opt/lgb-build \
+            -w /opt/lgb-build \
+            lightgbm/vsts-agent:manylinux_2_28_x86_64 \
+            /bin/bash -c 'PATH=/opt/miniforge/bin:$PATH sh ./build-python.sh bdist_wheel --nomp'
+      - name: Test compatibility
+        run: |
+          docker run \
+            --rm \
+            -v $(pwd):/opt/lgb-build \
+            -w /opt/lgb-build \
+            python:3.7 \
            /bin/bash ./.ci/test-python-oldest.sh
  all-python-package-jobs-successful:
    if: always()
    runs-on: ubuntu-latest
-    needs: [test, test-oldest-versions]
+    needs: [test, test-latest-versions, test-oldest-versions]
    steps:
    - name: Note that all tests succeeded
      uses: re-actors/alls-green@v1.2.2
--- a/.github/workflows/r_configure.yml
+++ b/.github/workflows/r_configure.yml
@ -21,7 +21,7 @@ jobs:
        run: |
          git config --global --add safe.directory "${GITHUB_WORKSPACE}"
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
--- a/.github/workflows/r_package.yml
+++ b/.github/workflows/r_package.yml
@ -7,7 +7,6 @@ on:
  pull_request:
    branches:
    - master
-    - release/*

 # automatically cancel in-progress builds if another commit is pushed
 concurrency:
@ -15,6 +14,7 @@ concurrency:
  cancel-in-progress: true

 env:
+  CMAKE_BUILD_PARALLEL_LEVEL: 4
  # hack to get around this:
  # https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html
  _R_CHECK_SYSTEM_CLOCK_: 0
@ -189,7 +189,6 @@ jobs:
        run: |
          export TASK="${{ matrix.task }}"
          export COMPILER="${{ matrix.compiler }}"
-          export GITHUB_ACTIONS="true"
          if [[ "${{ matrix.os }}" == "macos-13" ]]; then
              export OS_NAME="macos"
          elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
@ -216,7 +215,6 @@ jobs:
          $env:R_VERSION = "${{ matrix.r_version }}"
          $env:R_BUILD_TYPE = "${{ matrix.build_type }}"
          $env:COMPILER = "${{ matrix.compiler }}"
-          $env:GITHUB_ACTIONS = "true"
          $env:TASK = "${{ matrix.task }}"
          & "$env:GITHUB_WORKSPACE/.ci/test_windows.ps1"
  test-r-sanitizers:
@ -237,7 +235,7 @@ jobs:
        run: |
          git config --global --add safe.directory "${GITHUB_WORKSPACE}"
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
@ -280,7 +278,7 @@ jobs:
        run: |
          git config --global --add safe.directory "${GITHUB_WORKSPACE}"
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
--- a/.github/workflows/r_valgrind.yml
+++ b/.github/workflows/r_valgrind.yml
@ -24,7 +24,7 @@ jobs:
        run: |
          git config --global --add safe.directory "${GITHUB_WORKSPACE}"
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
--- a/.github/workflows/static_analysis.yml
+++ b/.github/workflows/static_analysis.yml
@ -9,7 +9,6 @@ on:
  pull_request:
    branches:
    - master
-    - release/*

 # automatically cancel in-progress builds if another commit is pushed
 concurrency:
@ -18,10 +17,8 @@ concurrency:

 env:
  COMPILER: 'gcc'
-  CONDA_ENV: test-env
-  GITHUB_ACTIONS: 'true'
  OS_NAME: 'linux'
-  PYTHON_VERSION: '3.11'
+  PYTHON_VERSION: '3.12'

 jobs:
  test:
@ -36,7 +33,7 @@ jobs:
          - task: check-docs
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: false
@ -59,7 +56,7 @@ jobs:
        run: |
          git config --global --add safe.directory "${GITHUB_WORKSPACE}"
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 5
          submodules: true
--- a/.github/workflows/triggering_comments.yml
+++ b/.github/workflows/triggering_comments.yml
@ -12,7 +12,7 @@ jobs:
      SECRETS_WORKFLOW: ${{ secrets.WORKFLOW }}
    steps:
    - name: Checkout repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
      with:
        fetch-depth: 5
        submodules: false
--- a/.gitignore
+++ b/.gitignore
@ -405,7 +405,7 @@ python-package/lightgbm/VERSION.txt

 # R build artefacts
 **/autom4te.cache/
-conftest*
+R-package/conftest*
 R-package/config.status
 !R-package/data/agaricus.test.rda
 !R-package/data/agaricus.train.rda
--- a/.nuget/create_nuget.py
+++ b/.nuget/create_nuget.py
@ -1,5 +1,6 @@
 # coding: utf-8
 """Script for generating files with NuGet package metadata."""
+
 import datetime
 import sys
 from pathlib import Path
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -13,7 +13,7 @@ exclude: |

 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v4.6.0
    hooks:
      - id: end-of-file-fixer
      - id: trailing-whitespace
@ -25,7 +25,7 @@ repos:
        args: ["--settings-path", "python-package/pyproject.toml"]
  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
-    rev: v0.2.1
+    rev: v0.4.7
    hooks:
      # Run the linter.
      - id: ruff
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@ -7,15 +7,15 @@ trigger:
    - v*
 pr:
 - master
- release/*
 variables:
  AZURE: 'true'
-  PYTHON_VERSION: '3.11'
-  CONDA_ENV: test-env
+  CMAKE_BUILD_PARALLEL_LEVEL: 4
+  PYTHON_VERSION: '3.12'
  runCodesignValidationInjection: false
  skipComponentGovernanceDetection: true
  DOTNET_CLI_TELEMETRY_OPTOUT: true
  DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
+  SKBUILD_STRICT_CONFIG: true
 resources:
  # The __work/ directory, where Azure DevOps writes the source files, needs to be read-write because
  # LightGBM's CI jobs write files in the source directory.
@ -61,19 +61,19 @@ jobs:
    matrix:
      regular:
        TASK: regular
-        PYTHON_VERSION: '3.9'
+        PYTHON_VERSION: '3.10'
      sdist:
        TASK: sdist
-        PYTHON_VERSION: '3.7'
+        PYTHON_VERSION: '3.8'
      bdist:
        TASK: bdist
-        PYTHON_VERSION: '3.8'
+        PYTHON_VERSION: '3.9'
      inference:
        TASK: if-else
      mpi_source:
        TASK: mpi
        METHOD: source
-        PYTHON_VERSION: '3.8'
+        PYTHON_VERSION: '3.9'
      gpu_source:
        TASK: gpu
        METHOD: source
@ -82,7 +82,6 @@ jobs:
  steps:
  - script: |
      echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
-      echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
      echo "##vso[task.prependpath]/usr/lib64/openmpi/bin"
      echo "##vso[task.prependpath]$CONDA/bin"
    displayName: 'Set variables'
@ -127,7 +126,7 @@ jobs:
        TASK: sdist
      bdist:
        TASK: bdist
-        PYTHON_VERSION: '3.9'
+        PYTHON_VERSION: '3.10'
      inference:
        TASK: if-else
      mpi_source:
@ -136,30 +135,29 @@ jobs:
      mpi_pip:
        TASK: mpi
        METHOD: pip
-        PYTHON_VERSION: '3.10'
+        PYTHON_VERSION: '3.11'
      mpi_wheel:
        TASK: mpi
        METHOD: wheel
-        PYTHON_VERSION: '3.8'
+        PYTHON_VERSION: '3.9'
      gpu_source:
        TASK: gpu
        METHOD: source
-        PYTHON_VERSION: '3.10'
+        PYTHON_VERSION: '3.11'
      gpu_pip:
        TASK: gpu
        METHOD: pip
-        PYTHON_VERSION: '3.9'
+        PYTHON_VERSION: '3.10'
      gpu_wheel:
        TASK: gpu
        METHOD: wheel
-        PYTHON_VERSION: '3.8'
+        PYTHON_VERSION: '3.9'
      cpp_tests:
        TASK: cpp-tests
        METHOD: with-sanitizers
  steps:
  - script: |
      echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
-      echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
      CONDA=$HOME/miniforge
      echo "##vso[task.setvariable variable=CONDA]$CONDA"
      echo "##vso[task.prependpath]$CONDA/bin"
@ -188,8 +186,8 @@ jobs:
 - job: QEMU_multiarch
 ###########################################
  variables:
+    BUILD_DIRECTORY: /LightGBM
    COMPILER: gcc
-    OS_NAME: 'linux'
    PRODUCES_ARTIFACTS: 'true'
  pool:
    vmImage: ubuntu-22.04
@ -215,26 +213,12 @@ jobs:
      git clean -d -f -x
    displayName: 'Clean source directory'
  - script: |
-      export ROOT_DOCKER_FOLDER=/LightGBM
-      cat > docker.env <<EOF
-      AZURE=$AZURE
-      OS_NAME=$OS_NAME
-      COMPILER=$COMPILER
-      TASK=$TASK
-      METHOD=$METHOD
-      CONDA_ENV=$CONDA_ENV
-      PYTHON_VERSION=$PYTHON_VERSION
-      BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
-      LGB_VER=$(head -n 1 VERSION.txt)
-      PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS
-      BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY
-      EOF
      cat > docker-script.sh <<EOF
      export CONDA=\$HOME/miniforge
      export PATH=\$CONDA/bin:/opt/rh/llvm-toolset-7.0/root/usr/bin:\$PATH
      export LD_LIBRARY_PATH=/opt/rh/llvm-toolset-7.0/root/usr/lib64:\$LD_LIBRARY_PATH
-      $ROOT_DOCKER_FOLDER/.ci/setup.sh || exit 1
-      $ROOT_DOCKER_FOLDER/.ci/test.sh || exit 1
+      \$BUILD_DIRECTORY/.ci/setup.sh || exit 1
+      \$BUILD_DIRECTORY/.ci/test.sh || exit 1
      EOF
      IMAGE_URI="lightgbm/vsts-agent:manylinux2014_aarch64"
      docker pull "${IMAGE_URI}" || exit 1
@ -243,11 +227,19 @@ jobs:
      docker run \
        --platform "${PLATFORM}" \
        --rm \
-        --env-file docker.env \
-        -v "$(Build.SourcesDirectory)":"$ROOT_DOCKER_FOLDER" \
+        --env AZURE=true \
+        --env BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY \
+        --env BUILD_DIRECTORY=$BUILD_DIRECTORY \
+        --env COMPILER=$COMPILER \
+        --env METHOD=$METHOD \
+        --env OS_NAME=linux \
+        --env PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS \
+        --env PYTHON_VERSION=$PYTHON_VERSION \
+        --env TASK=$TASK \
+        -v "$(Build.SourcesDirectory)":"$BUILD_DIRECTORY" \
        -v "$(Build.ArtifactStagingDirectory)":"$(Build.ArtifactStagingDirectory)" \
        "${IMAGE_URI}" \
-        /bin/bash $ROOT_DOCKER_FOLDER/docker-script.sh
+        /bin/bash $BUILD_DIRECTORY/docker-script.sh
    displayName: 'Setup and run tests'
  - task: PublishBuildArtifacts@1
    condition: and(succeeded(), in(variables['TASK'], 'bdist'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))
@ -263,7 +255,7 @@ jobs:
    OS_NAME: 'macos'
    PRODUCES_ARTIFACTS: 'true'
  pool:
-    vmImage: 'macOS-11'
+    vmImage: 'macOS-12'
  strategy:
    matrix:
      regular:
@ -283,7 +275,6 @@ jobs:
  steps:
  - script: |
      echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
-      echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
      CONDA=$AGENT_HOMEDIRECTORY/miniforge
      echo "##vso[task.setvariable variable=CONDA]$CONDA"
      echo "##vso[task.prependpath]$CONDA/bin"
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -25,6 +25,14 @@ option(__INTEGRATE_OPENCL "Set to ON if building LightGBM with the OpenCL ICD Lo

 cmake_minimum_required(VERSION 3.18)

+# If using Visual Studio generators, always target v10.x of the Windows SDK.
+# Doing this avoids lookups that could fall back to very old versions, e.g. by finding
+# outdated registry entries.
+# ref: https://cmake.org/cmake/help/latest/variable/CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION.html
+if(CMAKE_GENERATOR MATCHES "Visual Studio")
+    set(CMAKE_SYSTEM_VERSION 10.0 CACHE INTERNAL "target Windows SDK version" FORCE)
+endif()
+
 project(lightgbm LANGUAGES C CXX)

 if(BUILD_CPP_TEST)
@ -704,6 +712,83 @@ if(__BUILD_FOR_PYTHON)
    set(CMAKE_INSTALL_PREFIX "lightgbm")
 endif()

+# The macOS linker puts an absolute path to linked libraries in lib_lightgb.dylib.
+# This block overrides that information for LightGBM's OpenMP dependency, to allow
+# finding that library in more places.
+#
+# This reduces the risk of runtime issues resulting from multiple libomp.dylib being loaded.
+#
+if(APPLE AND USE_OPENMP)
+  # store path to libomp found at build time in a variable
+  get_target_property(
+    OpenMP_LIBRARY_LOCATION
+    OpenMP::OpenMP_CXX
+    INTERFACE_LINK_LIBRARIES
+  )
+  # get just the filename of that path
+  # (to deal with the possibility that it might be 'libomp.dylib' or 'libgomp.dylib' or 'libiomp.dylib')
+  get_filename_component(
+    OpenMP_LIBRARY_NAME
+    ${OpenMP_LIBRARY_LOCATION}
+    NAME
+  )
+  # get directory of that path
+  get_filename_component(
+    OpenMP_LIBRARY_DIR
+    ${OpenMP_LIBRARY_LOCATION}
+    DIRECTORY
+  )
+  # get exact name of the library in a variable
+  get_target_property(
+    __LIB_LIGHTGBM_OUTPUT_NAME
+    _lightgbm
+    OUTPUT_NAME
+  )
+  if(NOT __LIB_LIGHTGBM_OUTPUT_NAME)
+    set(__LIB_LIGHTGBM_OUTPUT_NAME "lib_lightgbm")
+  endif()
+
+  if(CMAKE_SHARED_LIBRARY_SUFFIX_CXX)
+    set(
+      __LIB_LIGHTGBM_FILENAME "${__LIB_LIGHTGBM_OUTPUT_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX_CXX}"
+      CACHE INTERNAL "lightgbm shared library filename"
+    )
+  else()
+    set(
+      __LIB_LIGHTGBM_FILENAME "${__LIB_LIGHTGBM_OUTPUT_NAME}.dylib"
+      CACHE INTERNAL "lightgbm shared library filename"
+    )
+  endif()
+
+  # Override the absolute path to OpenMP with a relative one using @rpath.
+  #
+  # This also ensures that if a libomp.dylib has already been loaded, it'll just use that.
+  add_custom_command(
+    TARGET _lightgbm
+    POST_BUILD
+      COMMAND
+        install_name_tool
+        -change
+        ${OpenMP_LIBRARY_LOCATION}
+        "@rpath/${OpenMP_LIBRARY_NAME}"
+        "${__LIB_LIGHTGBM_FILENAME}"
+      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+      COMMENT "Replacing hard-coded OpenMP install_name with '@rpath/${OpenMP_LIBRARY_NAME}'..."
+  )
+  # add RPATH entries to ensure the loader looks in the following, in the following order:
+  #
+  #   - /opt/homebrew/opt/libomp/lib (where 'brew install' / 'brew link' puts libomp.dylib)
+  #   - ${OpenMP_LIBRARY_DIR}        (wherever find_package(OpenMP) found OpenMP at build time)
+  #
+  set_target_properties(
+    _lightgbm
+    PROPERTIES
+      BUILD_WITH_INSTALL_RPATH TRUE
+      INSTALL_RPATH "/opt/homebrew/opt/libomp/lib;${OpenMP_LIBRARY_DIR}"
+      INSTALL_RPATH_USE_LINK_PATH FALSE
+  )
+endif()
+
 install(
  TARGETS _lightgbm
  RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@ -1309,6 +1309,8 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #'        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
 #'        means "save the fifth, sixth, and seventh tree"
 #'
+#'        \emph{New in version 4.4.0}
+#'
 #' @return lgb.Booster
 #'
 #' @examples
@ -1373,6 +1375,8 @@ lgb.save <- function(
 #'        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
 #'        means "dump the fifth, sixth, and seventh tree"
 #'
+#'        \emph{New in version 4.4.0}
+#'
 #' @return json format of model
 #'
 #' @examples
--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@ -170,7 +170,12 @@ Dataset <- R6::R6Class(

            # Check if more categorical features were output over the feature space
            data_is_not_filename <- !is.character(private$raw_data)
-            if (data_is_not_filename && max(private$categorical_feature) > ncol(private$raw_data)) {
+            if (
+              data_is_not_filename
+              && !is.null(private$raw_data)
+              && is.null(private$used_indices)
+              && max(private$categorical_feature) > ncol(private$raw_data)
+            ) {
              stop(
                "lgb.Dataset.construct: supplied a too large value in categorical_feature: "
                , max(private$categorical_feature)
@ -1049,6 +1054,9 @@ dimnames.lgb.Dataset <- function(x) {
 #' @title Slice a dataset
 #' @description Get a new \code{lgb.Dataset} containing the specified rows of
 #'              original \code{lgb.Dataset} object
+#'
+#'              \emph{Renamed from} \code{slice()} \emph{in 4.4.0}
+#'
 #' @param dataset Object of class \code{lgb.Dataset}
 #' @param idxset an integer vector of indices of rows needed
 #' @return constructed sub dataset
--- a/R-package/R/lgb.model.dt.tree.R
+++ b/R-package/R/lgb.model.dt.tree.R
@ -6,6 +6,9 @@
 #' @param start_iteration Index (1-based) of the first boosting round to include in the output.
 #'        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
 #'        means "return information about the fifth, sixth, and seventh trees".
+#'
+#'        \emph{New in version 4.4.0}
+#'
 #' @return
 #' A \code{data.table} with detailed information about model trees' nodes and leafs.
 #'
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -59,68 +59,66 @@

 }

+# [description]
+#
+#     Besides applying checks, this function
+#
+#         1. turns feature *names* into 1-based integer positions, then
+#         2. adds an extra list element with skipped features, then
+#         3. turns 1-based integer positions into 0-based positions, and finally
+#         4. collapses the values of each list element into a string like "[0, 1]".
+#
 .check_interaction_constraints <- function(interaction_constraints, column_names) {
-
-  # Convert interaction constraints to feature numbers
-  string_constraints <- list()
-
-  if (!is.null(interaction_constraints)) {
-
-    if (!methods::is(interaction_constraints, "list")) {
-        stop("interaction_constraints must be a list")
-    }
-    constraint_is_character_or_numeric <- sapply(
-        X = interaction_constraints
-        , FUN = function(x) {
-            return(is.character(x) || is.numeric(x))
-        }
-    )
-    if (!all(constraint_is_character_or_numeric)) {
-        stop("every element in interaction_constraints must be a character vector or numeric vector")
-    }
-
-    for (constraint in interaction_constraints) {
-
-      # Check for character name
-      if (is.character(constraint)) {
-
-          constraint_indices <- as.integer(match(constraint, column_names) - 1L)
-
-          # Provided indices, but some indices are not existing?
-          if (sum(is.na(constraint_indices)) > 0L) {
-            stop(
-              "supplied an unknown feature in interaction_constraints "
-              , sQuote(constraint[is.na(constraint_indices)])
-            )
-          }
-
-        } else {
-
-          # Check that constraint indices are at most number of features
-          if (max(constraint) > length(column_names)) {
-            stop(
-              "supplied a too large value in interaction_constraints: "
-              , max(constraint)
-              , " but only "
-              , length(column_names)
-              , " features"
-            )
-          }
-
-          # Store indices as [0, n-1] indexed instead of [1, n] indexed
-          constraint_indices <- as.integer(constraint - 1L)
-
-        }
-
-        # Convert constraint to string
-        constraint_string <- paste0("[", paste0(constraint_indices, collapse = ","), "]")
-        string_constraints <- append(string_constraints, constraint_string)
-    }
-
+  if (is.null(interaction_constraints)) {
+    return(list())
+  }
+  if (!identical(class(interaction_constraints), "list")) {
+    stop("interaction_constraints must be a list")
  }

-  return(string_constraints)
+  column_indices <- seq_along(column_names)

+  # Convert feature names to 1-based integer positions and apply checks
+  for (j in seq_along(interaction_constraints)) {
+    constraint <- interaction_constraints[[j]]
+
+    if (is.character(constraint)) {
+      constraint_indices <- match(constraint, column_names)
+    } else if (is.numeric(constraint)) {
+      constraint_indices <- as.integer(constraint)
+    } else {
+      stop("every element in interaction_constraints must be a character vector or numeric vector")
+    }
+
+    # Features outside range?
+    bad <- !(constraint_indices %in% column_indices)
+    if (any(bad)) {
+      stop(
+        "unknown feature(s) in interaction_constraints: "
+        , toString(sQuote(constraint[bad], q = "'"))
+      )
+    }
+
+    interaction_constraints[[j]] <- constraint_indices
+  }
+
+  # Add missing features as new interaction set
+  remaining_indices <- setdiff(
+    column_indices, sort(unique(unlist(interaction_constraints)))
+  )
+  if (length(remaining_indices) > 0L) {
+    interaction_constraints <- c(
+      interaction_constraints, list(remaining_indices)
+    )
+  }
+
+  # Turn indices 0-based and convert to string
+  for (j in seq_along(interaction_constraints)) {
+    interaction_constraints[[j]] <- paste0(
+      "[", paste0(interaction_constraints[[j]] - 1L, collapse = ","), "]"
+    )
+  }
+  return(interaction_constraints)
 }


--- a/R-package/configure
+++ b/R-package/configure
@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for lightgbm 4.3.0.99.
+# Generated by GNU Autoconf 2.71 for lightgbm 4.4.0.99.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@ -607,8 +607,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='lightgbm'
 PACKAGE_TARNAME='lightgbm'
-PACKAGE_VERSION='4.3.0.99'
-PACKAGE_STRING='lightgbm 4.3.0.99'
+PACKAGE_VERSION='4.4.0.99'
+PACKAGE_STRING='lightgbm 4.4.0.99'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@ -1211,7 +1211,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures lightgbm 4.3.0.99 to adapt to many kinds of systems.
+\`configure' configures lightgbm 4.4.0.99 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@ -1273,7 +1273,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of lightgbm 4.3.0.99:";;
+     short | recursive ) echo "Configuration of lightgbm 4.4.0.99:";;
   esac
  cat <<\_ACEOF

@ -1341,7 +1341,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-lightgbm configure 4.3.0.99
+lightgbm configure 4.4.0.99
 generated by GNU Autoconf 2.71

 Copyright (C) 2021 Free Software Foundation, Inc.
@ -1378,7 +1378,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by lightgbm $as_me 4.3.0.99, which was
+It was created by lightgbm $as_me 4.4.0.99, which was
 generated by GNU Autoconf 2.71.  Invocation command line was

  $ $0$ac_configure_args_raw
@ -2454,7 +2454,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by lightgbm $as_me 4.3.0.99, which was
+This file was extended by lightgbm $as_me 4.4.0.99, which was
 generated by GNU Autoconf 2.71.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@ -2509,7 +2509,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-lightgbm config.status 4.3.0.99
+lightgbm config.status 4.4.0.99
 configured by $0, generated by GNU Autoconf 2.71,
  with options \\"\$ac_cs_config\\"

--- a/R-package/cran-comments.md
+++ b/R-package/cran-comments.md
@ -1,5 +1,15 @@
 # CRAN Submission History

+## v4.4.0 - Submission 1 - (June 14, 2024)
+
+### CRAN response
+
+Accepted to CRAN
+
+### Maintainer Notes
+
+This was a standard release of `{lightgbm}`, not intended to fix any particular R-specific issues.
+
 ## v4.3.0 - Submission 1 - (January 18, 2024)

 ### CRAN response
--- a/R-package/man/lgb.dump.Rd
+++ b/R-package/man/lgb.dump.Rd
@ -12,8 +12,10 @@ lgb.dump(booster, num_iteration = NULL, start_iteration = 1L)
 \item{num_iteration}{Number of iterations to be dumped. NULL or <= 0 means use best iteration}

 \item{start_iteration}{Index (1-based) of the first boosting round to dump.
-For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
-means "dump the fifth, sixth, and seventh tree"}
+       For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+       means "dump the fifth, sixth, and seventh tree"
+
+       \emph{New in version 4.4.0}}
 }
 \value{
 json format of model
--- a/R-package/man/lgb.model.dt.tree.Rd
+++ b/R-package/man/lgb.model.dt.tree.Rd
@ -12,8 +12,10 @@ lgb.model.dt.tree(model, num_iteration = NULL, start_iteration = 1L)
 \item{num_iteration}{Number of iterations to include. NULL or <= 0 means use best iteration.}

 \item{start_iteration}{Index (1-based) of the first boosting round to include in the output.
-For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
-means "return information about the fifth, sixth, and seventh trees".}
+       For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+       means "return information about the fifth, sixth, and seventh trees".
+
+       \emph{New in version 4.4.0}}
 }
 \value{
 A \code{data.table} with detailed information about model trees' nodes and leafs.
--- a/R-package/man/lgb.save.Rd
+++ b/R-package/man/lgb.save.Rd
@ -14,8 +14,10 @@ lgb.save(booster, filename, num_iteration = NULL, start_iteration = 1L)
 \item{num_iteration}{Number of iterations to save, NULL or <= 0 means use best iteration}

 \item{start_iteration}{Index (1-based) of the first boosting round to save.
-For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
-means "save the fifth, sixth, and seventh tree"}
+       For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
+       means "save the fifth, sixth, and seventh tree"
+
+       \emph{New in version 4.4.0}}
 }
 \value{
 lgb.Booster
--- a/R-package/man/lgb.slice.Dataset.Rd
+++ b/R-package/man/lgb.slice.Dataset.Rd
@ -17,6 +17,8 @@ constructed sub dataset
 \description{
 Get a new \code{lgb.Dataset} containing the specified rows of
             original \code{lgb.Dataset} object
+
+             \emph{Renamed from} \code{slice()} \emph{in 4.4.0}
 }
 \examples{
 \donttest{
--- a/R-package/src/lightgbm_R.cpp
+++ b/R-package/src/lightgbm_R.cpp
@ -11,6 +11,7 @@
 #include <LightGBM/utils/text_reader.h>

 #include <R_ext/Rdynload.h>
+#include <R_ext/Altrep.h>

 #define R_NO_REMAP
 #define R_USE_C99_IN_CXX
@ -24,6 +25,150 @@
 #include <utility>
 #include <vector>
 #include <algorithm>
+#include <type_traits>
+
+R_altrep_class_t lgb_altrepped_char_vec;
+R_altrep_class_t lgb_altrepped_int_arr;
+R_altrep_class_t lgb_altrepped_dbl_arr;
+
+template <class T>
+void delete_cpp_array(SEXP R_ptr) {
+  T *ptr_to_cpp_obj = static_cast<T*>(R_ExternalPtrAddr(R_ptr));
+  delete[] ptr_to_cpp_obj;
+  R_ClearExternalPtr(R_ptr);
+}
+
+void delete_cpp_char_vec(SEXP R_ptr) {
+  std::vector<char> *ptr_to_cpp_obj = static_cast<std::vector<char>*>(R_ExternalPtrAddr(R_ptr));
+  delete ptr_to_cpp_obj;
+  R_ClearExternalPtr(R_ptr);
+}
+
+// Note: MSVC has issues with Altrep classes, so they are disabled for it.
+// See: https://github.com/microsoft/LightGBM/pull/6213#issuecomment-2111025768
+#ifdef _MSC_VER
+#  define LGB_NO_ALTREP
+#endif
+
+#ifndef LGB_NO_ALTREP
+SEXP make_altrepped_raw_vec(void *void_ptr) {
+  std::unique_ptr<std::vector<char>> *ptr_to_cpp_vec = static_cast<std::unique_ptr<std::vector<char>>*>(void_ptr);
+  SEXP R_ptr = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP R_raw = Rf_protect(R_new_altrep(lgb_altrepped_char_vec, R_NilValue, R_NilValue));
+
+  R_SetExternalPtrAddr(R_ptr, ptr_to_cpp_vec->get());
+  R_RegisterCFinalizerEx(R_ptr, delete_cpp_char_vec, TRUE);
+  ptr_to_cpp_vec->release();
+
+  R_set_altrep_data1(R_raw, R_ptr);
+  Rf_unprotect(2);
+  return R_raw;
+}
+#else
+SEXP make_r_raw_vec(void *void_ptr) {
+  std::unique_ptr<std::vector<char>> *ptr_to_cpp_vec = static_cast<std::unique_ptr<std::vector<char>>*>(void_ptr);
+  R_xlen_t len = ptr_to_cpp_vec->get()->size();
+  SEXP out = Rf_protect(Rf_allocVector(RAWSXP, len));
+  std::copy(ptr_to_cpp_vec->get()->begin(), ptr_to_cpp_vec->get()->end(), reinterpret_cast<char*>(RAW(out)));
+  Rf_unprotect(1);
+  return out;
+}
+#define make_altrepped_raw_vec make_r_raw_vec
+#endif
+
+std::vector<char>* get_ptr_from_altrepped_raw(SEXP R_raw) {
+  return static_cast<std::vector<char>*>(R_ExternalPtrAddr(R_altrep_data1(R_raw)));
+}
+
+R_xlen_t get_altrepped_raw_len(SEXP R_raw) {
+  return get_ptr_from_altrepped_raw(R_raw)->size();
+}
+
+const void* get_altrepped_raw_dataptr_or_null(SEXP R_raw) {
+  return get_ptr_from_altrepped_raw(R_raw)->data();
+}
+
+void* get_altrepped_raw_dataptr(SEXP R_raw, Rboolean writeable) {
+  return get_ptr_from_altrepped_raw(R_raw)->data();
+}
+
+#ifndef LGB_NO_ALTREP
+template <class T>
+R_altrep_class_t get_altrep_class_for_type() {
+  if (std::is_same<T, double>::value) {
+    return lgb_altrepped_dbl_arr;
+  } else {
+    return lgb_altrepped_int_arr;
+  }
+}
+#else
+template <class T>
+SEXPTYPE get_sexptype_class_for_type() {
+  if (std::is_same<T, double>::value) {
+    return REALSXP;
+  } else {
+    return INTSXP;
+  }
+}
+
+template <class T>
+T* get_r_vec_ptr(SEXP x) {
+  if (std::is_same<T, double>::value) {
+    return static_cast<T*>(static_cast<void*>(REAL(x)));
+  } else {
+    return static_cast<T*>(static_cast<void*>(INTEGER(x)));
+  }
+}
+#endif
+
+template <class T>
+struct arr_and_len {
+  T *arr;
+  int64_t len;
+};
+
+#ifndef LGB_NO_ALTREP
+template <class T>
+SEXP make_altrepped_vec_from_arr(void *void_ptr) {
+  T *arr = static_cast<arr_and_len<T>*>(void_ptr)->arr;
+  uint64_t len = static_cast<arr_and_len<T>*>(void_ptr)->len;
+  SEXP R_ptr = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP R_len = Rf_protect(Rf_allocVector(REALSXP, 1));
+  SEXP R_vec = Rf_protect(R_new_altrep(get_altrep_class_for_type<T>(), R_NilValue, R_NilValue));
+
+  REAL(R_len)[0] = static_cast<double>(len);
+  R_SetExternalPtrAddr(R_ptr, arr);
+  R_RegisterCFinalizerEx(R_ptr, delete_cpp_array<T>, TRUE);
+
+  R_set_altrep_data1(R_vec, R_ptr);
+  R_set_altrep_data2(R_vec, R_len);
+  Rf_unprotect(3);
+  return R_vec;
+}
+#else
+template <class T>
+SEXP make_R_vec_from_arr(void *void_ptr) {
+  T *arr = static_cast<arr_and_len<T>*>(void_ptr)->arr;
+  uint64_t len = static_cast<arr_and_len<T>*>(void_ptr)->len;
+  SEXP out = Rf_protect(Rf_allocVector(get_sexptype_class_for_type<T>(), len));
+  std::copy(arr, arr + len, get_r_vec_ptr<T>(out));
+  Rf_unprotect(1);
+  return out;
+}
+#define make_altrepped_vec_from_arr make_R_vec_from_arr
+#endif
+
+R_xlen_t get_altrepped_vec_len(SEXP R_vec) {
+  return static_cast<R_xlen_t>(Rf_asReal(R_altrep_data2(R_vec)));
+}
+
+const void* get_altrepped_vec_dataptr_or_null(SEXP R_vec) {
+  return R_ExternalPtrAddr(R_altrep_data1(R_vec));
+}
+
+void* get_altrepped_vec_dataptr(SEXP R_vec, Rboolean writeable) {
+  return R_ExternalPtrAddr(R_altrep_data1(R_vec));
+}

 #define COL_MAJOR (0)

@ -143,18 +288,18 @@ SEXP LGBM_DatasetCreateFromFile_R(SEXP filename,
  SEXP parameters,
  SEXP reference) {
  R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  DatasetHandle handle = nullptr;
  DatasetHandle ref = nullptr;
  if (!Rf_isNull(reference)) {
    ref = R_ExternalPtrAddr(reference);
  }
-  const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
  CHECK_CALL(LGBM_DatasetCreateFromFile(filename_ptr, parameters_ptr, ref, &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
-  UNPROTECT(3);
+  Rf_unprotect(3);
  return ret;
  R_API_END();
 }
@ -168,14 +313,14 @@ SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
  SEXP parameters,
  SEXP reference) {
  R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  const int* p_indptr = INTEGER(indptr);
  const int* p_indices = INTEGER(indices);
  const double* p_data = REAL(data);
  int64_t nindptr = static_cast<int64_t>(Rf_asInteger(num_indptr));
  int64_t ndata = static_cast<int64_t>(Rf_asInteger(nelem));
  int64_t nrow = static_cast<int64_t>(Rf_asInteger(num_row));
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
  DatasetHandle handle = nullptr;
  DatasetHandle ref = nullptr;
  if (!Rf_isNull(reference)) {
@ -186,7 +331,7 @@ SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
    nrow, parameters_ptr, ref, &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }
@ -197,11 +342,11 @@ SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
  SEXP parameters,
  SEXP reference) {
  R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  int32_t nrow = static_cast<int32_t>(Rf_asInteger(num_row));
  int32_t ncol = static_cast<int32_t>(Rf_asInteger(num_col));
  double* p_mat = REAL(data);
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
  DatasetHandle handle = nullptr;
  DatasetHandle ref = nullptr;
  if (!Rf_isNull(reference)) {
@ -211,7 +356,7 @@ SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
    parameters_ptr, ref, &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }
@ -222,7 +367,7 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle,
  SEXP parameters) {
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  int32_t len = static_cast<int32_t>(Rf_asInteger(len_used_row_indices));
  std::unique_ptr<int32_t[]> idxvec(new int32_t[len]);
  // convert from one-based to zero-based index
@ -233,14 +378,14 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle,
  for (int32_t i = 0; i < len; ++i) {
    idxvec[i] = static_cast<int32_t>(used_row_indices_[i] - 1);
  }
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
  DatasetHandle res = nullptr;
  CHECK_CALL(LGBM_DatasetGetSubset(R_ExternalPtrAddr(handle),
    idxvec.get(), len, parameters_ptr,
    &res));
  R_SetExternalPtrAddr(ret, res);
  R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }
@ -249,7 +394,7 @@ SEXP LGBM_DatasetSetFeatureNames_R(SEXP handle,
  SEXP feature_names) {
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
-  auto vec_names = Split(CHAR(PROTECT(Rf_asChar(feature_names))), '\t');
+  auto vec_names = Split(CHAR(Rf_protect(Rf_asChar(feature_names))), '\t');
  int len = static_cast<int>(vec_names.size());
  std::unique_ptr<const char*[]> vec_sptr(new const char*[len]);
  for (int i = 0; i < len; ++i) {
@ -257,13 +402,13 @@ SEXP LGBM_DatasetSetFeatureNames_R(SEXP handle,
  }
  CHECK_CALL(LGBM_DatasetSetFeatureNames(R_ExternalPtrAddr(handle),
    vec_sptr.get(), len));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }

 SEXP LGBM_DatasetGetFeatureNames_R(SEXP handle) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
  SEXP feature_names;
@ -301,11 +446,11 @@ SEXP LGBM_DatasetGetFeatureNames_R(SEXP handle) {
        ptr_names.data()));
  }
  CHECK_EQ(len, out_len);
-  feature_names = PROTECT(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
+  feature_names = Rf_protect(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
  for (int i = 0; i < len; ++i) {
    SET_STRING_ELT(feature_names, i, safe_R_mkChar(ptr_names[i], &cont_token));
  }
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return feature_names;
  R_API_END();
 }
@ -314,10 +459,10 @@ SEXP LGBM_DatasetSaveBinary_R(SEXP handle,
  SEXP filename) {
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
-  const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
+  const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
  CHECK_CALL(LGBM_DatasetSaveBinary(R_ExternalPtrAddr(handle),
    filename_ptr));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -339,7 +484,7 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
  int len = Rf_asInteger(num_element);
-  const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
+  const char* name = CHAR(Rf_protect(Rf_asChar(field_name)));
  if (!strcmp("group", name) || !strcmp("query", name)) {
    CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, INTEGER(field_data), len, C_API_DTYPE_INT32));
  } else if (!strcmp("init_score", name)) {
@ -349,7 +494,7 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
    std::copy(REAL(field_data), REAL(field_data) + len, vec.get());
    CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, vec.get(), len, C_API_DTYPE_FLOAT32));
  }
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -359,7 +504,7 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
  SEXP field_data) {
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
-  const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
+  const char* name = CHAR(Rf_protect(Rf_asChar(field_name)));
  int out_len = 0;
  int out_type = 0;
  const void* res;
@ -381,7 +526,7 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
    auto p_data = reinterpret_cast<const float*>(res);
    std::copy(p_data, p_data + out_len, REAL(field_data));
  }
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -391,7 +536,7 @@ SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
  SEXP out) {
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(handle);
-  const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
+  const char* name = CHAR(Rf_protect(Rf_asChar(field_name)));
  int out_len = 0;
  int out_type = 0;
  const void* res;
@ -400,7 +545,7 @@ SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
    out_len -= 1;
  }
  INTEGER(out)[0] = out_len;
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -408,10 +553,10 @@ SEXP LGBM_DatasetGetFieldSize_R(SEXP handle,
 SEXP LGBM_DatasetUpdateParamChecking_R(SEXP old_params,
  SEXP new_params) {
  R_API_BEGIN();
-  const char* old_params_ptr = CHAR(PROTECT(Rf_asChar(old_params)));
-  const char* new_params_ptr = CHAR(PROTECT(Rf_asChar(new_params)));
+  const char* old_params_ptr = CHAR(Rf_protect(Rf_asChar(old_params)));
+  const char* new_params_ptr = CHAR(Rf_protect(Rf_asChar(new_params)));
  CHECK_CALL(LGBM_DatasetUpdateParamChecking(old_params_ptr, new_params_ptr));
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return R_NilValue;
  R_API_END();
 }
@ -468,34 +613,34 @@ SEXP LGBM_BoosterCreate_R(SEXP train_data,
  SEXP parameters) {
  R_API_BEGIN();
  _AssertDatasetHandleNotNull(train_data);
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
  BoosterHandle handle = nullptr;
  CHECK_CALL(LGBM_BoosterCreate(R_ExternalPtrAddr(train_data), parameters_ptr, &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }

 SEXP LGBM_BoosterCreateFromModelfile_R(SEXP filename) {
  R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  int out_num_iterations = 0;
-  const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
+  const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
  BoosterHandle handle = nullptr;
  CHECK_CALL(LGBM_BoosterCreateFromModelfile(filename_ptr, &out_num_iterations, &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }

 SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
  R_API_BEGIN();
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
  SEXP temp = NULL;
  int n_protected = 1;
  int out_num_iterations = 0;
@ -510,7 +655,7 @@ SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
      break;
    }
    case STRSXP: {
-      temp = PROTECT(STRING_ELT(model_str, 0));
+      temp = Rf_protect(STRING_ELT(model_str, 0));
      n_protected++;
      model_str_ptr = reinterpret_cast<const char*>(CHAR(temp));
    }
@ -519,7 +664,7 @@ SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
  CHECK_CALL(LGBM_BoosterLoadModelFromString(model_str_ptr, &out_num_iterations, &handle));
  R_SetExternalPtrAddr(ret, handle);
  R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
-  UNPROTECT(n_protected);
+  Rf_unprotect(n_protected);
  return ret;
  R_API_END();
 }
@ -558,9 +703,9 @@ SEXP LGBM_BoosterResetParameter_R(SEXP handle,
  SEXP parameters) {
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
-  const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
+  const char* parameters_ptr = CHAR(Rf_protect(Rf_asChar(parameters)));
  CHECK_CALL(LGBM_BoosterResetParameter(R_ExternalPtrAddr(handle), parameters_ptr));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -650,7 +795,7 @@ SEXP LGBM_BoosterGetLowerBoundValue_R(SEXP handle,
 }

 SEXP LGBM_BoosterGetEvalNames_R(SEXP handle) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  SEXP eval_names;
@ -689,11 +834,11 @@ SEXP LGBM_BoosterGetEvalNames_R(SEXP handle) {
        ptr_names.data()));
  }
  CHECK_EQ(out_len, len);
-  eval_names = PROTECT(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
+  eval_names = Rf_protect(safe_R_string(static_cast<R_xlen_t>(len), &cont_token));
  for (int i = 0; i < len; ++i) {
    SET_STRING_ELT(eval_names, i, safe_R_mkChar(ptr_names[i], &cont_token));
  }
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return eval_names;
  R_API_END();
 }
@ -763,14 +908,14 @@ SEXP LGBM_BoosterPredictForFile_R(SEXP handle,
  SEXP result_filename) {
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
-  const char* data_filename_ptr = CHAR(PROTECT(Rf_asChar(data_filename)));
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
-  const char* result_filename_ptr = CHAR(PROTECT(Rf_asChar(result_filename)));
+  const char* data_filename_ptr = CHAR(Rf_protect(Rf_asChar(data_filename)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
+  const char* result_filename_ptr = CHAR(Rf_protect(Rf_asChar(result_filename)));
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
  CHECK_CALL(LGBM_BoosterPredictForFile(R_ExternalPtrAddr(handle), data_filename_ptr,
    Rf_asInteger(data_has_header), pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), parameter_ptr,
    result_filename_ptr));
-  UNPROTECT(3);
+  Rf_unprotect(3);
  return R_NilValue;
  R_API_END();
 }
@ -819,12 +964,12 @@ SEXP LGBM_BoosterPredictForCSC_R(SEXP handle,
  int64_t nrow = static_cast<int64_t>(Rf_asInteger(num_row));
  double* ptr_ret = REAL(out_result);
  int64_t out_len;
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  CHECK_CALL(LGBM_BoosterPredictForCSC(R_ExternalPtrAddr(handle),
    p_indptr, C_API_DTYPE_INT32, p_indices,
    p_data, C_API_DTYPE_FLOAT64, nindptr, ndata,
    nrow, pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), parameter_ptr, &out_len, ptr_ret));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -844,7 +989,7 @@ SEXP LGBM_BoosterPredictForCSR_R(SEXP handle,
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  int64_t out_len;
  CHECK_CALL(LGBM_BoosterPredictForCSR(R_ExternalPtrAddr(handle),
    INTEGER(indptr), C_API_DTYPE_INT32, INTEGER(indices),
@ -852,7 +997,7 @@ SEXP LGBM_BoosterPredictForCSR_R(SEXP handle,
    Rf_xlength(indptr), Rf_xlength(data), Rf_asInteger(ncols),
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
    parameter_ptr, &out_len, REAL(out_result)));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -871,7 +1016,7 @@ SEXP LGBM_BoosterPredictForCSRSingleRow_R(SEXP handle,
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  int nnz = static_cast<int>(Rf_xlength(data));
  const int indptr[] = {0, nnz};
  int64_t out_len;
@ -881,7 +1026,7 @@ SEXP LGBM_BoosterPredictForCSRSingleRow_R(SEXP handle,
    2, nnz, Rf_asInteger(ncols),
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
    parameter_ptr, &out_len, REAL(out_result)));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -901,8 +1046,8 @@ SEXP LGBM_BoosterPredictForCSRSingleRowFastInit_R(SEXP handle,
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  FastConfigHandle out_fastConfig;
  CHECK_CALL(LGBM_BoosterPredictForCSRSingleRowFastInit(R_ExternalPtrAddr(handle),
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
@ -910,7 +1055,7 @@ SEXP LGBM_BoosterPredictForCSRSingleRowFastInit_R(SEXP handle,
    parameter_ptr, &out_fastConfig));
  R_SetExternalPtrAddr(ret, out_fastConfig);
  R_RegisterCFinalizerEx(ret, LGBM_FastConfigFree_wrapped, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }
@ -950,12 +1095,12 @@ SEXP LGBM_BoosterPredictForMat_R(SEXP handle,
  int32_t ncol = static_cast<int32_t>(Rf_asInteger(num_col));
  const double* p_mat = REAL(data);
  double* ptr_ret = REAL(out_result);
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  int64_t out_len;
  CHECK_CALL(LGBM_BoosterPredictForMat(R_ExternalPtrAddr(handle),
    p_mat, C_API_DTYPE_FLOAT64, nrow, ncol, COL_MAJOR,
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), parameter_ptr, &out_len, ptr_ret));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -964,8 +1109,6 @@ struct SparseOutputPointers {
  void* indptr;
  int32_t* indices;
  void* data;
-  int indptr_type;
-  int data_type;
  SparseOutputPointers(void* indptr, int32_t* indices, void* data)
  : indptr(indptr), indices(indices), data(data) {}
 };
@ -985,12 +1128,12 @@ SEXP LGBM_BoosterPredictSparseOutput_R(SEXP handle,
  SEXP start_iteration,
  SEXP num_iteration,
  SEXP parameter) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  const char* out_names[] = {"indptr", "indices", "data", ""};
-  SEXP out = PROTECT(Rf_mkNamed(VECSXP, out_names));
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  SEXP out = Rf_protect(Rf_mkNamed(VECSXP, out_names));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));

  int64_t out_len[2];
  void *out_indptr;
@ -1015,17 +1158,28 @@ SEXP LGBM_BoosterPredictSparseOutput_R(SEXP handle,
    &delete_SparseOutputPointers
  };

-  SEXP out_indptr_R = safe_R_int(out_len[1], &cont_token);
-  SET_VECTOR_ELT(out, 0, out_indptr_R);
-  SEXP out_indices_R = safe_R_int(out_len[0], &cont_token);
-  SET_VECTOR_ELT(out, 1, out_indices_R);
-  SEXP out_data_R = safe_R_real(out_len[0], &cont_token);
-  SET_VECTOR_ELT(out, 2, out_data_R);
-  std::memcpy(INTEGER(out_indptr_R), out_indptr, out_len[1]*sizeof(int));
-  std::memcpy(INTEGER(out_indices_R), out_indices, out_len[0]*sizeof(int));
-  std::memcpy(REAL(out_data_R), out_data, out_len[0]*sizeof(double));
+  arr_and_len<int> indptr_str{static_cast<int*>(out_indptr), out_len[1]};
+  SET_VECTOR_ELT(
+    out, 0,
+    R_UnwindProtect(make_altrepped_vec_from_arr<int>,
+      static_cast<void*>(&indptr_str), throw_R_memerr, &cont_token, cont_token));
+  pointers_struct->indptr = nullptr;

-  UNPROTECT(3);
+  arr_and_len<int> indices_str{static_cast<int*>(out_indices), out_len[0]};
+  SET_VECTOR_ELT(
+    out, 1,
+    R_UnwindProtect(make_altrepped_vec_from_arr<int>,
+      static_cast<void*>(&indices_str), throw_R_memerr, &cont_token, cont_token));
+  pointers_struct->indices = nullptr;
+
+  arr_and_len<double> data_str{static_cast<double*>(out_data), out_len[0]};
+  SET_VECTOR_ELT(
+    out, 2,
+    R_UnwindProtect(make_altrepped_vec_from_arr<double>,
+      static_cast<void*>(&data_str), throw_R_memerr, &cont_token, cont_token));
+  pointers_struct->data = nullptr;
+
+  Rf_unprotect(3);
  return out;
  R_API_END();
 }
@ -1042,14 +1196,14 @@ SEXP LGBM_BoosterPredictForMatSingleRow_R(SEXP handle,
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  double* ptr_ret = REAL(out_result);
  int64_t out_len;
  CHECK_CALL(LGBM_BoosterPredictForMatSingleRow(R_ExternalPtrAddr(handle),
    REAL(data), C_API_DTYPE_FLOAT64, Rf_xlength(data), 1,
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
    parameter_ptr, &out_len, ptr_ret));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }
@ -1065,8 +1219,8 @@ SEXP LGBM_BoosterPredictForMatSingleRowFastInit_R(SEXP handle,
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib);
-  SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
-  const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter)));
+  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
+  const char* parameter_ptr = CHAR(Rf_protect(Rf_asChar(parameter)));
  FastConfigHandle out_fastConfig;
  CHECK_CALL(LGBM_BoosterPredictForMatSingleRowFastInit(R_ExternalPtrAddr(handle),
    pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration),
@ -1074,7 +1228,7 @@ SEXP LGBM_BoosterPredictForMatSingleRowFastInit_R(SEXP handle,
    parameter_ptr, &out_fastConfig));
  R_SetExternalPtrAddr(ret, out_fastConfig);
  R_RegisterCFinalizerEx(ret, LGBM_FastConfigFree_wrapped, TRUE);
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return ret;
  R_API_END();
 }
@ -1097,18 +1251,46 @@ SEXP LGBM_BoosterSaveModel_R(SEXP handle,
  SEXP start_iteration) {
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
-  const char* filename_ptr = CHAR(PROTECT(Rf_asChar(filename)));
+  const char* filename_ptr = CHAR(Rf_protect(Rf_asChar(filename)));
  CHECK_CALL(LGBM_BoosterSaveModel(R_ExternalPtrAddr(handle), Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), Rf_asInteger(feature_importance_type), filename_ptr));
-  UNPROTECT(1);
+  Rf_unprotect(1);
  return R_NilValue;
  R_API_END();
 }

+// Note: for some reason, MSVC crashes when an error is thrown here
+// if the buffer variable is defined as 'std::unique_ptr<std::vector<char>>',
+// but not if it is defined as '<std::vector<char>'.
+#ifndef _MSC_VER
 SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
  SEXP num_iteration,
  SEXP feature_importance_type,
  SEXP start_iteration) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
+  R_API_BEGIN();
+  _AssertBoosterHandleNotNull(handle);
+  int64_t out_len = 0;
+  int64_t buf_len = 1024 * 1024;
+  int num_iter = Rf_asInteger(num_iteration);
+  int start_iter = Rf_asInteger(start_iteration);
+  int importance_type = Rf_asInteger(feature_importance_type);
+  std::unique_ptr<std::vector<char>> inner_char_buf(new std::vector<char>(buf_len));
+  CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, buf_len, &out_len, inner_char_buf->data()));
+  inner_char_buf->resize(out_len);
+  if (out_len > buf_len) {
+    CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, inner_char_buf->data()));
+  }
+  SEXP out = R_UnwindProtect(make_altrepped_raw_vec, &inner_char_buf, throw_R_memerr, &cont_token, cont_token);
+  Rf_unprotect(1);
+  return out;
+  R_API_END();
+}
+#else
+SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
+  SEXP num_iteration,
+  SEXP feature_importance_type,
+  SEXP start_iteration) {
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  int64_t out_len = 0;
@ -1118,23 +1300,24 @@ SEXP LGBM_BoosterSaveModelToString_R(SEXP handle,
  int importance_type = Rf_asInteger(feature_importance_type);
  std::vector<char> inner_char_buf(buf_len);
  CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data()));
-  SEXP model_str = PROTECT(safe_R_raw(out_len, &cont_token));
+  SEXP model_str = Rf_protect(safe_R_raw(out_len, &cont_token));
  // if the model string was larger than the initial buffer, call the function again, writing directly to the R object
  if (out_len > buf_len) {
    CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, reinterpret_cast<char*>(RAW(model_str))));
  } else {
    std::copy(inner_char_buf.begin(), inner_char_buf.begin() + out_len, reinterpret_cast<char*>(RAW(model_str)));
  }
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return model_str;
  R_API_END();
 }
+#endif

 SEXP LGBM_BoosterDumpModel_R(SEXP handle,
  SEXP num_iteration,
  SEXP feature_importance_type,
  SEXP start_iteration) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  SEXP model_str;
@ -1150,15 +1333,15 @@ SEXP LGBM_BoosterDumpModel_R(SEXP handle,
    inner_char_buf.resize(out_len);
    CHECK_CALL(LGBM_BoosterDumpModel(R_ExternalPtrAddr(handle), start_iter, num_iter, importance_type, out_len, &out_len, inner_char_buf.data()));
  }
-  model_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
+  model_str = Rf_protect(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
  SET_STRING_ELT(model_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return model_str;
  R_API_END();
 }

 SEXP LGBM_DumpParamAliases_R() {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  SEXP aliases_str;
  int64_t out_len = 0;
@ -1170,15 +1353,15 @@ SEXP LGBM_DumpParamAliases_R() {
    inner_char_buf.resize(out_len);
    CHECK_CALL(LGBM_DumpParamAliases(out_len, &out_len, inner_char_buf.data()));
  }
-  aliases_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
+  aliases_str = Rf_protect(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
  SET_STRING_ELT(aliases_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return aliases_str;
  R_API_END();
 }

 SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  SEXP cont_token = Rf_protect(R_MakeUnwindCont());
  R_API_BEGIN();
  _AssertBoosterHandleNotNull(handle);
  SEXP params_str;
@ -1191,9 +1374,9 @@ SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
    inner_char_buf.resize(out_len);
    CHECK_CALL(LGBM_BoosterGetLoadedParam(R_ExternalPtrAddr(handle), out_len, &out_len, inner_char_buf.data()));
  }
-  params_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
+  params_str = Rf_protect(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
  SET_STRING_ELT(params_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
-  UNPROTECT(2);
+  Rf_unprotect(2);
  return params_str;
  R_API_END();
 }
@ -1281,4 +1464,21 @@ LIGHTGBM_C_EXPORT void R_init_lightgbm(DllInfo *dll);
 void R_init_lightgbm(DllInfo *dll) {
  R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
  R_useDynamicSymbols(dll, FALSE);
+
+#ifndef LGB_NO_ALTREP
+  lgb_altrepped_char_vec = R_make_altraw_class("lgb_altrepped_char_vec", "lightgbm", dll);
+  R_set_altrep_Length_method(lgb_altrepped_char_vec, get_altrepped_raw_len);
+  R_set_altvec_Dataptr_method(lgb_altrepped_char_vec, get_altrepped_raw_dataptr);
+  R_set_altvec_Dataptr_or_null_method(lgb_altrepped_char_vec, get_altrepped_raw_dataptr_or_null);
+
+  lgb_altrepped_int_arr = R_make_altinteger_class("lgb_altrepped_int_arr", "lightgbm", dll);
+  R_set_altrep_Length_method(lgb_altrepped_int_arr, get_altrepped_vec_len);
+  R_set_altvec_Dataptr_method(lgb_altrepped_int_arr, get_altrepped_vec_dataptr);
+  R_set_altvec_Dataptr_or_null_method(lgb_altrepped_int_arr, get_altrepped_vec_dataptr_or_null);
+
+  lgb_altrepped_dbl_arr = R_make_altreal_class("lgb_altrepped_dbl_arr", "lightgbm", dll);
+  R_set_altrep_Length_method(lgb_altrepped_dbl_arr, get_altrepped_vec_len);
+  R_set_altvec_Dataptr_method(lgb_altrepped_dbl_arr, get_altrepped_vec_dataptr);
+  R_set_altvec_Dataptr_or_null_method(lgb_altrepped_dbl_arr, get_altrepped_vec_dataptr_or_null);
+#endif
 }
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@ -2776,14 +2776,12 @@ test_that(paste0("lgb.train() throws an informative error if the members of inte
 test_that("lgb.train() throws an informative error if interaction_constraints contains a too large index", {
  dtrain <- lgb.Dataset(train$data, label = train$label)
  params <- list(objective = "regression",
-                 interaction_constraints = list(c(1L, length(colnames(train$data)) + 1L), 3L))
-    expect_error({
-      bst <- lightgbm(
-        data = dtrain
-        , params = params
-        , nrounds = 2L
-      )
-    }, "supplied a too large value in interaction_constraints")
+                 interaction_constraints = list(c(1L, ncol(train$data) + 1L:2L), 3L))
+    expect_error(
+      lightgbm(data = dtrain, params = params, nrounds = 2L)
+      , "unknown feature(s) in interaction_constraints: '127', '128'"
+      , fixed = TRUE
+    )
 })

 test_that(paste0("lgb.train() gives same result when interaction_constraints is specified as a list of ",
@ -2876,6 +2874,37 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai

 })

+test_that("Interaction constraints add missing features correctly as new group", {
+  dtrain <- lgb.Dataset(
+    train$data[, 1L:6L]  # Pick only some columns
+    , label = train$label
+    , params = list(num_threads = .LGB_MAX_THREADS)
+  )
+
+  list_of_constraints <- list(
+    list(3L, 1L:2L)
+    , list("cap-shape=convex", c("cap-shape=bell", "cap-shape=conical"))
+  )
+
+  for (constraints in list_of_constraints) {
+    params <- list(
+      objective = "regression"
+      , interaction_constraints = constraints
+      , verbose = .LGB_VERBOSITY
+      , num_threads = .LGB_MAX_THREADS
+    )
+    bst <- lightgbm(data = dtrain, params = params, nrounds = 10L)
+
+    expected_list <- list("[2]", "[0,1]", "[3,4,5]")
+    expect_equal(bst$params$interaction_constraints, expected_list)
+
+    expected_string <- "[interaction_constraints: [2],[0,1],[3,4,5]]"
+    expect_true(
+      grepl(expected_string, bst$save_model_to_string(), fixed = TRUE)
+    )
+  }
+})
+
 .generate_trainset_for_monotone_constraints_tests <- function(x3_to_categorical) {
  n_samples <- 3000L
  x1_positively_correlated_with_y <- runif(n = n_samples, min = 0.0, max = 1.0)
--- a/R-package/tests/testthat/test_dataset.R
+++ b/R-package/tests/testthat/test_dataset.R
@ -440,6 +440,35 @@ test_that("lgb.Dataset: should be able to run lgb.cv() immediately after using l
  expect_true(methods::is(bst, "lgb.CVBooster"))
 })

+test_that("lgb.Dataset: should be able to be used in lgb.cv() when constructed with categorical feature indices", {
+  data("mtcars")
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1L])
+  categorical_feature <- which(names(mtcars) %in% c("cyl", "vs", "am", "gear", "carb")) - 1L
+  dtrain <- lgb.Dataset(
+    data = x
+    , label = y
+    , categorical_feature = categorical_feature
+    , free_raw_data = TRUE
+    , params = list(num_threads = .LGB_MAX_THREADS)
+  )
+  # constructing the Dataset frees the raw data
+  dtrain$construct()
+  params <- list(
+    objective = "regression"
+    , num_leaves = 2L
+    , verbose = .LGB_VERBOSITY
+    , num_threads = .LGB_MAX_THREADS
+  )
+  # cv should reuse the same categorical features without checking the indices
+  bst <- lgb.cv(params = params, data = dtrain, stratified = FALSE, nrounds = 1L)
+  expect_equal(
+    unlist(bst$boosters[[1L]]$booster$params$categorical_feature)
+    , categorical_feature - 1L  # 0-based
+  )
+})
+
+
 test_that("lgb.Dataset: should be able to use and retrieve long feature names", {
  # set one feature to a value longer than the default buffer size used
  # in LGBM_DatasetGetFeatureNames_R
@ -621,3 +650,12 @@ test_that("lgb.Dataset can be constructed with categorical features and without
    lgb.Dataset(raw_mat, categorical_feature = 2L)$construct()
  }, regexp = "supplied a too large value in categorical_feature: 2 but only 1 features")
 })
+
+test_that("lgb.Dataset.slice fails with a categorical feature index greater than the number of features", {
+  data <- matrix(runif(100L), nrow = 50L, ncol = 2L)
+  ds <- lgb.Dataset(data = data, categorical_feature = 3L)
+  subset <- ds$slice(1L:20L)
+  expect_error({
+    subset$construct()
+  }, regexp = "supplied a too large value in categorical_feature: 3 but only 2 features")
+})
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@ -174,7 +174,7 @@ test_that("Loading a Booster from a text file works", {
        , bagging_freq = 1L
        , boost_from_average = FALSE
        , categorical_feature = c(1L, 2L)
-        , interaction_constraints = list(c(1L, 2L), 1L)
+        , interaction_constraints = list(1L:2L, 3L, 4L:ncol(train$data))
        , feature_contri = rep(0.5, ncol(train$data))
        , metric = c("mape", "average_precision")
        , learning_rate = 1.0
--- a/R-package/tests/testthat/test_utils.R
+++ b/R-package/tests/testthat/test_utils.R
@ -147,3 +147,21 @@ test_that(".equal_or_both_null produces expected results", {
    expect_false(.equal_or_both_null(10.0, 1L))
    expect_true(.equal_or_both_null(0L, 0L))
 })
+
+test_that(".check_interaction_constraints() adds skipped features", {
+  ref <- letters[1L:5L]
+  ic_num <- list(1L, c(2L, 3L))
+  ic_char <- list("a", c("b", "c"))
+  expected <- list("[0]", "[1,2]", "[3,4]")
+
+  ic_checked_num <- .check_interaction_constraints(
+    interaction_constraints = ic_num, column_names = ref
+  )
+
+  ic_checked_char <- .check_interaction_constraints(
+    interaction_constraints = ic_char, column_names = ref
+  )
+
+  expect_equal(ic_checked_num, expected)
+  expect_equal(ic_checked_char, expected)
+})
--- a/README.md
+++ b/README.md
@ -133,7 +133,7 @@ Support
 -------

 - Ask a question [on Stack Overflow with the `lightgbm` tag](https://stackoverflow.com/questions/ask?tags=lightgbm), we monitor this for new questions.
- Open **bug reports** and **feature requests** (not questions) on [GitHub issues](https://github.com/microsoft/LightGBM/issues).
+- Open **bug reports** and **feature requests** on [GitHub issues](https://github.com/microsoft/LightGBM/issues).

 How to Contribute
 -----------------
@ -156,8 +156,6 @@ Qi Meng, Guolin Ke, Taifeng Wang, Wei Chen, Qiwei Ye, Zhi-Ming Ma, Tie-Yan Liu.

 Huan Zhang, Si Si and Cho-Jui Hsieh. "[GPU Acceleration for Large-scale Tree Boosting](https://arxiv.org/abs/1706.08359)". SysML Conference, 2018.

-**Note**: If you use LightGBM in your GitHub projects, please add `lightgbm` in the `requirements.txt`.
-
 License
 -------

--- a/VERSION.txt
+++ b/VERSION.txt
@ -1 +1 @@
-4.3.0.99
+4.4.0.99
--- a/docs/FAQ.rst
+++ b/docs/FAQ.rst
@ -149,7 +149,7 @@ and copy memory as required by creating new processes instead of forking (or, us

 Cloud platform container services may cause LightGBM to hang, if they use Linux fork to run multiple containers on a
 single instance. For example, LightGBM hangs in AWS Batch array jobs, which `use the ECS agent
-<https://aws.amazon.com/batch/faqs/#Features>`__ to manage multiple running jobs. Setting ``nthreads=1`` mitigates the issue.
+<https://aws.amazon.com/batch/faqs>`__ to manage multiple running jobs. Setting ``nthreads=1`` mitigates the issue.

 12. Why is early stopping not enabled by default in LightGBM?
 -------------------------------------------------------------
@ -321,7 +321,7 @@ We are doing our best to provide universal wheels which have high running speed
 However, sometimes it's just impossible to guarantee the possibility of usage of LightGBM in any specific environment (see `Microsoft/LightGBM#1743 <https://github.com/microsoft/LightGBM/issues/1743>`__).

 Therefore, the first thing you should try in case of segfaults is **compiling from the source** using ``pip install --no-binary lightgbm lightgbm``.
-For the OS-specific prerequisites see `this guide <https://github.com/microsoft/LightGBM/blob/master/python-package/README.rst#user-content-build-from-sources>`__.
+For the OS-specific prerequisites see https://github.com/microsoft/LightGBM/blob/master/python-package/README.rst.

 Also, feel free to post a new issue in our GitHub repository. We always look at each case individually and try to find a root cause.

--- a/docs/GPU-Windows.rst
+++ b/docs/GPU-Windows.rst
@ -602,9 +602,9 @@ And open an issue in GitHub `here`_ with that log.

 .. _Boost: https://www.boost.org/users/history/

-.. _Prebuilt Boost x86_64: https://www.rpmfind.net/linux/fedora/linux/releases/38/Everything/x86_64/os/Packages/m/mingw64-boost-static-1.78.0-4.fc38.noarch.rpm
+.. _Prebuilt Boost x86_64: https://www.rpmfind.net/linux/fedora/linux/releases/40/Everything/x86_64/os/Packages/m/mingw64-boost-static-1.78.0-9.fc40.noarch.rpm

-.. _Prebuilt Boost i686: https://www.rpmfind.net/linux/fedora/linux/releases/38/Everything/x86_64/os/Packages/m/mingw32-boost-static-1.78.0-4.fc38.noarch.rpm
+.. _Prebuilt Boost i686: https://www.rpmfind.net/linux/fedora/linux/releases/40/Everything/x86_64/os/Packages/m/mingw32-boost-static-1.78.0-9.fc40.noarch.rpm

 .. _7zip: https://www.7-zip.org/download.html

--- a/docs/Parameters-Tuning.rst
+++ b/docs/Parameters-Tuning.rst
@ -22,7 +22,7 @@ To get good results using a leaf-wise tree, these are some important parameters:
 1. ``num_leaves``. This is the main parameter to control the complexity of the tree model.
   Theoretically, we can set ``num_leaves = 2^(max_depth)`` to obtain the same number of leaves as depth-wise tree.
   However, this simple conversion is not good in practice.
-   The reason is that a leaf-wise tree is typically much deeper than a depth-wise tree for a fixed number of leaves. Unconstrained depth can induce over-fitting.
+   A leaf-wise tree is typically much deeper than a depth-wise tree for a fixed number of leaves. Unconstrained depth can induce over-fitting.
   Thus, when trying to tune the ``num_leaves``, we should let it be smaller than ``2^(max_depth)``.
   For example, when the ``max_depth=7`` the depth-wise tree can get good accuracy,
   but setting ``num_leaves`` to ``127`` may cause over-fitting, and setting it to ``70`` or ``80`` may get better accuracy than depth-wise.
@ -33,6 +33,7 @@ To get good results using a leaf-wise tree, these are some important parameters:
   In practice, setting it to hundreds or thousands is enough for a large dataset.

 3. ``max_depth``. You also can use ``max_depth`` to limit the tree depth explicitly.
+   If you set ``max_depth``, also explicitly set ``num_leaves`` to some value ``<= 2^max_depth``.

 For Faster Speed
 ----------------
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@ -414,6 +414,8 @@ Learning Control Parameters

   -  when early stopping is used (i.e. ``early_stopping_round > 0``), require the early stopping metric to improve by at least this delta to be considered an improvement

+   -  *New in 4.4.0*
+
 -  ``first_metric_only`` :raw-html:`<a id="first_metric_only" title="Permalink to this parameter" href="#first_metric_only">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool

   -  LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping
--- a/docs/Python-Intro.rst
+++ b/docs/Python-Intro.rst
@ -59,8 +59,9 @@ Many of the examples in this page use functionality from ``numpy``. To run the e

 .. code:: python

-    data = np.random.rand(500, 10)  # 500 entities, each contains 10 features
-    label = np.random.randint(2, size=500)  # binary target
+    rng = np.random.default_rng()
+    data = rng.uniform(size=(500, 10))  # 500 entities, each contains 10 features
+    label = rng.integers(low=0, high=2, size=(500, ))  # binary target
    train_data = lgb.Dataset(data, label=label)

 **To load a scipy.sparse.csr\_matrix array into Dataset:**
@ -139,7 +140,8 @@ It doesn't need to convert to one-hot encoding, and is much faster than one-hot

 .. code:: python

-    w = np.random.rand(500, )
+    rng = np.random.default_rng()
+    w = rng.uniform(size=(500, ))
    train_data = lgb.Dataset(data, label=label, weight=w)

 or
@ -147,7 +149,8 @@ or
 .. code:: python

    train_data = lgb.Dataset(data, label=label)
-    w = np.random.rand(500, )
+    rng = np.random.default_rng()
+    w = rng.uniform(size=(500, ))
    train_data.set_weight(w)

 And you can use ``Dataset.set_init_score()`` to set initial score, and ``Dataset.set_group()`` to set group/query data for ranking tasks.
@ -249,7 +252,8 @@ A model that has been trained or loaded can perform predictions on datasets:
 .. code:: python

    # 7 entities, each contains 10 features
-    data = np.random.rand(7, 10)
+    rng = np.random.default_rng()
+    data = rng.uniform(size=(7, 10))
    ypred = bst.predict(data)

 If early stopping is enabled during training, you can get predictions from the best iteration with ``bst.best_iteration``:
--- a/docs/conf.py
+++ b/docs/conf.py
@ -17,6 +17,7 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute.
 """Sphinx configuration file."""
+
 import datetime
 import os
 import sys
--- a/examples/python-guide/logistic_regression.py
+++ b/examples/python-guide/logistic_regression.py
@ -22,15 +22,15 @@ import lightgbm as lgb
 #################
 # Simulate some binary data with a single categorical and
 #   single continuous predictor
-np.random.seed(0)
+rng = np.random.default_rng(seed=0)
 N = 1000
 X = pd.DataFrame({"continuous": range(N), "categorical": np.repeat([0, 1, 2, 3, 4], N / 5)})
 CATEGORICAL_EFFECTS = [-1, -1, -2, -2, 2]
 LINEAR_TERM = np.array(
    [-0.5 + 0.01 * X["continuous"][k] + CATEGORICAL_EFFECTS[X["categorical"][k]] for k in range(X.shape[0])]
-) + np.random.normal(0, 1, X.shape[0])
+) + rng.normal(loc=0, scale=1, size=X.shape[0])
 TRUE_PROB = expit(LINEAR_TERM)
-Y = np.random.binomial(1, TRUE_PROB, size=N)
+Y = rng.binomial(n=1, p=TRUE_PROB, size=N)
 DATA = {
    "X": X,
    "probability_labels": TRUE_PROB,
@ -65,10 +65,9 @@ def experiment(objective, label_type, data):
    result : dict
        Experiment summary stats.
    """
-    np.random.seed(0)
    nrounds = 5
    lgb_data = data[f"lgb_with_{label_type}_labels"]
-    params = {"objective": objective, "feature_fraction": 1, "bagging_fraction": 1, "verbose": -1}
+    params = {"objective": objective, "feature_fraction": 1, "bagging_fraction": 1, "verbose": -1, "seed": 123}
    time_zero = time.time()
    gbm = lgb.train(params, lgb_data, num_boost_round=nrounds)
    y_fitted = gbm.predict(data["X"])
--- a/helpers/check_dynamic_dependencies.py
+++ b/helpers/check_dynamic_dependencies.py
@ -12,6 +12,7 @@ Version history for these symbols can be found at the following:
 * GLIBCXX: https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html
 * OMP/GOMP: https://github.com/gcc-mirror/gcc/blob/master/libgomp/libgomp.map
 """
+
 import re
 import sys
 from pathlib import Path
--- a/helpers/parameter_generator.py
+++ b/helpers/parameter_generator.py
@ -6,6 +6,7 @@ with list of all parameters, aliases table and other routines
 along with parameters description in LightGBM/docs/Parameters.rst file
 from the information in LightGBM/include/LightGBM/config.h file.
 """
+
 import re
 from collections import defaultdict
 from pathlib import Path
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@ -396,6 +396,7 @@ struct Config {

  // check = >=0.0
  // desc = when early stopping is used (i.e. ``early_stopping_round > 0``), require the early stopping metric to improve by at least this delta to be considered an improvement
+  // desc = *New in 4.4.0*
  double early_stopping_min_delta = 0.0;

  // desc = LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping
@ -1146,7 +1147,7 @@ struct Config {
  static const std::string DumpAliases();

 private:
-  void CheckParamConflict();
+  void CheckParamConflict(const std::unordered_map<std::string, std::string>& params);
  void GetMembersFromString(const std::unordered_map<std::string, std::string>& params);
  std::string SaveMembersToString() const;
  void GetAucMuWeights();
--- a/pmml/README.md
+++ b/pmml/README.md
@ -1,6 +0,0 @@
-PMML Generator
-==============
-
-The old Python convert script is removed due to it cannot support the new format of categorical features.
-
-Please refer to https://github.com/jpmml/jpmml-lightgbm.
--- a/python-package/README.rst
+++ b/python-package/README.rst
@ -11,8 +11,6 @@ Preparation

 32-bit Python is not supported. Please install 64-bit version. If you have a strong need to install with 32-bit Python, refer to `Build 32-bit Version with 32-bit Python section <#build-32-bit-version-with-32-bit-python>`__.

-`setuptools <https://pypi.org/project/setuptools>`_ is needed.
-
 Install from `PyPI <https://pypi.org/project/lightgbm>`_
 ''''''''''''''''''''''''''''''''''''''''''''''''''''''''

@ -299,10 +297,6 @@ Refer to the walk through examples in `Python guide folder <https://github.com/m
 Development Guide
 -----------------

-The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps/pep-0008/>`_.
-
-The package's documentation strings (docstrings) are written in the `numpydoc style <https://numpydoc.readthedocs.io/en/latest/format.html>`_.
-
 To check that a contribution to the package matches its style expectations, run the following from the root of the repo.

 .. code:: sh
--- a/python-package/lightgbm/init.py
+++ b/python-package/lightgbm/init.py
@ -3,6 +3,7 @@

 Contributors: https://github.com/microsoft/LightGBM/graphs/contributors.
 """
+
 from pathlib import Path

 from .basic import Booster, Dataset, Sequence, register_logger
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@ -1,5 +1,6 @@
 # coding: utf-8
 """Wrapper for C API of LightGBM."""
+
 import abc
 import ctypes
 import inspect
@ -355,10 +356,10 @@ def _list_to_1d_numpy(
        array = data.ravel()
        return _cast_numpy_array_to_dtype(array, dtype)
    elif _is_1d_list(data):
-        return np.array(data, dtype=dtype, copy=False)
+        return np.asarray(data, dtype=dtype)
    elif isinstance(data, pd_Series):
        _check_for_bad_pandas_dtypes(data.to_frame().dtypes)
-        return np.array(data, dtype=dtype, copy=False)  # SparseArray should be supported as well
+        return np.asarray(data, dtype=dtype)  # SparseArray should be supported as well
    else:
        raise TypeError(
            f"Wrong type({type(data).__name__}) for {name}.\n" "It should be list, numpy 1-D array or pandas Series"
@ -556,7 +557,8 @@ class LightGBMError(Exception):


 # DeprecationWarning is not shown by default, so let's create our own with higher level
-class LGBMDeprecationWarning(UserWarning):
+# ref: https://peps.python.org/pep-0565/#additional-use-case-for-futurewarning
+class LGBMDeprecationWarning(FutureWarning):
    """Custom deprecation warning."""

    pass
@ -726,7 +728,7 @@ def _convert_from_sliced_object(data: np.ndarray) -> np.ndarray:
 def _c_float_array(data: np.ndarray) -> Tuple[_ctypes_float_ptr, int, np.ndarray]:
    """Get pointer of float numpy array / list."""
    if _is_1d_list(data):
-        data = np.array(data, copy=False)
+        data = np.asarray(data)
    if _is_numpy_1d_array(data):
        data = _convert_from_sliced_object(data)
        assert data.flags.c_contiguous
@ -747,7 +749,7 @@ def _c_float_array(data: np.ndarray) -> Tuple[_ctypes_float_ptr, int, np.ndarray
 def _c_int_array(data: np.ndarray) -> Tuple[_ctypes_int_ptr, int, np.ndarray]:
    """Get pointer of int numpy array / list."""
    if _is_1d_list(data):
-        data = np.array(data, copy=False)
+        data = np.asarray(data)
    if _is_numpy_1d_array(data):
        data = _convert_from_sliced_object(data)
        assert data.flags.c_contiguous
@ -1268,7 +1270,7 @@ class _InnerPredictor:
        preds: Optional[np.ndarray],
    ) -> Tuple[np.ndarray, int]:
        if mat.dtype == np.float32 or mat.dtype == np.float64:
-            data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
+            data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
        else:  # change non-float data to float data, need to copy
            data = np.array(mat.reshape(mat.size), dtype=np.float32)
        ptr_data, type_ptr_data, _ = _c_float_array(data)
@ -2283,9 +2285,9 @@ class Dataset:

        self._handle = ctypes.c_void_p()
        if mat.dtype == np.float32 or mat.dtype == np.float64:
-            data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
+            data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
        else:  # change non-float data to float data, need to copy
-            data = np.array(mat.reshape(mat.size), dtype=np.float32)
+            data = np.asarray(mat.reshape(mat.size), dtype=np.float32)

        ptr_data, type_ptr_data, _ = _c_float_array(data)
        _safe_call(
@ -2330,7 +2332,7 @@ class Dataset:
            nrow[i] = mat.shape[0]

            if mat.dtype == np.float32 or mat.dtype == np.float64:
-                mats[i] = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
+                mats[i] = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
            else:  # change non-float data to float data, need to copy
                mats[i] = np.array(mat.reshape(mat.size), dtype=np.float32)

--- a/python-package/lightgbm/callback.py
+++ b/python-package/lightgbm/callback.py
@ -1,5 +1,6 @@
 # coding: utf-8
 """Callbacks library."""
+
 from collections import OrderedDict
 from dataclasses import dataclass
 from functools import partial
--- a/python-package/lightgbm/compat.py
+++ b/python-package/lightgbm/compat.py
@ -37,18 +37,6 @@ except ImportError:

    concat = None

-"""numpy"""
-try:
-    from numpy.random import Generator as np_random_Generator
-except ImportError:
-
-    class np_random_Generator:  # type: ignore
-        """Dummy class for np.random.Generator."""
-
-        def __init__(self, *args: Any, **kwargs: Any):
-            pass
-
-
 """matplotlib"""
 try:
    import matplotlib  # noqa: F401
--- a/python-package/lightgbm/dask.py
+++ b/python-package/lightgbm/dask.py
@ -6,6 +6,7 @@ dask.Array and dask.DataFrame collections.

 It is based on dask-lightgbm, which was based on dask-xgboost.
 """
+
 import operator
 import socket
 from collections import defaultdict
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@ -1,5 +1,6 @@
 # coding: utf-8
 """Library with training routines of LightGBM."""
+
 import copy
 import json
 import warnings
@ -511,7 +512,7 @@ def _make_n_folds(
        if hasattr(folds, "split"):
            group_info = full_data.get_group()
            if group_info is not None:
-                group_info = np.array(group_info, dtype=np.int32, copy=False)
+                group_info = np.asarray(group_info, dtype=np.int32)
                flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
            else:
                flatted_group = np.zeros(num_data, dtype=np.int32)
@ -525,7 +526,7 @@ def _make_n_folds(
            if not SKLEARN_INSTALLED:
                raise LightGBMError("scikit-learn is required for ranking cv")
            # ranking task, split according to groups
-            group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False)
+            group_info = np.asarray(full_data.get_group(), dtype=np.int32)
            flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
            group_kfold = _LGBMGroupKFold(n_splits=nfold)
            folds = group_kfold.split(X=np.empty(num_data), groups=flatted_group)
--- a/python-package/lightgbm/libpath.py
+++ b/python-package/lightgbm/libpath.py
@ -1,5 +1,6 @@
 # coding: utf-8
 """Find the path to LightGBM dynamic library files."""
+
 from pathlib import Path
 from platform import system
 from typing import List
--- a/python-package/lightgbm/plotting.py
+++ b/python-package/lightgbm/plotting.py
@ -1,5 +1,6 @@
 # coding: utf-8
 """Plotting library."""
+
 import math
 from copy import deepcopy
 from io import BytesIO
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@ -1,5 +1,6 @@
 # coding: utf-8
 """Scikit-learn wrapper interface for LightGBM."""
+
 import copy
 from inspect import signature
 from pathlib import Path
@ -40,7 +41,6 @@ from .compat import (
    _LGBMModelBase,
    _LGBMRegressorBase,
    dt_DataTable,
-    np_random_Generator,
    pd_DataFrame,
 )
 from .engine import train
@ -454,6 +454,30 @@ _lgbmmodel_doc_predict = """
    """


+def _extract_evaluation_meta_data(
+    *,
+    collection: Optional[Union[Dict[Any, Any], List[Any]]],
+    name: str,
+    i: int,
+) -> Optional[Any]:
+    """Try to extract the ith element of one of the ``eval_*`` inputs."""
+    if collection is None:
+        return None
+    elif isinstance(collection, list):
+        # It's possible, for example, to pass 3 eval sets through `eval_set`,
+        # but only 1 init_score through `eval_init_score`.
+        #
+        # This if-else accounts for that possiblity.
+        if len(collection) > i:
+            return collection[i]
+        else:
+            return None
+    elif isinstance(collection, dict):
+        return collection.get(i, None)
+    else:
+        raise TypeError(f"{name} should be dict or list")
+
+
 class LGBMModel(_LGBMModelBase):
    """Implementation of the scikit-learn API for LightGBM."""

@ -475,7 +499,7 @@ class LGBMModel(_LGBMModelBase):
        colsample_bytree: float = 1.0,
        reg_alpha: float = 0.0,
        reg_lambda: float = 0.0,
-        random_state: Optional[Union[int, np.random.RandomState, "np.random.Generator"]] = None,
+        random_state: Optional[Union[int, np.random.RandomState, np.random.Generator]] = None,
        n_jobs: Optional[int] = None,
        importance_type: str = "split",
        **kwargs: Any,
@ -492,6 +516,7 @@ class LGBMModel(_LGBMModelBase):
            Maximum tree leaves for base learners.
        max_depth : int, optional (default=-1)
            Maximum tree depth for base learners, <=0 means no limit.
+            If setting this to a positive value, consider also changing ``num_leaves`` to ``<= 2^max_depth``.
        learning_rate : float, optional (default=0.1)
            Boosting learning rate.
            You can use ``callbacks`` parameter of ``fit`` method to shrink/adapt learning rate
@ -738,7 +763,7 @@ class LGBMModel(_LGBMModelBase):

        if isinstance(params["random_state"], np.random.RandomState):
            params["random_state"] = params["random_state"].randint(np.iinfo(np.int32).max)
-        elif isinstance(params["random_state"], np_random_Generator):
+        elif isinstance(params["random_state"], np.random.Generator):
            params["random_state"] = int(params["random_state"].integers(np.iinfo(np.int32).max))
        if self._n_classes > 2:
            for alias in _ConfigAliases.get("num_class"):
@ -868,17 +893,6 @@ class LGBMModel(_LGBMModelBase):

        valid_sets: List[Dataset] = []
        if eval_set is not None:
-
-            def _get_meta_data(collection, name, i):
-                if collection is None:
-                    return None
-                elif isinstance(collection, list):
-                    return collection[i] if len(collection) > i else None
-                elif isinstance(collection, dict):
-                    return collection.get(i, None)
-                else:
-                    raise TypeError(f"{name} should be dict or list")
-
            if isinstance(eval_set, tuple):
                eval_set = [eval_set]
            for i, valid_data in enumerate(eval_set):
@ -886,8 +900,16 @@ class LGBMModel(_LGBMModelBase):
                if valid_data[0] is X and valid_data[1] is y:
                    valid_set = train_set
                else:
-                    valid_weight = _get_meta_data(eval_sample_weight, "eval_sample_weight", i)
-                    valid_class_weight = _get_meta_data(eval_class_weight, "eval_class_weight", i)
+                    valid_weight = _extract_evaluation_meta_data(
+                        collection=eval_sample_weight,
+                        name="eval_sample_weight",
+                        i=i,
+                    )
+                    valid_class_weight = _extract_evaluation_meta_data(
+                        collection=eval_class_weight,
+                        name="eval_class_weight",
+                        i=i,
+                    )
                    if valid_class_weight is not None:
                        if isinstance(valid_class_weight, dict) and self._class_map is not None:
                            valid_class_weight = {self._class_map[k]: v for k, v in valid_class_weight.items()}
@ -896,8 +918,16 @@ class LGBMModel(_LGBMModelBase):
                            valid_weight = valid_class_sample_weight
                        else:
                            valid_weight = np.multiply(valid_weight, valid_class_sample_weight)
-                    valid_init_score = _get_meta_data(eval_init_score, "eval_init_score", i)
-                    valid_group = _get_meta_data(eval_group, "eval_group", i)
+                    valid_init_score = _extract_evaluation_meta_data(
+                        collection=eval_init_score,
+                        name="eval_init_score",
+                        i=i,
+                    )
+                    valid_group = _extract_evaluation_meta_data(
+                        collection=eval_group,
+                        name="eval_group",
+                        i=i,
+                    )
                    valid_set = Dataset(
                        data=valid_data[0],
                        label=valid_data[1],
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@ -15,11 +15,11 @@ classifiers = [
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
    "Topic :: Scientific/Engineering :: Artificial Intelligence"
 ]
 dependencies = [
-    "dataclasses ; python_version < '3.7'",
-    "numpy",
+    "numpy>=1.17.0",
    "scipy"
 ]
 description = "LightGBM Python Package"
@ -29,8 +29,8 @@ maintainers = [
 ]
 name = "lightgbm"
 readme = "README.rst"
-requires-python = ">=3.6"
-version = "4.3.0.99"
+requires-python = ">=3.7"
+version = "4.4.0.99"

 [project.optional-dependencies]
 arrow = [
@ -79,7 +79,7 @@ logging.level = "INFO"
 sdist.reproducible = true
 wheel.py-api = "py3"
 experimental = false
-strict-config = true
+strict-config = false
 minimum-version = "0.9.3"

 # end:build-system
@ -156,6 +156,8 @@ select = [
    "E",
    # pyflakes
    "F",
+    # NumPy-specific rules
+    "NPY",
    # pylint
    "PL",
    # flake8-return: unnecessary assignment before return
--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@ -289,14 +289,14 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) {
  }

  // check for conflicts
-  CheckParamConflict();
+  CheckParamConflict(params);
 }

 bool CheckMultiClassObjective(const std::string& objective) {
  return (objective == std::string("multiclass") || objective == std::string("multiclassova"));
 }

-void Config::CheckParamConflict() {
+void Config::CheckParamConflict(const std::unordered_map<std::string, std::string>& params) {
  // check if objective, metric, and num_class match
  int num_class_check = num_class;
  bool objective_type_multiclass = CheckMultiClassObjective(objective) || (objective == std::string("custom") && num_class_check > 1);
@ -356,14 +356,24 @@ void Config::CheckParamConflict() {
                 tree_learner.c_str());
    }
  }
-  // Check max_depth and num_leaves
-  if (max_depth > 0) {
+
+  // max_depth defaults to -1, so max_depth>0 implies "you explicitly overrode the default"
+  //
+  // Changing max_depth while leaving num_leaves at its default (31) can lead to 2 undesirable situations:
+  //
+  //   * (0 <= max_depth <= 4) it's not possible to produce a tree with 31 leaves
+  //     - this block reduces num_leaves to 2^max_depth
+  //   * (max_depth > 4) 31 leaves is less than a full depth-wise tree, which might lead to underfitting
+  //     - this block warns about that
+  // ref: https://github.com/microsoft/LightGBM/issues/2898#issuecomment-1002860601
+  if (max_depth > 0 && (params.count("num_leaves") == 0 || params.at("num_leaves").empty())) {
    double full_num_leaves = std::pow(2, max_depth);
-    if (full_num_leaves > num_leaves
-        && num_leaves == kDefaultNumLeaves) {
-      Log::Warning("Accuracy may be bad since you didn't explicitly set num_leaves OR 2^max_depth > num_leaves."
-                   " (num_leaves=%d).",
-                   num_leaves);
+    if (full_num_leaves > num_leaves) {
+      Log::Warning("Provided parameters constrain tree depth (max_depth=%d) without explicitly setting 'num_leaves'. "
+                   "This can lead to underfitting. To resolve this warning, pass 'num_leaves' (<=%.0f) in params. "
+                   "Alternatively, pass (max_depth=-1) and just use 'num_leaves' to constrain model complexity.",
+                   max_depth,
+                   full_num_leaves);
    }

    if (full_num_leaves < num_leaves) {
--- a/src/io/dataset_loader.cpp
+++ b/src/io/dataset_loader.cpp
@ -274,7 +274,7 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
    dataset.reset(LoadFromBinFile(filename, bin_filename.c_str(), rank, num_machines, &num_global_data, &used_data_indices));

    // checks whether there's a initial score file when loaded from binary data files
-    // the intial score file should with suffix ".bin.init"
+    // the initial score file should with suffix ".bin.init"
    dataset->metadata_.LoadInitialScore(bin_filename);

    dataset->device_type_ = config_.device_type;
@ -344,7 +344,7 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
    // load data from binary file
    dataset.reset(LoadFromBinFile(filename, bin_filename.c_str(), 0, 1, &num_global_data, &used_data_indices));
    // checks whether there's a initial score file when loaded from binary data files
-    // the intial score file should with suffix ".bin.init"
+    // the initial score file should with suffix ".bin.init"
    dataset->metadata_.LoadInitialScore(bin_filename);
  }
  // not need to check validation data
--- a/tests/c_api_test/test_.py
+++ b/tests/c_api_test/test_.py
@ -125,7 +125,7 @@ def load_from_mat(filename, reference):
    mat = np.loadtxt(str(filename), dtype=np.float64)
    label = mat[:, 0].astype(np.float32)
    mat = mat[:, 1:]
-    data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
+    data = np.asarray(mat.reshape(mat.size), dtype=np.float64)
    handle = ctypes.c_void_p()
    ref = None
    if reference is not None:
@ -203,7 +203,7 @@ def test_booster():
    mat = data[:, 1:]
    preb = np.empty(mat.shape[0], dtype=np.float64)
    num_preb = ctypes.c_int64(0)
-    data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
+    data = np.asarray(mat.reshape(mat.size), dtype=np.float64)
    LIB.LGBM_BoosterPredictForMat(
        booster2,
        data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
--- a/tests/python_package_test/conftest.py
+++ b/tests/python_package_test/conftest.py
@ -0,0 +1,12 @@
+import numpy as np
+import pytest
+
+
+@pytest.fixture(scope="function")
+def rng():
+    return np.random.default_rng()
+
+
+@pytest.fixture(scope="function")
+def rng_fixed_seed():
+    return np.random.default_rng(seed=42)
--- a/tests/python_package_test/test_arrow.py
+++ b/tests/python_package_test/test_arrow.py
@ -20,6 +20,10 @@ if os.getenv("ALLOW_SKIP_ARROW_TESTS") == "1":
 else:
    import pyarrow as pa  # type: ignore

+    assert (
+        lgb.compat.PYARROW_INSTALLED is True
+    ), "'pyarrow' and its dependencies must be installed to run the arrow tests"
+
 # ----------------------------------------------------------------------------------------------- #
 #                                            UTILITIES                                            #
 # ----------------------------------------------------------------------------------------------- #
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@ -9,7 +9,7 @@ from pathlib import Path
 import numpy as np
 import pytest
 from scipy import sparse
-from sklearn.datasets import dump_svmlight_file, load_svmlight_file
+from sklearn.datasets import dump_svmlight_file, load_svmlight_file, make_blobs
 from sklearn.model_selection import train_test_split

 import lightgbm as lgb
@ -136,7 +136,7 @@ def _create_sequence_from_ndarray(data, num_seq, batch_size):
@pytest.mark.parametrize("batch_size", [3, None])
@pytest.mark.parametrize("include_0_and_nan", [False, True])
@pytest.mark.parametrize("num_seq", [1, 3])
-def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq):
+def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq, rng):
    params = {"bin_construct_sample_cnt": sample_count}

    nrow = 50
@ -175,7 +175,6 @@ def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq):

    # Test for validation set.
    # Select some random rows as valid data.
-    rng = np.random.default_rng()  # Pass integer to set seed when needed.
    valid_idx = (rng.random(10) * nrow).astype(np.int32)
    valid_data = data[valid_idx, :]
    valid_X = valid_data[:, :-1]
@ -201,7 +200,7 @@ def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq):


@pytest.mark.parametrize("num_seq", [1, 2])
-def test_sequence_get_data(num_seq):
+def test_sequence_get_data(num_seq, rng):
    nrow = 20
    ncol = 11
    data = np.arange(nrow * ncol, dtype=np.float64).reshape((nrow, ncol))
@ -212,7 +211,7 @@ def test_sequence_get_data(num_seq):
    seq_ds = lgb.Dataset(seqs, label=Y, params=None, free_raw_data=False).construct()
    assert seq_ds.get_data() == seqs

-    used_indices = np.random.choice(np.arange(nrow), nrow // 3, replace=False)
+    used_indices = rng.choice(a=np.arange(nrow), size=nrow // 3, replace=False)
    subset_data = seq_ds.subset(used_indices).construct()
    np.testing.assert_array_equal(subset_data.get_data(), X[sorted(used_indices)])

@ -246,8 +245,8 @@ def test_chunked_dataset_linear():
    valid_data.construct()


-def test_save_dataset_subset_and_load_from_file(tmp_path):
-    data = np.random.rand(100, 2)
+def test_save_dataset_subset_and_load_from_file(tmp_path, rng):
+    data = rng.standard_normal(size=(100, 2))
    params = {"max_bin": 50, "min_data_in_bin": 10}
    ds = lgb.Dataset(data, params=params)
    ds.subset([1, 2, 3, 5, 8]).save_binary(tmp_path / "subset.bin")
@ -267,18 +266,18 @@ def test_subset_group():
    assert subset_group[1] == 9


-def test_add_features_throws_if_num_data_unequal():
-    X1 = np.random.random((100, 1))
-    X2 = np.random.random((10, 1))
+def test_add_features_throws_if_num_data_unequal(rng):
+    X1 = rng.uniform(size=(100, 1))
+    X2 = rng.uniform(size=(10, 1))
    d1 = lgb.Dataset(X1).construct()
    d2 = lgb.Dataset(X2).construct()
    with pytest.raises(lgb.basic.LightGBMError):
        d1.add_features_from(d2)


-def test_add_features_throws_if_datasets_unconstructed():
-    X1 = np.random.random((100, 1))
-    X2 = np.random.random((100, 1))
+def test_add_features_throws_if_datasets_unconstructed(rng):
+    X1 = rng.uniform(size=(100, 1))
+    X2 = rng.uniform(size=(100, 1))
    with pytest.raises(ValueError):
        d1 = lgb.Dataset(X1)
        d2 = lgb.Dataset(X2)
@ -293,8 +292,8 @@ def test_add_features_throws_if_datasets_unconstructed():
        d1.add_features_from(d2)


-def test_add_features_equal_data_on_alternating_used_unused(tmp_path):
-    X = np.random.random((100, 5))
+def test_add_features_equal_data_on_alternating_used_unused(tmp_path, rng):
+    X = rng.uniform(size=(100, 5))
    X[:, [1, 3]] = 0
    names = [f"col_{i}" for i in range(5)]
    for j in range(1, 5):
@ -313,8 +312,8 @@ def test_add_features_equal_data_on_alternating_used_unused(tmp_path):
        assert dtxt == d1txt


-def test_add_features_same_booster_behaviour(tmp_path):
-    X = np.random.random((100, 5))
+def test_add_features_same_booster_behaviour(tmp_path, rng):
+    X = rng.uniform(size=(100, 5))
    X[:, [1, 3]] = 0
    names = [f"col_{i}" for i in range(5)]
    for j in range(1, 5):
@ -322,7 +321,7 @@ def test_add_features_same_booster_behaviour(tmp_path):
        d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
        d1.add_features_from(d2)
        d = lgb.Dataset(X, feature_name=names).construct()
-        y = np.random.random(100)
+        y = rng.uniform(size=(100,))
        d1.set_label(y)
        d.set_label(y)
        b1 = lgb.Booster(train_set=d1)
@ -341,11 +340,11 @@ def test_add_features_same_booster_behaviour(tmp_path):
        assert dtxt == d1txt


-def test_add_features_from_different_sources():
+def test_add_features_from_different_sources(rng):
    pd = pytest.importorskip("pandas")
    n_row = 100
    n_col = 5
-    X = np.random.random((n_row, n_col))
+    X = rng.uniform(size=(n_row, n_col))
    xxs = [X, sparse.csr_matrix(X), pd.DataFrame(X)]
    names = [f"col_{i}" for i in range(n_col)]
    seq = _create_sequence_from_ndarray(X, 1, 30)
@ -380,9 +379,9 @@ def test_add_features_from_different_sources():
            assert d1.feature_name == res_feature_names


-def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_features(capsys):
+def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_features(capsys, rng):
    arr_a = np.zeros((100, 1), dtype=np.float32)
-    arr_b = np.random.normal(size=(100, 5))
+    arr_b = rng.uniform(size=(100, 5))

    dataset_a = lgb.Dataset(arr_a).construct()
    expected_msg = (
@ -402,10 +401,10 @@ def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_feat
    assert dataset_a._handle.value == original_handle


-def test_cegb_affects_behavior(tmp_path):
-    X = np.random.random((100, 5))
+def test_cegb_affects_behavior(tmp_path, rng):
+    X = rng.uniform(size=(100, 5))
    X[:, [1, 3]] = 0
-    y = np.random.random(100)
+    y = rng.uniform(size=(100,))
    names = [f"col_{i}" for i in range(5)]
    ds = lgb.Dataset(X, feature_name=names).construct()
    ds.set_label(y)
@ -433,10 +432,10 @@ def test_cegb_affects_behavior(tmp_path):
        assert basetxt != casetxt


-def test_cegb_scaling_equalities(tmp_path):
-    X = np.random.random((100, 5))
+def test_cegb_scaling_equalities(tmp_path, rng):
+    X = rng.uniform(size=(100, 5))
    X[:, [1, 3]] = 0
-    y = np.random.random(100)
+    y = rng.uniform(size=(100,))
    names = [f"col_{i}" for i in range(5)]
    ds = lgb.Dataset(X, feature_name=names).construct()
    ds.set_label(y)
@ -573,10 +572,10 @@ def test_dataset_construction_overwrites_user_provided_metadata_fields():
    np_assert_array_equal(dtrain.get_field("weight"), expected_weight, strict=True)


-def test_dataset_construction_with_high_cardinality_categorical_succeeds():
+def test_dataset_construction_with_high_cardinality_categorical_succeeds(rng):
    pd = pytest.importorskip("pandas")
-    X = pd.DataFrame({"x1": np.random.randint(0, 5_000, 10_000)})
-    y = np.random.rand(10_000)
+    X = pd.DataFrame({"x1": rng.integers(low=0, high=5_000, size=(10_000,))})
+    y = rng.uniform(size=(10_000,))
    ds = lgb.Dataset(X, y, categorical_feature=["x1"])
    ds.construct()
    assert ds.num_data() == 10_000
@ -663,11 +662,11 @@ def test_choose_param_value_objective(objective_alias):

@pytest.mark.parametrize("collection", ["1d_np", "2d_np", "pd_float", "pd_str", "1d_list", "2d_list"])
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
-def test_list_to_1d_numpy(collection, dtype):
+def test_list_to_1d_numpy(collection, dtype, rng):
    collection2y = {
-        "1d_np": np.random.rand(10),
-        "2d_np": np.random.rand(10, 1),
-        "pd_float": np.random.rand(10),
+        "1d_np": rng.uniform(size=(10,)),
+        "2d_np": rng.uniform(size=(10, 1)),
+        "pd_float": rng.uniform(size=(10,)),
        "pd_str": ["a", "b"],
        "1d_list": [1] * 10,
        "2d_list": [[1], [2]],
@ -696,7 +695,7 @@ def test_list_to_1d_numpy(collection, dtype):


@pytest.mark.parametrize("init_score_type", ["array", "dataframe", "list"])
-def test_init_score_for_multiclass_classification(init_score_type):
+def test_init_score_for_multiclass_classification(init_score_type, rng):
    init_score = [[i * 10 + j for j in range(3)] for i in range(10)]
    if init_score_type == "array":
        init_score = np.array(init_score)
@ -704,7 +703,7 @@ def test_init_score_for_multiclass_classification(init_score_type):
        if not PANDAS_INSTALLED:
            pytest.skip("Pandas is not installed.")
        init_score = pd_DataFrame(init_score)
-    data = np.random.rand(10, 2)
+    data = rng.uniform(size=(10, 2))
    ds = lgb.Dataset(data, init_score=init_score).construct()
    np.testing.assert_equal(ds.get_field("init_score"), init_score)
    np.testing.assert_equal(ds.init_score, init_score)
@ -741,16 +740,20 @@ def test_param_aliases():


 def _bad_gradients(preds, _):
-    return np.random.randn(len(preds) + 1), np.random.rand(len(preds) + 1)
+    rng = np.random.default_rng()
+    # "bad" = 1 element too many
+    size = (len(preds) + 1,)
+    return rng.standard_normal(size=size), rng.uniform(size=size)


 def _good_gradients(preds, _):
-    return np.random.randn(*preds.shape), np.random.rand(*preds.shape)
+    rng = np.random.default_rng()
+    return rng.standard_normal(size=preds.shape), rng.uniform(size=preds.shape)


-def test_custom_objective_safety():
+def test_custom_objective_safety(rng):
    nrows = 100
-    X = np.random.randn(nrows, 5)
+    X = rng.standard_normal(size=(nrows, 5))
    y_binary = np.arange(nrows) % 2
    classes = [0, 1, 2]
    nclass = len(classes)
@ -771,10 +774,13 @@ def test_custom_objective_safety():

@pytest.mark.parametrize("dtype", [np.float32, np.float64])
@pytest.mark.parametrize("feature_name", [["x1", "x2"], "auto"])
-def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name):
+def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name, rng):
    pd = pytest.importorskip("pandas")
-    X = np.random.rand(10, 2).astype(dtype)
-    df = pd.DataFrame(X)
+    X = rng.uniform(size=(10, 2)).astype(dtype)
+    # copy=False is necessary because starting with pandas 3.0, pd.DataFrame() creates
+    # a copy of the input numpy array by default
+    # ref: https://github.com/pandas-dev/pandas/issues/58913
+    df = pd.DataFrame(X, copy=False)
    built_data = lgb.basic._data_from_pandas(
        data=df, feature_name=feature_name, categorical_feature="auto", pandas_categorical=None
    )[0]
@ -784,9 +790,9 @@ def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name):

@pytest.mark.parametrize("feature_name", [["x1"], [42], "auto"])
@pytest.mark.parametrize("categories", ["seen", "unseen"])
-def test_categorical_code_conversion_doesnt_modify_original_data(feature_name, categories):
+def test_categorical_code_conversion_doesnt_modify_original_data(feature_name, categories, rng):
    pd = pytest.importorskip("pandas")
-    X = np.random.choice(["a", "b"], 100).reshape(-1, 1)
+    X = rng.choice(a=["a", "b"], size=(100, 1))
    column_name = "a" if feature_name == "auto" else feature_name[0]
    df = pd.DataFrame(X.copy(), columns=[column_name], dtype="category")
    if categories == "seen":
@ -814,15 +820,15 @@ def test_categorical_code_conversion_doesnt_modify_original_data(feature_name, c


@pytest.mark.parametrize("min_data_in_bin", [2, 10])
-def test_feature_num_bin(min_data_in_bin):
+def test_feature_num_bin(min_data_in_bin, rng):
    X = np.vstack(
        [
-            np.random.rand(100),
+            rng.uniform(size=(100,)),
            np.array([1, 2] * 50),
            np.array([0, 1, 2] * 33 + [0]),
            np.array([1, 2] * 49 + 2 * [np.nan]),
            np.zeros(100),
-            np.random.choice([0, 1], 100),
+            rng.choice(a=[0, 1], size=(100,)),
        ]
    ).T
    n_continuous = X.shape[1] - 1
@ -862,9 +868,9 @@ def test_feature_num_bin(min_data_in_bin):
        ds.feature_num_bin(num_features)


-def test_feature_num_bin_with_max_bin_by_feature():
-    X = np.random.rand(100, 3)
-    max_bin_by_feature = np.random.randint(3, 30, size=X.shape[1])
+def test_feature_num_bin_with_max_bin_by_feature(rng):
+    X = rng.uniform(size=(100, 3))
+    max_bin_by_feature = rng.integers(low=3, high=30, size=X.shape[1])
    ds = lgb.Dataset(X, params={"max_bin_by_feature": max_bin_by_feature}).construct()
    actual_num_bins = [ds.feature_num_bin(i) for i in range(X.shape[1])]
    np.testing.assert_equal(actual_num_bins, max_bin_by_feature)
@ -882,8 +888,62 @@ def test_set_leaf_output():
    np.testing.assert_allclose(bst.predict(X), y_pred + 1)


-def test_feature_names_are_set_correctly_when_no_feature_names_passed_into_Dataset():
+def test_feature_names_are_set_correctly_when_no_feature_names_passed_into_Dataset(rng):
    ds = lgb.Dataset(
-        data=np.random.randn(100, 3),
+        data=rng.standard_normal(size=(100, 3)),
    )
    assert ds.construct().feature_name == ["Column_0", "Column_1", "Column_2"]
+
+
+# NOTE: this intentionally contains values where num_leaves <, ==, and > (max_depth^2)
+@pytest.mark.parametrize(("max_depth", "num_leaves"), [(-1, 3), (-1, 50), (5, 3), (5, 31), (5, 32), (8, 3), (8, 31)])
+def test_max_depth_warning_is_not_raised_if_num_leaves_is_also_provided(capsys, num_leaves, max_depth):
+    X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
+    lgb.Booster(
+        params={
+            "objective": "binary",
+            "max_depth": max_depth,
+            "num_leaves": num_leaves,
+            "num_iterations": 1,
+            "verbose": 0,
+        },
+        train_set=lgb.Dataset(X, label=y),
+    )
+    assert "Provided parameters constrain tree depth" not in capsys.readouterr().out
+
+
+# NOTE: max_depth < 5 is significant here because the default for num_leaves=31. With max_depth=5,
+#       a full depth-wise tree would have 2^5 = 32 leaves.
+@pytest.mark.parametrize("max_depth", [1, 2, 3, 4])
+def test_max_depth_warning_is_not_raised_if_max_depth_gt_1_and_lt_5_and_num_leaves_omitted(capsys, max_depth):
+    X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
+    lgb.Booster(
+        params={
+            "objective": "binary",
+            "max_depth": max_depth,
+            "num_iterations": 1,
+            "verbose": 0,
+        },
+        train_set=lgb.Dataset(X, label=y),
+    )
+    assert "Provided parameters constrain tree depth" not in capsys.readouterr().out
+
+
+@pytest.mark.parametrize("max_depth", [5, 6, 7, 8, 9])
+def test_max_depth_warning_is_raised_if_max_depth_gte_5_and_num_leaves_omitted(capsys, max_depth):
+    X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
+    lgb.Booster(
+        params={
+            "objective": "binary",
+            "max_depth": max_depth,
+            "num_iterations": 1,
+            "verbose": 0,
+        },
+        train_set=lgb.Dataset(X, label=y),
+    )
+    expected_warning = (
+        f"[LightGBM] [Warning] Provided parameters constrain tree depth (max_depth={max_depth}) without explicitly "
+        f"setting 'num_leaves'. This can lead to underfitting. To resolve this warning, pass 'num_leaves' (<={2**max_depth}) "
+        "in params. Alternatively, pass (max_depth=-1) and just use 'num_leaves' to constrain model complexity."
+    )
+    assert expected_warning in capsys.readouterr().out
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@ -550,7 +550,7 @@ def test_multi_class_error():
@pytest.mark.skipif(
    getenv("TASK", "") == "cuda", reason="Skip due to differences in implementation details of CUDA version"
 )
-def test_auc_mu():
+def test_auc_mu(rng):
    # should give same result as binary auc for 2 classes
    X, y = load_digits(n_class=10, return_X_y=True)
    y_new = np.zeros((len(y)))
@ -578,7 +578,7 @@ def test_auc_mu():
    assert results_auc_mu["training"]["auc_mu"][-1] == pytest.approx(0.5)
    # test that weighted data gives different auc_mu
    lgb_X = lgb.Dataset(X, label=y)
-    lgb_X_weighted = lgb.Dataset(X, label=y, weight=np.abs(np.random.normal(size=y.shape)))
+    lgb_X_weighted = lgb.Dataset(X, label=y, weight=np.abs(rng.standard_normal(size=y.shape)))
    results_unweighted = {}
    results_weighted = {}
    params = dict(params, num_classes=10, num_leaves=5)
@ -1432,9 +1432,9 @@ def test_feature_name():
    assert feature_names == gbm.feature_name()


-def test_feature_name_with_non_ascii():
-    X_train = np.random.normal(size=(100, 4))
-    y_train = np.random.random(100)
+def test_feature_name_with_non_ascii(rng):
+    X_train = rng.normal(size=(100, 4))
+    y_train = rng.normal(size=(100,))
    # This has non-ascii strings.
    feature_names = ["F_零", "F_一", "F_二", "F_三"]
    params = {"verbose": -1}
@ -1448,9 +1448,14 @@ def test_feature_name_with_non_ascii():
    assert feature_names == gbm2.feature_name()


-def test_parameters_are_loaded_from_model_file(tmp_path, capsys):
-    X = np.hstack([np.random.rand(100, 1), np.random.randint(0, 5, (100, 2))])
-    y = np.random.rand(100)
+def test_parameters_are_loaded_from_model_file(tmp_path, capsys, rng):
+    X = np.hstack(
+        [
+            rng.uniform(size=(100, 1)),
+            rng.integers(low=0, high=5, size=(100, 2)),
+        ]
+    )
+    y = rng.uniform(size=(100,))
    ds = lgb.Dataset(X, y)
    params = {
        "bagging_fraction": 0.8,
@ -1702,29 +1707,29 @@ def test_all_expected_params_are_written_out_to_model_text(tmp_path):
        assert param_str in model_txt_from_memory


-def test_pandas_categorical():
+# why fixed seed?
+# sometimes there is no difference how cols are treated (cat or not cat)
+def test_pandas_categorical(rng_fixed_seed):
    pd = pytest.importorskip("pandas")
-    np.random.seed(42)  # sometimes there is no difference how cols are treated (cat or not cat)
    X = pd.DataFrame(
        {
-            "A": np.random.permutation(["a", "b", "c", "d"] * 75),  # str
-            "B": np.random.permutation([1, 2, 3] * 100),  # int
-            "C": np.random.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60),  # float
-            "D": np.random.permutation([True, False] * 150),  # bool
-            "E": pd.Categorical(np.random.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True),
+            "A": rng_fixed_seed.permutation(["a", "b", "c", "d"] * 75),  # str
+            "B": rng_fixed_seed.permutation([1, 2, 3] * 100),  # int
+            "C": rng_fixed_seed.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60),  # float
+            "D": rng_fixed_seed.permutation([True, False] * 150),  # bool
+            "E": pd.Categorical(rng_fixed_seed.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True),
        }
    )  # str and ordered categorical
-    y = np.random.permutation([0, 1] * 150)
+    y = rng_fixed_seed.permutation([0, 1] * 150)
    X_test = pd.DataFrame(
        {
-            "A": np.random.permutation(["a", "b", "e"] * 20),  # unseen category
-            "B": np.random.permutation([1, 3] * 30),
-            "C": np.random.permutation([0.1, -0.1, 0.2, 0.2] * 15),
-            "D": np.random.permutation([True, False] * 30),
-            "E": pd.Categorical(np.random.permutation(["z", "y"] * 30), ordered=True),
+            "A": rng_fixed_seed.permutation(["a", "b", "e"] * 20),  # unseen category
+            "B": rng_fixed_seed.permutation([1, 3] * 30),
+            "C": rng_fixed_seed.permutation([0.1, -0.1, 0.2, 0.2] * 15),
+            "D": rng_fixed_seed.permutation([True, False] * 30),
+            "E": pd.Categorical(rng_fixed_seed.permutation(["z", "y"] * 30), ordered=True),
        }
    )
-    np.random.seed()  # reset seed
    cat_cols_actual = ["A", "B", "C", "D"]
    cat_cols_to_store = cat_cols_actual + ["E"]
    X[cat_cols_actual] = X[cat_cols_actual].astype("category")
@ -1786,21 +1791,21 @@ def test_pandas_categorical():
    assert gbm7.pandas_categorical == cat_values


-def test_pandas_sparse():
+def test_pandas_sparse(rng):
    pd = pytest.importorskip("pandas")
    X = pd.DataFrame(
        {
-            "A": pd.arrays.SparseArray(np.random.permutation([0, 1, 2] * 100)),
-            "B": pd.arrays.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
-            "C": pd.arrays.SparseArray(np.random.permutation([True, False] * 150)),
+            "A": pd.arrays.SparseArray(rng.permutation([0, 1, 2] * 100)),
+            "B": pd.arrays.SparseArray(rng.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
+            "C": pd.arrays.SparseArray(rng.permutation([True, False] * 150)),
        }
    )
-    y = pd.Series(pd.arrays.SparseArray(np.random.permutation([0, 1] * 150)))
+    y = pd.Series(pd.arrays.SparseArray(rng.permutation([0, 1] * 150)))
    X_test = pd.DataFrame(
        {
-            "A": pd.arrays.SparseArray(np.random.permutation([0, 2] * 30)),
-            "B": pd.arrays.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
-            "C": pd.arrays.SparseArray(np.random.permutation([True, False] * 30)),
+            "A": pd.arrays.SparseArray(rng.permutation([0, 2] * 30)),
+            "B": pd.arrays.SparseArray(rng.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
+            "C": pd.arrays.SparseArray(rng.permutation([True, False] * 30)),
        }
    )
    for dtype in pd.concat([X.dtypes, X_test.dtypes, pd.Series(y.dtypes)]):
@ -1816,9 +1821,9 @@ def test_pandas_sparse():
    np.testing.assert_allclose(pred_sparse, pred_dense)


-def test_reference_chain():
-    X = np.random.normal(size=(100, 2))
-    y = np.random.normal(size=100)
+def test_reference_chain(rng):
+    X = rng.normal(size=(100, 2))
+    y = rng.normal(size=(100,))
    tmp_dat = lgb.Dataset(X, y)
    # take subsets and train
    tmp_dat_train = tmp_dat.subset(np.arange(80))
@ -1940,28 +1945,28 @@ def test_contribs_sparse_multiclass():
        np.testing.assert_allclose(contribs_csc_array, contribs_dense)


-@pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason="not enough RAM")
-def test_int32_max_sparse_contribs():
-    params = {"objective": "binary"}
-    train_features = np.random.rand(100, 1000)
-    train_targets = [0] * 50 + [1] * 50
-    lgb_train = lgb.Dataset(train_features, train_targets)
-    gbm = lgb.train(params, lgb_train, num_boost_round=2)
-    csr_input_shape = (3000000, 1000)
-    test_features = csr_matrix(csr_input_shape)
-    for i in range(0, csr_input_shape[0], csr_input_shape[0] // 6):
-        for j in range(0, 1000, 100):
-            test_features[i, j] = random.random()
-    y_pred_csr = gbm.predict(test_features, pred_contrib=True)
-    # Note there is an extra column added to the output for the expected value
-    csr_output_shape = (csr_input_shape[0], csr_input_shape[1] + 1)
-    assert y_pred_csr.shape == csr_output_shape
-    y_pred_csc = gbm.predict(test_features.tocsc(), pred_contrib=True)
-    # Note output CSC shape should be same as CSR output shape
-    assert y_pred_csc.shape == csr_output_shape
+# @pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason="not enough RAM")
+# def test_int32_max_sparse_contribs(rng):
+#     params = {"objective": "binary"}
+#     train_features = rng.uniform(size=(100, 1000))
+#     train_targets = [0] * 50 + [1] * 50
+#     lgb_train = lgb.Dataset(train_features, train_targets)
+#     gbm = lgb.train(params, lgb_train, num_boost_round=2)
+#     csr_input_shape = (3000000, 1000)
+#     test_features = csr_matrix(csr_input_shape)
+#     for i in range(0, csr_input_shape[0], csr_input_shape[0] // 6):
+#         for j in range(0, 1000, 100):
+#             test_features[i, j] = random.random()
+#     y_pred_csr = gbm.predict(test_features, pred_contrib=True)
+#     # Note there is an extra column added to the output for the expected value
+#     csr_output_shape = (csr_input_shape[0], csr_input_shape[1] + 1)
+#     assert y_pred_csr.shape == csr_output_shape
+#     y_pred_csc = gbm.predict(test_features.tocsc(), pred_contrib=True)
+#     # Note output CSC shape should be same as CSR output shape
+#     assert y_pred_csc.shape == csr_output_shape


-def test_sliced_data():
+def test_sliced_data(rng):
    def train_and_get_predictions(features, labels):
        dataset = lgb.Dataset(features, label=labels)
        lgb_params = {
@ -1977,7 +1982,7 @@ def test_sliced_data():
        return gbm.predict(features)

    num_samples = 100
-    features = np.random.rand(num_samples, 5)
+    features = rng.uniform(size=(num_samples, 5))
    positive_samples = int(num_samples * 0.25)
    labels = np.append(
        np.ones(positive_samples, dtype=np.float32), np.zeros(num_samples - positive_samples, dtype=np.float32)
@ -2011,13 +2016,13 @@ def test_sliced_data():
    np.testing.assert_allclose(origin_pred, sliced_pred)


-def test_init_with_subset():
-    data = np.random.random((50, 2))
+def test_init_with_subset(rng):
+    data = rng.uniform(size=(50, 2))
    y = [1] * 25 + [0] * 25
    lgb_train = lgb.Dataset(data, y, free_raw_data=False)
-    subset_index_1 = np.random.choice(np.arange(50), 30, replace=False)
+    subset_index_1 = rng.choice(a=np.arange(50), size=30, replace=False)
    subset_data_1 = lgb_train.subset(subset_index_1)
-    subset_index_2 = np.random.choice(np.arange(50), 20, replace=False)
+    subset_index_2 = rng.choice(a=np.arange(50), size=20, replace=False)
    subset_data_2 = lgb_train.subset(subset_index_2)
    params = {"objective": "binary", "verbose": -1}
    init_gbm = lgb.train(params=params, train_set=subset_data_1, num_boost_round=10, keep_training_booster=True)
@ -2037,9 +2042,9 @@ def test_init_with_subset():
    assert subset_data_4.get_data() == "lgb_train_data.bin"


-def test_training_on_constructed_subset_without_params():
-    X = np.random.random((100, 10))
-    y = np.random.random(100)
+def test_training_on_constructed_subset_without_params(rng):
+    X = rng.uniform(size=(100, 10))
+    y = rng.uniform(size=(100,))
    lgb_data = lgb.Dataset(X, y)
    subset_indices = [1, 2, 3, 4]
    subset = lgb_data.subset(subset_indices).construct()
@ -2051,9 +2056,10 @@ def test_training_on_constructed_subset_without_params():

 def generate_trainset_for_monotone_constraints_tests(x3_to_category=True):
    number_of_dpoints = 3000
-    x1_positively_correlated_with_y = np.random.random(size=number_of_dpoints)
-    x2_negatively_correlated_with_y = np.random.random(size=number_of_dpoints)
-    x3_negatively_correlated_with_y = np.random.random(size=number_of_dpoints)
+    rng = np.random.default_rng()
+    x1_positively_correlated_with_y = rng.uniform(size=number_of_dpoints)
+    x2_negatively_correlated_with_y = rng.uniform(size=number_of_dpoints)
+    x3_negatively_correlated_with_y = rng.uniform(size=number_of_dpoints)
    x = np.column_stack(
        (
            x1_positively_correlated_with_y,
@ -2062,8 +2068,8 @@ def generate_trainset_for_monotone_constraints_tests(x3_to_category=True):
        )
    )

-    zs = np.random.normal(loc=0.0, scale=0.01, size=number_of_dpoints)
-    scales = 10.0 * (np.random.random(6) + 0.5)
+    zs = rng.normal(loc=0.0, scale=0.01, size=number_of_dpoints)
+    scales = 10.0 * (rng.uniform(size=6) + 0.5)
    y = (
        scales[0] * x1_positively_correlated_with_y
        + np.sin(scales[1] * np.pi * x1_positively_correlated_with_y)
@ -2265,9 +2271,8 @@ def test_max_bin_by_feature():
    assert len(np.unique(est.predict(X))) == 3


-def test_small_max_bin():
-    np.random.seed(0)
-    y = np.random.choice([0, 1], 100)
+def test_small_max_bin(rng_fixed_seed):
+    y = rng_fixed_seed.choice([0, 1], 100)
    x = np.ones((100, 1))
    x[:30, 0] = -1
    x[60:, 0] = 2
@ -2278,7 +2283,6 @@ def test_small_max_bin():
    params["max_bin"] = 3
    lgb_x = lgb.Dataset(x, label=y)
    lgb.train(params, lgb_x, num_boost_round=5)
-    np.random.seed()  # reset seed


 def test_refit():
@ -2293,14 +2297,14 @@ def test_refit():
    assert err_pred > new_err_pred


-def test_refit_dataset_params():
+def test_refit_dataset_params(rng):
    # check refit accepts dataset_params
    X, y = load_breast_cancer(return_X_y=True)
    lgb_train = lgb.Dataset(X, y, init_score=np.zeros(y.size))
    train_params = {"objective": "binary", "verbose": -1, "seed": 123}
    gbm = lgb.train(train_params, lgb_train, num_boost_round=10)
    non_weight_err_pred = log_loss(y, gbm.predict(X))
-    refit_weight = np.random.rand(y.shape[0])
+    refit_weight = rng.uniform(size=(y.shape[0],))
    dataset_params = {
        "max_bin": 260,
        "min_data_in_bin": 5,
@ -3011,7 +3015,7 @@ def test_model_size():
@pytest.mark.skipif(
    getenv("TASK", "") == "cuda", reason="Skip due to differences in implementation details of CUDA version"
 )
-def test_get_split_value_histogram():
+def test_get_split_value_histogram(rng_fixed_seed):
    X, y = make_synthetic_regression()
    X = np.repeat(X, 3, axis=0)
    y = np.repeat(y, 3, axis=0)
@ -3351,7 +3355,7 @@ def test_binning_same_sign():
    assert predicted[1] == pytest.approx(predicted[2])


-def test_dataset_update_params():
+def test_dataset_update_params(rng):
    default_params = {
        "max_bin": 100,
        "max_bin_by_feature": [20, 10],
@ -3400,8 +3404,8 @@ def test_dataset_update_params():
        "linear_tree": True,
        "precise_float_parser": False,
    }
-    X = np.random.random((100, 2))
-    y = np.random.random(100)
+    X = rng.uniform(size=(100, 2))
+    y = rng.uniform(size=(100,))

    # decreasing without freeing raw data is allowed
    lgb_data = lgb.Dataset(X, y, params=default_params, free_raw_data=False).construct()
@ -3443,12 +3447,12 @@ def test_dataset_update_params():
            lgb.train(new_params, lgb_data, num_boost_round=3)


-def test_dataset_params_with_reference():
+def test_dataset_params_with_reference(rng):
    default_params = {"max_bin": 100}
-    X = np.random.random((100, 2))
-    y = np.random.random(100)
-    X_val = np.random.random((100, 2))
-    y_val = np.random.random(100)
+    X = rng.uniform(size=(100, 2))
+    y = rng.uniform(size=(100,))
+    X_val = rng.uniform(size=(100, 2))
+    y_val = rng.uniform(size=(100,))
    lgb_train = lgb.Dataset(X, y, params=default_params, free_raw_data=False).construct()
    lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train, free_raw_data=False).construct()
    assert lgb_train.get_params() == default_params
@ -3486,7 +3490,7 @@ def test_path_smoothing():
    assert err < err_new


-def test_trees_to_dataframe():
+def test_trees_to_dataframe(rng):
    pytest.importorskip("pandas")

    def _imptcs_to_numpy(X, impcts_dict):
@ -3516,7 +3520,7 @@ def test_trees_to_dataframe():

    # test edge case with one leaf
    X = np.ones((10, 2))
-    y = np.random.rand(10)
+    y = rng.uniform(size=(10,))
    data = lgb.Dataset(X, label=y)
    bst = lgb.train({"objective": "binary", "verbose": -1}, data, num_trees)
    tree_df = bst.trees_to_dataframe()
@ -3574,11 +3578,10 @@ def test_interaction_constraints():
    )


-def test_linear_trees_num_threads():
+def test_linear_trees_num_threads(rng_fixed_seed):
    # check that number of threads does not affect result
-    np.random.seed(0)
    x = np.arange(0, 1000, 0.1)
-    y = 2 * x + np.random.normal(0, 0.1, len(x))
+    y = 2 * x + rng_fixed_seed.normal(loc=0, scale=0.1, size=(len(x),))
    x = x[:, np.newaxis]
    lgb_train = lgb.Dataset(x, label=y)
    params = {"verbose": -1, "objective": "regression", "seed": 0, "linear_tree": True, "num_threads": 2}
@ -3590,11 +3593,10 @@ def test_linear_trees_num_threads():
    np.testing.assert_allclose(pred1, pred2)


-def test_linear_trees(tmp_path):
+def test_linear_trees(tmp_path, rng_fixed_seed):
    # check that setting linear_tree=True fits better than ordinary trees when data has linear relationship
-    np.random.seed(0)
    x = np.arange(0, 100, 0.1)
-    y = 2 * x + np.random.normal(0, 0.1, len(x))
+    y = 2 * x + rng_fixed_seed.normal(0, 0.1, len(x))
    x = x[:, np.newaxis]
    lgb_train = lgb.Dataset(x, label=y)
    params = {"verbose": -1, "metric": "mse", "seed": 0, "num_leaves": 2}
@ -4099,21 +4101,20 @@ def test_record_evaluation_with_cv(train_metric):
                np.testing.assert_allclose(cv_hist[key], eval_result[dataset][f"{metric}-{agg}"])


-def test_pandas_with_numpy_regular_dtypes():
+def test_pandas_with_numpy_regular_dtypes(rng_fixed_seed):
    pd = pytest.importorskip("pandas")
    uints = ["uint8", "uint16", "uint32", "uint64"]
    ints = ["int8", "int16", "int32", "int64"]
    bool_and_floats = ["bool", "float16", "float32", "float64"]
-    rng = np.random.RandomState(42)

    n_samples = 100
    # data as float64
    df = pd.DataFrame(
        {
-            "x1": rng.randint(0, 2, n_samples),
-            "x2": rng.randint(1, 3, n_samples),
-            "x3": 10 * rng.randint(1, 3, n_samples),
-            "x4": 100 * rng.randint(1, 3, n_samples),
+            "x1": rng_fixed_seed.integers(low=0, high=2, size=n_samples),
+            "x2": rng_fixed_seed.integers(low=1, high=3, size=n_samples),
+            "x3": 10 * rng_fixed_seed.integers(low=1, high=3, size=n_samples),
+            "x4": 100 * rng_fixed_seed.integers(low=1, high=3, size=n_samples),
        }
    )
    df = df.astype(np.float64)
@ -4139,15 +4140,14 @@ def test_pandas_with_numpy_regular_dtypes():
        np.testing.assert_allclose(preds, preds2)


-def test_pandas_nullable_dtypes():
+def test_pandas_nullable_dtypes(rng_fixed_seed):
    pd = pytest.importorskip("pandas")
-    rng = np.random.RandomState(0)
    df = pd.DataFrame(
        {
-            "x1": rng.randint(1, 3, size=100),
+            "x1": rng_fixed_seed.integers(low=1, high=3, size=100),
            "x2": np.linspace(-1, 1, 100),
-            "x3": pd.arrays.SparseArray(rng.randint(0, 11, size=100)),
-            "x4": rng.rand(100) < 0.5,
+            "x3": pd.arrays.SparseArray(rng_fixed_seed.integers(low=0, high=11, size=100)),
+            "x4": rng_fixed_seed.uniform(size=(100,)) < 0.5,
        }
    )
    # introduce some missing values
@ -4219,7 +4219,7 @@ def test_boost_from_average_with_single_leaf_trees():
    assert y.min() <= mean_preds <= y.max()


-def test_cegb_split_buffer_clean():
+def test_cegb_split_buffer_clean(rng_fixed_seed):
    # modified from https://github.com/microsoft/LightGBM/issues/3679#issuecomment-938652811
    # and https://github.com/microsoft/LightGBM/pull/5087
    # test that the ``splits_per_leaf_`` of CEGB is cleaned before training a new tree
@ -4228,11 +4228,9 @@ def test_cegb_split_buffer_clean():
    #    Check failed: (best_split_info.left_count) > (0)

    R, C = 1000, 100
-    seed = 29
-    np.random.seed(seed)
-    data = np.random.randn(R, C)
+    data = rng_fixed_seed.standard_normal(size=(R, C))
    for i in range(1, C):
-        data[i] += data[0] * np.random.randn()
+        data[i] += data[0] * rng_fixed_seed.standard_normal()

    N = int(0.8 * len(data))
    train_data = data[:N]
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@ -340,7 +340,7 @@ def test_grid_search():
    assert evals_result == grid.best_estimator_.evals_result_


-def test_random_search():
+def test_random_search(rng):
    X, y = load_iris(return_X_y=True)
    y = y.astype(str)  # utilize label encoder at it's max power
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
@ -349,8 +349,8 @@ def test_random_search():
    params = {"subsample": 0.8, "subsample_freq": 1}
    param_dist = {
        "boosting_type": ["rf", "gbdt"],
-        "n_estimators": [np.random.randint(low=3, high=10) for i in range(n_iter)],
-        "reg_alpha": [np.random.uniform(low=0.01, high=0.06) for i in range(n_iter)],
+        "n_estimators": rng.integers(low=3, high=10, size=(n_iter,)).tolist(),
+        "reg_alpha": rng.uniform(low=0.01, high=0.06, size=(n_iter,)).tolist(),
    }
    fit_params = {"eval_set": [(X_val, y_val)], "eval_metric": constant_metric, "callbacks": [lgb.early_stopping(2)]}
    rand = RandomizedSearchCV(
@ -556,29 +556,29 @@ def test_feature_importances_type():
    assert importance_split_top1 != importance_gain_top1


-def test_pandas_categorical():
+# why fixed seed?
+# sometimes there is no difference how cols are treated (cat or not cat)
+def test_pandas_categorical(rng_fixed_seed):
    pd = pytest.importorskip("pandas")
-    np.random.seed(42)  # sometimes there is no difference how cols are treated (cat or not cat)
    X = pd.DataFrame(
        {
-            "A": np.random.permutation(["a", "b", "c", "d"] * 75),  # str
-            "B": np.random.permutation([1, 2, 3] * 100),  # int
-            "C": np.random.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60),  # float
-            "D": np.random.permutation([True, False] * 150),  # bool
-            "E": pd.Categorical(np.random.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True),
+            "A": rng_fixed_seed.permutation(["a", "b", "c", "d"] * 75),  # str
+            "B": rng_fixed_seed.permutation([1, 2, 3] * 100),  # int
+            "C": rng_fixed_seed.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60),  # float
+            "D": rng_fixed_seed.permutation([True, False] * 150),  # bool
+            "E": pd.Categorical(rng_fixed_seed.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True),
        }
    )  # str and ordered categorical
-    y = np.random.permutation([0, 1] * 150)
+    y = rng_fixed_seed.permutation([0, 1] * 150)
    X_test = pd.DataFrame(
        {
-            "A": np.random.permutation(["a", "b", "e"] * 20),  # unseen category
-            "B": np.random.permutation([1, 3] * 30),
-            "C": np.random.permutation([0.1, -0.1, 0.2, 0.2] * 15),
-            "D": np.random.permutation([True, False] * 30),
-            "E": pd.Categorical(np.random.permutation(["z", "y"] * 30), ordered=True),
+            "A": rng_fixed_seed.permutation(["a", "b", "e"] * 20),  # unseen category
+            "B": rng_fixed_seed.permutation([1, 3] * 30),
+            "C": rng_fixed_seed.permutation([0.1, -0.1, 0.2, 0.2] * 15),
+            "D": rng_fixed_seed.permutation([True, False] * 30),
+            "E": pd.Categorical(rng_fixed_seed.permutation(["z", "y"] * 30), ordered=True),
        }
    )
-    np.random.seed()  # reset seed
    cat_cols_actual = ["A", "B", "C", "D"]
    cat_cols_to_store = cat_cols_actual + ["E"]
    X[cat_cols_actual] = X[cat_cols_actual].astype("category")
@ -620,21 +620,21 @@ def test_pandas_categorical():
    assert gbm6.booster_.pandas_categorical == cat_values


-def test_pandas_sparse():
+def test_pandas_sparse(rng):
    pd = pytest.importorskip("pandas")
    X = pd.DataFrame(
        {
-            "A": pd.arrays.SparseArray(np.random.permutation([0, 1, 2] * 100)),
-            "B": pd.arrays.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
-            "C": pd.arrays.SparseArray(np.random.permutation([True, False] * 150)),
+            "A": pd.arrays.SparseArray(rng.permutation([0, 1, 2] * 100)),
+            "B": pd.arrays.SparseArray(rng.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
+            "C": pd.arrays.SparseArray(rng.permutation([True, False] * 150)),
        }
    )
-    y = pd.Series(pd.arrays.SparseArray(np.random.permutation([0, 1] * 150)))
+    y = pd.Series(pd.arrays.SparseArray(rng.permutation([0, 1] * 150)))
    X_test = pd.DataFrame(
        {
-            "A": pd.arrays.SparseArray(np.random.permutation([0, 2] * 30)),
-            "B": pd.arrays.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
-            "C": pd.arrays.SparseArray(np.random.permutation([True, False] * 30)),
+            "A": pd.arrays.SparseArray(rng.permutation([0, 2] * 30)),
+            "B": pd.arrays.SparseArray(rng.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
+            "C": pd.arrays.SparseArray(rng.permutation([True, False] * 30)),
        }
    )
    for dtype in pd.concat([X.dtypes, X_test.dtypes, pd.Series(y.dtypes)]):
@ -1073,11 +1073,11 @@ def test_multiple_eval_metrics():
    assert "binary_logloss" in gbm.evals_result_["training"]


-def test_nan_handle():
+def test_nan_handle(rng):
    nrows = 100
    ncols = 10
-    X = np.random.randn(nrows, ncols)
-    y = np.random.randn(nrows) + np.full(nrows, 1e30)
+    X = rng.standard_normal(size=(nrows, ncols))
+    y = rng.standard_normal(size=(nrows,)) + np.full(nrows, 1e30)
    weight = np.zeros(nrows)
    params = {"n_estimators": 20, "verbose": -1}
    params_fit = {"X": X, "y": y, "sample_weight": weight, "eval_set": (X, y), "callbacks": [lgb.early_stopping(5)]}
@ -1276,6 +1276,20 @@ def test_check_is_fitted():
        check_is_fitted(model)


+@pytest.mark.parametrize("estimator_class", [lgb.LGBMModel, lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker])
+@pytest.mark.parametrize("max_depth", [3, 4, 5, 8])
+def test_max_depth_warning_is_never_raised(capsys, estimator_class, max_depth):
+    X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
+    params = {"n_estimators": 1, "max_depth": max_depth, "verbose": 0}
+    if estimator_class is lgb.LGBMModel:
+        estimator_class(**{**params, "objective": "binary"}).fit(X, y)
+    elif estimator_class is lgb.LGBMRanker:
+        estimator_class(**params).fit(X, y, group=np.ones(X.shape[0]))
+    else:
+        estimator_class(**params).fit(X, y)
+    assert "Provided parameters constrain tree depth" not in capsys.readouterr().out
+
+
@parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()])
 def test_sklearn_integration(estimator, check):
    estimator.set_params(min_child_samples=1, min_data_in_bin=1)
@ -1410,13 +1424,13 @@ def test_validate_features(task):
@pytest.mark.parametrize("X_type", ["dt_DataTable", "list2d", "numpy", "scipy_csc", "scipy_csr", "pd_DataFrame"])
@pytest.mark.parametrize("y_type", ["list1d", "numpy", "pd_Series", "pd_DataFrame"])
@pytest.mark.parametrize("task", ["binary-classification", "multiclass-classification", "regression"])
-def test_classification_and_regression_minimally_work_with_all_all_accepted_data_types(X_type, y_type, task):
+def test_classification_and_regression_minimally_work_with_all_all_accepted_data_types(X_type, y_type, task, rng):
    if any(t.startswith("pd_") for t in [X_type, y_type]) and not PANDAS_INSTALLED:
        pytest.skip("pandas is not installed")
    if any(t.startswith("dt_") for t in [X_type, y_type]) and not DATATABLE_INSTALLED:
        pytest.skip("datatable is not installed")
    X, y, g = _create_data(task, n_samples=2_000)
-    weights = np.abs(np.random.randn(y.shape[0]))
+    weights = np.abs(rng.standard_normal(size=(y.shape[0],)))

    if task == "binary-classification" or task == "regression":
        init_score = np.full_like(y, np.mean(y))
@ -1487,13 +1501,13 @@ def test_classification_and_regression_minimally_work_with_all_all_accepted_data
@pytest.mark.parametrize("X_type", ["dt_DataTable", "list2d", "numpy", "scipy_csc", "scipy_csr", "pd_DataFrame"])
@pytest.mark.parametrize("y_type", ["list1d", "numpy", "pd_DataFrame", "pd_Series"])
@pytest.mark.parametrize("g_type", ["list1d_float", "list1d_int", "numpy", "pd_Series"])
-def test_ranking_minimally_works_with_all_all_accepted_data_types(X_type, y_type, g_type):
+def test_ranking_minimally_works_with_all_all_accepted_data_types(X_type, y_type, g_type, rng):
    if any(t.startswith("pd_") for t in [X_type, y_type, g_type]) and not PANDAS_INSTALLED:
        pytest.skip("pandas is not installed")
    if any(t.startswith("dt_") for t in [X_type, y_type, g_type]) and not DATATABLE_INSTALLED:
        pytest.skip("datatable is not installed")
    X, y, g = _create_data(task="ranking", n_samples=1_000)
-    weights = np.abs(np.random.randn(y.shape[0]))
+    weights = np.abs(rng.standard_normal(size=(y.shape[0],)))
    init_score = np.full_like(y, np.mean(y))
    X_valid = X * 2
 @ -1 +1 @@
 .3.0.99
 .4.0.99