зеркало из https://github.com/microsoft/LightGBM.git
[CUDA] consolidate CUDA versions (#5677)
* [ci] speed up if-else, swig, and lint conda setup * add 'source activate' * python constraint * start removing cuda v1 * comment out CI * remove more references * revert some unnecessaary changes * revert a few more mistakes * revert another change that ignored params * sigh * remove CUDATreeLearner * fix tests, docs * fix quoting in setup.py * restore all CI * Apply suggestions from code review Co-authored-by: shiyu1994 <shiyu_k1994@qq.com> * Apply suggestions from code review * completely remove cuda_exp, update docs --------- Co-authored-by: shiyu1994 <shiyu_k1994@qq.com>
This commit is contained in:
Родитель
5ffd757119
Коммит
4f47547c88
|
@ -106,7 +106,7 @@ else # Linux
|
|||
|| exit -1
|
||||
fi
|
||||
fi
|
||||
if [[ $TASK == "cuda" || $TASK == "cuda_exp" ]]; then
|
||||
if [[ $TASK == "cuda" ]]; then
|
||||
echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
|
||||
apt-get update
|
||||
apt-get install --no-install-recommends -y \
|
||||
|
|
35
.ci/test.sh
35
.ci/test.sh
|
@ -201,41 +201,24 @@ if [[ $TASK == "gpu" ]]; then
|
|||
elif [[ $METHOD == "source" ]]; then
|
||||
cmake -DUSE_GPU=ON ..
|
||||
fi
|
||||
elif [[ $TASK == "cuda" || $TASK == "cuda_exp" ]]; then
|
||||
if [[ $TASK == "cuda" ]]; then
|
||||
sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' $BUILD_DIRECTORY/include/LightGBM/config.h
|
||||
grep -q 'std::string device_type = "cuda"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1 # make sure that changes were really done
|
||||
else
|
||||
sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda_exp";/' $BUILD_DIRECTORY/include/LightGBM/config.h
|
||||
grep -q 'std::string device_type = "cuda_exp"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1 # make sure that changes were really done
|
||||
# by default ``gpu_use_dp=false`` for efficiency. change to ``true`` here for exact results in ci tests
|
||||
sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' $BUILD_DIRECTORY/include/LightGBM/config.h
|
||||
grep -q 'gpu_use_dp = true' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1 # make sure that changes were really done
|
||||
fi
|
||||
elif [[ $TASK == "cuda" ]]; then
|
||||
sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' $BUILD_DIRECTORY/include/LightGBM/config.h
|
||||
grep -q 'std::string device_type = "cuda"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1 # make sure that changes were really done
|
||||
# by default ``gpu_use_dp=false`` for efficiency. change to ``true`` here for exact results in ci tests
|
||||
sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' $BUILD_DIRECTORY/include/LightGBM/config.h
|
||||
grep -q 'gpu_use_dp = true' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1 # make sure that changes were really done
|
||||
if [[ $METHOD == "pip" ]]; then
|
||||
cd $BUILD_DIRECTORY/python-package && python setup.py sdist || exit -1
|
||||
if [[ $TASK == "cuda" ]]; then
|
||||
pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER.tar.gz -v --install-option=--cuda || exit -1
|
||||
else
|
||||
pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER.tar.gz -v --install-option=--cuda-exp || exit -1
|
||||
fi
|
||||
pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER.tar.gz -v --install-option=--cuda || exit -1
|
||||
pytest $BUILD_DIRECTORY/tests/python_package_test || exit -1
|
||||
exit 0
|
||||
elif [[ $METHOD == "wheel" ]]; then
|
||||
if [[ $TASK == "cuda" ]]; then
|
||||
cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --cuda || exit -1
|
||||
else
|
||||
cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --cuda-exp || exit -1
|
||||
fi
|
||||
cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --cuda || exit -1
|
||||
pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER*.whl -v || exit -1
|
||||
pytest $BUILD_DIRECTORY/tests || exit -1
|
||||
exit 0
|
||||
elif [[ $METHOD == "source" ]]; then
|
||||
if [[ $TASK == "cuda" ]]; then
|
||||
cmake -DUSE_CUDA=ON ..
|
||||
else
|
||||
cmake -DUSE_CUDA_EXP=ON ..
|
||||
fi
|
||||
cmake -DUSE_CUDA=ON ..
|
||||
fi
|
||||
elif [[ $TASK == "mpi" ]]; then
|
||||
if [[ $METHOD == "pip" ]]; then
|
||||
|
|
|
@ -28,31 +28,21 @@ jobs:
|
|||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- method: source
|
||||
compiler: gcc
|
||||
python_version: "3.8"
|
||||
cuda_version: "11.7.1"
|
||||
task: cuda
|
||||
- method: pip
|
||||
compiler: clang
|
||||
python_version: "3.9"
|
||||
cuda_version: "10.0"
|
||||
task: cuda
|
||||
- method: wheel
|
||||
compiler: gcc
|
||||
python_version: "3.10"
|
||||
cuda_version: "9.0"
|
||||
cuda_version: "11.7.1"
|
||||
task: cuda
|
||||
- method: source
|
||||
compiler: gcc
|
||||
python_version: "3.8"
|
||||
cuda_version: "11.7.1"
|
||||
task: cuda_exp
|
||||
cuda_version: "10.0"
|
||||
task: cuda
|
||||
- method: pip
|
||||
compiler: clang
|
||||
python_version: "3.9"
|
||||
cuda_version: "10.0"
|
||||
task: cuda_exp
|
||||
cuda_version: "11.7.1"
|
||||
task: cuda
|
||||
steps:
|
||||
- name: Setup or update software on host machine
|
||||
run: |
|
||||
|
|
|
@ -4,8 +4,7 @@ option(USE_GPU "Enable GPU-accelerated training" OFF)
|
|||
option(USE_SWIG "Enable SWIG to generate Java API" OFF)
|
||||
option(USE_HDFS "Enable HDFS support (EXPERIMENTAL)" OFF)
|
||||
option(USE_TIMETAG "Set to ON to output time costs" OFF)
|
||||
option(USE_CUDA "Enable CUDA-accelerated training (EXPERIMENTAL)" OFF)
|
||||
option(USE_CUDA_EXP "Enable CUDA-accelerated training with more acceleration (EXPERIMENTAL)" OFF)
|
||||
option(USE_CUDA "Enable CUDA-accelerated training " OFF)
|
||||
option(USE_DEBUG "Set to ON for Debug mode" OFF)
|
||||
option(USE_SANITIZER "Use santizer flags" OFF)
|
||||
set(
|
||||
|
@ -31,7 +30,7 @@ elseif(USE_SWIG)
|
|||
cmake_minimum_required(VERSION 3.8)
|
||||
elseif(USE_GPU OR APPLE)
|
||||
cmake_minimum_required(VERSION 3.2)
|
||||
elseif(USE_CUDA OR USE_CUDA_EXP)
|
||||
elseif(USE_CUDA)
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
else()
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
|
@ -137,7 +136,7 @@ else()
|
|||
add_definitions(-DUSE_SOCKET)
|
||||
endif()
|
||||
|
||||
if(USE_CUDA OR USE_CUDA_EXP)
|
||||
if(USE_CUDA)
|
||||
set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}")
|
||||
enable_language(CUDA)
|
||||
set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE)
|
||||
|
@ -192,12 +191,8 @@ if(__INTEGRATE_OPENCL)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
if(USE_CUDA OR USE_CUDA_EXP)
|
||||
if(USE_CUDA)
|
||||
find_package(CUDA 9.0 REQUIRED)
|
||||
else()
|
||||
find_package(CUDA 10.0 REQUIRED)
|
||||
endif()
|
||||
if(USE_CUDA)
|
||||
find_package(CUDA 10.0 REQUIRED)
|
||||
include_directories(${CUDA_INCLUDE_DIRS})
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=${OpenMP_CXX_FLAGS} -Xcompiler=-fPIC -Xcompiler=-Wall")
|
||||
|
||||
|
@ -224,11 +219,7 @@ if(USE_CUDA OR USE_CUDA_EXP)
|
|||
endif()
|
||||
message(STATUS "CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
|
||||
|
||||
if(USE_CUDA)
|
||||
add_definitions(-DUSE_CUDA)
|
||||
elseif(USE_CUDA_EXP)
|
||||
add_definitions(-DUSE_CUDA_EXP)
|
||||
endif()
|
||||
add_definitions(-DUSE_CUDA)
|
||||
|
||||
if(NOT DEFINED CMAKE_CUDA_STANDARD)
|
||||
set(CMAKE_CUDA_STANDARD 11)
|
||||
|
@ -411,10 +402,8 @@ file(
|
|||
src/objective/*.cpp
|
||||
src/network/*.cpp
|
||||
src/treelearner/*.cpp
|
||||
if(USE_CUDA OR USE_CUDA_EXP)
|
||||
if(USE_CUDA)
|
||||
src/treelearner/*.cu
|
||||
endif()
|
||||
if(USE_CUDA_EXP)
|
||||
src/boosting/cuda/*.cpp
|
||||
src/boosting/cuda/*.cu
|
||||
src/metric/cuda/*.cpp
|
||||
|
@ -549,7 +538,7 @@ if(__INTEGRATE_OPENCL)
|
|||
target_link_libraries(lightgbm_objs PUBLIC ${INTEGRATED_OPENCL_LIBRARIES} ${CMAKE_DL_LIBS})
|
||||
endif()
|
||||
|
||||
if(USE_CUDA OR USE_CUDA_EXP)
|
||||
if(USE_CUDA)
|
||||
# Disable cmake warning about policy CMP0104. Refer to issue #3754 and PR #4268.
|
||||
# Custom target properties does not propagate, thus we need to specify for
|
||||
# each target that contains or depends on cuda source.
|
||||
|
|
|
@ -605,8 +605,8 @@ Docker
|
|||
|
||||
Refer to `GPU Docker folder <https://github.com/microsoft/LightGBM/tree/master/docker/gpu>`__.
|
||||
|
||||
Build CUDA Version (Experimental)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Build CUDA Version
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The `original GPU build <#build-gpu-version>`__ of LightGBM (``device_type=gpu``) is based on OpenCL.
|
||||
|
||||
|
@ -621,7 +621,7 @@ On Linux a CUDA version of LightGBM can be built using **CUDA**, **CMake** and *
|
|||
|
||||
The following dependencies should be installed before compilation:
|
||||
|
||||
- **CUDA** 9.0 or later libraries. Please refer to `this detailed guide`_. Pay great attention to the minimum required versions of host compilers listed in the table from that guide and use only recommended versions of compilers.
|
||||
- **CUDA** 10.0 or later libraries. Please refer to `this detailed guide`_. Pay great attention to the minimum required versions of host compilers listed in the table from that guide and use only recommended versions of compilers.
|
||||
|
||||
- **CMake** 3.16 or later.
|
||||
|
||||
|
@ -636,8 +636,6 @@ To build LightGBM CUDA version, run the following commands:
|
|||
cmake -DUSE_CUDA=1 ..
|
||||
make -j4
|
||||
|
||||
Recently, a new CUDA version with better efficiency is implemented as an experimental feature. To build the new CUDA version, replace ``-DUSE_CUDA`` with ``-DUSE_CUDA_EXP`` in the above commands. Please note that new version requires **CUDA** 10.0 or later libraries.
|
||||
|
||||
**Note**: glibc >= 2.14 is required.
|
||||
|
||||
**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
|
||||
|
|
|
@ -205,9 +205,15 @@ Core Parameters
|
|||
|
||||
- **Note**: please **don't** change this during training, especially when running multiple jobs simultaneously by external packages, otherwise it may cause undesirable errors
|
||||
|
||||
- ``device_type`` :raw-html:`<a id="device_type" title="Permalink to this parameter" href="#device_type">🔗︎</a>`, default = ``cpu``, type = enum, options: ``cpu``, ``gpu``, ``cuda``, ``cuda_exp``, aliases: ``device``
|
||||
- ``device_type`` :raw-html:`<a id="device_type" title="Permalink to this parameter" href="#device_type">🔗︎</a>`, default = ``cpu``, type = enum, options: ``cpu``, ``gpu``, ``cuda``, aliases: ``device``
|
||||
|
||||
- device for the tree learning, you can use GPU to achieve the faster learning
|
||||
- device for the tree learning
|
||||
|
||||
- ``cpu`` supports all LightGBM functionality and is portable across the widest range of operating systems and hardware
|
||||
|
||||
- ``cuda`` offers faster training than ``gpu`` or ``cpu``, but only works on GPUs supporting CUDA
|
||||
|
||||
- ``gpu`` can be faster than ``cpu`` and works on a wider range of GPUs than CUDA
|
||||
|
||||
- **Note**: it is recommended to use the smaller ``max_bin`` (e.g. 63) to get the better speed up
|
||||
|
||||
|
@ -215,10 +221,6 @@ Core Parameters
|
|||
|
||||
- **Note**: refer to `Installation Guide <./Installation-Guide.rst#build-gpu-version>`__ to build LightGBM with GPU support
|
||||
|
||||
- **Note**: ``cuda_exp`` is an experimental CUDA version, the installation guide for ``cuda_exp`` is identical with ``cuda``
|
||||
|
||||
- **Note**: ``cuda_exp`` is faster than ``cuda`` and will replace ``cuda`` in the future
|
||||
|
||||
- ``seed`` :raw-html:`<a id="seed" title="Permalink to this parameter" href="#seed">🔗︎</a>`, default = ``None``, type = int, aliases: ``random_seed``, ``random_state``
|
||||
|
||||
- this seed is used to generate other seeds, e.g. ``data_random_seed``, ``feature_fraction_seed``, etc.
|
||||
|
|
|
@ -480,13 +480,13 @@ class MultiValBin {
|
|||
|
||||
virtual MultiValBin* Clone() = 0;
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
virtual const void* GetRowWiseData(uint8_t* bit_type,
|
||||
size_t* total_size,
|
||||
bool* is_sparse,
|
||||
const void** out_data_ptr,
|
||||
uint8_t* data_ptr_bit_type) const = 0;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
};
|
||||
|
||||
inline uint32_t BinMapper::ValueToBin(double value) const {
|
||||
|
|
|
@ -223,14 +223,15 @@ struct Config {
|
|||
|
||||
// [doc-only]
|
||||
// type = enum
|
||||
// options = cpu, gpu, cuda, cuda_exp
|
||||
// options = cpu, gpu, cuda
|
||||
// alias = device
|
||||
// desc = device for the tree learning, you can use GPU to achieve the faster learning
|
||||
// desc = device for the tree learning
|
||||
// desc = ``cpu`` supports all LightGBM functionality and is portable across the widest range of operating systems and hardware
|
||||
// desc = ``cuda`` offers faster training than ``gpu`` or ``cpu``, but only works on GPUs supporting CUDA
|
||||
// desc = ``gpu`` can be faster than ``cpu`` and works on a wider range of GPUs than CUDA
|
||||
// desc = **Note**: it is recommended to use the smaller ``max_bin`` (e.g. 63) to get the better speed up
|
||||
// desc = **Note**: for the faster speed, GPU uses 32-bit float point to sum up by default, so this may affect the accuracy for some tasks. You can set ``gpu_use_dp=true`` to enable 64-bit float point, but it will slow down the training
|
||||
// desc = **Note**: refer to `Installation Guide <./Installation-Guide.rst#build-gpu-version>`__ to build LightGBM with GPU support
|
||||
// desc = **Note**: ``cuda_exp`` is an experimental CUDA version, the installation guide for ``cuda_exp`` is identical with ``cuda``
|
||||
// desc = **Note**: ``cuda_exp`` is faster than ``cuda`` and will replace ``cuda`` in the future
|
||||
std::string device_type = "cpu";
|
||||
|
||||
// [doc-only]
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#ifndef LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
|
||||
#define LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
@ -577,5 +577,5 @@ __device__ VAL_T PercentileDevice(const VAL_T* values,
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
#endif // LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#ifndef LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
|
||||
#define LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
|
||||
|
@ -137,4 +137,4 @@ class CUDAColumnData {
|
|||
|
||||
#endif // LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#ifndef LIGHTGBM_CUDA_CUDA_METADATA_HPP_
|
||||
#define LIGHTGBM_CUDA_CUDA_METADATA_HPP_
|
||||
|
@ -55,4 +55,4 @@ class CUDAMetadata {
|
|||
|
||||
#endif // LIGHTGBM_CUDA_CUDA_METADATA_HPP_
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#ifndef LIGHTGBM_CUDA_CUDA_METRIC_HPP_
|
||||
#define LIGHTGBM_CUDA_CUDA_METRIC_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/metric.h>
|
||||
|
||||
|
@ -36,6 +36,6 @@ class CUDAMetricInterface: public HOST_METRIC {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
#endif // LIGHTGBM_CUDA_CUDA_METRIC_HPP_
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#ifndef LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
|
||||
#define LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_utils.h>
|
||||
#include <LightGBM/objective_function.h>
|
||||
|
@ -73,6 +73,6 @@ class CUDAObjectiveInterface: public HOST_OBJECTIVE {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
#endif // LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
#ifndef LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
|
||||
#define LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
@ -69,6 +69,6 @@ class CUDARandom {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
#endif // LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#ifndef LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
|
||||
#define LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
|
||||
|
@ -176,4 +176,4 @@ class CUDARowData {
|
|||
} // namespace LightGBM
|
||||
#endif // LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#ifndef LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
|
||||
#define LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
|
||||
|
@ -102,4 +102,4 @@ class CUDASplitInfo {
|
|||
|
||||
#endif // LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#ifndef LIGHTGBM_CUDA_CUDA_TREE_HPP_
|
||||
#define LIGHTGBM_CUDA_CUDA_TREE_HPP_
|
||||
|
@ -170,4 +170,4 @@ class CUDATree : public Tree {
|
|||
|
||||
#endif // LIGHTGBM_CUDA_CUDA_TREE_HPP_
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -6,20 +6,15 @@
|
|||
#ifndef LIGHTGBM_CUDA_CUDA_UTILS_H_
|
||||
#define LIGHTGBM_CUDA_CUDA_UTILS_H_
|
||||
|
||||
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
|
||||
#ifdef USE_CUDA
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <stdio.h>
|
||||
#include <LightGBM/utils/log.h>
|
||||
#endif // USE_CUDA || USE_CUDA_EXP
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#include <vector>
|
||||
#endif // USE_CUDA_EXP
|
||||
|
||||
namespace LightGBM {
|
||||
|
||||
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
|
||||
#define CUDASUCCESS_OR_FATAL(ans) { gpuAssert((ans), __FILE__, __LINE__); }
|
||||
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true) {
|
||||
if (code != cudaSuccess) {
|
||||
|
@ -27,9 +22,7 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort =
|
|||
if (abort) exit(code);
|
||||
}
|
||||
}
|
||||
#endif // USE_CUDA || USE_CUDA_EXP
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#define CUDASUCCESS_OR_FATAL_OUTER(ans) { gpuAssert((ans), file, line); }
|
||||
|
||||
void SetCUDADevice(int gpu_device_id, const char* file, int line);
|
||||
|
@ -184,8 +177,8 @@ class CUDAVector {
|
|||
size_t size_;
|
||||
};
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA
|
||||
|
||||
#endif // LIGHTGBM_CUDA_CUDA_UTILS_H_
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
#include <LightGBM/utils/common.h>
|
||||
|
||||
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
|
||||
#ifdef USE_CUDA
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#endif
|
||||
|
@ -43,7 +43,7 @@ struct CHAllocator {
|
|||
T* ptr;
|
||||
if (n == 0) return NULL;
|
||||
n = SIZE_ALIGNED(n);
|
||||
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
|
||||
#ifdef USE_CUDA
|
||||
if (LGBM_config_::current_device == lgbm_device_cuda) {
|
||||
cudaError_t ret = cudaHostAlloc(&ptr, n*sizeof(T), cudaHostAllocPortable);
|
||||
if (ret != cudaSuccess) {
|
||||
|
@ -62,7 +62,7 @@ struct CHAllocator {
|
|||
void deallocate(T* p, std::size_t n) {
|
||||
(void)n; // UNUSED
|
||||
if (p == NULL) return;
|
||||
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
|
||||
#ifdef USE_CUDA
|
||||
if (LGBM_config_::current_device == lgbm_device_cuda) {
|
||||
cudaPointerAttributes attributes;
|
||||
cudaPointerGetAttributes(&attributes, p);
|
||||
|
|
|
@ -277,13 +277,13 @@ class Metadata {
|
|||
/*! \brief Disable copy */
|
||||
Metadata(const Metadata&) = delete;
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
CUDAMetadata* cuda_metadata() const { return cuda_metadata_.get(); }
|
||||
|
||||
void CreateCUDAMetadata(const int gpu_device_id);
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
private:
|
||||
/*! \brief Load wights from file */
|
||||
|
@ -329,9 +329,9 @@ class Metadata {
|
|||
bool weight_load_from_file_;
|
||||
bool query_load_from_file_;
|
||||
bool init_score_load_from_file_;
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
std::unique_ptr<CUDAMetadata> cuda_metadata_;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
};
|
||||
|
||||
|
||||
|
@ -910,13 +910,13 @@ class Dataset {
|
|||
return feature_groups_[feature_group_index]->feature_min_bin(sub_feature_index);
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
const CUDAColumnData* cuda_column_data() const {
|
||||
return cuda_column_data_.get();
|
||||
}
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
private:
|
||||
void CreateCUDAColumnData();
|
||||
|
@ -968,9 +968,9 @@ class Dataset {
|
|||
/*! \brief mutex for threading safe call */
|
||||
std::mutex mutex_;
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
std::unique_ptr<CUDAColumnData> cuda_column_data_;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
std::string parser_config_str_;
|
||||
};
|
||||
|
|
|
@ -97,7 +97,7 @@ class ObjectiveFunction {
|
|||
*/
|
||||
virtual bool IsCUDAObjective() const { return false; }
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
/*!
|
||||
* \brief Convert output for CUDA version
|
||||
*/
|
||||
|
@ -107,7 +107,7 @@ class ObjectiveFunction {
|
|||
|
||||
virtual bool NeedConvertOutputCUDA () const { return false; }
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
};
|
||||
|
||||
} // namespace LightGBM
|
||||
|
|
|
@ -38,9 +38,9 @@ class SampleStrategy {
|
|||
|
||||
std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() { return bag_data_indices_; }
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
CUDAVector<data_size_t>& cuda_bag_data_indices() { return cuda_bag_data_indices_; }
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
void UpdateObjectiveFunction(const ObjectiveFunction* objective_function) {
|
||||
objective_function_ = objective_function;
|
||||
|
@ -72,10 +72,10 @@ class SampleStrategy {
|
|||
/*! \brief whether need to resize the gradient vectors */
|
||||
bool need_resize_gradients_;
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
/*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda_exp */
|
||||
#ifdef USE_CUDA
|
||||
/*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda */
|
||||
CUDAVector<data_size_t> cuda_bag_data_indices_;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
};
|
||||
|
||||
} // namespace LightGBM
|
||||
|
|
|
@ -126,7 +126,7 @@ class MultiValBinWrapper {
|
|||
}
|
||||
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
const void* GetRowWiseData(
|
||||
uint8_t* bit_type,
|
||||
size_t* total_size,
|
||||
|
@ -142,7 +142,7 @@ class MultiValBinWrapper {
|
|||
return multi_val_bin_->GetRowWiseData(bit_type, total_size, is_sparse, out_data_ptr, data_ptr_bit_type);
|
||||
}
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
private:
|
||||
bool is_use_subcol_ = false;
|
||||
|
@ -183,9 +183,9 @@ struct TrainingShareStates {
|
|||
|
||||
const std::vector<uint32_t>& feature_hist_offsets() const { return feature_hist_offsets_; }
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
const std::vector<uint32_t>& column_hist_offsets() const { return column_hist_offsets_; }
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
bool IsSparseRowwise() {
|
||||
return (multi_val_bin_wrapper_ != nullptr && multi_val_bin_wrapper_->IsSparse());
|
||||
|
@ -235,7 +235,7 @@ struct TrainingShareStates {
|
|||
}
|
||||
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
const void* GetRowWiseData(uint8_t* bit_type,
|
||||
size_t* total_size,
|
||||
bool* is_sparse,
|
||||
|
@ -250,13 +250,13 @@ struct TrainingShareStates {
|
|||
return nullptr;
|
||||
}
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
private:
|
||||
std::vector<uint32_t> feature_hist_offsets_;
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
std::vector<uint32_t> column_hist_offsets_;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
int num_hist_total_bin_ = 0;
|
||||
std::unique_ptr<MultiValBinWrapper> multi_val_bin_wrapper_;
|
||||
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_;
|
||||
|
|
|
@ -319,9 +319,9 @@ class Tree {
|
|||
|
||||
inline bool is_linear() const { return is_linear_; }
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
inline bool is_cuda_tree() const { return is_cuda_tree_; }
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
inline void SetIsLinear(bool is_linear) {
|
||||
is_linear_ = is_linear;
|
||||
|
@ -532,10 +532,10 @@ class Tree {
|
|||
std::vector<std::vector<int>> leaf_features_;
|
||||
/* \brief features used in leaf linear models; indexing is relative to used_features_ */
|
||||
std::vector<std::vector<int>> leaf_features_inner_;
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
/*! \brief Marks whether this tree is a CUDATree */
|
||||
bool is_cuda_tree_;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
};
|
||||
|
||||
inline void Tree::Split(int leaf, int feature, int real_feature,
|
||||
|
|
|
@ -121,11 +121,9 @@ Build CUDA Version
|
|||
|
||||
All requirements from `Build from Sources section <#build-from-sources>`__ apply for this installation option as well, and `CMake`_ (version 3.16 or higher) is strongly required.
|
||||
|
||||
**CUDA** library (version 9.0 or higher) is needed: details for installation can be found in `Installation Guide <https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#build-cuda-version-experimental>`__.
|
||||
**CUDA** library (version 10.0 or higher) is needed: details for installation can be found in `Installation Guide <https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#build-cuda-version-experimental>`__.
|
||||
|
||||
Recently, a new CUDA version with better efficiency is implemented as an experimental feature. To build the new CUDA version, replace ``--cuda`` with ``--cuda-exp`` in the above commands. Please note that new version requires **CUDA** 10.0 or later libraries. Note that this new version uses twice the memory, since it stores data row-wise as well as column-wise in memory to improve performance (see this `issue <https://github.com/microsoft/LightGBM/issues/5318>`__ for discussion).
|
||||
|
||||
To use the regular or experimental CUDA versions within Python, pass ``{"device": "cuda"}`` or ``{"device": "cuda_exp"}`` respectively as parameters.
|
||||
To use the CUDA version within Python, pass ``{"device": "cuda"}`` respectively in parameters.
|
||||
|
||||
Build HDFS Version
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
@ -211,8 +209,6 @@ Run ``python setup.py install --gpu`` to enable GPU support. All requirements fr
|
|||
|
||||
Run ``python setup.py install --cuda`` to enable CUDA support. All requirements from `Build CUDA Version section <#build-cuda-version>`__ apply for this installation option as well.
|
||||
|
||||
Run ``python setup.py install --cuda-exp`` to enable the new experimental version of CUDA support. All requirements from `Build CUDA Version section <#build-cuda-version>`__ apply for this installation option as well.
|
||||
|
||||
Run ``python setup.py install --hdfs`` to enable HDFS support. All requirements from `Build HDFS Version section <#build-hdfs-version>`__ apply for this installation option as well.
|
||||
|
||||
Run ``python setup.py install --bit32``, if you want to use 32-bit version. All requirements from `Build 32-bit Version with 32-bit Python section <#build-32-bit-version-with-32-bit-python>`__ apply for this installation option as well.
|
||||
|
|
|
@ -21,7 +21,6 @@ LIGHTGBM_OPTIONS = [
|
|||
('integrated-opencl', None, 'Compile integrated OpenCL version'),
|
||||
('gpu', 'g', 'Compile GPU version'),
|
||||
('cuda', None, 'Compile CUDA version'),
|
||||
('cuda-exp', None, 'Compile CUDA Experimental version'),
|
||||
('mpi', None, 'Compile MPI version'),
|
||||
('nomp', None, 'Compile version without OpenMP support'),
|
||||
('hdfs', 'h', 'Compile HDFS version'),
|
||||
|
@ -106,7 +105,6 @@ def compile_cpp(
|
|||
use_mingw: bool = False,
|
||||
use_gpu: bool = False,
|
||||
use_cuda: bool = False,
|
||||
use_cuda_exp: bool = False,
|
||||
use_mpi: bool = False,
|
||||
use_hdfs: bool = False,
|
||||
boost_root: Optional[str] = None,
|
||||
|
@ -148,8 +146,6 @@ def compile_cpp(
|
|||
cmake_cmd.append(f"-DOpenCL_LIBRARY={opencl_library}")
|
||||
elif use_cuda:
|
||||
cmake_cmd.append("-DUSE_CUDA=ON")
|
||||
elif use_cuda_exp:
|
||||
cmake_cmd.append("-DUSE_CUDA_EXP=ON")
|
||||
if use_mpi:
|
||||
cmake_cmd.append("-DUSE_MPI=ON")
|
||||
if nomp:
|
||||
|
@ -171,7 +167,7 @@ def compile_cpp(
|
|||
else:
|
||||
status = 1
|
||||
lib_path = CURRENT_DIR / "compile" / "windows" / "x64" / "DLL" / "lib_lightgbm.dll"
|
||||
if not any((use_gpu, use_cuda, use_cuda_exp, use_mpi, use_hdfs, nomp, bit32, integrated_opencl)):
|
||||
if not any((use_gpu, use_cuda, use_mpi, use_hdfs, nomp, bit32, integrated_opencl)):
|
||||
logger.info("Starting to compile with MSBuild from existing solution file.")
|
||||
platform_toolsets = ("v143", "v142", "v141", "v140")
|
||||
for pt in platform_toolsets:
|
||||
|
@ -235,7 +231,6 @@ class CustomInstall(install):
|
|||
self.integrated_opencl = False
|
||||
self.gpu = False
|
||||
self.cuda = False
|
||||
self.cuda_exp = False
|
||||
self.boost_root = None
|
||||
self.boost_dir = None
|
||||
self.boost_include_dir = None
|
||||
|
@ -260,7 +255,7 @@ class CustomInstall(install):
|
|||
LOG_PATH.touch()
|
||||
if not self.precompile:
|
||||
copy_files(integrated_opencl=self.integrated_opencl, use_gpu=self.gpu)
|
||||
compile_cpp(use_mingw=self.mingw, use_gpu=self.gpu, use_cuda=self.cuda, use_cuda_exp=self.cuda_exp, use_mpi=self.mpi,
|
||||
compile_cpp(use_mingw=self.mingw, use_gpu=self.gpu, use_cuda=self.cuda, use_mpi=self.mpi,
|
||||
use_hdfs=self.hdfs, boost_root=self.boost_root, boost_dir=self.boost_dir,
|
||||
boost_include_dir=self.boost_include_dir, boost_librarydir=self.boost_librarydir,
|
||||
opencl_include_dir=self.opencl_include_dir, opencl_library=self.opencl_library,
|
||||
|
@ -281,7 +276,6 @@ class CustomBdistWheel(bdist_wheel):
|
|||
self.integrated_opencl = False
|
||||
self.gpu = False
|
||||
self.cuda = False
|
||||
self.cuda_exp = False
|
||||
self.boost_root = None
|
||||
self.boost_dir = None
|
||||
self.boost_include_dir = None
|
||||
|
@ -304,7 +298,6 @@ class CustomBdistWheel(bdist_wheel):
|
|||
install.integrated_opencl = self.integrated_opencl
|
||||
install.gpu = self.gpu
|
||||
install.cuda = self.cuda
|
||||
install.cuda_exp = self.cuda_exp
|
||||
install.boost_root = self.boost_root
|
||||
install.boost_dir = self.boost_dir
|
||||
install.boost_include_dir = self.boost_include_dir
|
||||
|
|
|
@ -36,7 +36,7 @@ Application::Application(int argc, char** argv) {
|
|||
Log::Fatal("No training/prediction data, application quit");
|
||||
}
|
||||
|
||||
if (config_.device_type == std::string("cuda") || config_.device_type == std::string("cuda_exp")) {
|
||||
if (config_.device_type == std::string("cuda")) {
|
||||
LGBM_config_::current_device = lgbm_device_cuda;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,33 +47,33 @@ class BaggingSampleStrategy : public SampleStrategy {
|
|||
Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
|
||||
// set bagging data to tree learner
|
||||
if (!is_use_subset_) {
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config_->device_type == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (config_->device_type == std::string("cuda")) {
|
||||
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
|
||||
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
|
||||
} else {
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
} else {
|
||||
// get subset
|
||||
tmp_subset_->ReSize(bag_data_cnt_);
|
||||
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
|
||||
bag_data_cnt_, false);
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config_->device_type == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (config_->device_type == std::string("cuda")) {
|
||||
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
|
||||
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
|
||||
bag_data_cnt_);
|
||||
} else {
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
|
||||
bag_data_cnt_);
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -103,11 +103,11 @@ class BaggingSampleStrategy : public SampleStrategy {
|
|||
bag_data_cnt_ = static_cast<data_size_t>(config_->bagging_fraction * num_data_);
|
||||
}
|
||||
bag_data_indices_.resize(num_data_);
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config_->device_type == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (config_->device_type == std::string("cuda")) {
|
||||
cuda_bag_data_indices_.Resize(num_data_);
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
bagging_runner_.ReSize(num_data_);
|
||||
bagging_rands_.clear();
|
||||
for (int i = 0;
|
||||
|
@ -118,7 +118,7 @@ class BaggingSampleStrategy : public SampleStrategy {
|
|||
double average_bag_rate =
|
||||
(static_cast<double>(bag_data_cnt_) / num_data_) / config_->bagging_freq;
|
||||
is_use_subset_ = false;
|
||||
if (config_->device_type != std::string("cuda_exp")) {
|
||||
if (config_->device_type != std::string("cuda")) {
|
||||
const int group_threshold_usesubset = 100;
|
||||
const double average_bag_rate_threshold = 0.5;
|
||||
if (average_bag_rate <= average_bag_rate_threshold
|
||||
|
@ -141,9 +141,9 @@ class BaggingSampleStrategy : public SampleStrategy {
|
|||
} else {
|
||||
bag_data_cnt_ = num_data_;
|
||||
bag_data_indices_.clear();
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
cuda_bag_data_indices_.Clear();
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
bagging_runner_.ReSize(0);
|
||||
is_use_subset_ = false;
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
#include "cuda_score_updater.hpp"
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
namespace LightGBM {
|
||||
|
||||
|
@ -91,4 +91,4 @@ inline void CUDAScoreUpdater::MultiplyScore(double val, int cur_tree_id) {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
#include "cuda_score_updater.hpp"
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
namespace LightGBM {
|
||||
|
||||
|
@ -42,4 +42,4 @@ void CUDAScoreUpdater::LaunchMultiplyScoreConstantKernel(const double val, const
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#ifndef LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
|
||||
#define LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_utils.h>
|
||||
|
||||
|
@ -60,6 +60,6 @@ class CUDAScoreUpdater: public ScoreUpdater {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
#endif // LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
|
||||
|
|
|
@ -68,14 +68,14 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
|
|||
es_first_metric_only_ = config_->first_metric_only;
|
||||
shrinkage_rate_ = config_->learning_rate;
|
||||
|
||||
if (config_->device_type == std::string("cuda") || config_->device_type == std::string("cuda_exp")) {
|
||||
if (config_->device_type == std::string("cuda")) {
|
||||
LGBM_config_::current_learner = use_cuda_learner;
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config_->device_type == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (config_->device_type == std::string("cuda")) {
|
||||
const int gpu_device_id = config_->gpu_device_id >= 0 ? config_->gpu_device_id : 0;
|
||||
CUDASUCCESS_OR_FATAL(cudaSetDevice(gpu_device_id));
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
|
||||
// load forced_splits file
|
||||
|
@ -116,15 +116,15 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
|
|||
}
|
||||
training_metrics_.shrink_to_fit();
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config_->device_type == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (config_->device_type == std::string("cuda")) {
|
||||
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
|
||||
} else {
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
num_data_ = train_data_->num_data();
|
||||
|
||||
|
@ -186,11 +186,11 @@ void GBDT::AddValidDataset(const Dataset* valid_data,
|
|||
}
|
||||
// for a validation dataset, we need its score and metric
|
||||
auto new_score_updater =
|
||||
#ifdef USE_CUDA_EXP
|
||||
config_->device_type == std::string("cuda_exp") ?
|
||||
#ifdef USE_CUDA
|
||||
config_->device_type == std::string("cuda") ?
|
||||
std::unique_ptr<CUDAScoreUpdater>(new CUDAScoreUpdater(valid_data, num_tree_per_iteration_,
|
||||
objective_function_ != nullptr && objective_function_->IsCUDAObjective())) :
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_));
|
||||
// update score
|
||||
for (int i = 0; i < iter_; ++i) {
|
||||
|
@ -481,15 +481,15 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
|
|||
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
|
||||
// we need to predict out-of-bag scores of data for boosting
|
||||
if (num_data_ - bag_data_cnt > 0) {
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config_->device_type == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (config_->device_type == std::string("cuda")) {
|
||||
train_score_updater_->AddScore(tree, data_sample_strategy_->cuda_bag_data_indices().RawData() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
|
||||
} else {
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
train_score_updater_->AddScore(tree, data_sample_strategy_->bag_data_indices().data() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -503,17 +503,17 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t num_data) const {
|
||||
#else
|
||||
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t /*num_data*/) const {
|
||||
#endif // USE_CUDA_EXP
|
||||
#ifdef USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
#ifdef USE_CUDA
|
||||
const bool evaluation_on_cuda = metric->IsCUDAMetric();
|
||||
if ((boosting_on_gpu_ && evaluation_on_cuda) || (!boosting_on_gpu_ && !evaluation_on_cuda)) {
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
return metric->Eval(score, objective_function_);
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
} else if (boosting_on_gpu_ && !evaluation_on_cuda) {
|
||||
const size_t total_size = static_cast<size_t>(num_data) * static_cast<size_t>(num_tree_per_iteration_);
|
||||
if (total_size > host_score_.size()) {
|
||||
|
@ -529,7 +529,7 @@ std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* scor
|
|||
CopyFromHostToCUDADevice<double>(cuda_score_.RawData(), score, total_size, __FILE__, __LINE__);
|
||||
return metric->Eval(cuda_score_.RawData(), objective_function_);
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
|
||||
std::string GBDT::OutputMetric(int iter) {
|
||||
|
@ -660,14 +660,14 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
|
|||
num_data = valid_score_updater_[used_idx]->num_data();
|
||||
*out_len = static_cast<int64_t>(num_data) * num_class_;
|
||||
}
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
std::vector<double> host_raw_scores;
|
||||
if (boosting_on_gpu_) {
|
||||
host_raw_scores.resize(static_cast<size_t>(*out_len), 0.0);
|
||||
CopyFromCUDADeviceToHost<double>(host_raw_scores.data(), raw_scores, static_cast<size_t>(*out_len), __FILE__, __LINE__);
|
||||
raw_scores = host_raw_scores.data();
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
if (objective_function_ != nullptr) {
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (data_size_t i = 0; i < num_data; ++i) {
|
||||
|
@ -730,26 +730,26 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
|
|||
}
|
||||
training_metrics_.shrink_to_fit();
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
|
||||
!data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
|
||||
tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
if (train_data != train_data_) {
|
||||
train_data_ = train_data;
|
||||
data_sample_strategy_->UpdateTrainingData(train_data);
|
||||
// not same training data, need reset score and others
|
||||
// create score tracker
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config_->device_type == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (config_->device_type == std::string("cuda")) {
|
||||
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
|
||||
} else {
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
// update score
|
||||
for (int i = 0; i < iter_; ++i) {
|
||||
|
@ -827,8 +827,8 @@ void GBDT::ResetGradientBuffers() {
|
|||
const bool is_use_subset = data_sample_strategy_->is_use_subset();
|
||||
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
|
||||
if (objective_function_ != nullptr) {
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config_->device_type == std::string("cuda_exp") && boosting_on_gpu_) {
|
||||
#ifdef USE_CUDA
|
||||
if (config_->device_type == std::string("cuda") && boosting_on_gpu_) {
|
||||
if (cuda_gradients_.Size() < total_size) {
|
||||
cuda_gradients_.Resize(total_size);
|
||||
cuda_hessians_.Resize(total_size);
|
||||
|
@ -836,16 +836,16 @@ void GBDT::ResetGradientBuffers() {
|
|||
gradients_pointer_ = cuda_gradients_.RawData();
|
||||
hessians_pointer_ = cuda_hessians_.RawData();
|
||||
} else {
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
if (gradients_.size() < total_size) {
|
||||
gradients_.resize(total_size);
|
||||
hessians_.resize(total_size);
|
||||
}
|
||||
gradients_pointer_ = gradients_.data();
|
||||
hessians_pointer_ = hessians_.data();
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
} else if (data_sample_strategy_->IsHessianChange() || (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_)) {
|
||||
if (gradients_.size() < total_size) {
|
||||
gradients_.resize(total_size);
|
||||
|
|
|
@ -542,7 +542,7 @@ class GBDT : public GBDTBase {
|
|||
/*! \brief Parser config file content */
|
||||
std::string parser_config_str_ = "";
|
||||
|
||||
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
|
||||
#ifdef USE_CUDA
|
||||
/*! \brief First order derivative of training data */
|
||||
std::vector<score_t, CHAllocator<score_t>> gradients_;
|
||||
/*! \brief Second order derivative of training data */
|
||||
|
@ -557,18 +557,18 @@ class GBDT : public GBDTBase {
|
|||
score_t* gradients_pointer_;
|
||||
/*! \brief Pointer to hessian vector, can be on CPU or GPU */
|
||||
score_t* hessians_pointer_;
|
||||
/*! \brief Whether boosting is done on GPU, used for cuda_exp */
|
||||
/*! \brief Whether boosting is done on GPU, used for device_type=cuda */
|
||||
bool boosting_on_gpu_;
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
/*! \brief Gradient vector on GPU */
|
||||
CUDAVector<score_t> cuda_gradients_;
|
||||
/*! \brief Hessian vector on GPU */
|
||||
CUDAVector<score_t> cuda_hessians_;
|
||||
/*! \brief Buffer for scores when boosting is on GPU but evaluation is not, used only with cuda_exp */
|
||||
/*! \brief Buffer for scores when boosting is on GPU but evaluation is not, used only with device_type=cuda */
|
||||
mutable std::vector<double> host_score_;
|
||||
/*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with cuda_exp */
|
||||
/*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with device_type=cuda */
|
||||
mutable CUDAVector<double> cuda_score_;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
/*! \brief Number of training data */
|
||||
data_size_t num_data_;
|
||||
|
|
|
@ -43,33 +43,33 @@ class GOSSStrategy : public SampleStrategy {
|
|||
bag_data_cnt_ = left_cnt;
|
||||
// set bagging data to tree learner
|
||||
if (!is_use_subset_) {
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config_->device_type == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (config_->device_type == std::string("cuda")) {
|
||||
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
|
||||
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
|
||||
} else {
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
} else {
|
||||
// get subset
|
||||
tmp_subset_->ReSize(bag_data_cnt_);
|
||||
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
|
||||
bag_data_cnt_, false);
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config_->device_type == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (config_->device_type == std::string("cuda")) {
|
||||
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
|
||||
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
|
||||
bag_data_cnt_);
|
||||
} else {
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
|
||||
bag_data_cnt_);
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_algorithms.hpp>
|
||||
|
||||
|
@ -509,4 +509,4 @@ template __device__ double PercentileDevice<double, data_size_t, label_t, double
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_utils.h>
|
||||
|
||||
|
@ -28,4 +28,4 @@ void SetCUDADevice(int gpu_device_id, const char* file, int line) {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -886,7 +886,7 @@ namespace LightGBM {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
template <>
|
||||
const void* MultiValDenseBin<uint8_t>::GetRowWiseData(uint8_t* bit_type,
|
||||
size_t* total_size,
|
||||
|
@ -1081,6 +1081,6 @@ namespace LightGBM {
|
|||
return to_return;
|
||||
}
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
} // namespace LightGBM
|
||||
|
|
|
@ -177,8 +177,6 @@ void GetDeviceType(const std::unordered_map<std::string, std::string>& params, s
|
|||
*device_type = "gpu";
|
||||
} else if (value == std::string("cuda")) {
|
||||
*device_type = "cuda";
|
||||
} else if (value == std::string("cuda_exp")) {
|
||||
*device_type = "cuda_exp";
|
||||
} else {
|
||||
Log::Fatal("Unknown device type %s", value.c_str());
|
||||
}
|
||||
|
@ -260,7 +258,7 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) {
|
|||
GetObjectiveType(params, &objective);
|
||||
GetMetricType(params, objective, &metric);
|
||||
GetDeviceType(params, &device_type);
|
||||
if (device_type == std::string("cuda") || device_type == std::string("cuda_exp")) {
|
||||
if (device_type == std::string("cuda")) {
|
||||
LGBM_config_::current_device = lgbm_device_cuda;
|
||||
}
|
||||
GetTreeLearnerType(params, &tree_learner);
|
||||
|
@ -373,26 +371,21 @@ void Config::CheckParamConflict() {
|
|||
num_leaves = static_cast<int>(full_num_leaves);
|
||||
}
|
||||
}
|
||||
if (device_type == std::string("gpu") || device_type == std::string("cuda")) {
|
||||
if (device_type == std::string("gpu")) {
|
||||
// force col-wise for gpu, and cuda version
|
||||
force_col_wise = true;
|
||||
force_row_wise = false;
|
||||
if (deterministic) {
|
||||
Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic.");
|
||||
}
|
||||
} else if (device_type == std::string("cuda_exp")) {
|
||||
// force row-wise for cuda_exp version
|
||||
} else if (device_type == std::string("cuda")) {
|
||||
// force row-wise for cuda version
|
||||
force_col_wise = false;
|
||||
force_row_wise = true;
|
||||
if (deterministic) {
|
||||
Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic.");
|
||||
}
|
||||
}
|
||||
// force gpu_use_dp for CUDA
|
||||
if (device_type == std::string("cuda") && !gpu_use_dp) {
|
||||
Log::Warning("CUDA currently requires double precision calculations.");
|
||||
gpu_use_dp = true;
|
||||
}
|
||||
// linear tree learner must be serial type and run on CPU device
|
||||
if (linear_tree) {
|
||||
if (device_type != std::string("cpu")) {
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_column_data.hpp>
|
||||
|
||||
|
@ -308,4 +308,4 @@ void CUDAColumnData::InitColumnMetaInfo() {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
*/
|
||||
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_column_data.hpp>
|
||||
|
||||
|
@ -58,4 +58,4 @@ void CUDAColumnData::LaunchCopySubrowKernel(void* const* in_cuda_data_by_column)
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_metadata.hpp>
|
||||
|
||||
|
@ -89,4 +89,4 @@ void CUDAMetadata::SetInitScore(const double* init_score, data_size_t len) {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_row_data.hpp>
|
||||
|
||||
|
@ -474,4 +474,4 @@ template const uint64_t* CUDARowData::GetPartitionPtr<uint64_t>() const;
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_tree.hpp>
|
||||
|
||||
|
@ -337,4 +337,4 @@ void CUDATree::AsConstantTree(double val) {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
*/
|
||||
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_tree.hpp>
|
||||
|
||||
|
@ -456,4 +456,4 @@ void CUDATree::LaunchAddPredictionToScoreKernel(
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -345,9 +345,9 @@ void Dataset::Construct(std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
|
|||
auto features_in_group = OneFeaturePerGroup(used_features);
|
||||
|
||||
auto is_sparse = io_config.is_enable_sparse;
|
||||
if (io_config.device_type == std::string("cuda") || io_config.device_type == std::string("cuda_exp")) {
|
||||
if (io_config.device_type == std::string("cuda")) {
|
||||
LGBM_config_::current_device = lgbm_device_cuda;
|
||||
if ((io_config.device_type == std::string("cuda") || io_config.device_type == std::string("cuda_exp")) && is_sparse) {
|
||||
if ((io_config.device_type == std::string("cuda")) && is_sparse) {
|
||||
Log::Warning("Using sparse features with CUDA is currently not supported.");
|
||||
is_sparse = false;
|
||||
}
|
||||
|
@ -355,8 +355,7 @@ void Dataset::Construct(std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
|
|||
|
||||
std::vector<int8_t> group_is_multi_val(used_features.size(), 0);
|
||||
if (io_config.enable_bundle && !used_features.empty()) {
|
||||
bool lgbm_is_gpu_used = io_config.device_type == std::string("gpu") || io_config.device_type == std::string("cuda")
|
||||
|| io_config.device_type == std::string("cuda_exp");
|
||||
bool lgbm_is_gpu_used = io_config.device_type == std::string("gpu") || io_config.device_type == std::string("cuda");
|
||||
features_in_group = FastFeatureBundling(
|
||||
*bin_mappers, sample_non_zero_indices, sample_values, num_per_col,
|
||||
num_sample_col, static_cast<data_size_t>(total_sample_cnt),
|
||||
|
@ -447,14 +446,14 @@ void Dataset::FinishLoad() {
|
|||
}
|
||||
metadata_.FinishLoad();
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (device_type_ == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (device_type_ == std::string("cuda")) {
|
||||
CreateCUDAColumnData();
|
||||
metadata_.CreateCUDAMetadata(gpu_device_id_);
|
||||
} else {
|
||||
cuda_column_data_.reset(nullptr);
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
is_finish_load_ = true;
|
||||
}
|
||||
|
||||
|
@ -862,15 +861,15 @@ void Dataset::CopySubrow(const Dataset* fullset,
|
|||
device_type_ = fullset->device_type_;
|
||||
gpu_device_id_ = fullset->gpu_device_id_;
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (device_type_ == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (device_type_ == std::string("cuda")) {
|
||||
if (cuda_column_data_ == nullptr) {
|
||||
cuda_column_data_.reset(new CUDAColumnData(fullset->num_data(), gpu_device_id_));
|
||||
metadata_.CreateCUDAMetadata(gpu_device_id_);
|
||||
}
|
||||
cuda_column_data_->CopySubrow(fullset->cuda_column_data(), used_indices, num_used_indices);
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
|
||||
bool Dataset::SetFloatField(const char* field_name, const float* field_data,
|
||||
|
@ -1508,13 +1507,13 @@ void Dataset::AddFeaturesFrom(Dataset* other) {
|
|||
raw_data_.push_back(other->raw_data_[i]);
|
||||
}
|
||||
}
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (device_type_ == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (device_type_ == std::string("cuda")) {
|
||||
CreateCUDAColumnData();
|
||||
} else {
|
||||
cuda_column_data_ = nullptr;
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
|
||||
const void* Dataset::GetColWiseData(
|
||||
|
@ -1536,7 +1535,7 @@ const void* Dataset::GetColWiseData(
|
|||
return feature_groups_[feature_group_index]->GetColWiseData(sub_feature_index, bit_type, is_sparse, bin_iterator);
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
void Dataset::CreateCUDAColumnData() {
|
||||
cuda_column_data_.reset(new CUDAColumnData(num_data_, gpu_device_id_));
|
||||
int num_columns = 0;
|
||||
|
@ -1671,6 +1670,6 @@ void Dataset::CreateCUDAColumnData() {
|
|||
feature_to_column);
|
||||
}
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
} // namespace LightGBM
|
||||
|
|
|
@ -279,14 +279,14 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
|
|||
|
||||
dataset->device_type_ = config_.device_type;
|
||||
dataset->gpu_device_id_ = config_.gpu_device_id;
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config_.device_type == std::string("cuda_exp")) {
|
||||
#ifdef USE_CUDA
|
||||
if (config_.device_type == std::string("cuda")) {
|
||||
dataset->CreateCUDAColumnData();
|
||||
dataset->metadata_.CreateCUDAMetadata(dataset->gpu_device_id_);
|
||||
} else {
|
||||
dataset->cuda_column_data_ = nullptr;
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
// check meta data
|
||||
dataset->metadata_.CheckOrPartition(num_global_data, used_data_indices);
|
||||
|
|
|
@ -467,7 +467,7 @@ class DenseBin : public Bin {
|
|||
|
||||
private:
|
||||
data_size_t num_data_;
|
||||
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
|
||||
#ifdef USE_CUDA
|
||||
std::vector<VAL_T, CHAllocator<VAL_T>> data_;
|
||||
#else
|
||||
std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> data_;
|
||||
|
|
|
@ -18,9 +18,9 @@ Metadata::Metadata() {
|
|||
weight_load_from_file_ = false;
|
||||
query_load_from_file_ = false;
|
||||
init_score_load_from_file_ = false;
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
cuda_metadata_ = nullptr;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
|
||||
void Metadata::Init(const char* data_filename) {
|
||||
|
@ -344,11 +344,11 @@ void Metadata::SetInitScore(const double* init_score, data_size_t len) {
|
|||
init_score_[i] = Common::AvoidInf(init_score[i]);
|
||||
}
|
||||
init_score_load_from_file_ = false;
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
if (cuda_metadata_ != nullptr) {
|
||||
cuda_metadata_->SetInitScore(init_score_.data(), len);
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
|
||||
void Metadata::InsertInitScores(const double* init_scores, data_size_t start_index, data_size_t len, data_size_t source_size) {
|
||||
|
@ -387,11 +387,11 @@ void Metadata::SetLabel(const label_t* label, data_size_t len) {
|
|||
for (data_size_t i = 0; i < num_data_; ++i) {
|
||||
label_[i] = Common::AvoidInf(label[i]);
|
||||
}
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
if (cuda_metadata_ != nullptr) {
|
||||
cuda_metadata_->SetLabel(label_.data(), len);
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
|
||||
void Metadata::InsertLabels(const label_t* labels, data_size_t start_index, data_size_t len) {
|
||||
|
@ -428,11 +428,11 @@ void Metadata::SetWeights(const label_t* weights, data_size_t len) {
|
|||
}
|
||||
CalculateQueryWeights();
|
||||
weight_load_from_file_ = false;
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
if (cuda_metadata_ != nullptr) {
|
||||
cuda_metadata_->SetWeights(weights_.data(), len);
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
|
||||
void Metadata::InsertWeights(const label_t* weights, data_size_t start_index, data_size_t len) {
|
||||
|
@ -477,7 +477,7 @@ void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
|
|||
}
|
||||
CalculateQueryWeights();
|
||||
query_load_from_file_ = false;
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
if (cuda_metadata_ != nullptr) {
|
||||
if (query_weights_.size() > 0) {
|
||||
CHECK_EQ(query_weights_.size(), static_cast<size_t>(num_queries_));
|
||||
|
@ -486,7 +486,7 @@ void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
|
|||
cuda_metadata_->SetQuery(query_boundaries_.data(), nullptr, num_queries_);
|
||||
}
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
|
||||
void Metadata::InsertQueries(const data_size_t* queries, data_size_t start_index, data_size_t len) {
|
||||
|
@ -635,12 +635,12 @@ void Metadata::FinishLoad() {
|
|||
CalculateQueryBoundaries();
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
void Metadata::CreateCUDAMetadata(const int gpu_device_id) {
|
||||
cuda_metadata_.reset(new CUDAMetadata(gpu_device_id));
|
||||
cuda_metadata_->Init(label_, weights_, query_boundaries_, query_weights_, init_score_);
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
void Metadata::LoadFromMemory(const void* memory) {
|
||||
const char* mem_ptr = reinterpret_cast<const char*>(memory);
|
||||
|
|
|
@ -211,13 +211,13 @@ class MultiValDenseBin : public MultiValBin {
|
|||
|
||||
MultiValDenseBin<VAL_T>* Clone() override;
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
const void* GetRowWiseData(uint8_t* bit_type,
|
||||
size_t* total_size,
|
||||
bool* is_sparse,
|
||||
const void** out_data_ptr,
|
||||
uint8_t* data_ptr_bit_type) const override;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
private:
|
||||
data_size_t num_data_;
|
||||
|
|
|
@ -292,13 +292,13 @@ class MultiValSparseBin : public MultiValBin {
|
|||
MultiValSparseBin<INDEX_T, VAL_T>* Clone() override;
|
||||
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
const void* GetRowWiseData(uint8_t* bit_type,
|
||||
size_t* total_size,
|
||||
bool* is_sparse,
|
||||
const void** out_data_ptr,
|
||||
uint8_t* data_ptr_bit_type) const override;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
private:
|
||||
data_size_t num_data_;
|
||||
|
|
|
@ -382,9 +382,9 @@ void TrainingShareStates::CalcBinOffsets(const std::vector<std::unique_ptr<Featu
|
|||
}
|
||||
num_hist_total_bin_ = static_cast<int>(feature_hist_offsets_.back());
|
||||
}
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
column_hist_offsets_ = *offsets;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
|
||||
void TrainingShareStates::SetMultiValBin(MultiValBin* bin, data_size_t num_data,
|
||||
|
|
|
@ -53,9 +53,9 @@ Tree::Tree(int max_leaves, bool track_branch_features, bool is_linear)
|
|||
leaf_features_.resize(max_leaves_);
|
||||
leaf_features_inner_.resize(max_leaves_);
|
||||
}
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
is_cuda_tree_ = false;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
}
|
||||
|
||||
int Tree::Split(int leaf, int feature, int real_feature, uint32_t threshold_bin,
|
||||
|
@ -731,9 +731,9 @@ Tree::Tree(const char* str, size_t* used_len) {
|
|||
is_linear_ = false;
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
is_cuda_tree_ = false;
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
if ((num_leaves_ <= 1) && !is_linear_) {
|
||||
return;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_binary_metric.hpp"
|
||||
|
||||
|
@ -28,4 +28,4 @@ std::vector<double> CUDABinaryMetricInterface<HOST_METRIC, CUDA_METRIC>::Eval(co
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#ifndef LIGHTGBM_METRIC_CUDA_CUDA_BINARY_METRIC_HPP_
|
||||
#define LIGHTGBM_METRIC_CUDA_CUDA_BINARY_METRIC_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_metric.hpp>
|
||||
#include <LightGBM/cuda/cuda_utils.h>
|
||||
|
@ -52,6 +52,6 @@ class CUDABinaryLoglossMetric: public CUDABinaryMetricInterface<BinaryLoglossMet
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
#endif // LIGHTGBM_METRIC_CUDA_CUDA_BINARY_METRIC_HPP_
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_binary_metric.hpp"
|
||||
#include "cuda_pointwise_metric.hpp"
|
||||
|
@ -35,4 +35,4 @@ template void CUDAPointwiseMetricInterface<BinaryLoglossMetric, CUDABinaryLoglos
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_algorithms.hpp>
|
||||
|
||||
|
@ -66,4 +66,4 @@ template void CUDAPointwiseMetricInterface<BinaryLoglossMetric, CUDABinaryLoglos
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#ifndef LIGHTGBM_METRIC_CUDA_CUDA_POINTWISE_METRIC_HPP_
|
||||
#define LIGHTGBM_METRIC_CUDA_CUDA_POINTWISE_METRIC_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_metric.hpp>
|
||||
#include <LightGBM/cuda/cuda_utils.h>
|
||||
|
@ -38,6 +38,6 @@ class CUDAPointwiseMetricInterface: public CUDAMetricInterface<HOST_METRIC> {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
#endif // LIGHTGBM_METRIC_CUDA_CUDA_POINTWISE_METRIC_HPP_
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
@ -31,4 +31,4 @@ CUDAL2Metric::CUDAL2Metric(const Config& config): CUDARegressionMetricInterface<
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#ifndef LIGHTGBM_METRIC_CUDA_CUDA_REGRESSION_METRIC_HPP_
|
||||
#define LIGHTGBM_METRIC_CUDA_CUDA_REGRESSION_METRIC_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_metric.hpp>
|
||||
#include <LightGBM/cuda/cuda_utils.h>
|
||||
|
@ -54,6 +54,6 @@ class CUDAL2Metric : public CUDARegressionMetricInterface<L2Metric, CUDAL2Metric
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
#endif // LIGHTGBM_METRIC_CUDA_CUDA_REGRESSION_METRIC_HPP_
|
||||
|
|
|
@ -17,77 +17,77 @@
|
|||
namespace LightGBM {
|
||||
|
||||
Metric* Metric::CreateMetric(const std::string& type, const Config& config) {
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config.device_type == std::string("cuda_exp") && config.boosting == std::string("gbdt")) {
|
||||
#ifdef USE_CUDA
|
||||
if (config.device_type == std::string("cuda") && config.boosting == std::string("gbdt")) {
|
||||
if (type == std::string("l2")) {
|
||||
return new CUDAL2Metric(config);
|
||||
} else if (type == std::string("rmse")) {
|
||||
return new CUDARMSEMetric(config);
|
||||
} else if (type == std::string("l1")) {
|
||||
Log::Warning("Metric l1 is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric l1 is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new L1Metric(config);
|
||||
} else if (type == std::string("quantile")) {
|
||||
Log::Warning("Metric quantile is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric quantile is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new QuantileMetric(config);
|
||||
} else if (type == std::string("huber")) {
|
||||
Log::Warning("Metric huber is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric huber is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new HuberLossMetric(config);
|
||||
} else if (type == std::string("fair")) {
|
||||
Log::Warning("Metric fair is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric fair is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new FairLossMetric(config);
|
||||
} else if (type == std::string("poisson")) {
|
||||
Log::Warning("Metric poisson is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric poisson is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new PoissonMetric(config);
|
||||
} else if (type == std::string("binary_logloss")) {
|
||||
return new CUDABinaryLoglossMetric(config);
|
||||
} else if (type == std::string("binary_error")) {
|
||||
Log::Warning("Metric binary_error is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric binary_error is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new BinaryErrorMetric(config);
|
||||
} else if (type == std::string("auc")) {
|
||||
Log::Warning("Metric auc is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric auc is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new AUCMetric(config);
|
||||
} else if (type == std::string("average_precision")) {
|
||||
Log::Warning("Metric average_precision is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric average_precision is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new AveragePrecisionMetric(config);
|
||||
} else if (type == std::string("auc_mu")) {
|
||||
Log::Warning("Metric auc_mu is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric auc_mu is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new AucMuMetric(config);
|
||||
} else if (type == std::string("ndcg")) {
|
||||
Log::Warning("Metric ndcg is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric ndcg is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new NDCGMetric(config);
|
||||
} else if (type == std::string("map")) {
|
||||
Log::Warning("Metric map is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric map is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new MapMetric(config);
|
||||
} else if (type == std::string("multi_logloss")) {
|
||||
Log::Warning("Metric multi_logloss is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric multi_logloss is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new MultiSoftmaxLoglossMetric(config);
|
||||
} else if (type == std::string("multi_error")) {
|
||||
Log::Warning("Metric multi_error is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric multi_error is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new MultiErrorMetric(config);
|
||||
} else if (type == std::string("cross_entropy")) {
|
||||
Log::Warning("Metric cross_entropy is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric cross_entropy is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new CrossEntropyMetric(config);
|
||||
} else if (type == std::string("cross_entropy_lambda")) {
|
||||
Log::Warning("Metric cross_entropy_lambda is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric cross_entropy_lambda is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new CrossEntropyLambdaMetric(config);
|
||||
} else if (type == std::string("kullback_leibler")) {
|
||||
Log::Warning("Metric kullback_leibler is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric kullback_leibler is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new KullbackLeiblerDivergence(config);
|
||||
} else if (type == std::string("mape")) {
|
||||
Log::Warning("Metric mape is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric mape is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new MAPEMetric(config);
|
||||
} else if (type == std::string("gamma")) {
|
||||
Log::Warning("Metric gamma is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric gamma is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new GammaMetric(config);
|
||||
} else if (type == std::string("gamma_deviance")) {
|
||||
Log::Warning("Metric gamma_deviance is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric gamma_deviance is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new GammaDevianceMetric(config);
|
||||
} else if (type == std::string("tweedie")) {
|
||||
Log::Warning("Metric tweedie is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
|
||||
Log::Warning("Metric tweedie is not implemented in cuda version. Fall back to evaluation on CPU.");
|
||||
return new TweedieMetric(config);
|
||||
}
|
||||
} else {
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
if (type == std::string("l2")) {
|
||||
return new L2Metric(config);
|
||||
} else if (type == std::string("rmse")) {
|
||||
|
@ -135,9 +135,9 @@ Metric* Metric::CreateMetric(const std::string& type, const Config& config) {
|
|||
} else if (type == std::string("tweedie")) {
|
||||
return new TweedieMetric(config);
|
||||
}
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_binary_objective.hpp"
|
||||
|
||||
|
@ -61,4 +61,4 @@ void CUDABinaryLogloss::Init(const Metadata& metadata, data_size_t num_data) {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
@ -206,4 +206,4 @@ void CUDABinaryLogloss::LaunchResetOVACUDALabelKernel() const {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#ifndef LIGHTGBM_OBJECTIVE_CUDA_CUDA_BINARY_OBJECTIVE_HPP_
|
||||
#define LIGHTGBM_OBJECTIVE_CUDA_CUDA_BINARY_OBJECTIVE_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#define GET_GRADIENTS_BLOCK_SIZE_BINARY (1024)
|
||||
#define CALC_INIT_SCORE_BLOCK_SIZE_BINARY (1024)
|
||||
|
@ -58,6 +58,6 @@ class CUDABinaryLogloss : public CUDAObjectiveInterface<BinaryLogloss> {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
||||
#endif // LIGHTGBM_OBJECTIVE_CUDA_CUDA_BINARY_OBJECTIVE_HPP_
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_multiclass_objective.hpp"
|
||||
|
||||
|
@ -59,4 +59,4 @@ const double* CUDAMulticlassOVA::ConvertOutputCUDA(const data_size_t num_data, c
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
@ -105,4 +105,4 @@ const double* CUDAMulticlassSoftmax::LaunchConvertOutputCUDAKernel(
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
#ifndef LIGHTGBM_OBJECTIVE_CUDA_CUDA_MULTICLASS_OBJECTIVE_HPP_
|
||||
#define LIGHTGBM_OBJECTIVE_CUDA_CUDA_MULTICLASS_OBJECTIVE_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_objective_function.hpp>
|
||||
|
||||
|
@ -74,5 +74,5 @@ class CUDAMulticlassOVA: public CUDAObjectiveInterface<MulticlassOVA> {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
#endif // LIGHTGBM_OBJECTIVE_CUDA_CUDA_MULTICLASS_OBJECTIVE_HPP_
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
@ -64,4 +64,4 @@ void CUDARankXENDCG::GenerateItemRands() const {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_rank_objective.hpp"
|
||||
|
||||
|
@ -658,4 +658,4 @@ void CUDARankXENDCG::LaunchGetGradientsKernel(const double* score, score_t* grad
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#ifndef LIGHTGBM_OBJECTIVE_CUDA_CUDA_RANK_OBJECTIVE_HPP_
|
||||
#define LIGHTGBM_OBJECTIVE_CUDA_CUDA_RANK_OBJECTIVE_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#define NUM_QUERY_PER_BLOCK (10)
|
||||
|
||||
|
@ -118,5 +118,5 @@ class CUDARankXENDCG : public CUDALambdaRankObjectiveInterface<RankXENDCG> {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
#endif // LIGHTGBM_OBJECTIVE_CUDA_CUDA_RANK_OBJECTIVE_HPP_
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_regression_objective.hpp"
|
||||
|
||||
|
@ -85,4 +85,4 @@ double CUDARegressionPoissonLoss::LaunchCalcInitScoreKernel(const int class_id)
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_regression_objective.hpp"
|
||||
#include <LightGBM/cuda/cuda_algorithms.hpp>
|
||||
|
@ -353,4 +353,4 @@ const double* CUDARegressionPoissonLoss::LaunchConvertOutputCUDAKernel(const dat
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#ifndef LIGHTGBM_OBJECTIVE_CUDA_CUDA_REGRESSION_OBJECTIVE_HPP_
|
||||
#define LIGHTGBM_OBJECTIVE_CUDA_CUDA_REGRESSION_OBJECTIVE_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#define GET_GRADIENTS_BLOCK_SIZE_REGRESSION (1024)
|
||||
|
||||
|
@ -135,5 +135,5 @@ class CUDARegressionPoissonLoss : public CUDARegressionObjectiveInterface<Regres
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
#endif // LIGHTGBM_OBJECTIVE_CUDA_CUDA_REGRESSION_OBJECTIVE_HPP_
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
namespace LightGBM {
|
||||
|
||||
ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& type, const Config& config) {
|
||||
#ifdef USE_CUDA_EXP
|
||||
if (config.device_type == std::string("cuda_exp") &&
|
||||
#ifdef USE_CUDA
|
||||
if (config.device_type == std::string("cuda") &&
|
||||
config.data_sample_strategy != std::string("goss") &&
|
||||
config.boosting != std::string("rf")) {
|
||||
if (type == std::string("regression")) {
|
||||
|
@ -27,7 +27,7 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
|
|||
} else if (type == std::string("regression_l1")) {
|
||||
return new CUDARegressionL1loss(config);
|
||||
} else if (type == std::string("quantile")) {
|
||||
Log::Warning("Objective quantile is not implemented in cuda_exp version. Fall back to boosting on CPU.");
|
||||
Log::Warning("Objective quantile is not implemented in cuda version. Fall back to boosting on CPU.");
|
||||
return new RegressionQuantileloss(config);
|
||||
} else if (type == std::string("huber")) {
|
||||
return new CUDARegressionHuberLoss(config);
|
||||
|
@ -46,26 +46,26 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
|
|||
} else if (type == std::string("multiclassova")) {
|
||||
return new CUDAMulticlassOVA(config);
|
||||
} else if (type == std::string("cross_entropy")) {
|
||||
Log::Warning("Objective cross_entropy is not implemented in cuda_exp version. Fall back to boosting on CPU.");
|
||||
Log::Warning("Objective cross_entropy is not implemented in cuda version. Fall back to boosting on CPU.");
|
||||
return new CrossEntropy(config);
|
||||
} else if (type == std::string("cross_entropy_lambda")) {
|
||||
Log::Warning("Objective cross_entropy_lambda is not implemented in cuda_exp version. Fall back to boosting on CPU.");
|
||||
Log::Warning("Objective cross_entropy_lambda is not implemented in cuda version. Fall back to boosting on CPU.");
|
||||
return new CrossEntropyLambda(config);
|
||||
} else if (type == std::string("mape")) {
|
||||
Log::Warning("Objective mape is not implemented in cuda_exp version. Fall back to boosting on CPU.");
|
||||
Log::Warning("Objective mape is not implemented in cuda version. Fall back to boosting on CPU.");
|
||||
return new RegressionMAPELOSS(config);
|
||||
} else if (type == std::string("gamma")) {
|
||||
Log::Warning("Objective gamma is not implemented in cuda_exp version. Fall back to boosting on CPU.");
|
||||
Log::Warning("Objective gamma is not implemented in cuda version. Fall back to boosting on CPU.");
|
||||
return new RegressionGammaLoss(config);
|
||||
} else if (type == std::string("tweedie")) {
|
||||
Log::Warning("Objective tweedie is not implemented in cuda_exp version. Fall back to boosting on CPU.");
|
||||
Log::Warning("Objective tweedie is not implemented in cuda version. Fall back to boosting on CPU.");
|
||||
return new RegressionTweedieLoss(config);
|
||||
} else if (type == std::string("custom")) {
|
||||
Log::Warning("Using customized objective with cuda_exp. This requires copying gradients from CPU to GPU, which can be slow.");
|
||||
Log::Warning("Using customized objective with cuda. This requires copying gradients from CPU to GPU, which can be slow.");
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
if (type == std::string("regression")) {
|
||||
return new RegressionL2loss(config);
|
||||
} else if (type == std::string("regression_l1")) {
|
||||
|
@ -101,9 +101,9 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
|
|||
} else if (type == std::string("custom")) {
|
||||
return nullptr;
|
||||
}
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
}
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
Log::Fatal("Unknown objective type name: %s", type.c_str());
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
@ -383,4 +383,4 @@ void CUDABestSplitFinder::SetUsedFeatureByNode(const std::vector<int8_t>& is_fea
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
@ -1802,4 +1802,4 @@ void CUDABestSplitFinder::LaunchInitCUDARandomKernel() {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#ifndef LIGHTGBM_TREELEARNER_CUDA_CUDA_BEST_SPLIT_FINDER_HPP_
|
||||
#define LIGHTGBM_TREELEARNER_CUDA_CUDA_BEST_SPLIT_FINDER_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/bin.h>
|
||||
#include <LightGBM/dataset.h>
|
||||
|
@ -211,5 +211,5 @@ class CUDABestSplitFinder {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
#endif // LIGHTGBM_TREELEARNER_CUDA_CUDA_BEST_SPLIT_FINDER_HPP_
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
@ -370,4 +370,4 @@ void CUDADataPartition::ResetByLeafPred(const std::vector<int>& leaf_pred, int n
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_data_partition.hpp"
|
||||
|
||||
|
@ -1071,4 +1071,4 @@ void CUDADataPartition::LaunchAddPredictionToScoreKernel(const double* leaf_valu
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#ifndef LIGHTGBM_TREELEARNER_CUDA_CUDA_DATA_PARTITION_HPP_
|
||||
#define LIGHTGBM_TREELEARNER_CUDA_CUDA_DATA_PARTITION_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/bin.h>
|
||||
#include <LightGBM/meta.h>
|
||||
|
@ -384,5 +384,5 @@ class CUDADataPartition {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
#endif // LIGHTGBM_TREELEARNER_CUDA_CUDA_DATA_PARTITION_HPP_
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_histogram_constructor.hpp"
|
||||
|
||||
|
@ -193,4 +193,4 @@ void CUDAHistogramConstructor::ResetConfig(const Config* config) {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_histogram_constructor.hpp"
|
||||
|
||||
|
@ -429,4 +429,4 @@ void CUDAHistogramConstructor::LaunchSubtractHistogramKernel(
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#ifndef LIGHTGBM_TREELEARNER_CUDA_CUDA_HISTOGRAM_CONSTRUCTOR_HPP_
|
||||
#define LIGHTGBM_TREELEARNER_CUDA_CUDA_HISTOGRAM_CONSTRUCTOR_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_row_data.hpp>
|
||||
#include <LightGBM/feature_group.h>
|
||||
|
@ -165,5 +165,5 @@ class CUDAHistogramConstructor {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
#endif // LIGHTGBM_TREELEARNER_CUDA_CUDA_HISTOGRAM_CONSTRUCTOR_HPP_
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_leaf_splits.hpp"
|
||||
|
||||
|
@ -68,4 +68,4 @@ void CUDALeafSplits::Resize(const data_size_t num_data) {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_leaf_splits.hpp"
|
||||
#include <LightGBM/cuda/cuda_algorithms.hpp>
|
||||
|
@ -126,4 +126,4 @@ void CUDALeafSplits::LaunchInitValuesKernal(
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#ifndef LIGHTGBM_TREELEARNER_CUDA_CUDA_LEAF_SPLITS_HPP_
|
||||
#define LIGHTGBM_TREELEARNER_CUDA_CUDA_LEAF_SPLITS_HPP_
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_utils.h>
|
||||
#include <LightGBM/bin.h>
|
||||
|
@ -156,5 +156,5 @@ class CUDALeafSplits {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
#endif // LIGHTGBM_TREELEARNER_CUDA_CUDA_LEAF_SPLITS_HPP_
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_single_gpu_tree_learner.hpp"
|
||||
|
||||
|
@ -515,4 +515,4 @@ void CUDASingleGPUTreeLearner::CheckSplitValid(
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* license information.
|
||||
*/
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include <LightGBM/cuda/cuda_algorithms.hpp>
|
||||
|
||||
|
@ -258,4 +258,4 @@ void CUDASingleGPUTreeLearner::LaunchConstructBitsetForCategoricalSplitKernel(
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#ifdef USE_CUDA_EXP
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_leaf_splits.hpp"
|
||||
#include "cuda_histogram_constructor.hpp"
|
||||
|
@ -137,7 +137,7 @@ class CUDASingleGPUTreeLearner: public SerialTreeLearner {
|
|||
|
||||
} // namespace LightGBM
|
||||
|
||||
#else // USE_CUDA_EXP
|
||||
#else // USE_CUDA
|
||||
|
||||
// When GPU support is not compiled in, quit with an error message
|
||||
|
||||
|
@ -147,12 +147,12 @@ class CUDASingleGPUTreeLearner: public SerialTreeLearner {
|
|||
public:
|
||||
#pragma warning(disable : 4702)
|
||||
explicit CUDASingleGPUTreeLearner(const Config* tree_config, const bool /*boosting_on_cuda*/) : SerialTreeLearner(tree_config) {
|
||||
Log::Fatal("CUDA Tree Learner experimental version was not enabled in this build.\n"
|
||||
"Please recompile with CMake option -DUSE_CUDA_EXP=1");
|
||||
Log::Fatal("CUDA Tree Learner was not enabled in this build.\n"
|
||||
"Please recompile with CMake option -DUSE_CUDAP=1");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA_EXP
|
||||
#endif // USE_CUDA
|
||||
#endif // LIGHTGBM_TREELEARNER_CUDA_CUDA_SINGLE_GPU_TREE_LEARNER_HPP_
|
||||
|
|
|
@ -1,171 +0,0 @@
|
|||
/*!
|
||||
* Copyright (c) 2020 IBM Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
#ifdef USE_CUDA
|
||||
|
||||
#include "cuda_kernel_launcher.h"
|
||||
|
||||
#include <LightGBM/utils/log.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
namespace LightGBM {
|
||||
|
||||
void cuda_histogram(
|
||||
int histogram_size,
|
||||
data_size_t leaf_num_data,
|
||||
data_size_t num_data,
|
||||
bool use_all_features,
|
||||
bool is_constant_hessian,
|
||||
int num_workgroups,
|
||||
cudaStream_t stream,
|
||||
uint8_t* arg0,
|
||||
uint8_t* arg1,
|
||||
data_size_t arg2,
|
||||
data_size_t* arg3,
|
||||
data_size_t arg4,
|
||||
score_t* arg5,
|
||||
score_t* arg6,
|
||||
score_t arg6_const,
|
||||
char* arg7,
|
||||
volatile int* arg8,
|
||||
void* arg9,
|
||||
size_t exp_workgroups_per_feature) {
|
||||
if (histogram_size == 16) {
|
||||
if (leaf_num_data == num_data) {
|
||||
if (use_all_features) {
|
||||
if (!is_constant_hessian)
|
||||
histogram16<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
else
|
||||
histogram16<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
} else {
|
||||
if (!is_constant_hessian)
|
||||
histogram16_fulldata<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
else
|
||||
histogram16_fulldata<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
}
|
||||
} else {
|
||||
if (use_all_features) {
|
||||
// seems all features is always enabled, so this should be the same as fulldata
|
||||
if (!is_constant_hessian)
|
||||
histogram16<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
else
|
||||
histogram16<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
} else {
|
||||
if (!is_constant_hessian)
|
||||
histogram16<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
else
|
||||
histogram16<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
}
|
||||
}
|
||||
} else if (histogram_size == 64) {
|
||||
if (leaf_num_data == num_data) {
|
||||
if (use_all_features) {
|
||||
if (!is_constant_hessian)
|
||||
histogram64<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
else
|
||||
histogram64<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
} else {
|
||||
if (!is_constant_hessian)
|
||||
histogram64_fulldata<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
else
|
||||
histogram64_fulldata<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
}
|
||||
} else {
|
||||
if (use_all_features) {
|
||||
// seems all features is always enabled, so this should be the same as fulldata
|
||||
if (!is_constant_hessian)
|
||||
histogram64<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
else
|
||||
histogram64<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
} else {
|
||||
if (!is_constant_hessian)
|
||||
histogram64<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
else
|
||||
histogram64<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (leaf_num_data == num_data) {
|
||||
if (use_all_features) {
|
||||
if (!is_constant_hessian)
|
||||
histogram256<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
else
|
||||
histogram256<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
} else {
|
||||
if (!is_constant_hessian)
|
||||
histogram256_fulldata<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
else
|
||||
histogram256_fulldata<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
}
|
||||
} else {
|
||||
if (use_all_features) {
|
||||
// seems all features is always enabled, so this should be the same as fulldata
|
||||
if (!is_constant_hessian)
|
||||
histogram256<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
else
|
||||
histogram256<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
} else {
|
||||
if (!is_constant_hessian)
|
||||
histogram256<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
else
|
||||
histogram256<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
|
||||
arg3, arg4, arg5,
|
||||
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA
|
|
@ -1,70 +0,0 @@
|
|||
/*!
|
||||
* Copyright (c) 2020 IBM Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
#ifndef LIGHTGBM_TREELEARNER_CUDA_KERNEL_LAUNCHER_H_
|
||||
#define LIGHTGBM_TREELEARNER_CUDA_KERNEL_LAUNCHER_H_
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include <chrono>
|
||||
#include "kernels/histogram_16_64_256.hu" // kernel, acc_type, data_size_t, uchar, score_t
|
||||
|
||||
namespace LightGBM {
|
||||
|
||||
struct ThreadData {
|
||||
// device id
|
||||
int device_id;
|
||||
// parameters for cuda_histogram
|
||||
int histogram_size;
|
||||
data_size_t leaf_num_data;
|
||||
data_size_t num_data;
|
||||
bool use_all_features;
|
||||
bool is_constant_hessian;
|
||||
int num_workgroups;
|
||||
cudaStream_t stream;
|
||||
uint8_t* device_features;
|
||||
uint8_t* device_feature_masks;
|
||||
data_size_t* device_data_indices;
|
||||
score_t* device_gradients;
|
||||
score_t* device_hessians;
|
||||
score_t hessians_const;
|
||||
char* device_subhistograms;
|
||||
volatile int* sync_counters;
|
||||
void* device_histogram_outputs;
|
||||
size_t exp_workgroups_per_feature;
|
||||
// cuda events
|
||||
cudaEvent_t* kernel_start;
|
||||
cudaEvent_t* kernel_wait_obj;
|
||||
std::chrono::duration<double, std::milli>* kernel_input_wait_time;
|
||||
// copy histogram
|
||||
size_t output_size;
|
||||
char* host_histogram_output;
|
||||
cudaEvent_t* histograms_wait_obj;
|
||||
};
|
||||
|
||||
|
||||
void cuda_histogram(
|
||||
int histogram_size,
|
||||
data_size_t leaf_num_data,
|
||||
data_size_t num_data,
|
||||
bool use_all_features,
|
||||
bool is_constant_hessian,
|
||||
int num_workgroups,
|
||||
cudaStream_t stream,
|
||||
uint8_t* arg0,
|
||||
uint8_t* arg1,
|
||||
data_size_t arg2,
|
||||
data_size_t* arg3,
|
||||
data_size_t arg4,
|
||||
score_t* arg5,
|
||||
score_t* arg6,
|
||||
score_t arg6_const,
|
||||
char* arg7,
|
||||
volatile int* arg8,
|
||||
void* arg9,
|
||||
size_t exp_workgroups_per_feature);
|
||||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA
|
||||
#endif // LIGHTGBM_TREELEARNER_CUDA_KERNEL_LAUNCHER_H_
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,261 +0,0 @@
|
|||
/*!
|
||||
* Copyright (c) 2020 IBM Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
||||
*/
|
||||
#ifndef LIGHTGBM_TREELEARNER_CUDA_TREE_LEARNER_H_
|
||||
#define LIGHTGBM_TREELEARNER_CUDA_TREE_LEARNER_H_
|
||||
|
||||
#include <LightGBM/utils/random.h>
|
||||
#include <LightGBM/utils/array_args.h>
|
||||
#include <LightGBM/dataset.h>
|
||||
#include <LightGBM/feature_group.h>
|
||||
#include <LightGBM/tree.h>
|
||||
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
#ifdef USE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#endif
|
||||
|
||||
#include "feature_histogram.hpp"
|
||||
#include "serial_tree_learner.h"
|
||||
#include "data_partition.hpp"
|
||||
#include "split_info.hpp"
|
||||
#include "leaf_splits.hpp"
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include <LightGBM/cuda/vector_cudahost.h>
|
||||
#include "cuda_kernel_launcher.h"
|
||||
|
||||
|
||||
using json11::Json;
|
||||
|
||||
namespace LightGBM {
|
||||
|
||||
/*!
|
||||
* \brief CUDA-based parallel learning algorithm.
|
||||
*/
|
||||
class CUDATreeLearner: public SerialTreeLearner {
|
||||
public:
|
||||
explicit CUDATreeLearner(const Config* tree_config);
|
||||
~CUDATreeLearner();
|
||||
void Init(const Dataset* train_data, bool is_constant_hessian) override;
|
||||
void ResetTrainingDataInner(const Dataset* train_data, bool is_constant_hessian, bool reset_multi_val_bin) override;
|
||||
Tree* Train(const score_t* gradients, const score_t *hessians, bool is_first_tree) override;
|
||||
void SetBaggingData(const Dataset* subset, const data_size_t* used_indices, data_size_t num_data) override {
|
||||
SerialTreeLearner::SetBaggingData(subset, used_indices, num_data);
|
||||
if (subset == nullptr && used_indices != nullptr) {
|
||||
if (num_data != num_data_) {
|
||||
use_bagging_ = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
use_bagging_ = false;
|
||||
}
|
||||
|
||||
protected:
|
||||
void BeforeTrain() override;
|
||||
bool BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) override;
|
||||
void FindBestSplits(const Tree* tree) override;
|
||||
void Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) override;
|
||||
void ConstructHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) override;
|
||||
|
||||
private:
|
||||
typedef float gpu_hist_t;
|
||||
|
||||
/*!
|
||||
* \brief Find the best number of workgroups processing one feature for maximizing efficiency
|
||||
* \param leaf_num_data The number of data examples on the current leaf being processed
|
||||
* \return Log2 of the best number for workgroups per feature, in range 0...kMaxLogWorkgroupsPerFeature
|
||||
*/
|
||||
int GetNumWorkgroupsPerFeature(data_size_t leaf_num_data);
|
||||
|
||||
/*!
|
||||
* \brief Initialize GPU device
|
||||
* \param num_gpu: number of maximum gpus
|
||||
*/
|
||||
void InitGPU(int num_gpu);
|
||||
|
||||
/*!
|
||||
* \brief Allocate memory for GPU computation // alloc only
|
||||
*/
|
||||
void CountDenseFeatureGroups(); // compute num_dense_feature_group
|
||||
void prevAllocateGPUMemory(); // compute CPU-side param calculation & Pin HostMemory
|
||||
void AllocateGPUMemory();
|
||||
|
||||
/*!
|
||||
* \ ResetGPUMemory
|
||||
*/
|
||||
void ResetGPUMemory();
|
||||
|
||||
/*!
|
||||
* \ copy dense feature from CPU to GPU
|
||||
*/
|
||||
void copyDenseFeature();
|
||||
|
||||
/*!
|
||||
* \brief Compute GPU feature histogram for the current leaf.
|
||||
* Indices, gradients and Hessians have been copied to the device.
|
||||
* \param leaf_num_data Number of data on current leaf
|
||||
* \param use_all_features Set to true to not use feature masks, with a faster kernel
|
||||
*/
|
||||
void GPUHistogram(data_size_t leaf_num_data, bool use_all_features);
|
||||
|
||||
void SetThreadData(ThreadData* thread_data, int device_id, int histogram_size,
|
||||
int leaf_num_data, bool use_all_features,
|
||||
int num_workgroups, int exp_workgroups_per_feature) {
|
||||
ThreadData* td = &thread_data[device_id];
|
||||
td->device_id = device_id;
|
||||
td->histogram_size = histogram_size;
|
||||
td->leaf_num_data = leaf_num_data;
|
||||
td->num_data = num_data_;
|
||||
td->use_all_features = use_all_features;
|
||||
td->is_constant_hessian = share_state_->is_constant_hessian;
|
||||
td->num_workgroups = num_workgroups;
|
||||
td->stream = stream_[device_id];
|
||||
td->device_features = device_features_[device_id];
|
||||
td->device_feature_masks = reinterpret_cast<uint8_t *>(device_feature_masks_[device_id]);
|
||||
td->device_data_indices = device_data_indices_[device_id];
|
||||
td->device_gradients = device_gradients_[device_id];
|
||||
td->device_hessians = device_hessians_[device_id];
|
||||
td->hessians_const = hessians_[0];
|
||||
td->device_subhistograms = device_subhistograms_[device_id];
|
||||
td->sync_counters = sync_counters_[device_id];
|
||||
td->device_histogram_outputs = device_histogram_outputs_[device_id];
|
||||
td->exp_workgroups_per_feature = exp_workgroups_per_feature;
|
||||
|
||||
td->kernel_start = &(kernel_start_[device_id]);
|
||||
td->kernel_wait_obj = &(kernel_wait_obj_[device_id]);
|
||||
td->kernel_input_wait_time = &(kernel_input_wait_time_[device_id]);
|
||||
|
||||
size_t output_size = num_gpu_feature_groups_[device_id] * dword_features_ * device_bin_size_ * hist_bin_entry_sz_;
|
||||
size_t host_output_offset = offset_gpu_feature_groups_[device_id] * dword_features_ * device_bin_size_ * hist_bin_entry_sz_;
|
||||
td->output_size = output_size;
|
||||
td->host_histogram_output = reinterpret_cast<char*>(host_histogram_outputs_) + host_output_offset;
|
||||
td->histograms_wait_obj = &(histograms_wait_obj_[device_id]);
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Wait for GPU kernel execution and read histogram
|
||||
* \param histograms Destination of histogram results from GPU.
|
||||
*/
|
||||
template <typename HistType>
|
||||
void WaitAndGetHistograms(FeatureHistogram* leaf_histogram_array);
|
||||
|
||||
/*!
|
||||
* \brief Construct GPU histogram asynchronously.
|
||||
* Interface is similar to Dataset::ConstructHistograms().
|
||||
* \param is_feature_used A predicate vector for enabling each feature
|
||||
* \param data_indices Array of data example IDs to be included in histogram, will be copied to GPU.
|
||||
* Set to nullptr to skip copy to GPU.
|
||||
* \param num_data Number of data examples to be included in histogram
|
||||
* \return true if GPU kernel is launched, false if GPU is not used
|
||||
*/
|
||||
bool ConstructGPUHistogramsAsync(
|
||||
const std::vector<int8_t>& is_feature_used,
|
||||
const data_size_t* data_indices, data_size_t num_data);
|
||||
|
||||
/*! brief Log2 of max number of workgroups per feature*/
|
||||
const int kMaxLogWorkgroupsPerFeature = 10; // 2^10
|
||||
/*! brief Max total number of workgroups with preallocated workspace.
|
||||
* If we use more than this number of workgroups, we have to reallocate subhistograms */
|
||||
std::vector<int> preallocd_max_num_wg_;
|
||||
|
||||
/*! \brief True if bagging is used */
|
||||
bool use_bagging_;
|
||||
|
||||
/*! \brief GPU command queue object */
|
||||
std::vector<cudaStream_t> stream_;
|
||||
|
||||
/*! \brief total number of feature-groups */
|
||||
int num_feature_groups_;
|
||||
/*! \brief total number of dense feature-groups, which will be processed on GPU */
|
||||
int num_dense_feature_groups_;
|
||||
std::vector<int> num_gpu_feature_groups_;
|
||||
std::vector<int> offset_gpu_feature_groups_;
|
||||
/*! \brief On GPU we read one DWORD (4-byte) of features of one example once.
|
||||
* With bin size > 16, there are 4 features per DWORD.
|
||||
* With bin size <=16, there are 8 features per DWORD.
|
||||
*/
|
||||
int dword_features_;
|
||||
/*! \brief Max number of bins of training data, used to determine
|
||||
* which GPU kernel to use */
|
||||
int max_num_bin_;
|
||||
/*! \brief Used GPU kernel bin size (64, 256) */
|
||||
int histogram_size_;
|
||||
int device_bin_size_;
|
||||
/*! \brief Size of histogram bin entry, depending if single or double precision is used */
|
||||
size_t hist_bin_entry_sz_;
|
||||
/*! \brief Indices of all dense feature-groups */
|
||||
std::vector<int> dense_feature_group_map_;
|
||||
/*! \brief Indices of all sparse feature-groups */
|
||||
std::vector<int> sparse_feature_group_map_;
|
||||
/*! \brief GPU memory object holding the training data */
|
||||
std::vector<uint8_t*> device_features_;
|
||||
/*! \brief GPU memory object holding the ordered gradient */
|
||||
std::vector<score_t*> device_gradients_;
|
||||
/*! \brief GPU memory object holding the ordered hessian */
|
||||
std::vector<score_t*> device_hessians_;
|
||||
/*! \brief A vector of feature mask. 1 = feature used, 0 = feature not used */
|
||||
std::vector<char> feature_masks_;
|
||||
/*! \brief GPU memory object holding the feature masks */
|
||||
std::vector<char*> device_feature_masks_;
|
||||
/*! \brief Pointer to pinned memory of feature masks */
|
||||
char* ptr_pinned_feature_masks_ = nullptr;
|
||||
/*! \brief GPU memory object holding indices of the leaf being processed */
|
||||
std::vector<data_size_t*> device_data_indices_;
|
||||
/*! \brief GPU memory object holding counters for workgroup coordination */
|
||||
std::vector<int*> sync_counters_;
|
||||
/*! \brief GPU memory object holding temporary sub-histograms per workgroup */
|
||||
std::vector<char*> device_subhistograms_;
|
||||
/*! \brief Host memory object for histogram output (GPU will write to Host memory directly) */
|
||||
std::vector<void*> device_histogram_outputs_;
|
||||
/*! \brief Host memory pointer for histogram outputs */
|
||||
void *host_histogram_outputs_;
|
||||
/*! CUDA waitlist object for waiting for data transfer before kernel execution */
|
||||
std::vector<cudaEvent_t> kernel_wait_obj_;
|
||||
/*! CUDA waitlist object for reading output histograms after kernel execution */
|
||||
std::vector<cudaEvent_t> histograms_wait_obj_;
|
||||
/*! CUDA Asynchronous waiting object for copying indices */
|
||||
std::vector<cudaEvent_t> indices_future_;
|
||||
/*! Asynchronous waiting object for copying gradients */
|
||||
std::vector<cudaEvent_t> gradients_future_;
|
||||
/*! Asynchronous waiting object for copying Hessians */
|
||||
std::vector<cudaEvent_t> hessians_future_;
|
||||
/*! Asynchronous waiting object for copying dense features */
|
||||
std::vector<cudaEvent_t> features_future_;
|
||||
|
||||
// host-side buffer for converting feature data into featre4 data
|
||||
int nthreads_; // number of Feature4* vector on host4_vecs_
|
||||
std::vector<cudaEvent_t> kernel_start_;
|
||||
std::vector<float> kernel_time_; // measure histogram kernel time
|
||||
std::vector<std::chrono::duration<double, std::milli>> kernel_input_wait_time_;
|
||||
int num_gpu_;
|
||||
int allocated_num_data_; // allocated data instances
|
||||
pthread_t **cpu_threads_; // pthread, 1 cpu thread / gpu
|
||||
};
|
||||
|
||||
} // namespace LightGBM
|
||||
#else // USE_CUDA
|
||||
|
||||
// When GPU support is not compiled in, quit with an error message
|
||||
|
||||
namespace LightGBM {
|
||||
|
||||
class CUDATreeLearner: public SerialTreeLearner {
|
||||
public:
|
||||
#pragma warning(disable : 4702)
|
||||
explicit CUDATreeLearner(const Config* tree_config) : SerialTreeLearner(tree_config) {
|
||||
Log::Fatal("CUDA Tree Learner was not enabled in this build.\n"
|
||||
"Please recompile with CMake option -DUSE_CUDA=1");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace LightGBM
|
||||
|
||||
#endif // USE_CUDA
|
||||
#endif // LIGHTGBM_TREELEARNER_CUDA_TREE_LEARNER_H_
|
|
@ -276,7 +276,6 @@ void DataParallelTreeLearner<TREELEARNER_T>::Split(Tree* tree, int best_Leaf, in
|
|||
}
|
||||
|
||||
// instantiate template classes, otherwise linker cannot find the code
|
||||
template class DataParallelTreeLearner<CUDATreeLearner>;
|
||||
template class DataParallelTreeLearner<GPUTreeLearner>;
|
||||
template class DataParallelTreeLearner<SerialTreeLearner>;
|
||||
|
||||
|
|
|
@ -77,7 +77,6 @@ void FeatureParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(
|
|||
}
|
||||
|
||||
// instantiate template classes, otherwise linker cannot find the code
|
||||
template class FeatureParallelTreeLearner<CUDATreeLearner>;
|
||||
template class FeatureParallelTreeLearner<GPUTreeLearner>;
|
||||
template class FeatureParallelTreeLearner<SerialTreeLearner>;
|
||||
} // namespace LightGBM
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "cuda_tree_learner.h"
|
||||
#include "gpu_tree_learner.h"
|
||||
#include "serial_tree_learner.h"
|
||||
|
||||
|
|
|
@ -344,15 +344,7 @@ void SerialTreeLearner::FindBestSplits(const Tree* tree, const std::set<int>* fo
|
|||
}
|
||||
bool use_subtract = parent_leaf_histogram_array_ != nullptr;
|
||||
|
||||
#ifdef USE_CUDA
|
||||
if (LGBM_config_::current_learner == use_cpu_learner) {
|
||||
SerialTreeLearner::ConstructHistograms(is_feature_used, use_subtract);
|
||||
} else {
|
||||
ConstructHistograms(is_feature_used, use_subtract);
|
||||
}
|
||||
#else
|
||||
ConstructHistograms(is_feature_used, use_subtract);
|
||||
#endif
|
||||
FindBestSplitsFromHistograms(is_feature_used, use_subtract, tree);
|
||||
}
|
||||
|
||||
|
|
|
@ -211,7 +211,7 @@ class SerialTreeLearner: public TreeLearner {
|
|||
std::vector<score_t, boost::alignment::aligned_allocator<score_t, 4096>> ordered_gradients_;
|
||||
/*! \brief hessians of current iteration, ordered for cache optimized, aligned to 4K page */
|
||||
std::vector<score_t, boost::alignment::aligned_allocator<score_t, 4096>> ordered_hessians_;
|
||||
#elif defined(USE_CUDA) || defined(USE_CUDA_EXP)
|
||||
#elif defined(USE_CUDA)
|
||||
/*! \brief gradients of current iteration, ordered for cache optimized */
|
||||
std::vector<score_t, CHAllocator<score_t>> ordered_gradients_;
|
||||
/*! \brief hessians of current iteration, ordered for cache optimized */
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
*/
|
||||
#include <LightGBM/tree_learner.h>
|
||||
|
||||
#include "cuda_tree_learner.h"
|
||||
#include "gpu_tree_learner.h"
|
||||
#include "linear_tree_learner.h"
|
||||
#include "parallel_tree_learner.h"
|
||||
|
@ -40,24 +39,14 @@ TreeLearner* TreeLearner::CreateTreeLearner(const std::string& learner_type, con
|
|||
return new VotingParallelTreeLearner<GPUTreeLearner>(config);
|
||||
}
|
||||
} else if (device_type == std::string("cuda")) {
|
||||
if (learner_type == std::string("serial")) {
|
||||
return new CUDATreeLearner(config);
|
||||
} else if (learner_type == std::string("feature")) {
|
||||
return new FeatureParallelTreeLearner<CUDATreeLearner>(config);
|
||||
} else if (learner_type == std::string("data")) {
|
||||
return new DataParallelTreeLearner<CUDATreeLearner>(config);
|
||||
} else if (learner_type == std::string("voting")) {
|
||||
return new VotingParallelTreeLearner<CUDATreeLearner>(config);
|
||||
}
|
||||
} else if (device_type == std::string("cuda_exp")) {
|
||||
if (learner_type == std::string("serial")) {
|
||||
if (config->num_gpu == 1) {
|
||||
return new CUDASingleGPUTreeLearner(config, boosting_on_cuda);
|
||||
} else {
|
||||
Log::Fatal("cuda_exp only supports training on a single GPU.");
|
||||
Log::Fatal("Currently cuda version only supports training on a single GPU.");
|
||||
}
|
||||
} else {
|
||||
Log::Fatal("cuda_exp only supports training on a single machine.");
|
||||
Log::Fatal("Currently cuda version only supports training on a single machine.");
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
|
|
|
@ -501,7 +501,6 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Split(Tree* tree, int best_Leaf,
|
|||
}
|
||||
|
||||
// instantiate template classes, otherwise linker cannot find the code
|
||||
template class VotingParallelTreeLearner<CUDATreeLearner>;
|
||||
template class VotingParallelTreeLearner<GPUTreeLearner>;
|
||||
template class VotingParallelTreeLearner<SerialTreeLearner>;
|
||||
} // namespace LightGBM
|
||||
|
|
|
@ -48,7 +48,7 @@ def test_basic(tmp_path):
|
|||
assert bst.current_iteration() == 20
|
||||
assert bst.num_trees() == 20
|
||||
assert bst.num_model_per_iteration() == 1
|
||||
if getenv('TASK', '') != 'cuda_exp':
|
||||
if getenv('TASK', '') != 'cuda':
|
||||
assert bst.lower_bound() == pytest.approx(-2.9040190126976606)
|
||||
assert bst.upper_bound() == pytest.approx(3.3182142872462883)
|
||||
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче