[CUDA] consolidate CUDA versions (#5677)

* [ci] speed up if-else, swig, and lint conda setup

* add 'source activate'

* python constraint

* start removing cuda v1

* comment out CI

* remove more references

* revert some unnecessaary changes

* revert a few more mistakes

* revert another change that ignored params

* sigh

* remove CUDATreeLearner

* fix tests, docs

* fix quoting in setup.py

* restore all CI

* Apply suggestions from code review

Co-authored-by: shiyu1994 <shiyu_k1994@qq.com>

* Apply suggestions from code review

* completely remove cuda_exp, update docs

---------

Co-authored-by: shiyu1994 <shiyu_k1994@qq.com>
This commit is contained in:
James Lamb 2023-01-31 21:27:52 -06:00 коммит произвёл GitHub
Родитель 5ffd757119
Коммит 4f47547c88
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
104 изменённых файлов: 361 добавлений и 1980 удалений

Просмотреть файл

@ -106,7 +106,7 @@ else # Linux
|| exit -1
fi
fi
if [[ $TASK == "cuda" || $TASK == "cuda_exp" ]]; then
if [[ $TASK == "cuda" ]]; then
echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
apt-get update
apt-get install --no-install-recommends -y \

Просмотреть файл

@ -201,41 +201,24 @@ if [[ $TASK == "gpu" ]]; then
elif [[ $METHOD == "source" ]]; then
cmake -DUSE_GPU=ON ..
fi
elif [[ $TASK == "cuda" || $TASK == "cuda_exp" ]]; then
if [[ $TASK == "cuda" ]]; then
sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' $BUILD_DIRECTORY/include/LightGBM/config.h
grep -q 'std::string device_type = "cuda"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1 # make sure that changes were really done
else
sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda_exp";/' $BUILD_DIRECTORY/include/LightGBM/config.h
grep -q 'std::string device_type = "cuda_exp"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1 # make sure that changes were really done
# by default ``gpu_use_dp=false`` for efficiency. change to ``true`` here for exact results in ci tests
sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' $BUILD_DIRECTORY/include/LightGBM/config.h
grep -q 'gpu_use_dp = true' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1 # make sure that changes were really done
fi
elif [[ $TASK == "cuda" ]]; then
sed -i'.bak' 's/std::string device_type = "cpu";/std::string device_type = "cuda";/' $BUILD_DIRECTORY/include/LightGBM/config.h
grep -q 'std::string device_type = "cuda"' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1 # make sure that changes were really done
# by default ``gpu_use_dp=false`` for efficiency. change to ``true`` here for exact results in ci tests
sed -i'.bak' 's/gpu_use_dp = false;/gpu_use_dp = true;/' $BUILD_DIRECTORY/include/LightGBM/config.h
grep -q 'gpu_use_dp = true' $BUILD_DIRECTORY/include/LightGBM/config.h || exit -1 # make sure that changes were really done
if [[ $METHOD == "pip" ]]; then
cd $BUILD_DIRECTORY/python-package && python setup.py sdist || exit -1
if [[ $TASK == "cuda" ]]; then
pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER.tar.gz -v --install-option=--cuda || exit -1
else
pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER.tar.gz -v --install-option=--cuda-exp || exit -1
fi
pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER.tar.gz -v --install-option=--cuda || exit -1
pytest $BUILD_DIRECTORY/tests/python_package_test || exit -1
exit 0
elif [[ $METHOD == "wheel" ]]; then
if [[ $TASK == "cuda" ]]; then
cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --cuda || exit -1
else
cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --cuda-exp || exit -1
fi
cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --cuda || exit -1
pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER*.whl -v || exit -1
pytest $BUILD_DIRECTORY/tests || exit -1
exit 0
elif [[ $METHOD == "source" ]]; then
if [[ $TASK == "cuda" ]]; then
cmake -DUSE_CUDA=ON ..
else
cmake -DUSE_CUDA_EXP=ON ..
fi
cmake -DUSE_CUDA=ON ..
fi
elif [[ $TASK == "mpi" ]]; then
if [[ $METHOD == "pip" ]]; then

20
.github/workflows/cuda.yml поставляемый
Просмотреть файл

@ -28,31 +28,21 @@ jobs:
fail-fast: false
matrix:
include:
- method: source
compiler: gcc
python_version: "3.8"
cuda_version: "11.7.1"
task: cuda
- method: pip
compiler: clang
python_version: "3.9"
cuda_version: "10.0"
task: cuda
- method: wheel
compiler: gcc
python_version: "3.10"
cuda_version: "9.0"
cuda_version: "11.7.1"
task: cuda
- method: source
compiler: gcc
python_version: "3.8"
cuda_version: "11.7.1"
task: cuda_exp
cuda_version: "10.0"
task: cuda
- method: pip
compiler: clang
python_version: "3.9"
cuda_version: "10.0"
task: cuda_exp
cuda_version: "11.7.1"
task: cuda
steps:
- name: Setup or update software on host machine
run: |

Просмотреть файл

@ -4,8 +4,7 @@ option(USE_GPU "Enable GPU-accelerated training" OFF)
option(USE_SWIG "Enable SWIG to generate Java API" OFF)
option(USE_HDFS "Enable HDFS support (EXPERIMENTAL)" OFF)
option(USE_TIMETAG "Set to ON to output time costs" OFF)
option(USE_CUDA "Enable CUDA-accelerated training (EXPERIMENTAL)" OFF)
option(USE_CUDA_EXP "Enable CUDA-accelerated training with more acceleration (EXPERIMENTAL)" OFF)
option(USE_CUDA "Enable CUDA-accelerated training " OFF)
option(USE_DEBUG "Set to ON for Debug mode" OFF)
option(USE_SANITIZER "Use santizer flags" OFF)
set(
@ -31,7 +30,7 @@ elseif(USE_SWIG)
cmake_minimum_required(VERSION 3.8)
elseif(USE_GPU OR APPLE)
cmake_minimum_required(VERSION 3.2)
elseif(USE_CUDA OR USE_CUDA_EXP)
elseif(USE_CUDA)
cmake_minimum_required(VERSION 3.16)
else()
cmake_minimum_required(VERSION 3.0)
@ -137,7 +136,7 @@ else()
add_definitions(-DUSE_SOCKET)
endif()
if(USE_CUDA OR USE_CUDA_EXP)
if(USE_CUDA)
set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}")
enable_language(CUDA)
set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE)
@ -192,12 +191,8 @@ if(__INTEGRATE_OPENCL)
endif()
endif()
if(USE_CUDA OR USE_CUDA_EXP)
if(USE_CUDA)
find_package(CUDA 9.0 REQUIRED)
else()
find_package(CUDA 10.0 REQUIRED)
endif()
if(USE_CUDA)
find_package(CUDA 10.0 REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=${OpenMP_CXX_FLAGS} -Xcompiler=-fPIC -Xcompiler=-Wall")
@ -224,11 +219,7 @@ if(USE_CUDA OR USE_CUDA_EXP)
endif()
message(STATUS "CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
if(USE_CUDA)
add_definitions(-DUSE_CUDA)
elseif(USE_CUDA_EXP)
add_definitions(-DUSE_CUDA_EXP)
endif()
add_definitions(-DUSE_CUDA)
if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 11)
@ -411,10 +402,8 @@ file(
src/objective/*.cpp
src/network/*.cpp
src/treelearner/*.cpp
if(USE_CUDA OR USE_CUDA_EXP)
if(USE_CUDA)
src/treelearner/*.cu
endif()
if(USE_CUDA_EXP)
src/boosting/cuda/*.cpp
src/boosting/cuda/*.cu
src/metric/cuda/*.cpp
@ -549,7 +538,7 @@ if(__INTEGRATE_OPENCL)
target_link_libraries(lightgbm_objs PUBLIC ${INTEGRATED_OPENCL_LIBRARIES} ${CMAKE_DL_LIBS})
endif()
if(USE_CUDA OR USE_CUDA_EXP)
if(USE_CUDA)
# Disable cmake warning about policy CMP0104. Refer to issue #3754 and PR #4268.
# Custom target properties does not propagate, thus we need to specify for
# each target that contains or depends on cuda source.

Просмотреть файл

@ -605,8 +605,8 @@ Docker
Refer to `GPU Docker folder <https://github.com/microsoft/LightGBM/tree/master/docker/gpu>`__.
Build CUDA Version (Experimental)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Build CUDA Version
~~~~~~~~~~~~~~~~~~
The `original GPU build <#build-gpu-version>`__ of LightGBM (``device_type=gpu``) is based on OpenCL.
@ -621,7 +621,7 @@ On Linux a CUDA version of LightGBM can be built using **CUDA**, **CMake** and *
The following dependencies should be installed before compilation:
- **CUDA** 9.0 or later libraries. Please refer to `this detailed guide`_. Pay great attention to the minimum required versions of host compilers listed in the table from that guide and use only recommended versions of compilers.
- **CUDA** 10.0 or later libraries. Please refer to `this detailed guide`_. Pay great attention to the minimum required versions of host compilers listed in the table from that guide and use only recommended versions of compilers.
- **CMake** 3.16 or later.
@ -636,8 +636,6 @@ To build LightGBM CUDA version, run the following commands:
cmake -DUSE_CUDA=1 ..
make -j4
Recently, a new CUDA version with better efficiency is implemented as an experimental feature. To build the new CUDA version, replace ``-DUSE_CUDA`` with ``-DUSE_CUDA_EXP`` in the above commands. Please note that new version requires **CUDA** 10.0 or later libraries.
**Note**: glibc >= 2.14 is required.
**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).

Просмотреть файл

@ -205,9 +205,15 @@ Core Parameters
- **Note**: please **don't** change this during training, especially when running multiple jobs simultaneously by external packages, otherwise it may cause undesirable errors
- ``device_type`` :raw-html:`<a id="device_type" title="Permalink to this parameter" href="#device_type">&#x1F517;&#xFE0E;</a>`, default = ``cpu``, type = enum, options: ``cpu``, ``gpu``, ``cuda``, ``cuda_exp``, aliases: ``device``
- ``device_type`` :raw-html:`<a id="device_type" title="Permalink to this parameter" href="#device_type">&#x1F517;&#xFE0E;</a>`, default = ``cpu``, type = enum, options: ``cpu``, ``gpu``, ``cuda``, aliases: ``device``
- device for the tree learning, you can use GPU to achieve the faster learning
- device for the tree learning
- ``cpu`` supports all LightGBM functionality and is portable across the widest range of operating systems and hardware
- ``cuda`` offers faster training than ``gpu`` or ``cpu``, but only works on GPUs supporting CUDA
- ``gpu`` can be faster than ``cpu`` and works on a wider range of GPUs than CUDA
- **Note**: it is recommended to use the smaller ``max_bin`` (e.g. 63) to get the better speed up
@ -215,10 +221,6 @@ Core Parameters
- **Note**: refer to `Installation Guide <./Installation-Guide.rst#build-gpu-version>`__ to build LightGBM with GPU support
- **Note**: ``cuda_exp`` is an experimental CUDA version, the installation guide for ``cuda_exp`` is identical with ``cuda``
- **Note**: ``cuda_exp`` is faster than ``cuda`` and will replace ``cuda`` in the future
- ``seed`` :raw-html:`<a id="seed" title="Permalink to this parameter" href="#seed">&#x1F517;&#xFE0E;</a>`, default = ``None``, type = int, aliases: ``random_seed``, ``random_state``
- this seed is used to generate other seeds, e.g. ``data_random_seed``, ``feature_fraction_seed``, etc.

Просмотреть файл

@ -480,13 +480,13 @@ class MultiValBin {
virtual MultiValBin* Clone() = 0;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
virtual const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size,
bool* is_sparse,
const void** out_data_ptr,
uint8_t* data_ptr_bit_type) const = 0;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
};
inline uint32_t BinMapper::ValueToBin(double value) const {

Просмотреть файл

@ -223,14 +223,15 @@ struct Config {
// [doc-only]
// type = enum
// options = cpu, gpu, cuda, cuda_exp
// options = cpu, gpu, cuda
// alias = device
// desc = device for the tree learning, you can use GPU to achieve the faster learning
// desc = device for the tree learning
// desc = ``cpu`` supports all LightGBM functionality and is portable across the widest range of operating systems and hardware
// desc = ``cuda`` offers faster training than ``gpu`` or ``cpu``, but only works on GPUs supporting CUDA
// desc = ``gpu`` can be faster than ``cpu`` and works on a wider range of GPUs than CUDA
// desc = **Note**: it is recommended to use the smaller ``max_bin`` (e.g. 63) to get the better speed up
// desc = **Note**: for the faster speed, GPU uses 32-bit float point to sum up by default, so this may affect the accuracy for some tasks. You can set ``gpu_use_dp=true`` to enable 64-bit float point, but it will slow down the training
// desc = **Note**: refer to `Installation Guide <./Installation-Guide.rst#build-gpu-version>`__ to build LightGBM with GPU support
// desc = **Note**: ``cuda_exp`` is an experimental CUDA version, the installation guide for ``cuda_exp`` is identical with ``cuda``
// desc = **Note**: ``cuda_exp`` is faster than ``cuda`` and will replace ``cuda`` in the future
std::string device_type = "cpu";
// [doc-only]

Просмотреть файл

@ -6,7 +6,7 @@
#ifndef LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
#define LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <cuda.h>
#include <cuda_runtime.h>
@ -577,5 +577,5 @@ __device__ VAL_T PercentileDevice(const VAL_T* values,
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_

Просмотреть файл

@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#ifndef LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
#define LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
@ -137,4 +137,4 @@ class CUDAColumnData {
#endif // LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#ifndef LIGHTGBM_CUDA_CUDA_METADATA_HPP_
#define LIGHTGBM_CUDA_CUDA_METADATA_HPP_
@ -55,4 +55,4 @@ class CUDAMetadata {
#endif // LIGHTGBM_CUDA_CUDA_METADATA_HPP_
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -7,7 +7,7 @@
#ifndef LIGHTGBM_CUDA_CUDA_METRIC_HPP_
#define LIGHTGBM_CUDA_CUDA_METRIC_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/metric.h>
@ -36,6 +36,6 @@ class CUDAMetricInterface: public HOST_METRIC {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_CUDA_CUDA_METRIC_HPP_

Просмотреть файл

@ -7,7 +7,7 @@
#ifndef LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
#define LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_utils.h>
#include <LightGBM/objective_function.h>
@ -73,6 +73,6 @@ class CUDAObjectiveInterface: public HOST_OBJECTIVE {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_

Просмотреть файл

@ -5,7 +5,7 @@
#ifndef LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
#define LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <cuda.h>
#include <cuda_runtime.h>
@ -69,6 +69,6 @@ class CUDARandom {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_CUDA_CUDA_RANDOM_HPP_

Просмотреть файл

@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#ifndef LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
#define LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
@ -176,4 +176,4 @@ class CUDARowData {
} // namespace LightGBM
#endif // LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#ifndef LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
#define LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
@ -102,4 +102,4 @@ class CUDASplitInfo {
#endif // LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#ifndef LIGHTGBM_CUDA_CUDA_TREE_HPP_
#define LIGHTGBM_CUDA_CUDA_TREE_HPP_
@ -170,4 +170,4 @@ class CUDATree : public Tree {
#endif // LIGHTGBM_CUDA_CUDA_TREE_HPP_
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -6,20 +6,15 @@
#ifndef LIGHTGBM_CUDA_CUDA_UTILS_H_
#define LIGHTGBM_CUDA_CUDA_UTILS_H_
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
#ifdef USE_CUDA
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include <LightGBM/utils/log.h>
#endif // USE_CUDA || USE_CUDA_EXP
#ifdef USE_CUDA_EXP
#include <vector>
#endif // USE_CUDA_EXP
namespace LightGBM {
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
#define CUDASUCCESS_OR_FATAL(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true) {
if (code != cudaSuccess) {
@ -27,9 +22,7 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort =
if (abort) exit(code);
}
}
#endif // USE_CUDA || USE_CUDA_EXP
#ifdef USE_CUDA_EXP
#define CUDASUCCESS_OR_FATAL_OUTER(ans) { gpuAssert((ans), file, line); }
void SetCUDADevice(int gpu_device_id, const char* file, int line);
@ -184,8 +177,8 @@ class CUDAVector {
size_t size_;
};
#endif // USE_CUDA_EXP
} // namespace LightGBM
#endif // USE_CUDA
#endif // LIGHTGBM_CUDA_CUDA_UTILS_H_

Просмотреть файл

@ -7,7 +7,7 @@
#include <LightGBM/utils/common.h>
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
#ifdef USE_CUDA
#include <cuda.h>
#include <cuda_runtime.h>
#endif
@ -43,7 +43,7 @@ struct CHAllocator {
T* ptr;
if (n == 0) return NULL;
n = SIZE_ALIGNED(n);
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
#ifdef USE_CUDA
if (LGBM_config_::current_device == lgbm_device_cuda) {
cudaError_t ret = cudaHostAlloc(&ptr, n*sizeof(T), cudaHostAllocPortable);
if (ret != cudaSuccess) {
@ -62,7 +62,7 @@ struct CHAllocator {
void deallocate(T* p, std::size_t n) {
(void)n; // UNUSED
if (p == NULL) return;
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
#ifdef USE_CUDA
if (LGBM_config_::current_device == lgbm_device_cuda) {
cudaPointerAttributes attributes;
cudaPointerGetAttributes(&attributes, p);

Просмотреть файл

@ -277,13 +277,13 @@ class Metadata {
/*! \brief Disable copy */
Metadata(const Metadata&) = delete;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
CUDAMetadata* cuda_metadata() const { return cuda_metadata_.get(); }
void CreateCUDAMetadata(const int gpu_device_id);
#endif // USE_CUDA_EXP
#endif // USE_CUDA
private:
/*! \brief Load wights from file */
@ -329,9 +329,9 @@ class Metadata {
bool weight_load_from_file_;
bool query_load_from_file_;
bool init_score_load_from_file_;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
std::unique_ptr<CUDAMetadata> cuda_metadata_;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
};
@ -910,13 +910,13 @@ class Dataset {
return feature_groups_[feature_group_index]->feature_min_bin(sub_feature_index);
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
const CUDAColumnData* cuda_column_data() const {
return cuda_column_data_.get();
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
private:
void CreateCUDAColumnData();
@ -968,9 +968,9 @@ class Dataset {
/*! \brief mutex for threading safe call */
std::mutex mutex_;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
std::unique_ptr<CUDAColumnData> cuda_column_data_;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
std::string parser_config_str_;
};

Просмотреть файл

@ -97,7 +97,7 @@ class ObjectiveFunction {
*/
virtual bool IsCUDAObjective() const { return false; }
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
/*!
* \brief Convert output for CUDA version
*/
@ -107,7 +107,7 @@ class ObjectiveFunction {
virtual bool NeedConvertOutputCUDA () const { return false; }
#endif // USE_CUDA_EXP
#endif // USE_CUDA
};
} // namespace LightGBM

Просмотреть файл

@ -38,9 +38,9 @@ class SampleStrategy {
std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() { return bag_data_indices_; }
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
CUDAVector<data_size_t>& cuda_bag_data_indices() { return cuda_bag_data_indices_; }
#endif // USE_CUDA_EXP
#endif // USE_CUDA
void UpdateObjectiveFunction(const ObjectiveFunction* objective_function) {
objective_function_ = objective_function;
@ -72,10 +72,10 @@ class SampleStrategy {
/*! \brief whether need to resize the gradient vectors */
bool need_resize_gradients_;
#ifdef USE_CUDA_EXP
/*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda_exp */
#ifdef USE_CUDA
/*! \brief Buffer for bag_data_indices_ on GPU, used only with cuda */
CUDAVector<data_size_t> cuda_bag_data_indices_;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
};
} // namespace LightGBM

Просмотреть файл

@ -126,7 +126,7 @@ class MultiValBinWrapper {
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
const void* GetRowWiseData(
uint8_t* bit_type,
size_t* total_size,
@ -142,7 +142,7 @@ class MultiValBinWrapper {
return multi_val_bin_->GetRowWiseData(bit_type, total_size, is_sparse, out_data_ptr, data_ptr_bit_type);
}
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
private:
bool is_use_subcol_ = false;
@ -183,9 +183,9 @@ struct TrainingShareStates {
const std::vector<uint32_t>& feature_hist_offsets() const { return feature_hist_offsets_; }
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
const std::vector<uint32_t>& column_hist_offsets() const { return column_hist_offsets_; }
#endif // USE_CUDA_EXP
#endif // USE_CUDA
bool IsSparseRowwise() {
return (multi_val_bin_wrapper_ != nullptr && multi_val_bin_wrapper_->IsSparse());
@ -235,7 +235,7 @@ struct TrainingShareStates {
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size,
bool* is_sparse,
@ -250,13 +250,13 @@ struct TrainingShareStates {
return nullptr;
}
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
private:
std::vector<uint32_t> feature_hist_offsets_;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
std::vector<uint32_t> column_hist_offsets_;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
int num_hist_total_bin_ = 0;
std::unique_ptr<MultiValBinWrapper> multi_val_bin_wrapper_;
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_;

Просмотреть файл

@ -319,9 +319,9 @@ class Tree {
inline bool is_linear() const { return is_linear_; }
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
inline bool is_cuda_tree() const { return is_cuda_tree_; }
#endif // USE_CUDA_EXP
#endif // USE_CUDA
inline void SetIsLinear(bool is_linear) {
is_linear_ = is_linear;
@ -532,10 +532,10 @@ class Tree {
std::vector<std::vector<int>> leaf_features_;
/* \brief features used in leaf linear models; indexing is relative to used_features_ */
std::vector<std::vector<int>> leaf_features_inner_;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
/*! \brief Marks whether this tree is a CUDATree */
bool is_cuda_tree_;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
};
inline void Tree::Split(int leaf, int feature, int real_feature,

Просмотреть файл

@ -121,11 +121,9 @@ Build CUDA Version
All requirements from `Build from Sources section <#build-from-sources>`__ apply for this installation option as well, and `CMake`_ (version 3.16 or higher) is strongly required.
**CUDA** library (version 9.0 or higher) is needed: details for installation can be found in `Installation Guide <https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#build-cuda-version-experimental>`__.
**CUDA** library (version 10.0 or higher) is needed: details for installation can be found in `Installation Guide <https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#build-cuda-version-experimental>`__.
Recently, a new CUDA version with better efficiency is implemented as an experimental feature. To build the new CUDA version, replace ``--cuda`` with ``--cuda-exp`` in the above commands. Please note that new version requires **CUDA** 10.0 or later libraries. Note that this new version uses twice the memory, since it stores data row-wise as well as column-wise in memory to improve performance (see this `issue <https://github.com/microsoft/LightGBM/issues/5318>`__ for discussion).
To use the regular or experimental CUDA versions within Python, pass ``{"device": "cuda"}`` or ``{"device": "cuda_exp"}`` respectively as parameters.
To use the CUDA version within Python, pass ``{"device": "cuda"}`` respectively in parameters.
Build HDFS Version
~~~~~~~~~~~~~~~~~~
@ -211,8 +209,6 @@ Run ``python setup.py install --gpu`` to enable GPU support. All requirements fr
Run ``python setup.py install --cuda`` to enable CUDA support. All requirements from `Build CUDA Version section <#build-cuda-version>`__ apply for this installation option as well.
Run ``python setup.py install --cuda-exp`` to enable the new experimental version of CUDA support. All requirements from `Build CUDA Version section <#build-cuda-version>`__ apply for this installation option as well.
Run ``python setup.py install --hdfs`` to enable HDFS support. All requirements from `Build HDFS Version section <#build-hdfs-version>`__ apply for this installation option as well.
Run ``python setup.py install --bit32``, if you want to use 32-bit version. All requirements from `Build 32-bit Version with 32-bit Python section <#build-32-bit-version-with-32-bit-python>`__ apply for this installation option as well.

Просмотреть файл

@ -21,7 +21,6 @@ LIGHTGBM_OPTIONS = [
('integrated-opencl', None, 'Compile integrated OpenCL version'),
('gpu', 'g', 'Compile GPU version'),
('cuda', None, 'Compile CUDA version'),
('cuda-exp', None, 'Compile CUDA Experimental version'),
('mpi', None, 'Compile MPI version'),
('nomp', None, 'Compile version without OpenMP support'),
('hdfs', 'h', 'Compile HDFS version'),
@ -106,7 +105,6 @@ def compile_cpp(
use_mingw: bool = False,
use_gpu: bool = False,
use_cuda: bool = False,
use_cuda_exp: bool = False,
use_mpi: bool = False,
use_hdfs: bool = False,
boost_root: Optional[str] = None,
@ -148,8 +146,6 @@ def compile_cpp(
cmake_cmd.append(f"-DOpenCL_LIBRARY={opencl_library}")
elif use_cuda:
cmake_cmd.append("-DUSE_CUDA=ON")
elif use_cuda_exp:
cmake_cmd.append("-DUSE_CUDA_EXP=ON")
if use_mpi:
cmake_cmd.append("-DUSE_MPI=ON")
if nomp:
@ -171,7 +167,7 @@ def compile_cpp(
else:
status = 1
lib_path = CURRENT_DIR / "compile" / "windows" / "x64" / "DLL" / "lib_lightgbm.dll"
if not any((use_gpu, use_cuda, use_cuda_exp, use_mpi, use_hdfs, nomp, bit32, integrated_opencl)):
if not any((use_gpu, use_cuda, use_mpi, use_hdfs, nomp, bit32, integrated_opencl)):
logger.info("Starting to compile with MSBuild from existing solution file.")
platform_toolsets = ("v143", "v142", "v141", "v140")
for pt in platform_toolsets:
@ -235,7 +231,6 @@ class CustomInstall(install):
self.integrated_opencl = False
self.gpu = False
self.cuda = False
self.cuda_exp = False
self.boost_root = None
self.boost_dir = None
self.boost_include_dir = None
@ -260,7 +255,7 @@ class CustomInstall(install):
LOG_PATH.touch()
if not self.precompile:
copy_files(integrated_opencl=self.integrated_opencl, use_gpu=self.gpu)
compile_cpp(use_mingw=self.mingw, use_gpu=self.gpu, use_cuda=self.cuda, use_cuda_exp=self.cuda_exp, use_mpi=self.mpi,
compile_cpp(use_mingw=self.mingw, use_gpu=self.gpu, use_cuda=self.cuda, use_mpi=self.mpi,
use_hdfs=self.hdfs, boost_root=self.boost_root, boost_dir=self.boost_dir,
boost_include_dir=self.boost_include_dir, boost_librarydir=self.boost_librarydir,
opencl_include_dir=self.opencl_include_dir, opencl_library=self.opencl_library,
@ -281,7 +276,6 @@ class CustomBdistWheel(bdist_wheel):
self.integrated_opencl = False
self.gpu = False
self.cuda = False
self.cuda_exp = False
self.boost_root = None
self.boost_dir = None
self.boost_include_dir = None
@ -304,7 +298,6 @@ class CustomBdistWheel(bdist_wheel):
install.integrated_opencl = self.integrated_opencl
install.gpu = self.gpu
install.cuda = self.cuda
install.cuda_exp = self.cuda_exp
install.boost_root = self.boost_root
install.boost_dir = self.boost_dir
install.boost_include_dir = self.boost_include_dir

Просмотреть файл

@ -36,7 +36,7 @@ Application::Application(int argc, char** argv) {
Log::Fatal("No training/prediction data, application quit");
}
if (config_.device_type == std::string("cuda") || config_.device_type == std::string("cuda_exp")) {
if (config_.device_type == std::string("cuda")) {
LGBM_config_::current_device = lgbm_device_cuda;
}
}

Просмотреть файл

@ -47,33 +47,33 @@ class BaggingSampleStrategy : public SampleStrategy {
Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
// set bagging data to tree learner
if (!is_use_subset_) {
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
} else {
// get subset
tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
bag_data_cnt_, false);
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
bag_data_cnt_);
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
bag_data_cnt_);
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
}
}
@ -103,11 +103,11 @@ class BaggingSampleStrategy : public SampleStrategy {
bag_data_cnt_ = static_cast<data_size_t>(config_->bagging_fraction * num_data_);
}
bag_data_indices_.resize(num_data_);
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
cuda_bag_data_indices_.Resize(num_data_);
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
bagging_runner_.ReSize(num_data_);
bagging_rands_.clear();
for (int i = 0;
@ -118,7 +118,7 @@ class BaggingSampleStrategy : public SampleStrategy {
double average_bag_rate =
(static_cast<double>(bag_data_cnt_) / num_data_) / config_->bagging_freq;
is_use_subset_ = false;
if (config_->device_type != std::string("cuda_exp")) {
if (config_->device_type != std::string("cuda")) {
const int group_threshold_usesubset = 100;
const double average_bag_rate_threshold = 0.5;
if (average_bag_rate <= average_bag_rate_threshold
@ -141,9 +141,9 @@ class BaggingSampleStrategy : public SampleStrategy {
} else {
bag_data_cnt_ = num_data_;
bag_data_indices_.clear();
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
cuda_bag_data_indices_.Clear();
#endif // USE_CUDA_EXP
#endif // USE_CUDA
bagging_runner_.ReSize(0);
is_use_subset_ = false;
}

Просмотреть файл

@ -5,7 +5,7 @@
#include "cuda_score_updater.hpp"
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
namespace LightGBM {
@ -91,4 +91,4 @@ inline void CUDAScoreUpdater::MultiplyScore(double val, int cur_tree_id) {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -5,7 +5,7 @@
#include "cuda_score_updater.hpp"
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
namespace LightGBM {
@ -42,4 +42,4 @@ void CUDAScoreUpdater::LaunchMultiplyScoreConstantKernel(const double val, const
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -6,7 +6,7 @@
#ifndef LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
#define LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_utils.h>
@ -60,6 +60,6 @@ class CUDAScoreUpdater: public ScoreUpdater {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_

Просмотреть файл

@ -68,14 +68,14 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
es_first_metric_only_ = config_->first_metric_only;
shrinkage_rate_ = config_->learning_rate;
if (config_->device_type == std::string("cuda") || config_->device_type == std::string("cuda_exp")) {
if (config_->device_type == std::string("cuda")) {
LGBM_config_::current_learner = use_cuda_learner;
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
const int gpu_device_id = config_->gpu_device_id >= 0 ? config_->gpu_device_id : 0;
CUDASUCCESS_OR_FATAL(cudaSetDevice(gpu_device_id));
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
// load forced_splits file
@ -116,15 +116,15 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
}
training_metrics_.shrink_to_fit();
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
num_data_ = train_data_->num_data();
@ -186,11 +186,11 @@ void GBDT::AddValidDataset(const Dataset* valid_data,
}
// for a validation dataset, we need its score and metric
auto new_score_updater =
#ifdef USE_CUDA_EXP
config_->device_type == std::string("cuda_exp") ?
#ifdef USE_CUDA
config_->device_type == std::string("cuda") ?
std::unique_ptr<CUDAScoreUpdater>(new CUDAScoreUpdater(valid_data, num_tree_per_iteration_,
objective_function_ != nullptr && objective_function_->IsCUDAObjective())) :
#endif // USE_CUDA_EXP
#endif // USE_CUDA
std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_tree_per_iteration_));
// update score
for (int i = 0; i < iter_; ++i) {
@ -481,15 +481,15 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
// we need to predict out-of-bag scores of data for boosting
if (num_data_ - bag_data_cnt > 0) {
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
train_score_updater_->AddScore(tree, data_sample_strategy_->cuda_bag_data_indices().RawData() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
train_score_updater_->AddScore(tree, data_sample_strategy_->bag_data_indices().data() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
} else {
@ -503,17 +503,17 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
}
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t num_data) const {
#else
std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* score, const data_size_t /*num_data*/) const {
#endif // USE_CUDA_EXP
#ifdef USE_CUDA_EXP
#endif // USE_CUDA
#ifdef USE_CUDA
const bool evaluation_on_cuda = metric->IsCUDAMetric();
if ((boosting_on_gpu_ && evaluation_on_cuda) || (!boosting_on_gpu_ && !evaluation_on_cuda)) {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
return metric->Eval(score, objective_function_);
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
} else if (boosting_on_gpu_ && !evaluation_on_cuda) {
const size_t total_size = static_cast<size_t>(num_data) * static_cast<size_t>(num_tree_per_iteration_);
if (total_size > host_score_.size()) {
@ -529,7 +529,7 @@ std::vector<double> GBDT::EvalOneMetric(const Metric* metric, const double* scor
CopyFromHostToCUDADevice<double>(cuda_score_.RawData(), score, total_size, __FILE__, __LINE__);
return metric->Eval(cuda_score_.RawData(), objective_function_);
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
std::string GBDT::OutputMetric(int iter) {
@ -660,14 +660,14 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
num_data = valid_score_updater_[used_idx]->num_data();
*out_len = static_cast<int64_t>(num_data) * num_class_;
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
std::vector<double> host_raw_scores;
if (boosting_on_gpu_) {
host_raw_scores.resize(static_cast<size_t>(*out_len), 0.0);
CopyFromCUDADeviceToHost<double>(host_raw_scores.data(), raw_scores, static_cast<size_t>(*out_len), __FILE__, __LINE__);
raw_scores = host_raw_scores.data();
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
if (objective_function_ != nullptr) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
@ -730,26 +730,26 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
}
training_metrics_.shrink_to_fit();
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
!data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
#endif // USE_CUDA_EXP
#endif // USE_CUDA
if (train_data != train_data_) {
train_data_ = train_data;
data_sample_strategy_->UpdateTrainingData(train_data);
// not same training data, need reset score and others
// create score tracker
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
// update score
for (int i = 0; i < iter_; ++i) {
@ -827,8 +827,8 @@ void GBDT::ResetGradientBuffers() {
const bool is_use_subset = data_sample_strategy_->is_use_subset();
const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
if (objective_function_ != nullptr) {
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp") && boosting_on_gpu_) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda") && boosting_on_gpu_) {
if (cuda_gradients_.Size() < total_size) {
cuda_gradients_.Resize(total_size);
cuda_hessians_.Resize(total_size);
@ -836,16 +836,16 @@ void GBDT::ResetGradientBuffers() {
gradients_pointer_ = cuda_gradients_.RawData();
hessians_pointer_ = cuda_hessians_.RawData();
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
if (gradients_.size() < total_size) {
gradients_.resize(total_size);
hessians_.resize(total_size);
}
gradients_pointer_ = gradients_.data();
hessians_pointer_ = hessians_.data();
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
} else if (data_sample_strategy_->IsHessianChange() || (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_)) {
if (gradients_.size() < total_size) {
gradients_.resize(total_size);

Просмотреть файл

@ -542,7 +542,7 @@ class GBDT : public GBDTBase {
/*! \brief Parser config file content */
std::string parser_config_str_ = "";
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
#ifdef USE_CUDA
/*! \brief First order derivative of training data */
std::vector<score_t, CHAllocator<score_t>> gradients_;
/*! \brief Second order derivative of training data */
@ -557,18 +557,18 @@ class GBDT : public GBDTBase {
score_t* gradients_pointer_;
/*! \brief Pointer to hessian vector, can be on CPU or GPU */
score_t* hessians_pointer_;
/*! \brief Whether boosting is done on GPU, used for cuda_exp */
/*! \brief Whether boosting is done on GPU, used for device_type=cuda */
bool boosting_on_gpu_;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
/*! \brief Gradient vector on GPU */
CUDAVector<score_t> cuda_gradients_;
/*! \brief Hessian vector on GPU */
CUDAVector<score_t> cuda_hessians_;
/*! \brief Buffer for scores when boosting is on GPU but evaluation is not, used only with cuda_exp */
/*! \brief Buffer for scores when boosting is on GPU but evaluation is not, used only with device_type=cuda */
mutable std::vector<double> host_score_;
/*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with cuda_exp */
/*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with device_type=cuda */
mutable CUDAVector<double> cuda_score_;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
/*! \brief Number of training data */
data_size_t num_data_;

Просмотреть файл

@ -43,33 +43,33 @@ class GOSSStrategy : public SampleStrategy {
bag_data_cnt_ = left_cnt;
// set bagging data to tree learner
if (!is_use_subset_) {
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
} else {
// get subset
tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
bag_data_cnt_, false);
#ifdef USE_CUDA_EXP
if (config_->device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_->device_type == std::string("cuda")) {
CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
bag_data_cnt_);
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
bag_data_cnt_);
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
}

Просмотреть файл

@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_algorithms.hpp>
@ -509,4 +509,4 @@ template __device__ double PercentileDevice<double, data_size_t, label_t, double
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_utils.h>
@ -28,4 +28,4 @@ void SetCUDADevice(int gpu_device_id, const char* file, int line) {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -886,7 +886,7 @@ namespace LightGBM {
return nullptr;
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
template <>
const void* MultiValDenseBin<uint8_t>::GetRowWiseData(uint8_t* bit_type,
size_t* total_size,
@ -1081,6 +1081,6 @@ namespace LightGBM {
return to_return;
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
} // namespace LightGBM

Просмотреть файл

@ -177,8 +177,6 @@ void GetDeviceType(const std::unordered_map<std::string, std::string>& params, s
*device_type = "gpu";
} else if (value == std::string("cuda")) {
*device_type = "cuda";
} else if (value == std::string("cuda_exp")) {
*device_type = "cuda_exp";
} else {
Log::Fatal("Unknown device type %s", value.c_str());
}
@ -260,7 +258,7 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) {
GetObjectiveType(params, &objective);
GetMetricType(params, objective, &metric);
GetDeviceType(params, &device_type);
if (device_type == std::string("cuda") || device_type == std::string("cuda_exp")) {
if (device_type == std::string("cuda")) {
LGBM_config_::current_device = lgbm_device_cuda;
}
GetTreeLearnerType(params, &tree_learner);
@ -373,26 +371,21 @@ void Config::CheckParamConflict() {
num_leaves = static_cast<int>(full_num_leaves);
}
}
if (device_type == std::string("gpu") || device_type == std::string("cuda")) {
if (device_type == std::string("gpu")) {
// force col-wise for gpu, and cuda version
force_col_wise = true;
force_row_wise = false;
if (deterministic) {
Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic.");
}
} else if (device_type == std::string("cuda_exp")) {
// force row-wise for cuda_exp version
} else if (device_type == std::string("cuda")) {
// force row-wise for cuda version
force_col_wise = false;
force_row_wise = true;
if (deterministic) {
Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic.");
}
}
// force gpu_use_dp for CUDA
if (device_type == std::string("cuda") && !gpu_use_dp) {
Log::Warning("CUDA currently requires double precision calculations.");
gpu_use_dp = true;
}
// linear tree learner must be serial type and run on CPU device
if (linear_tree) {
if (device_type != std::string("cpu")) {

Просмотреть файл

@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_column_data.hpp>
@ -308,4 +308,4 @@ void CUDAColumnData::InitColumnMetaInfo() {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -4,7 +4,7 @@
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_column_data.hpp>
@ -58,4 +58,4 @@ void CUDAColumnData::LaunchCopySubrowKernel(void* const* in_cuda_data_by_column)
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_metadata.hpp>
@ -89,4 +89,4 @@ void CUDAMetadata::SetInitScore(const double* init_score, data_size_t len) {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_row_data.hpp>
@ -474,4 +474,4 @@ template const uint64_t* CUDARowData::GetPartitionPtr<uint64_t>() const;
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_tree.hpp>
@ -337,4 +337,4 @@ void CUDATree::AsConstantTree(double val) {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -4,7 +4,7 @@
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_tree.hpp>
@ -456,4 +456,4 @@ void CUDATree::LaunchAddPredictionToScoreKernel(
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -345,9 +345,9 @@ void Dataset::Construct(std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
auto features_in_group = OneFeaturePerGroup(used_features);
auto is_sparse = io_config.is_enable_sparse;
if (io_config.device_type == std::string("cuda") || io_config.device_type == std::string("cuda_exp")) {
if (io_config.device_type == std::string("cuda")) {
LGBM_config_::current_device = lgbm_device_cuda;
if ((io_config.device_type == std::string("cuda") || io_config.device_type == std::string("cuda_exp")) && is_sparse) {
if ((io_config.device_type == std::string("cuda")) && is_sparse) {
Log::Warning("Using sparse features with CUDA is currently not supported.");
is_sparse = false;
}
@ -355,8 +355,7 @@ void Dataset::Construct(std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
std::vector<int8_t> group_is_multi_val(used_features.size(), 0);
if (io_config.enable_bundle && !used_features.empty()) {
bool lgbm_is_gpu_used = io_config.device_type == std::string("gpu") || io_config.device_type == std::string("cuda")
|| io_config.device_type == std::string("cuda_exp");
bool lgbm_is_gpu_used = io_config.device_type == std::string("gpu") || io_config.device_type == std::string("cuda");
features_in_group = FastFeatureBundling(
*bin_mappers, sample_non_zero_indices, sample_values, num_per_col,
num_sample_col, static_cast<data_size_t>(total_sample_cnt),
@ -447,14 +446,14 @@ void Dataset::FinishLoad() {
}
metadata_.FinishLoad();
#ifdef USE_CUDA_EXP
if (device_type_ == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (device_type_ == std::string("cuda")) {
CreateCUDAColumnData();
metadata_.CreateCUDAMetadata(gpu_device_id_);
} else {
cuda_column_data_.reset(nullptr);
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
is_finish_load_ = true;
}
@ -862,15 +861,15 @@ void Dataset::CopySubrow(const Dataset* fullset,
device_type_ = fullset->device_type_;
gpu_device_id_ = fullset->gpu_device_id_;
#ifdef USE_CUDA_EXP
if (device_type_ == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (device_type_ == std::string("cuda")) {
if (cuda_column_data_ == nullptr) {
cuda_column_data_.reset(new CUDAColumnData(fullset->num_data(), gpu_device_id_));
metadata_.CreateCUDAMetadata(gpu_device_id_);
}
cuda_column_data_->CopySubrow(fullset->cuda_column_data(), used_indices, num_used_indices);
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
bool Dataset::SetFloatField(const char* field_name, const float* field_data,
@ -1508,13 +1507,13 @@ void Dataset::AddFeaturesFrom(Dataset* other) {
raw_data_.push_back(other->raw_data_[i]);
}
}
#ifdef USE_CUDA_EXP
if (device_type_ == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (device_type_ == std::string("cuda")) {
CreateCUDAColumnData();
} else {
cuda_column_data_ = nullptr;
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
const void* Dataset::GetColWiseData(
@ -1536,7 +1535,7 @@ const void* Dataset::GetColWiseData(
return feature_groups_[feature_group_index]->GetColWiseData(sub_feature_index, bit_type, is_sparse, bin_iterator);
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
void Dataset::CreateCUDAColumnData() {
cuda_column_data_.reset(new CUDAColumnData(num_data_, gpu_device_id_));
int num_columns = 0;
@ -1671,6 +1670,6 @@ void Dataset::CreateCUDAColumnData() {
feature_to_column);
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
} // namespace LightGBM

Просмотреть файл

@ -279,14 +279,14 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
dataset->device_type_ = config_.device_type;
dataset->gpu_device_id_ = config_.gpu_device_id;
#ifdef USE_CUDA_EXP
if (config_.device_type == std::string("cuda_exp")) {
#ifdef USE_CUDA
if (config_.device_type == std::string("cuda")) {
dataset->CreateCUDAColumnData();
dataset->metadata_.CreateCUDAMetadata(dataset->gpu_device_id_);
} else {
dataset->cuda_column_data_ = nullptr;
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
// check meta data
dataset->metadata_.CheckOrPartition(num_global_data, used_data_indices);

Просмотреть файл

@ -467,7 +467,7 @@ class DenseBin : public Bin {
private:
data_size_t num_data_;
#if defined(USE_CUDA) || defined(USE_CUDA_EXP)
#ifdef USE_CUDA
std::vector<VAL_T, CHAllocator<VAL_T>> data_;
#else
std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> data_;

Просмотреть файл

@ -18,9 +18,9 @@ Metadata::Metadata() {
weight_load_from_file_ = false;
query_load_from_file_ = false;
init_score_load_from_file_ = false;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
cuda_metadata_ = nullptr;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
void Metadata::Init(const char* data_filename) {
@ -344,11 +344,11 @@ void Metadata::SetInitScore(const double* init_score, data_size_t len) {
init_score_[i] = Common::AvoidInf(init_score[i]);
}
init_score_load_from_file_ = false;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
if (cuda_metadata_ != nullptr) {
cuda_metadata_->SetInitScore(init_score_.data(), len);
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
void Metadata::InsertInitScores(const double* init_scores, data_size_t start_index, data_size_t len, data_size_t source_size) {
@ -387,11 +387,11 @@ void Metadata::SetLabel(const label_t* label, data_size_t len) {
for (data_size_t i = 0; i < num_data_; ++i) {
label_[i] = Common::AvoidInf(label[i]);
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
if (cuda_metadata_ != nullptr) {
cuda_metadata_->SetLabel(label_.data(), len);
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
void Metadata::InsertLabels(const label_t* labels, data_size_t start_index, data_size_t len) {
@ -428,11 +428,11 @@ void Metadata::SetWeights(const label_t* weights, data_size_t len) {
}
CalculateQueryWeights();
weight_load_from_file_ = false;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
if (cuda_metadata_ != nullptr) {
cuda_metadata_->SetWeights(weights_.data(), len);
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
void Metadata::InsertWeights(const label_t* weights, data_size_t start_index, data_size_t len) {
@ -477,7 +477,7 @@ void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
}
CalculateQueryWeights();
query_load_from_file_ = false;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
if (cuda_metadata_ != nullptr) {
if (query_weights_.size() > 0) {
CHECK_EQ(query_weights_.size(), static_cast<size_t>(num_queries_));
@ -486,7 +486,7 @@ void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
cuda_metadata_->SetQuery(query_boundaries_.data(), nullptr, num_queries_);
}
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
void Metadata::InsertQueries(const data_size_t* queries, data_size_t start_index, data_size_t len) {
@ -635,12 +635,12 @@ void Metadata::FinishLoad() {
CalculateQueryBoundaries();
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
void Metadata::CreateCUDAMetadata(const int gpu_device_id) {
cuda_metadata_.reset(new CUDAMetadata(gpu_device_id));
cuda_metadata_->Init(label_, weights_, query_boundaries_, query_weights_, init_score_);
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
void Metadata::LoadFromMemory(const void* memory) {
const char* mem_ptr = reinterpret_cast<const char*>(memory);

Просмотреть файл

@ -211,13 +211,13 @@ class MultiValDenseBin : public MultiValBin {
MultiValDenseBin<VAL_T>* Clone() override;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size,
bool* is_sparse,
const void** out_data_ptr,
uint8_t* data_ptr_bit_type) const override;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
private:
data_size_t num_data_;

Просмотреть файл

@ -292,13 +292,13 @@ class MultiValSparseBin : public MultiValBin {
MultiValSparseBin<INDEX_T, VAL_T>* Clone() override;
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size,
bool* is_sparse,
const void** out_data_ptr,
uint8_t* data_ptr_bit_type) const override;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
private:
data_size_t num_data_;

Просмотреть файл

@ -382,9 +382,9 @@ void TrainingShareStates::CalcBinOffsets(const std::vector<std::unique_ptr<Featu
}
num_hist_total_bin_ = static_cast<int>(feature_hist_offsets_.back());
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
column_hist_offsets_ = *offsets;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
void TrainingShareStates::SetMultiValBin(MultiValBin* bin, data_size_t num_data,

Просмотреть файл

@ -53,9 +53,9 @@ Tree::Tree(int max_leaves, bool track_branch_features, bool is_linear)
leaf_features_.resize(max_leaves_);
leaf_features_inner_.resize(max_leaves_);
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
is_cuda_tree_ = false;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
}
int Tree::Split(int leaf, int feature, int real_feature, uint32_t threshold_bin,
@ -731,9 +731,9 @@ Tree::Tree(const char* str, size_t* used_len) {
is_linear_ = false;
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
is_cuda_tree_ = false;
#endif // USE_CUDA_EXP
#endif // USE_CUDA
if ((num_leaves_ <= 1) && !is_linear_) {
return;

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_binary_metric.hpp"
@ -28,4 +28,4 @@ std::vector<double> CUDABinaryMetricInterface<HOST_METRIC, CUDA_METRIC>::Eval(co
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -7,7 +7,7 @@
#ifndef LIGHTGBM_METRIC_CUDA_CUDA_BINARY_METRIC_HPP_
#define LIGHTGBM_METRIC_CUDA_CUDA_BINARY_METRIC_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_metric.hpp>
#include <LightGBM/cuda/cuda_utils.h>
@ -52,6 +52,6 @@ class CUDABinaryLoglossMetric: public CUDABinaryMetricInterface<BinaryLoglossMet
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_METRIC_CUDA_CUDA_BINARY_METRIC_HPP_

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_binary_metric.hpp"
#include "cuda_pointwise_metric.hpp"
@ -35,4 +35,4 @@ template void CUDAPointwiseMetricInterface<BinaryLoglossMetric, CUDABinaryLoglos
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_algorithms.hpp>
@ -66,4 +66,4 @@ template void CUDAPointwiseMetricInterface<BinaryLoglossMetric, CUDABinaryLoglos
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -7,7 +7,7 @@
#ifndef LIGHTGBM_METRIC_CUDA_CUDA_POINTWISE_METRIC_HPP_
#define LIGHTGBM_METRIC_CUDA_CUDA_POINTWISE_METRIC_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_metric.hpp>
#include <LightGBM/cuda/cuda_utils.h>
@ -38,6 +38,6 @@ class CUDAPointwiseMetricInterface: public CUDAMetricInterface<HOST_METRIC> {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_METRIC_CUDA_CUDA_POINTWISE_METRIC_HPP_

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <vector>
@ -31,4 +31,4 @@ CUDAL2Metric::CUDAL2Metric(const Config& config): CUDARegressionMetricInterface<
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -7,7 +7,7 @@
#ifndef LIGHTGBM_METRIC_CUDA_CUDA_REGRESSION_METRIC_HPP_
#define LIGHTGBM_METRIC_CUDA_CUDA_REGRESSION_METRIC_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_metric.hpp>
#include <LightGBM/cuda/cuda_utils.h>
@ -54,6 +54,6 @@ class CUDAL2Metric : public CUDARegressionMetricInterface<L2Metric, CUDAL2Metric
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_METRIC_CUDA_CUDA_REGRESSION_METRIC_HPP_

Просмотреть файл

@ -17,77 +17,77 @@
namespace LightGBM {
Metric* Metric::CreateMetric(const std::string& type, const Config& config) {
#ifdef USE_CUDA_EXP
if (config.device_type == std::string("cuda_exp") && config.boosting == std::string("gbdt")) {
#ifdef USE_CUDA
if (config.device_type == std::string("cuda") && config.boosting == std::string("gbdt")) {
if (type == std::string("l2")) {
return new CUDAL2Metric(config);
} else if (type == std::string("rmse")) {
return new CUDARMSEMetric(config);
} else if (type == std::string("l1")) {
Log::Warning("Metric l1 is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric l1 is not implemented in cuda version. Fall back to evaluation on CPU.");
return new L1Metric(config);
} else if (type == std::string("quantile")) {
Log::Warning("Metric quantile is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric quantile is not implemented in cuda version. Fall back to evaluation on CPU.");
return new QuantileMetric(config);
} else if (type == std::string("huber")) {
Log::Warning("Metric huber is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric huber is not implemented in cuda version. Fall back to evaluation on CPU.");
return new HuberLossMetric(config);
} else if (type == std::string("fair")) {
Log::Warning("Metric fair is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric fair is not implemented in cuda version. Fall back to evaluation on CPU.");
return new FairLossMetric(config);
} else if (type == std::string("poisson")) {
Log::Warning("Metric poisson is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric poisson is not implemented in cuda version. Fall back to evaluation on CPU.");
return new PoissonMetric(config);
} else if (type == std::string("binary_logloss")) {
return new CUDABinaryLoglossMetric(config);
} else if (type == std::string("binary_error")) {
Log::Warning("Metric binary_error is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric binary_error is not implemented in cuda version. Fall back to evaluation on CPU.");
return new BinaryErrorMetric(config);
} else if (type == std::string("auc")) {
Log::Warning("Metric auc is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric auc is not implemented in cuda version. Fall back to evaluation on CPU.");
return new AUCMetric(config);
} else if (type == std::string("average_precision")) {
Log::Warning("Metric average_precision is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric average_precision is not implemented in cuda version. Fall back to evaluation on CPU.");
return new AveragePrecisionMetric(config);
} else if (type == std::string("auc_mu")) {
Log::Warning("Metric auc_mu is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric auc_mu is not implemented in cuda version. Fall back to evaluation on CPU.");
return new AucMuMetric(config);
} else if (type == std::string("ndcg")) {
Log::Warning("Metric ndcg is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric ndcg is not implemented in cuda version. Fall back to evaluation on CPU.");
return new NDCGMetric(config);
} else if (type == std::string("map")) {
Log::Warning("Metric map is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric map is not implemented in cuda version. Fall back to evaluation on CPU.");
return new MapMetric(config);
} else if (type == std::string("multi_logloss")) {
Log::Warning("Metric multi_logloss is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric multi_logloss is not implemented in cuda version. Fall back to evaluation on CPU.");
return new MultiSoftmaxLoglossMetric(config);
} else if (type == std::string("multi_error")) {
Log::Warning("Metric multi_error is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric multi_error is not implemented in cuda version. Fall back to evaluation on CPU.");
return new MultiErrorMetric(config);
} else if (type == std::string("cross_entropy")) {
Log::Warning("Metric cross_entropy is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric cross_entropy is not implemented in cuda version. Fall back to evaluation on CPU.");
return new CrossEntropyMetric(config);
} else if (type == std::string("cross_entropy_lambda")) {
Log::Warning("Metric cross_entropy_lambda is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric cross_entropy_lambda is not implemented in cuda version. Fall back to evaluation on CPU.");
return new CrossEntropyLambdaMetric(config);
} else if (type == std::string("kullback_leibler")) {
Log::Warning("Metric kullback_leibler is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric kullback_leibler is not implemented in cuda version. Fall back to evaluation on CPU.");
return new KullbackLeiblerDivergence(config);
} else if (type == std::string("mape")) {
Log::Warning("Metric mape is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric mape is not implemented in cuda version. Fall back to evaluation on CPU.");
return new MAPEMetric(config);
} else if (type == std::string("gamma")) {
Log::Warning("Metric gamma is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric gamma is not implemented in cuda version. Fall back to evaluation on CPU.");
return new GammaMetric(config);
} else if (type == std::string("gamma_deviance")) {
Log::Warning("Metric gamma_deviance is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric gamma_deviance is not implemented in cuda version. Fall back to evaluation on CPU.");
return new GammaDevianceMetric(config);
} else if (type == std::string("tweedie")) {
Log::Warning("Metric tweedie is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
Log::Warning("Metric tweedie is not implemented in cuda version. Fall back to evaluation on CPU.");
return new TweedieMetric(config);
}
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
if (type == std::string("l2")) {
return new L2Metric(config);
} else if (type == std::string("rmse")) {
@ -135,9 +135,9 @@ Metric* Metric::CreateMetric(const std::string& type, const Config& config) {
} else if (type == std::string("tweedie")) {
return new TweedieMetric(config);
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
return nullptr;
}

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_binary_objective.hpp"
@ -61,4 +61,4 @@ void CUDABinaryLogloss::Init(const Metadata& metadata, data_size_t num_data) {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <algorithm>
@ -206,4 +206,4 @@ void CUDABinaryLogloss::LaunchResetOVACUDALabelKernel() const {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -7,7 +7,7 @@
#ifndef LIGHTGBM_OBJECTIVE_CUDA_CUDA_BINARY_OBJECTIVE_HPP_
#define LIGHTGBM_OBJECTIVE_CUDA_CUDA_BINARY_OBJECTIVE_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#define GET_GRADIENTS_BLOCK_SIZE_BINARY (1024)
#define CALC_INIT_SCORE_BLOCK_SIZE_BINARY (1024)
@ -58,6 +58,6 @@ class CUDABinaryLogloss : public CUDAObjectiveInterface<BinaryLogloss> {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_OBJECTIVE_CUDA_CUDA_BINARY_OBJECTIVE_HPP_

Просмотреть файл

@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_multiclass_objective.hpp"
@ -59,4 +59,4 @@ const double* CUDAMulticlassOVA::ConvertOutputCUDA(const data_size_t num_data, c
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <algorithm>
@ -105,4 +105,4 @@ const double* CUDAMulticlassSoftmax::LaunchConvertOutputCUDAKernel(
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -5,7 +5,7 @@
#ifndef LIGHTGBM_OBJECTIVE_CUDA_CUDA_MULTICLASS_OBJECTIVE_HPP_
#define LIGHTGBM_OBJECTIVE_CUDA_CUDA_MULTICLASS_OBJECTIVE_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_objective_function.hpp>
@ -74,5 +74,5 @@ class CUDAMulticlassOVA: public CUDAObjectiveInterface<MulticlassOVA> {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_OBJECTIVE_CUDA_CUDA_MULTICLASS_OBJECTIVE_HPP_

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <string>
#include <vector>
@ -64,4 +64,4 @@ void CUDARankXENDCG::GenerateItemRands() const {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_rank_objective.hpp"
@ -658,4 +658,4 @@ void CUDARankXENDCG::LaunchGetGradientsKernel(const double* score, score_t* grad
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -7,7 +7,7 @@
#ifndef LIGHTGBM_OBJECTIVE_CUDA_CUDA_RANK_OBJECTIVE_HPP_
#define LIGHTGBM_OBJECTIVE_CUDA_CUDA_RANK_OBJECTIVE_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#define NUM_QUERY_PER_BLOCK (10)
@ -118,5 +118,5 @@ class CUDARankXENDCG : public CUDALambdaRankObjectiveInterface<RankXENDCG> {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_OBJECTIVE_CUDA_CUDA_RANK_OBJECTIVE_HPP_

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_regression_objective.hpp"
@ -85,4 +85,4 @@ double CUDARegressionPoissonLoss::LaunchCalcInitScoreKernel(const int class_id)
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_regression_objective.hpp"
#include <LightGBM/cuda/cuda_algorithms.hpp>
@ -353,4 +353,4 @@ const double* CUDARegressionPoissonLoss::LaunchConvertOutputCUDAKernel(const dat
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -7,7 +7,7 @@
#ifndef LIGHTGBM_OBJECTIVE_CUDA_CUDA_REGRESSION_OBJECTIVE_HPP_
#define LIGHTGBM_OBJECTIVE_CUDA_CUDA_REGRESSION_OBJECTIVE_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#define GET_GRADIENTS_BLOCK_SIZE_REGRESSION (1024)
@ -135,5 +135,5 @@ class CUDARegressionPoissonLoss : public CUDARegressionObjectiveInterface<Regres
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_OBJECTIVE_CUDA_CUDA_REGRESSION_OBJECTIVE_HPP_

Просмотреть файл

@ -18,8 +18,8 @@
namespace LightGBM {
ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& type, const Config& config) {
#ifdef USE_CUDA_EXP
if (config.device_type == std::string("cuda_exp") &&
#ifdef USE_CUDA
if (config.device_type == std::string("cuda") &&
config.data_sample_strategy != std::string("goss") &&
config.boosting != std::string("rf")) {
if (type == std::string("regression")) {
@ -27,7 +27,7 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
} else if (type == std::string("regression_l1")) {
return new CUDARegressionL1loss(config);
} else if (type == std::string("quantile")) {
Log::Warning("Objective quantile is not implemented in cuda_exp version. Fall back to boosting on CPU.");
Log::Warning("Objective quantile is not implemented in cuda version. Fall back to boosting on CPU.");
return new RegressionQuantileloss(config);
} else if (type == std::string("huber")) {
return new CUDARegressionHuberLoss(config);
@ -46,26 +46,26 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
} else if (type == std::string("multiclassova")) {
return new CUDAMulticlassOVA(config);
} else if (type == std::string("cross_entropy")) {
Log::Warning("Objective cross_entropy is not implemented in cuda_exp version. Fall back to boosting on CPU.");
Log::Warning("Objective cross_entropy is not implemented in cuda version. Fall back to boosting on CPU.");
return new CrossEntropy(config);
} else if (type == std::string("cross_entropy_lambda")) {
Log::Warning("Objective cross_entropy_lambda is not implemented in cuda_exp version. Fall back to boosting on CPU.");
Log::Warning("Objective cross_entropy_lambda is not implemented in cuda version. Fall back to boosting on CPU.");
return new CrossEntropyLambda(config);
} else if (type == std::string("mape")) {
Log::Warning("Objective mape is not implemented in cuda_exp version. Fall back to boosting on CPU.");
Log::Warning("Objective mape is not implemented in cuda version. Fall back to boosting on CPU.");
return new RegressionMAPELOSS(config);
} else if (type == std::string("gamma")) {
Log::Warning("Objective gamma is not implemented in cuda_exp version. Fall back to boosting on CPU.");
Log::Warning("Objective gamma is not implemented in cuda version. Fall back to boosting on CPU.");
return new RegressionGammaLoss(config);
} else if (type == std::string("tweedie")) {
Log::Warning("Objective tweedie is not implemented in cuda_exp version. Fall back to boosting on CPU.");
Log::Warning("Objective tweedie is not implemented in cuda version. Fall back to boosting on CPU.");
return new RegressionTweedieLoss(config);
} else if (type == std::string("custom")) {
Log::Warning("Using customized objective with cuda_exp. This requires copying gradients from CPU to GPU, which can be slow.");
Log::Warning("Using customized objective with cuda. This requires copying gradients from CPU to GPU, which can be slow.");
return nullptr;
}
} else {
#endif // USE_CUDA_EXP
#endif // USE_CUDA
if (type == std::string("regression")) {
return new RegressionL2loss(config);
} else if (type == std::string("regression_l1")) {
@ -101,9 +101,9 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
} else if (type == std::string("custom")) {
return nullptr;
}
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
}
#endif // USE_CUDA_EXP
#endif // USE_CUDA
Log::Fatal("Unknown objective type name: %s", type.c_str());
return nullptr;
}

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <algorithm>
@ -383,4 +383,4 @@ void CUDABestSplitFinder::SetUsedFeatureByNode(const std::vector<int8_t>& is_fea
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <algorithm>
@ -1802,4 +1802,4 @@ void CUDABestSplitFinder::LaunchInitCUDARandomKernel() {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -7,7 +7,7 @@
#ifndef LIGHTGBM_TREELEARNER_CUDA_CUDA_BEST_SPLIT_FINDER_HPP_
#define LIGHTGBM_TREELEARNER_CUDA_CUDA_BEST_SPLIT_FINDER_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/bin.h>
#include <LightGBM/dataset.h>
@ -211,5 +211,5 @@ class CUDABestSplitFinder {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_TREELEARNER_CUDA_CUDA_BEST_SPLIT_FINDER_HPP_

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <algorithm>
#include <memory>
@ -370,4 +370,4 @@ void CUDADataPartition::ResetByLeafPred(const std::vector<int>& leaf_pred, int n
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_data_partition.hpp"
@ -1071,4 +1071,4 @@ void CUDADataPartition::LaunchAddPredictionToScoreKernel(const double* leaf_valu
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -6,7 +6,7 @@
#ifndef LIGHTGBM_TREELEARNER_CUDA_CUDA_DATA_PARTITION_HPP_
#define LIGHTGBM_TREELEARNER_CUDA_CUDA_DATA_PARTITION_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/bin.h>
#include <LightGBM/meta.h>
@ -384,5 +384,5 @@ class CUDADataPartition {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_TREELEARNER_CUDA_CUDA_DATA_PARTITION_HPP_

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_histogram_constructor.hpp"
@ -193,4 +193,4 @@ void CUDAHistogramConstructor::ResetConfig(const Config* config) {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_histogram_constructor.hpp"
@ -429,4 +429,4 @@ void CUDAHistogramConstructor::LaunchSubtractHistogramKernel(
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -6,7 +6,7 @@
#ifndef LIGHTGBM_TREELEARNER_CUDA_CUDA_HISTOGRAM_CONSTRUCTOR_HPP_
#define LIGHTGBM_TREELEARNER_CUDA_CUDA_HISTOGRAM_CONSTRUCTOR_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_row_data.hpp>
#include <LightGBM/feature_group.h>
@ -165,5 +165,5 @@ class CUDAHistogramConstructor {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_TREELEARNER_CUDA_CUDA_HISTOGRAM_CONSTRUCTOR_HPP_

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_leaf_splits.hpp"
@ -68,4 +68,4 @@ void CUDALeafSplits::Resize(const data_size_t num_data) {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -5,7 +5,7 @@
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_leaf_splits.hpp"
#include <LightGBM/cuda/cuda_algorithms.hpp>
@ -126,4 +126,4 @@ void CUDALeafSplits::LaunchInitValuesKernal(
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -6,7 +6,7 @@
#ifndef LIGHTGBM_TREELEARNER_CUDA_CUDA_LEAF_SPLITS_HPP_
#define LIGHTGBM_TREELEARNER_CUDA_CUDA_LEAF_SPLITS_HPP_
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_utils.h>
#include <LightGBM/bin.h>
@ -156,5 +156,5 @@ class CUDALeafSplits {
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_TREELEARNER_CUDA_CUDA_LEAF_SPLITS_HPP_

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_single_gpu_tree_learner.hpp"
@ -515,4 +515,4 @@ void CUDASingleGPUTreeLearner::CheckSplitValid(
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -4,7 +4,7 @@
* license information.
*/
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include <LightGBM/cuda/cuda_algorithms.hpp>
@ -258,4 +258,4 @@ void CUDASingleGPUTreeLearner::LaunchConstructBitsetForCategoricalSplitKernel(
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA

Просмотреть файл

@ -9,7 +9,7 @@
#include <memory>
#include <vector>
#ifdef USE_CUDA_EXP
#ifdef USE_CUDA
#include "cuda_leaf_splits.hpp"
#include "cuda_histogram_constructor.hpp"
@ -137,7 +137,7 @@ class CUDASingleGPUTreeLearner: public SerialTreeLearner {
} // namespace LightGBM
#else // USE_CUDA_EXP
#else // USE_CUDA
// When GPU support is not compiled in, quit with an error message
@ -147,12 +147,12 @@ class CUDASingleGPUTreeLearner: public SerialTreeLearner {
public:
#pragma warning(disable : 4702)
explicit CUDASingleGPUTreeLearner(const Config* tree_config, const bool /*boosting_on_cuda*/) : SerialTreeLearner(tree_config) {
Log::Fatal("CUDA Tree Learner experimental version was not enabled in this build.\n"
"Please recompile with CMake option -DUSE_CUDA_EXP=1");
Log::Fatal("CUDA Tree Learner was not enabled in this build.\n"
"Please recompile with CMake option -DUSE_CUDAP=1");
}
};
} // namespace LightGBM
#endif // USE_CUDA_EXP
#endif // USE_CUDA
#endif // LIGHTGBM_TREELEARNER_CUDA_CUDA_SINGLE_GPU_TREE_LEARNER_HPP_

Просмотреть файл

@ -1,171 +0,0 @@
/*!
* Copyright (c) 2020 IBM Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifdef USE_CUDA
#include "cuda_kernel_launcher.h"
#include <LightGBM/utils/log.h>
#include <cuda_runtime.h>
#include <cstdio>
namespace LightGBM {
void cuda_histogram(
int histogram_size,
data_size_t leaf_num_data,
data_size_t num_data,
bool use_all_features,
bool is_constant_hessian,
int num_workgroups,
cudaStream_t stream,
uint8_t* arg0,
uint8_t* arg1,
data_size_t arg2,
data_size_t* arg3,
data_size_t arg4,
score_t* arg5,
score_t* arg6,
score_t arg6_const,
char* arg7,
volatile int* arg8,
void* arg9,
size_t exp_workgroups_per_feature) {
if (histogram_size == 16) {
if (leaf_num_data == num_data) {
if (use_all_features) {
if (!is_constant_hessian)
histogram16<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
else
histogram16<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
} else {
if (!is_constant_hessian)
histogram16_fulldata<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
else
histogram16_fulldata<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
}
} else {
if (use_all_features) {
// seems all features is always enabled, so this should be the same as fulldata
if (!is_constant_hessian)
histogram16<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
else
histogram16<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
} else {
if (!is_constant_hessian)
histogram16<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
else
histogram16<<<num_workgroups, 16, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
}
}
} else if (histogram_size == 64) {
if (leaf_num_data == num_data) {
if (use_all_features) {
if (!is_constant_hessian)
histogram64<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
else
histogram64<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
} else {
if (!is_constant_hessian)
histogram64_fulldata<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
else
histogram64_fulldata<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
}
} else {
if (use_all_features) {
// seems all features is always enabled, so this should be the same as fulldata
if (!is_constant_hessian)
histogram64<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
else
histogram64<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
} else {
if (!is_constant_hessian)
histogram64<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
else
histogram64<<<num_workgroups, 64, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
}
}
} else {
if (leaf_num_data == num_data) {
if (use_all_features) {
if (!is_constant_hessian)
histogram256<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
else
histogram256<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
} else {
if (!is_constant_hessian)
histogram256_fulldata<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
else
histogram256_fulldata<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
}
} else {
if (use_all_features) {
// seems all features is always enabled, so this should be the same as fulldata
if (!is_constant_hessian)
histogram256<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
else
histogram256<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
} else {
if (!is_constant_hessian)
histogram256<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
else
histogram256<<<num_workgroups, 256, 0, stream>>>(arg0, arg1, arg2,
arg3, arg4, arg5,
arg6_const, arg7, arg8, static_cast<acc_type*>(arg9), exp_workgroups_per_feature);
}
}
}
}
} // namespace LightGBM
#endif // USE_CUDA

Просмотреть файл

@ -1,70 +0,0 @@
/*!
* Copyright (c) 2020 IBM Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifndef LIGHTGBM_TREELEARNER_CUDA_KERNEL_LAUNCHER_H_
#define LIGHTGBM_TREELEARNER_CUDA_KERNEL_LAUNCHER_H_
#ifdef USE_CUDA
#include <chrono>
#include "kernels/histogram_16_64_256.hu" // kernel, acc_type, data_size_t, uchar, score_t
namespace LightGBM {
struct ThreadData {
// device id
int device_id;
// parameters for cuda_histogram
int histogram_size;
data_size_t leaf_num_data;
data_size_t num_data;
bool use_all_features;
bool is_constant_hessian;
int num_workgroups;
cudaStream_t stream;
uint8_t* device_features;
uint8_t* device_feature_masks;
data_size_t* device_data_indices;
score_t* device_gradients;
score_t* device_hessians;
score_t hessians_const;
char* device_subhistograms;
volatile int* sync_counters;
void* device_histogram_outputs;
size_t exp_workgroups_per_feature;
// cuda events
cudaEvent_t* kernel_start;
cudaEvent_t* kernel_wait_obj;
std::chrono::duration<double, std::milli>* kernel_input_wait_time;
// copy histogram
size_t output_size;
char* host_histogram_output;
cudaEvent_t* histograms_wait_obj;
};
void cuda_histogram(
int histogram_size,
data_size_t leaf_num_data,
data_size_t num_data,
bool use_all_features,
bool is_constant_hessian,
int num_workgroups,
cudaStream_t stream,
uint8_t* arg0,
uint8_t* arg1,
data_size_t arg2,
data_size_t* arg3,
data_size_t arg4,
score_t* arg5,
score_t* arg6,
score_t arg6_const,
char* arg7,
volatile int* arg8,
void* arg9,
size_t exp_workgroups_per_feature);
} // namespace LightGBM
#endif // USE_CUDA
#endif // LIGHTGBM_TREELEARNER_CUDA_KERNEL_LAUNCHER_H_

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,261 +0,0 @@
/*!
* Copyright (c) 2020 IBM Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifndef LIGHTGBM_TREELEARNER_CUDA_TREE_LEARNER_H_
#define LIGHTGBM_TREELEARNER_CUDA_TREE_LEARNER_H_
#include <LightGBM/utils/random.h>
#include <LightGBM/utils/array_args.h>
#include <LightGBM/dataset.h>
#include <LightGBM/feature_group.h>
#include <LightGBM/tree.h>
#include <string>
#include <cmath>
#include <cstdio>
#include <memory>
#include <random>
#include <vector>
#ifdef USE_CUDA
#include <cuda_runtime.h>
#endif
#include "feature_histogram.hpp"
#include "serial_tree_learner.h"
#include "data_partition.hpp"
#include "split_info.hpp"
#include "leaf_splits.hpp"
#ifdef USE_CUDA
#include <LightGBM/cuda/vector_cudahost.h>
#include "cuda_kernel_launcher.h"
using json11::Json;
namespace LightGBM {
/*!
* \brief CUDA-based parallel learning algorithm.
*/
class CUDATreeLearner: public SerialTreeLearner {
public:
explicit CUDATreeLearner(const Config* tree_config);
~CUDATreeLearner();
void Init(const Dataset* train_data, bool is_constant_hessian) override;
void ResetTrainingDataInner(const Dataset* train_data, bool is_constant_hessian, bool reset_multi_val_bin) override;
Tree* Train(const score_t* gradients, const score_t *hessians, bool is_first_tree) override;
void SetBaggingData(const Dataset* subset, const data_size_t* used_indices, data_size_t num_data) override {
SerialTreeLearner::SetBaggingData(subset, used_indices, num_data);
if (subset == nullptr && used_indices != nullptr) {
if (num_data != num_data_) {
use_bagging_ = true;
return;
}
}
use_bagging_ = false;
}
protected:
void BeforeTrain() override;
bool BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) override;
void FindBestSplits(const Tree* tree) override;
void Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) override;
void ConstructHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) override;
private:
typedef float gpu_hist_t;
/*!
* \brief Find the best number of workgroups processing one feature for maximizing efficiency
* \param leaf_num_data The number of data examples on the current leaf being processed
* \return Log2 of the best number for workgroups per feature, in range 0...kMaxLogWorkgroupsPerFeature
*/
int GetNumWorkgroupsPerFeature(data_size_t leaf_num_data);
/*!
* \brief Initialize GPU device
* \param num_gpu: number of maximum gpus
*/
void InitGPU(int num_gpu);
/*!
* \brief Allocate memory for GPU computation // alloc only
*/
void CountDenseFeatureGroups(); // compute num_dense_feature_group
void prevAllocateGPUMemory(); // compute CPU-side param calculation & Pin HostMemory
void AllocateGPUMemory();
/*!
* \ ResetGPUMemory
*/
void ResetGPUMemory();
/*!
* \ copy dense feature from CPU to GPU
*/
void copyDenseFeature();
/*!
* \brief Compute GPU feature histogram for the current leaf.
* Indices, gradients and Hessians have been copied to the device.
* \param leaf_num_data Number of data on current leaf
* \param use_all_features Set to true to not use feature masks, with a faster kernel
*/
void GPUHistogram(data_size_t leaf_num_data, bool use_all_features);
void SetThreadData(ThreadData* thread_data, int device_id, int histogram_size,
int leaf_num_data, bool use_all_features,
int num_workgroups, int exp_workgroups_per_feature) {
ThreadData* td = &thread_data[device_id];
td->device_id = device_id;
td->histogram_size = histogram_size;
td->leaf_num_data = leaf_num_data;
td->num_data = num_data_;
td->use_all_features = use_all_features;
td->is_constant_hessian = share_state_->is_constant_hessian;
td->num_workgroups = num_workgroups;
td->stream = stream_[device_id];
td->device_features = device_features_[device_id];
td->device_feature_masks = reinterpret_cast<uint8_t *>(device_feature_masks_[device_id]);
td->device_data_indices = device_data_indices_[device_id];
td->device_gradients = device_gradients_[device_id];
td->device_hessians = device_hessians_[device_id];
td->hessians_const = hessians_[0];
td->device_subhistograms = device_subhistograms_[device_id];
td->sync_counters = sync_counters_[device_id];
td->device_histogram_outputs = device_histogram_outputs_[device_id];
td->exp_workgroups_per_feature = exp_workgroups_per_feature;
td->kernel_start = &(kernel_start_[device_id]);
td->kernel_wait_obj = &(kernel_wait_obj_[device_id]);
td->kernel_input_wait_time = &(kernel_input_wait_time_[device_id]);
size_t output_size = num_gpu_feature_groups_[device_id] * dword_features_ * device_bin_size_ * hist_bin_entry_sz_;
size_t host_output_offset = offset_gpu_feature_groups_[device_id] * dword_features_ * device_bin_size_ * hist_bin_entry_sz_;
td->output_size = output_size;
td->host_histogram_output = reinterpret_cast<char*>(host_histogram_outputs_) + host_output_offset;
td->histograms_wait_obj = &(histograms_wait_obj_[device_id]);
}
/*!
* \brief Wait for GPU kernel execution and read histogram
* \param histograms Destination of histogram results from GPU.
*/
template <typename HistType>
void WaitAndGetHistograms(FeatureHistogram* leaf_histogram_array);
/*!
* \brief Construct GPU histogram asynchronously.
* Interface is similar to Dataset::ConstructHistograms().
* \param is_feature_used A predicate vector for enabling each feature
* \param data_indices Array of data example IDs to be included in histogram, will be copied to GPU.
* Set to nullptr to skip copy to GPU.
* \param num_data Number of data examples to be included in histogram
* \return true if GPU kernel is launched, false if GPU is not used
*/
bool ConstructGPUHistogramsAsync(
const std::vector<int8_t>& is_feature_used,
const data_size_t* data_indices, data_size_t num_data);
/*! brief Log2 of max number of workgroups per feature*/
const int kMaxLogWorkgroupsPerFeature = 10; // 2^10
/*! brief Max total number of workgroups with preallocated workspace.
* If we use more than this number of workgroups, we have to reallocate subhistograms */
std::vector<int> preallocd_max_num_wg_;
/*! \brief True if bagging is used */
bool use_bagging_;
/*! \brief GPU command queue object */
std::vector<cudaStream_t> stream_;
/*! \brief total number of feature-groups */
int num_feature_groups_;
/*! \brief total number of dense feature-groups, which will be processed on GPU */
int num_dense_feature_groups_;
std::vector<int> num_gpu_feature_groups_;
std::vector<int> offset_gpu_feature_groups_;
/*! \brief On GPU we read one DWORD (4-byte) of features of one example once.
* With bin size > 16, there are 4 features per DWORD.
* With bin size <=16, there are 8 features per DWORD.
*/
int dword_features_;
/*! \brief Max number of bins of training data, used to determine
* which GPU kernel to use */
int max_num_bin_;
/*! \brief Used GPU kernel bin size (64, 256) */
int histogram_size_;
int device_bin_size_;
/*! \brief Size of histogram bin entry, depending if single or double precision is used */
size_t hist_bin_entry_sz_;
/*! \brief Indices of all dense feature-groups */
std::vector<int> dense_feature_group_map_;
/*! \brief Indices of all sparse feature-groups */
std::vector<int> sparse_feature_group_map_;
/*! \brief GPU memory object holding the training data */
std::vector<uint8_t*> device_features_;
/*! \brief GPU memory object holding the ordered gradient */
std::vector<score_t*> device_gradients_;
/*! \brief GPU memory object holding the ordered hessian */
std::vector<score_t*> device_hessians_;
/*! \brief A vector of feature mask. 1 = feature used, 0 = feature not used */
std::vector<char> feature_masks_;
/*! \brief GPU memory object holding the feature masks */
std::vector<char*> device_feature_masks_;
/*! \brief Pointer to pinned memory of feature masks */
char* ptr_pinned_feature_masks_ = nullptr;
/*! \brief GPU memory object holding indices of the leaf being processed */
std::vector<data_size_t*> device_data_indices_;
/*! \brief GPU memory object holding counters for workgroup coordination */
std::vector<int*> sync_counters_;
/*! \brief GPU memory object holding temporary sub-histograms per workgroup */
std::vector<char*> device_subhistograms_;
/*! \brief Host memory object for histogram output (GPU will write to Host memory directly) */
std::vector<void*> device_histogram_outputs_;
/*! \brief Host memory pointer for histogram outputs */
void *host_histogram_outputs_;
/*! CUDA waitlist object for waiting for data transfer before kernel execution */
std::vector<cudaEvent_t> kernel_wait_obj_;
/*! CUDA waitlist object for reading output histograms after kernel execution */
std::vector<cudaEvent_t> histograms_wait_obj_;
/*! CUDA Asynchronous waiting object for copying indices */
std::vector<cudaEvent_t> indices_future_;
/*! Asynchronous waiting object for copying gradients */
std::vector<cudaEvent_t> gradients_future_;
/*! Asynchronous waiting object for copying Hessians */
std::vector<cudaEvent_t> hessians_future_;
/*! Asynchronous waiting object for copying dense features */
std::vector<cudaEvent_t> features_future_;
// host-side buffer for converting feature data into featre4 data
int nthreads_; // number of Feature4* vector on host4_vecs_
std::vector<cudaEvent_t> kernel_start_;
std::vector<float> kernel_time_; // measure histogram kernel time
std::vector<std::chrono::duration<double, std::milli>> kernel_input_wait_time_;
int num_gpu_;
int allocated_num_data_; // allocated data instances
pthread_t **cpu_threads_; // pthread, 1 cpu thread / gpu
};
} // namespace LightGBM
#else // USE_CUDA
// When GPU support is not compiled in, quit with an error message
namespace LightGBM {
class CUDATreeLearner: public SerialTreeLearner {
public:
#pragma warning(disable : 4702)
explicit CUDATreeLearner(const Config* tree_config) : SerialTreeLearner(tree_config) {
Log::Fatal("CUDA Tree Learner was not enabled in this build.\n"
"Please recompile with CMake option -DUSE_CUDA=1");
}
};
} // namespace LightGBM
#endif // USE_CUDA
#endif // LIGHTGBM_TREELEARNER_CUDA_TREE_LEARNER_H_

Просмотреть файл

@ -276,7 +276,6 @@ void DataParallelTreeLearner<TREELEARNER_T>::Split(Tree* tree, int best_Leaf, in
}
// instantiate template classes, otherwise linker cannot find the code
template class DataParallelTreeLearner<CUDATreeLearner>;
template class DataParallelTreeLearner<GPUTreeLearner>;
template class DataParallelTreeLearner<SerialTreeLearner>;

Просмотреть файл

@ -77,7 +77,6 @@ void FeatureParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(
}
// instantiate template classes, otherwise linker cannot find the code
template class FeatureParallelTreeLearner<CUDATreeLearner>;
template class FeatureParallelTreeLearner<GPUTreeLearner>;
template class FeatureParallelTreeLearner<SerialTreeLearner>;
} // namespace LightGBM

Просмотреть файл

@ -12,7 +12,6 @@
#include <memory>
#include <vector>
#include "cuda_tree_learner.h"
#include "gpu_tree_learner.h"
#include "serial_tree_learner.h"

Просмотреть файл

@ -344,15 +344,7 @@ void SerialTreeLearner::FindBestSplits(const Tree* tree, const std::set<int>* fo
}
bool use_subtract = parent_leaf_histogram_array_ != nullptr;
#ifdef USE_CUDA
if (LGBM_config_::current_learner == use_cpu_learner) {
SerialTreeLearner::ConstructHistograms(is_feature_used, use_subtract);
} else {
ConstructHistograms(is_feature_used, use_subtract);
}
#else
ConstructHistograms(is_feature_used, use_subtract);
#endif
FindBestSplitsFromHistograms(is_feature_used, use_subtract, tree);
}

Просмотреть файл

@ -211,7 +211,7 @@ class SerialTreeLearner: public TreeLearner {
std::vector<score_t, boost::alignment::aligned_allocator<score_t, 4096>> ordered_gradients_;
/*! \brief hessians of current iteration, ordered for cache optimized, aligned to 4K page */
std::vector<score_t, boost::alignment::aligned_allocator<score_t, 4096>> ordered_hessians_;
#elif defined(USE_CUDA) || defined(USE_CUDA_EXP)
#elif defined(USE_CUDA)
/*! \brief gradients of current iteration, ordered for cache optimized */
std::vector<score_t, CHAllocator<score_t>> ordered_gradients_;
/*! \brief hessians of current iteration, ordered for cache optimized */

Просмотреть файл

@ -4,7 +4,6 @@
*/
#include <LightGBM/tree_learner.h>
#include "cuda_tree_learner.h"
#include "gpu_tree_learner.h"
#include "linear_tree_learner.h"
#include "parallel_tree_learner.h"
@ -40,24 +39,14 @@ TreeLearner* TreeLearner::CreateTreeLearner(const std::string& learner_type, con
return new VotingParallelTreeLearner<GPUTreeLearner>(config);
}
} else if (device_type == std::string("cuda")) {
if (learner_type == std::string("serial")) {
return new CUDATreeLearner(config);
} else if (learner_type == std::string("feature")) {
return new FeatureParallelTreeLearner<CUDATreeLearner>(config);
} else if (learner_type == std::string("data")) {
return new DataParallelTreeLearner<CUDATreeLearner>(config);
} else if (learner_type == std::string("voting")) {
return new VotingParallelTreeLearner<CUDATreeLearner>(config);
}
} else if (device_type == std::string("cuda_exp")) {
if (learner_type == std::string("serial")) {
if (config->num_gpu == 1) {
return new CUDASingleGPUTreeLearner(config, boosting_on_cuda);
} else {
Log::Fatal("cuda_exp only supports training on a single GPU.");
Log::Fatal("Currently cuda version only supports training on a single GPU.");
}
} else {
Log::Fatal("cuda_exp only supports training on a single machine.");
Log::Fatal("Currently cuda version only supports training on a single machine.");
}
}
return nullptr;

Просмотреть файл

@ -501,7 +501,6 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Split(Tree* tree, int best_Leaf,
}
// instantiate template classes, otherwise linker cannot find the code
template class VotingParallelTreeLearner<CUDATreeLearner>;
template class VotingParallelTreeLearner<GPUTreeLearner>;
template class VotingParallelTreeLearner<SerialTreeLearner>;
} // namespace LightGBM

Просмотреть файл

@ -48,7 +48,7 @@ def test_basic(tmp_path):
assert bst.current_iteration() == 20
assert bst.num_trees() == 20
assert bst.num_model_per_iteration() == 1
if getenv('TASK', '') != 'cuda_exp':
if getenv('TASK', '') != 'cuda':
assert bst.lower_bound() == pytest.approx(-2.9040190126976606)
assert bst.upper_bound() == pytest.approx(3.3182142872462883)

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше