зеркало из https://github.com/microsoft/LightGBM.git
Родитель
da174b8d06
Коммит
525f8b4b80
|
@ -2,7 +2,6 @@ option(USE_MPI "Enable MPI-based distributed learning" OFF)
|
|||
option(USE_OPENMP "Enable OpenMP" ON)
|
||||
option(USE_GPU "Enable GPU-accelerated training" OFF)
|
||||
option(USE_SWIG "Enable SWIG to generate Java API" OFF)
|
||||
option(USE_HDFS "Enable HDFS support (EXPERIMENTAL)" OFF)
|
||||
option(USE_TIMETAG "Set to ON to output time costs" OFF)
|
||||
option(USE_CUDA "Enable CUDA-accelerated training " OFF)
|
||||
option(USE_DEBUG "Set to ON for Debug mode" OFF)
|
||||
|
@ -294,21 +293,6 @@ if(USE_CUDA)
|
|||
endforeach()
|
||||
endif()
|
||||
|
||||
if(USE_HDFS)
|
||||
message(
|
||||
DEPRECATION
|
||||
"HDFS support in LightGBM is deprecated, and will be removed in a future release.\
|
||||
See https://github.com/microsoft/LightGBM/issues/6436.
|
||||
"
|
||||
)
|
||||
find_package(JNI REQUIRED)
|
||||
find_path(HDFS_INCLUDE_DIR hdfs.h REQUIRED)
|
||||
find_library(HDFS_LIB NAMES hdfs REQUIRED)
|
||||
include_directories(${HDFS_INCLUDE_DIR})
|
||||
add_definitions(-DUSE_HDFS)
|
||||
set(HDFS_CXX_LIBRARIES ${HDFS_LIB} ${JAVA_JVM_LIBRARY})
|
||||
endif()
|
||||
|
||||
include(CheckCXXSourceCompiles)
|
||||
check_cxx_source_compiles("
|
||||
#include <xmmintrin.h>
|
||||
|
@ -647,10 +631,6 @@ if(USE_CUDA)
|
|||
target_link_libraries(_lightgbm PRIVATE ${histograms})
|
||||
endif()
|
||||
|
||||
if(USE_HDFS)
|
||||
target_link_libraries(lightgbm_objs PUBLIC ${HDFS_CXX_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
if(MINGW OR CYGWIN)
|
||||
target_link_libraries(lightgbm_objs PUBLIC ws2_32 iphlpapi)
|
||||
|
|
|
@ -40,8 +40,6 @@
|
|||
# Compile CUDA version.
|
||||
# --gpu
|
||||
# Compile GPU version.
|
||||
# --hdfs
|
||||
# Compile HDFS version.
|
||||
# --integrated-opencl
|
||||
# Compile integrated OpenCL version.
|
||||
# --mingw
|
||||
|
@ -148,9 +146,6 @@ while [ $# -gt 0 ]; do
|
|||
--gpu)
|
||||
BUILD_ARGS="${BUILD_ARGS} --config-setting=cmake.define.USE_GPU=ON"
|
||||
;;
|
||||
--hdfs)
|
||||
BUILD_ARGS="${BUILD_ARGS} --config-setting=cmake.define.USE_HDFS=ON"
|
||||
;;
|
||||
--integrated-opencl)
|
||||
BUILD_ARGS="${BUILD_ARGS} --config-setting=cmake.define.__INTEGRATE_OPENCL=ON"
|
||||
;;
|
||||
|
|
|
@ -628,41 +628,6 @@ Windows
|
|||
The CUDA version is not supported on Windows.
|
||||
Use the GPU version (``device_type=gpu``) for GPU acceleration on Windows.
|
||||
|
||||
Build HDFS Version
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. warning::
|
||||
HDFS support in LightGBM is deprecated, and will be removed in a future release.
|
||||
See https://github.com/microsoft/LightGBM/issues/6436.
|
||||
|
||||
The HDFS version of LightGBM was tested on CDH-5.14.4 cluster.
|
||||
|
||||
Linux
|
||||
^^^^^
|
||||
|
||||
On Linux a HDFS version of LightGBM can be built using **CMake** and **gcc**.
|
||||
|
||||
1. Install `CMake`_.
|
||||
|
||||
2. Run the following commands:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
git clone --recursive https://github.com/microsoft/LightGBM
|
||||
cd LightGBM
|
||||
cmake -B build -S . -DUSE_HDFS=ON
|
||||
# if you have installed HDFS to a customized location, you should specify paths to HDFS headers (hdfs.h) and library (libhdfs.so) like the following:
|
||||
# cmake \
|
||||
# -DUSE_HDFS=ON \
|
||||
# -DHDFS_LIB="/opt/cloudera/parcels/CDH-5.14.4-1.cdh5.14.4.p0.3/lib64/libhdfs.so" \
|
||||
# -DHDFS_INCLUDE_DIR="/opt/cloudera/parcels/CDH-5.14.4-1.cdh5.14.4.p0.3/include/" \
|
||||
# ..
|
||||
cmake --build build -j4
|
||||
|
||||
**Note**: glibc >= 2.14 is required.
|
||||
|
||||
**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
|
||||
|
||||
Build Java Wrapper
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
|
|
@ -29,7 +29,6 @@ $(function() {
|
|||
'#build-mpi-version',
|
||||
'#build-gpu-version',
|
||||
'#build-cuda-version',
|
||||
'#build-hdfs-version',
|
||||
'#build-java-wrapper',
|
||||
'#build-c-unit-tests'
|
||||
];
|
||||
|
|
|
@ -155,23 +155,6 @@ All requirements from `Build from Sources section <#build-from-sources>`__ apply
|
|||
|
||||
To use the CUDA version within Python, pass ``{"device": "cuda"}`` respectively in parameters.
|
||||
|
||||
Build HDFS Version
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. warning::
|
||||
HDFS support in LightGBM is deprecated, and will be removed in a future release.
|
||||
See https://github.com/microsoft/LightGBM/issues/6436.
|
||||
|
||||
.. code:: sh
|
||||
|
||||
pip install lightgbm --config-settings=cmake.define.USE_HDFS=ON
|
||||
|
||||
All requirements from `Build from Sources section <#build-from-sources>`__ apply for this installation option as well.
|
||||
|
||||
**HDFS** library is needed: details for installation can be found in `Installation Guide <https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#build-hdfs-version>`__.
|
||||
|
||||
Note that the installation process of HDFS version was tested only on **Linux**.
|
||||
|
||||
Build with MinGW-w64 on Windows
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
@ -247,8 +230,6 @@ Run ``sh ./build-python.sh install --gpu`` to enable GPU support. All requiremen
|
|||
|
||||
Run ``sh ./build-python.sh install --cuda`` to enable CUDA support. All requirements from `Build CUDA Version section <#build-cuda-version>`__ apply for this installation option as well.
|
||||
|
||||
Run ``sh ./build-python.sh install --hdfs`` to enable HDFS support. All requirements from `Build HDFS Version section <#build-hdfs-version>`__ apply for this installation option as well.
|
||||
|
||||
Run ``sh ./build-python.sh install --bit32``, if you want to use 32-bit version. All requirements from `Build 32-bit Version with 32-bit Python section <#build-32-bit-version-with-32-bit-python>`__ apply for this installation option as well.
|
||||
|
||||
Run ``sh ./build-python.sh install --time-costs``, if you want to output time costs for different internal routines. All requirements from `Build with Time Costs Output section <#build-with-time-costs-output>`__ apply for this installation option as well.
|
||||
|
|
|
@ -11,10 +11,6 @@
|
|||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
|
||||
#ifdef USE_HDFS
|
||||
#include <hdfs.h>
|
||||
#endif
|
||||
|
||||
namespace LightGBM {
|
||||
|
||||
struct LocalFile : VirtualFileReader, VirtualFileWriter {
|
||||
|
@ -56,142 +52,17 @@ struct LocalFile : VirtualFileReader, VirtualFileWriter {
|
|||
const std::string mode_;
|
||||
};
|
||||
|
||||
const char* kHdfsProto = "hdfs://";
|
||||
|
||||
#ifdef USE_HDFS
|
||||
const size_t kHdfsProtoLength = static_cast<size_t>(strlen(kHdfsProto));
|
||||
|
||||
struct HDFSFile : VirtualFileReader, VirtualFileWriter {
|
||||
HDFSFile(const std::string& filename, int flags)
|
||||
: filename_(filename), flags_(flags) {}
|
||||
~HDFSFile() {
|
||||
if (file_ != NULL) {
|
||||
hdfsCloseFile(fs_, file_);
|
||||
}
|
||||
}
|
||||
|
||||
bool Init() {
|
||||
if (file_ == NULL) {
|
||||
if (fs_ == NULL) {
|
||||
fs_ = GetHDFSFileSystem(filename_);
|
||||
}
|
||||
if (fs_ != NULL &&
|
||||
(flags_ == O_WRONLY || 0 == hdfsExists(fs_, filename_.c_str()))) {
|
||||
file_ = hdfsOpenFile(fs_, filename_.c_str(), flags_, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
return file_ != NULL;
|
||||
}
|
||||
|
||||
bool Exists() const {
|
||||
if (fs_ == NULL) {
|
||||
fs_ = GetHDFSFileSystem(filename_);
|
||||
}
|
||||
return fs_ != NULL && 0 == hdfsExists(fs_, filename_.c_str());
|
||||
}
|
||||
|
||||
size_t Read(void* data, size_t bytes) const {
|
||||
return FileOperation<void*>(data, bytes, &hdfsRead);
|
||||
}
|
||||
|
||||
size_t Write(const void* data, size_t bytes) const {
|
||||
return FileOperation<const void*>(data, bytes, &hdfsWrite);
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename BufferType>
|
||||
using fileOp = tSize (*)(hdfsFS, hdfsFile, BufferType, tSize);
|
||||
|
||||
template <typename BufferType>
|
||||
inline size_t FileOperation(BufferType data, size_t bytes,
|
||||
fileOp<BufferType> op) const {
|
||||
char* buffer = const_cast<char*>(static_cast<const char*>(data));
|
||||
size_t remain = bytes;
|
||||
while (remain != 0) {
|
||||
size_t nmax = static_cast<size_t>(std::numeric_limits<tSize>::max());
|
||||
tSize ret = op(fs_, file_, buffer, std::min(nmax, remain));
|
||||
if (ret > 0) {
|
||||
size_t n = static_cast<size_t>(ret);
|
||||
remain -= n;
|
||||
buffer += n;
|
||||
} else if (ret == 0) {
|
||||
break;
|
||||
} else if (errno != EINTR) {
|
||||
Log::Fatal("Failed HDFS file operation [%s]", strerror(errno));
|
||||
}
|
||||
}
|
||||
return bytes - remain;
|
||||
}
|
||||
|
||||
static hdfsFS GetHDFSFileSystem(const std::string& uri) {
|
||||
size_t end = uri.find("/", kHdfsProtoLength);
|
||||
if (uri.find(kHdfsProto) != 0 || end == std::string::npos) {
|
||||
Log::Warning("Bad HDFS uri, no namenode found [%s]", uri.c_str());
|
||||
return NULL;
|
||||
}
|
||||
std::string hostport = uri.substr(kHdfsProtoLength, end - kHdfsProtoLength);
|
||||
if (fs_cache_.count(hostport) == 0) {
|
||||
fs_cache_[hostport] = MakeHDFSFileSystem(hostport);
|
||||
}
|
||||
return fs_cache_[hostport];
|
||||
}
|
||||
|
||||
static hdfsFS MakeHDFSFileSystem(const std::string& hostport) {
|
||||
std::istringstream iss(hostport);
|
||||
std::string host;
|
||||
tPort port = 0;
|
||||
std::getline(iss, host, ':');
|
||||
iss >> port;
|
||||
hdfsFS fs = iss.eof() ? hdfsConnect(host.c_str(), port) : NULL;
|
||||
if (fs == NULL) {
|
||||
Log::Warning("Could not connect to HDFS namenode [%s]", hostport.c_str());
|
||||
}
|
||||
return fs;
|
||||
}
|
||||
|
||||
mutable hdfsFS fs_ = NULL;
|
||||
hdfsFile file_ = NULL;
|
||||
const std::string filename_;
|
||||
const int flags_;
|
||||
static std::unordered_map<std::string, hdfsFS> fs_cache_;
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, hdfsFS> HDFSFile::fs_cache_ =
|
||||
std::unordered_map<std::string, hdfsFS>();
|
||||
|
||||
#define WITH_HDFS(x) x
|
||||
#else
|
||||
#define WITH_HDFS(x) Log::Fatal("HDFS support is not enabled")
|
||||
#endif // USE_HDFS
|
||||
|
||||
std::unique_ptr<VirtualFileReader> VirtualFileReader::Make(
|
||||
const std::string& filename) {
|
||||
#ifdef USE_HDFS
|
||||
if (0 == filename.find(kHdfsProto)) {
|
||||
WITH_HDFS(return std::unique_ptr<VirtualFileReader>(
|
||||
new HDFSFile(filename, O_RDONLY)));
|
||||
}
|
||||
#endif
|
||||
return std::unique_ptr<VirtualFileReader>(new LocalFile(filename, "rb"));
|
||||
}
|
||||
|
||||
std::unique_ptr<VirtualFileWriter> VirtualFileWriter::Make(
|
||||
const std::string& filename) {
|
||||
#ifdef USE_HDFS
|
||||
if (0 == filename.find(kHdfsProto)) {
|
||||
WITH_HDFS(return std::unique_ptr<VirtualFileWriter>(
|
||||
new HDFSFile(filename, O_WRONLY)));
|
||||
}
|
||||
#endif
|
||||
return std::unique_ptr<VirtualFileWriter>(new LocalFile(filename, "wb"));
|
||||
}
|
||||
|
||||
bool VirtualFileWriter::Exists(const std::string& filename) {
|
||||
#ifdef USE_HDFS
|
||||
if (0 == filename.find(kHdfsProto)) {
|
||||
WITH_HDFS(HDFSFile file(filename, O_RDONLY); return file.Exists());
|
||||
}
|
||||
#endif
|
||||
LocalFile file(filename, "rb");
|
||||
return file.Exists();
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче