LightGBM/build_r.R

259 строки
7.0 KiB
R
Исходник Обычный вид История

# For macOS users who have decided to use gcc
# (replace 8 with version of gcc installed on your machine)
# NOTE: your gcc / g++ from Homebrew is probably in /usr/local/bin
#export CXX=/usr/local/bin/g++-8 CC=/usr/local/bin/gcc-8
# Sys.setenv("CXX" = "/usr/local/bin/g++-8")
# Sys.setenv("CC" = "/usr/local/bin/gcc-8")
args <- commandArgs(trailingOnly = TRUE)
INSTALL_AFTER_BUILD <- !("--skip-install" %in% args)
TEMP_R_DIR <- file.path(getwd(), "lightgbm_r")
TEMP_SOURCE_DIR <- file.path(TEMP_R_DIR, "src")
USING_GPU <- "--use-gpu" %in% args
USING_MINGW <- "--use-mingw" %in% args
USING_MSYS2 <- "--use-msys2" %in% args
recognized_args <- c(
"--skip-install"
, "--use-gpu"
, "--use-mingw"
, "--use-msys2"
)
unrecognized_args <- setdiff(args, recognized_args)
if (length(unrecognized_args) > 0L) {
msg <- paste0(
"Unrecognized arguments: "
, paste0(unrecognized_args, collapse = ", ")
)
stop(msg)
}
# [description] Replace statements in install.libs.R code based on
# command-line flags
.replace_flag <- function(variable_name, value, content) {
out <- gsub(
pattern = paste0(variable_name, " <-.*")
, replacement = paste0(variable_name, " <- ", as.character(value))
, x = content
)
return(out)
}
install_libs_content <- readLines(
file.path("R-package", "src", "install.libs.R")
)
install_libs_content <- .replace_flag("use_gpu", USING_GPU, install_libs_content)
install_libs_content <- .replace_flag("use_mingw", USING_MINGW, install_libs_content)
install_libs_content <- .replace_flag("use_msys2", USING_MSYS2, install_libs_content)
# R returns FALSE (not a non-zero exit code) if a file copy operation
# breaks. Let's fix that
.handle_result <- function(res) {
if (!all(res)) {
stop("Copying files failed!")
}
return(invisible(NULL))
}
# system() will not raise an R exception if the process called
# fails. Wrapping it here to get that behavior.
#
# system() introduces a lot of overhead, at least on Windows,
# so trying processx if it is available
.run_shell_command <- function(cmd, args, strict = TRUE) {
on_windows <- .Platform$OS.type == "windows"
has_processx <- suppressMessages({
suppressWarnings({
require("processx") # nolint
})
})
if (has_processx && on_windows) {
result <- processx::run(
command = cmd
, args = args
, windows_verbatim_args = TRUE
, error_on_status = FALSE
, echo = TRUE
)
exit_code <- result$status
} else {
if (on_windows) {
message(paste0(
"Using system() to run shell commands. Installing "
, "'processx' with install.packages('processx') might "
, "make this faster."
))
}
cmd <- paste0(cmd, " ", paste0(args, collapse = " "))
exit_code <- system(cmd)
}
if (exit_code != 0L && isTRUE(strict)) {
stop(paste0("Command failed with exit code: ", exit_code))
}
return(invisible(exit_code))
}
# Make a new temporary folder to work in
unlink(x = TEMP_R_DIR, recursive = TRUE)
dir.create(TEMP_R_DIR)
# copy in the relevant files
result <- file.copy(
from = "R-package/./"
, to = sprintf("%s/", TEMP_R_DIR)
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
# overwrite src/install.libs.R with new content based on command-line flags
writeLines(
text = install_libs_content
, con = file.path(TEMP_SOURCE_DIR, "install.libs.R")
)
# Add blank Makevars files
result <- file.copy(
from = file.path(TEMP_R_DIR, "inst", "Makevars")
, to = file.path(TEMP_SOURCE_DIR, "Makevars")
, overwrite = TRUE
)
.handle_result(result)
result <- file.copy(
from = file.path(TEMP_R_DIR, "inst", "Makevars.win")
, to = file.path(TEMP_SOURCE_DIR, "Makevars.win")
, overwrite = TRUE
)
.handle_result(result)
result <- file.copy(
from = "include/"
, to = sprintf("%s/", TEMP_SOURCE_DIR)
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
result <- file.copy(
from = "src/"
, to = sprintf("%s/", TEMP_SOURCE_DIR)
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
# compute/ is a submodule with boost, only needed if
# building the R package with GPU support
if (USING_GPU) {
result <- file.copy(
from = "compute/"
, to = sprintf("%s/", TEMP_SOURCE_DIR)
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
}
result <- file.copy(
from = "CMakeLists.txt"
, to = file.path(TEMP_R_DIR, "inst", "bin/")
, overwrite = TRUE
)
.handle_result(result)
# remove CRAN-specific files
result <- file.remove(
file.path(TEMP_R_DIR, "cleanup")
, file.path(TEMP_R_DIR, "configure")
, file.path(TEMP_R_DIR, "configure.ac")
, file.path(TEMP_R_DIR, "configure.win")
, file.path(TEMP_SOURCE_DIR, "Makevars.in")
, file.path(TEMP_SOURCE_DIR, "Makevars.win.in")
)
.handle_result(result)
Fix model locale issue and improve model R/W performance. (#3405) * Fix LightGBM models locale sensitivity and improve R/W performance. When Java is used, the default C++ locale is broken. This is true for Java providers that use the C API or even Python models that require JEP. This patch solves that issue making the model reads/writes insensitive to such settings. To achieve it, within the model read/write codebase: - C++ streams are imbued with the classic locale - Calls to functions that are dependent on the locale are replaced - The default locale is not changed! This approach means: - The user's locale is never tampered with, avoiding issues such as https://github.com/microsoft/LightGBM/issues/2979 with the previous approach https://github.com/microsoft/LightGBM/pull/2891 - Datasets can still be read according the user's locale - The model file has a single format independent of locale Changes: - Add CommonC namespace which provides faster locale-independent versions of Common's methods - Model code makes conversions through CommonC - Cleanup unused Common methods - Performance improvements. Use fast libraries for locale-agnostic conversion: - value->string: https://github.com/fmtlib/fmt - string->double: https://github.com/lemire/fast_double_parser (10x faster double parsing according to their benchmark) Bugfixes: - https://github.com/microsoft/LightGBM/issues/2500 - https://github.com/microsoft/LightGBM/issues/2890 - https://github.com/ninia/jep/issues/205 (as it is related to LGBM as well) * Align CommonC namespace * Add new external_libs/ to python setup * Try fast_double_parser fix #1 Testing commit e09e5aad828bcb16bea7ed0ed8322e019112fdbe If it works it should fix more LGBM builds * CMake: Attempt to link fmt without explicit PUBLIC tag * Exclude external_libs from linting * Add exernal_libs to MANIFEST.in * Set dynamic linking option for fmt. * linting issues * Try to fix lint includes * Try to pass fPIC with static fmt lib * Try CMake P_I_C option with fmt library * [R-package] Add CMake support for R and CRAN * Cleanup CMakeLists * Try fmt hack to remove stdout * Switch to header-only mode * Add PRIVATE argument to target_link_libraries * use fmt in header-only mode * Remove CMakeLists comment * Change OpenMP to PUBLIC linking in Mac * Update fmt submodule to 7.1.2 * Use fmt in header-only-mode * Remove fmt from CMakeLists.txt * Upgrade fast_double_parser to v0.2.0 * Revert "Add PRIVATE argument to target_link_libraries" This reverts commit 3dd45dde7b92531b2530ab54522bb843c56227a7. * Address James Lamb's comments * Update R-package/.Rbuildignore Co-authored-by: James Lamb <jaylamb20@gmail.com> * Upgrade to fast_double_parser v0.3.0 - Solaris support * Use legacy code only in Solaris * Fix lint issues * Fix comment * Address StrikerRUS's comments (solaris ifdef). * Change header guards Co-authored-by: James Lamb <jaylamb20@gmail.com>
2020-12-08 16:36:24 +03:00
#------------#
# submodules #
#------------#
result <- file.copy(
from = "external_libs/"
, to = sprintf("%s/", TEMP_SOURCE_DIR)
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
# copy files into the place CMake expects
for (src_file in c("lightgbm_R.cpp", "lightgbm_R.h", "R_object_helper.h")) {
result <- file.copy(
from = file.path(TEMP_SOURCE_DIR, src_file)
, to = file.path(TEMP_SOURCE_DIR, "src", src_file)
, overwrite = TRUE
)
.handle_result(result)
result <- file.remove(
file.path(TEMP_SOURCE_DIR, src_file)
)
.handle_result(result)
}
result <- file.copy(
from = file.path("R-package", "inst", "make-r-def.R")
, to = file.path(TEMP_R_DIR, "inst", "bin/")
, overwrite = TRUE
)
.handle_result(result)
# R packages cannot have versions like 3.0.0rc1, but
# 3.0.0-1 is acceptable
LGB_VERSION <- readLines("VERSION.txt")[1L]
LGB_VERSION <- gsub(
pattern = "rc"
, replacement = "-"
, x = LGB_VERSION
)
# DESCRIPTION has placeholders for version
# and date so it doesn't have to be updated manually
DESCRIPTION_FILE <- file.path(TEMP_R_DIR, "DESCRIPTION")
description_contents <- readLines(DESCRIPTION_FILE)
description_contents <- gsub(
pattern = "~~VERSION~~"
, replacement = LGB_VERSION
, x = description_contents
)
description_contents <- gsub(
pattern = "~~DATE~~"
, replacement = as.character(Sys.Date())
, x = description_contents
)
writeLines(description_contents, DESCRIPTION_FILE)
# NOTE: --keep-empty-dirs is necessary to keep the deep paths expected
# by CMake while also meeting the CRAN req to create object files
# on demand
.run_shell_command("R", c("CMD", "build", TEMP_R_DIR, "--keep-empty-dirs"))
# Install the package
version <- gsub(
"Version: ",
"",
grep(
"Version: "
, readLines(con = file.path(TEMP_R_DIR, "DESCRIPTION"))
, value = TRUE
)
)
tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version))
install_cmd <- "R"
install_args <- c("CMD", "INSTALL", "--no-multiarch", "--with-keep.source", tarball)
if (INSTALL_AFTER_BUILD) {
.run_shell_command(install_cmd, install_args)
} else {
cmd <- paste0(install_cmd, " ", paste0(install_args, collapse = " "))
print(sprintf("Skipping installation. Install the package with command '%s'", cmd))
}