LightGBM/build-cran-package.sh

148 строки
4.3 KiB
Bash
Исходник Обычный вид История

#!/bin/sh
# [description]
# Prepare a source distribution of the R package
# to be submitted to CRAN.
#
# [usage]
# sh build-cran-package.sh
set -e
ORIG_WD=$(pwd)
TEMP_R_DIR=$(pwd)/lightgbm_r
if test -d ${TEMP_R_DIR}; then
rm -r ${TEMP_R_DIR}
fi
mkdir -p ${TEMP_R_DIR}
CURRENT_DATE=$(date +'%Y-%m-%d')
# R packages cannot have versions like 3.0.0rc1, but
# 3.0.0-1 is acceptable
LGB_VERSION=$(cat VERSION.txt | sed "s/rc/-/g")
# move relevant files
cp -R R-package/* ${TEMP_R_DIR}
cp -R include ${TEMP_R_DIR}/src/
cp -R src/* ${TEMP_R_DIR}/src/
Fix model locale issue and improve model R/W performance. (#3405) * Fix LightGBM models locale sensitivity and improve R/W performance. When Java is used, the default C++ locale is broken. This is true for Java providers that use the C API or even Python models that require JEP. This patch solves that issue making the model reads/writes insensitive to such settings. To achieve it, within the model read/write codebase: - C++ streams are imbued with the classic locale - Calls to functions that are dependent on the locale are replaced - The default locale is not changed! This approach means: - The user's locale is never tampered with, avoiding issues such as https://github.com/microsoft/LightGBM/issues/2979 with the previous approach https://github.com/microsoft/LightGBM/pull/2891 - Datasets can still be read according the user's locale - The model file has a single format independent of locale Changes: - Add CommonC namespace which provides faster locale-independent versions of Common's methods - Model code makes conversions through CommonC - Cleanup unused Common methods - Performance improvements. Use fast libraries for locale-agnostic conversion: - value->string: https://github.com/fmtlib/fmt - string->double: https://github.com/lemire/fast_double_parser (10x faster double parsing according to their benchmark) Bugfixes: - https://github.com/microsoft/LightGBM/issues/2500 - https://github.com/microsoft/LightGBM/issues/2890 - https://github.com/ninia/jep/issues/205 (as it is related to LGBM as well) * Align CommonC namespace * Add new external_libs/ to python setup * Try fast_double_parser fix #1 Testing commit e09e5aad828bcb16bea7ed0ed8322e019112fdbe If it works it should fix more LGBM builds * CMake: Attempt to link fmt without explicit PUBLIC tag * Exclude external_libs from linting * Add exernal_libs to MANIFEST.in * Set dynamic linking option for fmt. * linting issues * Try to fix lint includes * Try to pass fPIC with static fmt lib * Try CMake P_I_C option with fmt library * [R-package] Add CMake support for R and CRAN * Cleanup CMakeLists * Try fmt hack to remove stdout * Switch to header-only mode * Add PRIVATE argument to target_link_libraries * use fmt in header-only mode * Remove CMakeLists comment * Change OpenMP to PUBLIC linking in Mac * Update fmt submodule to 7.1.2 * Use fmt in header-only-mode * Remove fmt from CMakeLists.txt * Upgrade fast_double_parser to v0.2.0 * Revert "Add PRIVATE argument to target_link_libraries" This reverts commit 3dd45dde7b92531b2530ab54522bb843c56227a7. * Address James Lamb's comments * Update R-package/.Rbuildignore Co-authored-by: James Lamb <jaylamb20@gmail.com> * Upgrade to fast_double_parser v0.3.0 - Solaris support * Use legacy code only in Solaris * Fix lint issues * Fix comment * Address StrikerRUS's comments (solaris ifdef). * Change header guards Co-authored-by: James Lamb <jaylamb20@gmail.com>
2020-12-08 16:36:24 +03:00
cp \
external_libs/fast_double_parser/include/fast_double_parser.h \
${TEMP_R_DIR}/src/include/LightGBM
mkdir -p ${TEMP_R_DIR}/src/include/LightGBM/fmt
cp \
external_libs/fmt/include/fmt/*.h \
${TEMP_R_DIR}/src/include/LightGBM/fmt/
# including only specific files from Eigen, to keep the R package
# small and avoid redistributing code with licenses incompatible with
# LightGBM's license
EIGEN_R_DIR=${TEMP_R_DIR}/src/include/Eigen
mkdir -p ${EIGEN_R_DIR}
modules="Cholesky Core Dense Eigenvalues Geometry Householder Jacobi LU QR SVD"
for eigen_module in ${modules}; do
cp external_libs/eigen/Eigen/${eigen_module} ${EIGEN_R_DIR}/${eigen_module}
if [ ${eigen_module} != "Dense" ]; then
mkdir -p ${EIGEN_R_DIR}/src/${eigen_module}/
cp -R external_libs/eigen/Eigen/src/${eigen_module}/* ${EIGEN_R_DIR}/src/${eigen_module}/
fi
done
mkdir -p ${EIGEN_R_DIR}/src/misc
cp -R external_libs/eigen/Eigen/src/misc/* ${EIGEN_R_DIR}/src/misc/
mkdir -p ${EIGEN_R_DIR}/src/plugins
cp -R external_libs/eigen/Eigen/src/plugins/* ${EIGEN_R_DIR}/src/plugins/
cd ${TEMP_R_DIR}
# Remove files not needed for CRAN
echo "Removing files not needed for CRAN"
rm src/install.libs.R
rm -r inst/
rm -r pkgdown/
rm cran-comments.md
rm AUTOCONF_UBUNTU_VERSION
rm recreate-configure.sh
# files only used by the lightgbm CLI aren't needed for
# the R package
rm src/application/application.cpp
rm src/include/LightGBM/application.h
rm src/main.cpp
# configure.ac and DESCRIPTION have placeholders for version
# and date so they don't have to be updated manually
sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" configure.ac
sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" DESCRIPTION
sed -i.bak -e "s/~~DATE~~/${CURRENT_DATE}/" DESCRIPTION
# Remove 'region', 'endregion', and 'warning' pragmas.
# This won't change the correctness of the code. CRAN does
# not allow you to use compiler flag '-Wno-unknown-pragmas' or
# pragmas that suppress warnings.
echo "Removing unknown pragmas in headers"
for file in $(find . -name '*.h' -o -name '*.hpp' -o -name '*.cpp'); do
sed \
-i.bak \
-e 's/^.*#pragma clang diagnostic.*$//' \
-e 's/^.*#pragma diag_suppress.*$//' \
-e 's/^.*#pragma GCC diagnostic.*$//' \
-e 's/^.*#pragma region.*$//' \
-e 's/^.*#pragma endregion.*$//' \
-e 's/^.*#pragma warning.*$//' \
"${file}"
done
find . -name '*.h.bak' -o -name '*.hpp.bak' -o -name '*.cpp.bak' -exec rm {} \;
Fix model locale issue and improve model R/W performance. (#3405) * Fix LightGBM models locale sensitivity and improve R/W performance. When Java is used, the default C++ locale is broken. This is true for Java providers that use the C API or even Python models that require JEP. This patch solves that issue making the model reads/writes insensitive to such settings. To achieve it, within the model read/write codebase: - C++ streams are imbued with the classic locale - Calls to functions that are dependent on the locale are replaced - The default locale is not changed! This approach means: - The user's locale is never tampered with, avoiding issues such as https://github.com/microsoft/LightGBM/issues/2979 with the previous approach https://github.com/microsoft/LightGBM/pull/2891 - Datasets can still be read according the user's locale - The model file has a single format independent of locale Changes: - Add CommonC namespace which provides faster locale-independent versions of Common's methods - Model code makes conversions through CommonC - Cleanup unused Common methods - Performance improvements. Use fast libraries for locale-agnostic conversion: - value->string: https://github.com/fmtlib/fmt - string->double: https://github.com/lemire/fast_double_parser (10x faster double parsing according to their benchmark) Bugfixes: - https://github.com/microsoft/LightGBM/issues/2500 - https://github.com/microsoft/LightGBM/issues/2890 - https://github.com/ninia/jep/issues/205 (as it is related to LGBM as well) * Align CommonC namespace * Add new external_libs/ to python setup * Try fast_double_parser fix #1 Testing commit e09e5aad828bcb16bea7ed0ed8322e019112fdbe If it works it should fix more LGBM builds * CMake: Attempt to link fmt without explicit PUBLIC tag * Exclude external_libs from linting * Add exernal_libs to MANIFEST.in * Set dynamic linking option for fmt. * linting issues * Try to fix lint includes * Try to pass fPIC with static fmt lib * Try CMake P_I_C option with fmt library * [R-package] Add CMake support for R and CRAN * Cleanup CMakeLists * Try fmt hack to remove stdout * Switch to header-only mode * Add PRIVATE argument to target_link_libraries * use fmt in header-only mode * Remove CMakeLists comment * Change OpenMP to PUBLIC linking in Mac * Update fmt submodule to 7.1.2 * Use fmt in header-only-mode * Remove fmt from CMakeLists.txt * Upgrade fast_double_parser to v0.2.0 * Revert "Add PRIVATE argument to target_link_libraries" This reverts commit 3dd45dde7b92531b2530ab54522bb843c56227a7. * Address James Lamb's comments * Update R-package/.Rbuildignore Co-authored-by: James Lamb <jaylamb20@gmail.com> * Upgrade to fast_double_parser v0.3.0 - Solaris support * Use legacy code only in Solaris * Fix lint issues * Fix comment * Address StrikerRUS's comments (solaris ifdef). * Change header guards Co-authored-by: James Lamb <jaylamb20@gmail.com>
2020-12-08 16:36:24 +03:00
sed \
-i.bak \
-e 's/\.\..*fmt\/format\.h/LightGBM\/fmt\/format\.h/' \
src/include/LightGBM/utils/common.h
sed \
-i.bak \
-e 's/\.\..*fast_double_parser\.h/LightGBM\/fast_double_parser\.h/' \
src/include/LightGBM/utils/common.h
# When building an R package with 'configure', it seems
# you're guaranteed to get a shared library called
# <packagename>.so/dll. The package source code expects
# 'lib_lightgbm.so', not 'lightgbm.so', to comply with the way
# this project has historically handled installation
echo "Changing lib_lightgbm to lightgbm"
for file in R/*.R; do
sed \
-i.bak \
-e 's/lib_lightgbm/lightgbm/' \
"${file}"
done
sed \
-i.bak \
-e 's/lib_lightgbm/lightgbm/' \
NAMESPACE
# 'processx' is listed as a 'Suggests' dependency in DESCRIPTION
# because it is used in install.libs.R, a file that is not
# included in the CRAN distribution of the package
sed \
-i.bak \
'/processx/d' \
DESCRIPTION
echo "Cleaning sed backup files"
rm R/*.R.bak
rm NAMESPACE.bak
cd ${ORIG_WD}
R CMD build \
--keep-empty-dirs \
lightgbm_r
echo "Done building R package"