2020-07-29 05:05:59 +03:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
# [description]
|
|
|
|
# Prepare a source distribution of the R package
|
|
|
|
# to be submitted to CRAN.
|
|
|
|
#
|
|
|
|
# [usage]
|
|
|
|
# sh build-cran-package.sh
|
|
|
|
|
|
|
|
set -e
|
|
|
|
|
|
|
|
ORIG_WD=$(pwd)
|
|
|
|
TEMP_R_DIR=$(pwd)/lightgbm_r
|
|
|
|
|
|
|
|
if test -d ${TEMP_R_DIR}; then
|
|
|
|
rm -r ${TEMP_R_DIR}
|
|
|
|
fi
|
|
|
|
mkdir -p ${TEMP_R_DIR}
|
|
|
|
|
2020-08-25 22:36:37 +03:00
|
|
|
CURRENT_DATE=$(date +'%Y-%m-%d')
|
|
|
|
|
|
|
|
# R packages cannot have versions like 3.0.0rc1, but
|
|
|
|
# 3.0.0-1 is acceptable
|
|
|
|
LGB_VERSION=$(cat VERSION.txt | sed "s/rc/-/g")
|
|
|
|
|
2020-07-29 05:05:59 +03:00
|
|
|
# move relevant files
|
|
|
|
cp -R R-package/* ${TEMP_R_DIR}
|
|
|
|
cp -R include ${TEMP_R_DIR}/src/
|
|
|
|
cp -R src/* ${TEMP_R_DIR}/src/
|
|
|
|
|
2020-12-08 16:36:24 +03:00
|
|
|
cp \
|
|
|
|
external_libs/fast_double_parser/include/fast_double_parser.h \
|
|
|
|
${TEMP_R_DIR}/src/include/LightGBM
|
|
|
|
|
|
|
|
mkdir -p ${TEMP_R_DIR}/src/include/LightGBM/fmt
|
|
|
|
cp \
|
|
|
|
external_libs/fmt/include/fmt/*.h \
|
|
|
|
${TEMP_R_DIR}/src/include/LightGBM/fmt/
|
|
|
|
|
2021-01-18 15:44:38 +03:00
|
|
|
# including only specific files from Eigen, to keep the R package
|
|
|
|
# small and avoid redistributing code with licenses incompatible with
|
|
|
|
# LightGBM's license
|
|
|
|
EIGEN_R_DIR=${TEMP_R_DIR}/src/include/Eigen
|
|
|
|
mkdir -p ${EIGEN_R_DIR}
|
|
|
|
|
|
|
|
modules="Cholesky Core Dense Eigenvalues Geometry Householder Jacobi LU QR SVD"
|
|
|
|
for eigen_module in ${modules}; do
|
2021-01-22 17:45:43 +03:00
|
|
|
cp external_libs/eigen/Eigen/${eigen_module} ${EIGEN_R_DIR}/${eigen_module}
|
2021-01-18 15:44:38 +03:00
|
|
|
if [ ${eigen_module} != "Dense" ]; then
|
|
|
|
mkdir -p ${EIGEN_R_DIR}/src/${eigen_module}/
|
2021-01-22 17:45:43 +03:00
|
|
|
cp -R external_libs/eigen/Eigen/src/${eigen_module}/* ${EIGEN_R_DIR}/src/${eigen_module}/
|
2021-01-18 15:44:38 +03:00
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
mkdir -p ${EIGEN_R_DIR}/src/misc
|
2021-01-22 17:45:43 +03:00
|
|
|
cp -R external_libs/eigen/Eigen/src/misc/* ${EIGEN_R_DIR}/src/misc/
|
2021-01-18 15:44:38 +03:00
|
|
|
|
|
|
|
mkdir -p ${EIGEN_R_DIR}/src/plugins
|
2021-01-22 17:45:43 +03:00
|
|
|
cp -R external_libs/eigen/Eigen/src/plugins/* ${EIGEN_R_DIR}/src/plugins/
|
2021-01-18 15:44:38 +03:00
|
|
|
|
2020-07-29 05:05:59 +03:00
|
|
|
cd ${TEMP_R_DIR}
|
|
|
|
|
|
|
|
# Remove files not needed for CRAN
|
|
|
|
echo "Removing files not needed for CRAN"
|
|
|
|
rm src/install.libs.R
|
|
|
|
rm -r inst/
|
|
|
|
rm -r pkgdown/
|
2020-08-13 04:17:38 +03:00
|
|
|
rm cran-comments.md
|
2020-07-29 05:05:59 +03:00
|
|
|
rm AUTOCONF_UBUNTU_VERSION
|
|
|
|
rm recreate-configure.sh
|
|
|
|
|
2020-11-21 18:06:01 +03:00
|
|
|
# files only used by the lightgbm CLI aren't needed for
|
|
|
|
# the R package
|
|
|
|
rm src/application/application.cpp
|
|
|
|
rm src/include/LightGBM/application.h
|
2020-07-29 05:05:59 +03:00
|
|
|
rm src/main.cpp
|
|
|
|
|
2020-08-25 22:36:37 +03:00
|
|
|
# configure.ac and DESCRIPTION have placeholders for version
|
|
|
|
# and date so they don't have to be updated manually
|
|
|
|
sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" configure.ac
|
|
|
|
sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" DESCRIPTION
|
|
|
|
sed -i.bak -e "s/~~DATE~~/${CURRENT_DATE}/" DESCRIPTION
|
|
|
|
|
2020-10-16 22:03:06 +03:00
|
|
|
# Remove 'region', 'endregion', and 'warning' pragmas.
|
|
|
|
# This won't change the correctness of the code. CRAN does
|
|
|
|
# not allow you to use compiler flag '-Wno-unknown-pragmas' or
|
2020-07-29 05:05:59 +03:00
|
|
|
# pragmas that suppress warnings.
|
|
|
|
echo "Removing unknown pragmas in headers"
|
2020-10-16 22:03:06 +03:00
|
|
|
for file in $(find . -name '*.h' -o -name '*.hpp' -o -name '*.cpp'); do
|
2020-07-29 05:05:59 +03:00
|
|
|
sed \
|
|
|
|
-i.bak \
|
2021-01-18 15:44:38 +03:00
|
|
|
-e 's/^.*#pragma clang diagnostic.*$//' \
|
|
|
|
-e 's/^.*#pragma diag_suppress.*$//' \
|
|
|
|
-e 's/^.*#pragma GCC diagnostic.*$//' \
|
2020-07-29 05:05:59 +03:00
|
|
|
-e 's/^.*#pragma region.*$//' \
|
|
|
|
-e 's/^.*#pragma endregion.*$//' \
|
2020-10-16 22:03:06 +03:00
|
|
|
-e 's/^.*#pragma warning.*$//' \
|
2020-07-29 05:05:59 +03:00
|
|
|
"${file}"
|
|
|
|
done
|
2020-10-16 22:03:06 +03:00
|
|
|
find . -name '*.h.bak' -o -name '*.hpp.bak' -o -name '*.cpp.bak' -exec rm {} \;
|
2020-07-29 05:05:59 +03:00
|
|
|
|
2020-12-08 16:36:24 +03:00
|
|
|
sed \
|
|
|
|
-i.bak \
|
|
|
|
-e 's/\.\..*fmt\/format\.h/LightGBM\/fmt\/format\.h/' \
|
|
|
|
src/include/LightGBM/utils/common.h
|
|
|
|
|
|
|
|
sed \
|
|
|
|
-i.bak \
|
|
|
|
-e 's/\.\..*fast_double_parser\.h/LightGBM\/fast_double_parser\.h/' \
|
|
|
|
src/include/LightGBM/utils/common.h
|
|
|
|
|
2020-07-29 05:05:59 +03:00
|
|
|
# When building an R package with 'configure', it seems
|
|
|
|
# you're guaranteed to get a shared library called
|
|
|
|
# <packagename>.so/dll. The package source code expects
|
|
|
|
# 'lib_lightgbm.so', not 'lightgbm.so', to comply with the way
|
|
|
|
# this project has historically handled installation
|
|
|
|
echo "Changing lib_lightgbm to lightgbm"
|
|
|
|
for file in R/*.R; do
|
|
|
|
sed \
|
|
|
|
-i.bak \
|
|
|
|
-e 's/lib_lightgbm/lightgbm/' \
|
|
|
|
"${file}"
|
|
|
|
done
|
|
|
|
sed \
|
|
|
|
-i.bak \
|
|
|
|
-e 's/lib_lightgbm/lightgbm/' \
|
|
|
|
NAMESPACE
|
|
|
|
|
|
|
|
# 'processx' is listed as a 'Suggests' dependency in DESCRIPTION
|
|
|
|
# because it is used in install.libs.R, a file that is not
|
|
|
|
# included in the CRAN distribution of the package
|
|
|
|
sed \
|
|
|
|
-i.bak \
|
|
|
|
'/processx/d' \
|
|
|
|
DESCRIPTION
|
|
|
|
|
|
|
|
echo "Cleaning sed backup files"
|
|
|
|
rm R/*.R.bak
|
|
|
|
rm NAMESPACE.bak
|
|
|
|
|
|
|
|
cd ${ORIG_WD}
|
|
|
|
|
|
|
|
R CMD build \
|
|
|
|
--keep-empty-dirs \
|
|
|
|
lightgbm_r
|
|
|
|
|
|
|
|
echo "Done building R package"
|