2020-07-29 05:05:59 +03:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
# [description]
|
|
|
|
# Prepare a source distribution of the R package
|
|
|
|
# to be submitted to CRAN.
|
|
|
|
#
|
2021-10-31 06:31:35 +03:00
|
|
|
# [arguments]
|
|
|
|
#
|
|
|
|
# --r-executable Customize the R executable used by `R CMD build`.
|
|
|
|
# Useful if building the R package in an environment with
|
|
|
|
# non-standard builds of R, such as those provided in
|
|
|
|
# https://github.com/wch/r-debug.
|
|
|
|
#
|
2020-07-29 05:05:59 +03:00
|
|
|
# [usage]
|
2021-10-31 06:31:35 +03:00
|
|
|
#
|
|
|
|
# # default usage
|
2020-07-29 05:05:59 +03:00
|
|
|
# sh build-cran-package.sh
|
2021-10-31 06:31:35 +03:00
|
|
|
#
|
|
|
|
# # custom R build
|
|
|
|
# sh build-cran-package.sh --r-executable=RDvalgrind
|
2020-07-29 05:05:59 +03:00
|
|
|
|
|
|
|
set -e
|
|
|
|
|
2021-10-31 06:31:35 +03:00
|
|
|
LGB_R_EXECUTABLE=R
|
|
|
|
|
|
|
|
while [ $# -gt 0 ]; do
|
|
|
|
case "$1" in
|
|
|
|
--r-executable=*)
|
|
|
|
LGB_R_EXECUTABLE="${1#*=}"
|
|
|
|
;;
|
|
|
|
*)
|
|
|
|
echo "invalid argument '${1}'"
|
|
|
|
exit -1
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
shift
|
|
|
|
done
|
|
|
|
|
|
|
|
echo "Building lightgbm with R executable: ${LGB_R_EXECUTABLE}"
|
|
|
|
|
2021-08-02 20:43:41 +03:00
|
|
|
ORIG_WD="$(pwd)"
|
|
|
|
TEMP_R_DIR="$(pwd)/lightgbm_r"
|
2020-07-29 05:05:59 +03:00
|
|
|
|
2021-08-02 20:43:41 +03:00
|
|
|
if test -d "${TEMP_R_DIR}"; then
|
|
|
|
rm -r "${TEMP_R_DIR}"
|
2020-07-29 05:05:59 +03:00
|
|
|
fi
|
2021-08-02 20:43:41 +03:00
|
|
|
mkdir -p "${TEMP_R_DIR}"
|
2020-07-29 05:05:59 +03:00
|
|
|
|
2020-08-25 22:36:37 +03:00
|
|
|
CURRENT_DATE=$(date +'%Y-%m-%d')
|
|
|
|
|
|
|
|
# R packages cannot have versions like 3.0.0rc1, but
|
|
|
|
# 3.0.0-1 is acceptable
|
|
|
|
LGB_VERSION=$(cat VERSION.txt | sed "s/rc/-/g")
|
|
|
|
|
2020-07-29 05:05:59 +03:00
|
|
|
# move relevant files
|
2021-08-02 20:43:41 +03:00
|
|
|
cp -R R-package/* "${TEMP_R_DIR}"
|
|
|
|
cp -R include "${TEMP_R_DIR}/src/"
|
|
|
|
cp -R src/* "${TEMP_R_DIR}/src/"
|
2020-07-29 05:05:59 +03:00
|
|
|
|
2020-12-08 16:36:24 +03:00
|
|
|
cp \
|
|
|
|
external_libs/fast_double_parser/include/fast_double_parser.h \
|
2021-08-02 20:43:41 +03:00
|
|
|
"${TEMP_R_DIR}/src/include/LightGBM"
|
2020-12-08 16:36:24 +03:00
|
|
|
|
2021-08-02 20:43:41 +03:00
|
|
|
mkdir -p "${TEMP_R_DIR}/src/include/LightGBM/fmt"
|
2020-12-08 16:36:24 +03:00
|
|
|
cp \
|
|
|
|
external_libs/fmt/include/fmt/*.h \
|
2021-08-02 20:43:41 +03:00
|
|
|
"${TEMP_R_DIR}/src/include/LightGBM/fmt/"
|
2020-12-08 16:36:24 +03:00
|
|
|
|
2021-01-18 15:44:38 +03:00
|
|
|
# including only specific files from Eigen, to keep the R package
|
|
|
|
# small and avoid redistributing code with licenses incompatible with
|
|
|
|
# LightGBM's license
|
2021-08-02 20:43:41 +03:00
|
|
|
EIGEN_R_DIR="${TEMP_R_DIR}/src/include/Eigen"
|
|
|
|
mkdir -p "${EIGEN_R_DIR}"
|
2021-01-18 15:44:38 +03:00
|
|
|
|
|
|
|
modules="Cholesky Core Dense Eigenvalues Geometry Householder Jacobi LU QR SVD"
|
|
|
|
for eigen_module in ${modules}; do
|
2021-08-02 20:43:41 +03:00
|
|
|
cp external_libs/eigen/Eigen/${eigen_module} "${EIGEN_R_DIR}/${eigen_module}"
|
2021-01-18 15:44:38 +03:00
|
|
|
if [ ${eigen_module} != "Dense" ]; then
|
2021-08-02 20:43:41 +03:00
|
|
|
mkdir -p "${EIGEN_R_DIR}/src/${eigen_module}/"
|
|
|
|
cp -R external_libs/eigen/Eigen/src/${eigen_module}/* "${EIGEN_R_DIR}/src/${eigen_module}/"
|
2021-01-18 15:44:38 +03:00
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
2021-08-02 20:43:41 +03:00
|
|
|
mkdir -p "${EIGEN_R_DIR}/src/misc"
|
|
|
|
cp -R external_libs/eigen/Eigen/src/misc/* "${EIGEN_R_DIR}/src/misc/"
|
2021-01-18 15:44:38 +03:00
|
|
|
|
2021-08-02 20:43:41 +03:00
|
|
|
mkdir -p "${EIGEN_R_DIR}/src/plugins"
|
|
|
|
cp -R external_libs/eigen/Eigen/src/plugins/* "${EIGEN_R_DIR}/src/plugins/"
|
2021-01-18 15:44:38 +03:00
|
|
|
|
2021-08-02 20:43:41 +03:00
|
|
|
cd "${TEMP_R_DIR}"
|
2020-07-29 05:05:59 +03:00
|
|
|
|
|
|
|
# Remove files not needed for CRAN
|
|
|
|
echo "Removing files not needed for CRAN"
|
|
|
|
rm src/install.libs.R
|
|
|
|
rm -r inst/
|
|
|
|
rm -r pkgdown/
|
2020-08-13 04:17:38 +03:00
|
|
|
rm cran-comments.md
|
2020-07-29 05:05:59 +03:00
|
|
|
rm AUTOCONF_UBUNTU_VERSION
|
|
|
|
rm recreate-configure.sh
|
|
|
|
|
2020-11-21 18:06:01 +03:00
|
|
|
# files only used by the lightgbm CLI aren't needed for
|
|
|
|
# the R package
|
|
|
|
rm src/application/application.cpp
|
|
|
|
rm src/include/LightGBM/application.h
|
2020-07-29 05:05:59 +03:00
|
|
|
rm src/main.cpp
|
|
|
|
|
2020-08-25 22:36:37 +03:00
|
|
|
# configure.ac and DESCRIPTION have placeholders for version
|
|
|
|
# and date so they don't have to be updated manually
|
|
|
|
sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" configure.ac
|
|
|
|
sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" DESCRIPTION
|
|
|
|
sed -i.bak -e "s/~~DATE~~/${CURRENT_DATE}/" DESCRIPTION
|
|
|
|
|
2020-10-16 22:03:06 +03:00
|
|
|
# Remove 'region', 'endregion', and 'warning' pragmas.
|
|
|
|
# This won't change the correctness of the code. CRAN does
|
|
|
|
# not allow you to use compiler flag '-Wno-unknown-pragmas' or
|
2020-07-29 05:05:59 +03:00
|
|
|
# pragmas that suppress warnings.
|
|
|
|
echo "Removing unknown pragmas in headers"
|
2020-10-16 22:03:06 +03:00
|
|
|
for file in $(find . -name '*.h' -o -name '*.hpp' -o -name '*.cpp'); do
|
2020-07-29 05:05:59 +03:00
|
|
|
sed \
|
|
|
|
-i.bak \
|
2021-01-18 15:44:38 +03:00
|
|
|
-e 's/^.*#pragma clang diagnostic.*$//' \
|
|
|
|
-e 's/^.*#pragma diag_suppress.*$//' \
|
|
|
|
-e 's/^.*#pragma GCC diagnostic.*$//' \
|
2020-07-29 05:05:59 +03:00
|
|
|
-e 's/^.*#pragma region.*$//' \
|
|
|
|
-e 's/^.*#pragma endregion.*$//' \
|
2020-10-16 22:03:06 +03:00
|
|
|
-e 's/^.*#pragma warning.*$//' \
|
2020-07-29 05:05:59 +03:00
|
|
|
"${file}"
|
|
|
|
done
|
2020-10-16 22:03:06 +03:00
|
|
|
find . -name '*.h.bak' -o -name '*.hpp.bak' -o -name '*.cpp.bak' -exec rm {} \;
|
2020-07-29 05:05:59 +03:00
|
|
|
|
2020-12-08 16:36:24 +03:00
|
|
|
sed \
|
|
|
|
-i.bak \
|
|
|
|
-e 's/\.\..*fmt\/format\.h/LightGBM\/fmt\/format\.h/' \
|
|
|
|
src/include/LightGBM/utils/common.h
|
|
|
|
|
|
|
|
sed \
|
|
|
|
-i.bak \
|
|
|
|
-e 's/\.\..*fast_double_parser\.h/LightGBM\/fast_double_parser\.h/' \
|
|
|
|
src/include/LightGBM/utils/common.h
|
|
|
|
|
2020-07-29 05:05:59 +03:00
|
|
|
# When building an R package with 'configure', it seems
|
|
|
|
# you're guaranteed to get a shared library called
|
|
|
|
# <packagename>.so/dll. The package source code expects
|
|
|
|
# 'lib_lightgbm.so', not 'lightgbm.so', to comply with the way
|
|
|
|
# this project has historically handled installation
|
|
|
|
echo "Changing lib_lightgbm to lightgbm"
|
|
|
|
for file in R/*.R; do
|
|
|
|
sed \
|
|
|
|
-i.bak \
|
|
|
|
-e 's/lib_lightgbm/lightgbm/' \
|
|
|
|
"${file}"
|
|
|
|
done
|
|
|
|
sed \
|
|
|
|
-i.bak \
|
|
|
|
-e 's/lib_lightgbm/lightgbm/' \
|
|
|
|
NAMESPACE
|
|
|
|
|
|
|
|
# 'processx' is listed as a 'Suggests' dependency in DESCRIPTION
|
|
|
|
# because it is used in install.libs.R, a file that is not
|
|
|
|
# included in the CRAN distribution of the package
|
|
|
|
sed \
|
|
|
|
-i.bak \
|
|
|
|
'/processx/d' \
|
|
|
|
DESCRIPTION
|
|
|
|
|
|
|
|
echo "Cleaning sed backup files"
|
|
|
|
rm R/*.R.bak
|
|
|
|
rm NAMESPACE.bak
|
|
|
|
|
2021-08-02 20:43:41 +03:00
|
|
|
cd "${ORIG_WD}"
|
2020-07-29 05:05:59 +03:00
|
|
|
|
2021-10-31 06:31:35 +03:00
|
|
|
"${LGB_R_EXECUTABLE}" CMD build \
|
2020-07-29 05:05:59 +03:00
|
|
|
--keep-empty-dirs \
|
|
|
|
lightgbm_r
|
|
|
|
|
|
|
|
echo "Done building R package"
|