зеркало из https://github.com/Azure/aztk.git
Feature: Sparklyr (#243)
* Added rstudio server script * Added rstudio server port to aztk sdk * Added R dockerfiles * Added new line on dockerfiles * Pointing dockerfiles to new aztk-base * allow any user or application in the server to write to the history server log directory
This commit is contained in:
Родитель
c12ecebad2
Коммит
0efadefb98
|
@ -48,6 +48,7 @@ def __docker_run_cmd(docker_repo: str = None, gpu_enabled: bool = False, file_mo
|
|||
cmd.add_option('-p', '7077:7077') # Spark Master
|
||||
cmd.add_option('-p', '4040:4040') # Job UI
|
||||
cmd.add_option('-p', '8888:8888') # Jupyter UI
|
||||
cmd.add_option('-p', '8787:8787') # Rstudio Server
|
||||
cmd.add_option('-p', '18080:18080') # Spark History Server UI
|
||||
cmd.add_option('-p', '3022:3022') # Docker SSH
|
||||
cmd.add_option('-p', '8020:8020') # Namenode IPC: ClientProtocol
|
||||
|
@ -138,6 +139,7 @@ def generate_cluster_start_task(
|
|||
spark_worker_ui_port = constants.DOCKER_SPARK_WORKER_UI_PORT
|
||||
spark_jupyter_port = constants.DOCKER_SPARK_JUPYTER_PORT
|
||||
spark_job_ui_port = constants.DOCKER_SPARK_JOB_UI_PORT
|
||||
spark_rstudio_server_port = constants.DOCKER_SPARK_RSTUDIO_SERVER_PORT
|
||||
|
||||
# TODO use certificate
|
||||
environment_settings = [
|
||||
|
@ -159,6 +161,8 @@ def generate_cluster_start_task(
|
|||
name="SPARK_JUPYTER_PORT", value=spark_jupyter_port),
|
||||
batch_models.EnvironmentSetting(
|
||||
name="SPARK_JOB_UI_PORT", value=spark_job_ui_port),
|
||||
batch_models.EnvironmentSetting(
|
||||
name="SPARK_RSTUDIO_SERVER_PORT", value=spark_rstudio_server_port),
|
||||
] + __get_docker_credentials(spark_client)
|
||||
|
||||
# start task command
|
||||
|
|
|
@ -10,6 +10,7 @@ DOCKER_SPARK_CONTAINER_NAME = "spark"
|
|||
# DOCKER SPARK
|
||||
DOCKER_SPARK_WEB_UI_PORT = 8080
|
||||
DOCKER_SPARK_WORKER_UI_PORT = 8081
|
||||
DOCKER_SPARK_RSTUDIO_SERVER_PORT = 8787
|
||||
DOCKER_SPARK_JUPYTER_PORT = 8888
|
||||
DOCKER_SPARK_JOB_UI_PORT = 4040
|
||||
DOCKER_SPARK_JOB_UI_HISTORY_PORT = 18080
|
||||
|
|
|
@ -193,7 +193,8 @@ class SshConfig:
|
|||
self.web_ui_port = '8080'
|
||||
self.jupyter_port = '8888'
|
||||
self.name_node_ui_port = '50070'
|
||||
|
||||
self.rstudio_server_port = '8787'
|
||||
|
||||
def _read_config_file(self, path: str = aztk.utils.constants.DEFAULT_SSH_CONFIG_PATH):
|
||||
"""
|
||||
Reads the config file in the .aztk/ directory (.aztk/cluster.yaml)
|
||||
|
@ -235,13 +236,16 @@ class SshConfig:
|
|||
if config.get('name_node_ui_port') is not None:
|
||||
self.name_node_ui_port = config['name_node_ui_port']
|
||||
|
||||
if config.get('rstudio_server_port') is not None:
|
||||
self.rstudio_server_port = config['rstudio_server_port']
|
||||
|
||||
if config.get('host') is not None:
|
||||
self.host = config['host']
|
||||
|
||||
if config.get('connect') is not None:
|
||||
self.connect = config['connect']
|
||||
|
||||
def merge(self, cluster_id, username, job_ui_port, job_history_ui_port, web_ui_port, jupyter_port, name_node_ui_port, host, connect):
|
||||
def merge(self, cluster_id, username, job_ui_port, job_history_ui_port, web_ui_port, jupyter_port, name_node_ui_port, rstudio_server_port, host, connect):
|
||||
"""
|
||||
Merges fields with args object
|
||||
"""
|
||||
|
@ -256,6 +260,7 @@ class SshConfig:
|
|||
web_ui_port=web_ui_port,
|
||||
jupyter_port=jupyter_port,
|
||||
name_node_ui_port=name_node_ui_port,
|
||||
rstudio_server_port=rstudio_server_port,
|
||||
host=host,
|
||||
connect=connect
|
||||
)
|
||||
|
|
|
@ -21,6 +21,8 @@ def setup_parser(parser: argparse.ArgumentParser):
|
|||
help='Local port to port jupyter to')
|
||||
parser.add_argument('--namenodeui',
|
||||
help='Local port to port HDFS NameNode UI to')
|
||||
parser.add_argument('--rstudioserver',
|
||||
help='Local port to port rstudio server to')
|
||||
parser.add_argument('-u', '--username',
|
||||
help='Username to spark cluster')
|
||||
parser.add_argument('--host', dest="host",
|
||||
|
@ -46,6 +48,7 @@ def execute(args: typing.NamedTuple):
|
|||
web_ui_port=args.webui,
|
||||
jupyter_port=args.jupyter,
|
||||
name_node_ui_port=args.namenodeui,
|
||||
rstudio_server_port=args.rstudioserver,
|
||||
host=args.host,
|
||||
connect=args.connect
|
||||
)
|
||||
|
@ -57,7 +60,8 @@ def execute(args: typing.NamedTuple):
|
|||
log.info("open jobui: %s%s", http_prefix, ssh_conf.job_ui_port)
|
||||
log.info("open jobhistoryui: %s%s", http_prefix, ssh_conf.job_history_ui_port)
|
||||
log.info("open jupyter: %s%s", http_prefix, ssh_conf.jupyter_port)
|
||||
log.info("open jupyter: %s%s", http_prefix, ssh_conf.name_node_ui_port)
|
||||
log.info("open namenodeui: %s%s", http_prefix, ssh_conf.name_node_ui_port)
|
||||
log.info("open rstudio server: %s%s", http_prefix, ssh_conf.rstudio_server_port)
|
||||
log.info("ssh username: %s", ssh_conf.username)
|
||||
log.info("connect: %s", ssh_conf.connect)
|
||||
log.info("-------------------------------------------")
|
||||
|
@ -72,6 +76,7 @@ def execute(args: typing.NamedTuple):
|
|||
jobhistoryui=ssh_conf.job_history_ui_port,
|
||||
namenodeui=ssh_conf.name_node_ui_port,
|
||||
jupyter=ssh_conf.jupyter_port,
|
||||
rstudioserver=ssh_conf.rstudio_server_port,
|
||||
username=ssh_conf.username,
|
||||
host=ssh_conf.host,
|
||||
connect=ssh_conf.connect)
|
||||
|
|
|
@ -122,6 +122,7 @@ def ssh_in_master(
|
|||
jobhistoryui: str = None,
|
||||
jupyter: str = None,
|
||||
namenodeui: str = None,
|
||||
rstudioserver: str = None,
|
||||
ports=None,
|
||||
host: bool = False,
|
||||
connect: bool = True):
|
||||
|
@ -131,7 +132,8 @@ def ssh_in_master(
|
|||
:param username: Username to use to ssh
|
||||
:param webui: Port for the spark master web ui (Local port)
|
||||
:param jobui: Port for the job web ui (Local port)
|
||||
:param jupyter: Port for jupyter(Local port)
|
||||
:param jupyter: Port for jupyter (Local port)
|
||||
:param rstudioserver: Port for rstudio server (Local port)
|
||||
:param ports: an list of local and remote ports
|
||||
:type ports: [[<local-port>, <remote-port>]]
|
||||
"""
|
||||
|
@ -150,6 +152,7 @@ def ssh_in_master(
|
|||
|
||||
spark_web_ui_port = aztk.utils.constants.DOCKER_SPARK_WEB_UI_PORT
|
||||
spark_worker_ui_port = aztk.utils.constants.DOCKER_SPARK_WORKER_UI_PORT
|
||||
spark_rstudio_server_port = aztk.utils.constants.DOCKER_SPARK_RSTUDIO_SERVER_PORT
|
||||
spark_jupyter_port = aztk.utils.constants.DOCKER_SPARK_JUPYTER_PORT
|
||||
spark_job_ui_port = aztk.utils.constants.DOCKER_SPARK_JOB_UI_PORT
|
||||
spark_job_history_ui_port = aztk.utils.constants.DOCKER_SPARK_JOB_UI_HISTORY_PORT
|
||||
|
@ -173,6 +176,8 @@ def ssh_in_master(
|
|||
jupyter, spark_jupyter_port), enable=bool(jupyter))
|
||||
ssh_command.add_option("-L", "{0}:localhost:{1}".format(
|
||||
namenodeui, spark_namenode_ui_port), enable=bool(namenodeui))
|
||||
ssh_command.add_option("-L", "{0}:localhost:{1}".format(
|
||||
rstudioserver, spark_rstudio_server_port), enable=bool(rstudioserver))
|
||||
|
||||
if ports is not None:
|
||||
for port in ports:
|
||||
|
|
|
@ -20,5 +20,8 @@ jupyter_port: 8888
|
|||
# name_node_ui_port: <local port which where Name Node UI is forwarded to>
|
||||
name_node_ui_port: 50070
|
||||
|
||||
# rstudio_server_port: <local port which where rstudio server is forwarded to>
|
||||
rstudio_server_port: 8787
|
||||
|
||||
# connect: <true/false, connect to spark master or print connection string (--no-connect)>
|
||||
connect: true
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This custom script only works on images where rstudio server is pre-installed on the Docker image
|
||||
#
|
||||
# This custom script has been tested to work on the following docker images:
|
||||
# - jiata/aztk-r:0.1.0-spark2.2.0-r3.4.1
|
||||
# - jiata/aztk-r:0.1.0-spark2.1.0-r3.4.1
|
||||
# - jiata/aztk-r:0.1.0-spark1.6.3-r3.4.1
|
||||
|
||||
if [ "$IS_MASTER" = "1" ]; then
|
||||
|
||||
## Download and install Rstudio Server
|
||||
wget https://download2.rstudio.org/rstudio-server-$RSTUDIO_SERVER_VERSION-amd64.deb
|
||||
gdebi rstudio-server-$RSTUDIO_SERVER_VERSION-amd64.deb --non-interactive
|
||||
echo "server-app-armor-enabled=0" | tee -a /etc/rstudio/rserver.conf
|
||||
rm rstudio-server-$RSTUDIO_SERVER_VERSION-amd64.deb
|
||||
|
||||
## Preparing default user for Rstudio Server
|
||||
set -e
|
||||
useradd -m -d /home/rstudio rstudio -g staff
|
||||
echo rstudio:rstudio | chpasswd
|
||||
|
||||
rstudio-server start
|
||||
|
||||
fi
|
|
@ -0,0 +1,126 @@
|
|||
# Ubuntu 16.04 (Xenial)
|
||||
FROM aztk/base:spark1.6.3
|
||||
|
||||
# modify these ARGs on build time to specify your desired versions of Spark/Hadoop
|
||||
ARG R_VERSION=3.4.1
|
||||
ARG RSTUDIO_SERVER_VERSION=1.1.383
|
||||
ARG BUILD_DATE
|
||||
|
||||
# set env vars
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV BUILD_DATE ${BUILD_DATE:-}
|
||||
ENV RSTUDIO_SERVER_VERSION $RSTUDIO_SERVER_VERSION
|
||||
ENV R_VERSION $R_VERSION
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
bash-completion \
|
||||
ca-certificates \
|
||||
file \
|
||||
fonts-texgyre \
|
||||
g++ \
|
||||
gfortran \
|
||||
gsfonts \
|
||||
libcurl3 \
|
||||
libopenblas-dev \
|
||||
libpangocairo-1.0-0 \
|
||||
libpng16-16 \
|
||||
locales \
|
||||
make \
|
||||
unzip \
|
||||
zip \
|
||||
libcurl4-openssl-dev \
|
||||
libxml2-dev \
|
||||
libapparmor1 \
|
||||
gdebi-core \
|
||||
lsb-release \
|
||||
psmisc \
|
||||
sudo \
|
||||
&& echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \
|
||||
&& locale-gen en_US.utf8 \
|
||||
&& /usr/sbin/update-locale LANG=en_US.UTF-8 \
|
||||
&& BUILDDEPS="libcairo2-dev \
|
||||
libpango1.0-dev \
|
||||
libjpeg-dev \
|
||||
libicu-dev \
|
||||
libpcre3-dev \
|
||||
libpng-dev \
|
||||
libtiff5-dev \
|
||||
liblzma-dev \
|
||||
libx11-dev \
|
||||
libxt-dev \
|
||||
perl \
|
||||
tcl8.6-dev \
|
||||
tk8.6-dev \
|
||||
texinfo \
|
||||
texlive-extra-utils \
|
||||
texlive-fonts-recommended \
|
||||
texlive-fonts-extra \
|
||||
texlive-latex-recommended \
|
||||
x11proto-core-dev \
|
||||
xauth \
|
||||
xfonts-base \
|
||||
xvfb" \
|
||||
&& apt-get install -y --no-install-recommends $BUILDDEPS \
|
||||
## Download source code
|
||||
&& cd tmp/ \
|
||||
&& majorVersion=$(echo $R_VERSION | cut -f1 -d.) \
|
||||
&& curl -O https://cran.r-project.org/src/base/R-${majorVersion}/R-${R_VERSION}.tar.gz \
|
||||
## Extract source code
|
||||
&& tar -xf R-${R_VERSION}.tar.gz \
|
||||
&& cd R-${R_VERSION} \
|
||||
## Set compiler flags
|
||||
&& R_PAPERSIZE=letter \
|
||||
R_BATCHSAVE="--no-save --no-restore" \
|
||||
R_BROWSER=xdg-open \
|
||||
PAGER=/usr/bin/pager \
|
||||
PERL=/usr/bin/perl \
|
||||
R_UNZIPCMD=/usr/bin/unzip \
|
||||
R_ZIPCMD=/usr/bin/zip \
|
||||
R_PRINTCMD=/usr/bin/lpr \
|
||||
LIBnn=lib \
|
||||
AWK=/usr/bin/awk \
|
||||
CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \
|
||||
CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \
|
||||
## Configure options
|
||||
./configure --enable-R-shlib \
|
||||
--enable-memory-profiling \
|
||||
--with-readline \
|
||||
--with-blas="-lopenblas" \
|
||||
--disable-nls \
|
||||
--without-recommended-packages \
|
||||
## Build and install
|
||||
&& make \
|
||||
&& make install \
|
||||
## Add a default CRAN mirror
|
||||
&& echo "options(repos = c(CRAN = 'https://cran.rstudio.com/'), download.file.method = 'libcurl')" >> /usr/local/lib/R/etc/Rprofile.site \
|
||||
## Add a library directory (for user-installed packages)
|
||||
&& mkdir -p /usr/local/lib/R/site-library \
|
||||
&& chown root:staff /usr/local/lib/R/site-library \
|
||||
&& chmod g+wx /usr/local/lib/R/site-library \
|
||||
## Fix library path
|
||||
&& echo "R_LIBS_USER='/usr/local/lib/R/site-library'" >> /usr/local/lib/R/etc/Renviron \
|
||||
&& echo "R_LIBS=\${R_LIBS-'/usr/local/lib/R/site-library:/usr/local/lib/R/library:/usr/lib/R/library'}" >> /usr/local/lib/R/etc/Renviron \
|
||||
## install packages from date-locked MRAN snapshot of CRAN
|
||||
&& [ -z "$BUILD_DATE" ] && BUILD_DATE=$(TZ="America/Los_Angeles" date -I) || true \
|
||||
&& MRAN=https://mran.microsoft.com/snapshot/${BUILD_DATE} \
|
||||
&& echo MRAN=$MRAN >> /etc/environment \
|
||||
&& export MRAN=$MRAN \
|
||||
&& echo "options(repos = c(CRAN='$MRAN'), download.file.method = 'libcurl'); Sys.setenv(SPARK_HOME ='"$SPARK_HOME"')" >> /usr/local/lib/R/etc/Rprofile.site \
|
||||
## Use littler installation scripts
|
||||
&& Rscript -e "install.packages(c('littler', 'docopt', 'tidyverse', 'sparklyr'), repo = '$MRAN')" \
|
||||
&& ln -s /usr/local/lib/R/site-library/littler/examples/install2.r /usr/local/bin/install2.r \
|
||||
&& ln -s /usr/local/lib/R/site-library/littler/examples/installGithub.r /usr/local/bin/installGithub.r \
|
||||
&& ln -s /usr/local/lib/R/site-library/littler/bin/r /usr/local/bin/r \
|
||||
## TEMPORARY WORKAROUND to get more robust error handling for install2.r prior to littler update
|
||||
&& curl -O /usr/local/bin/install2.r https://github.com/eddelbuettel/littler/raw/master/inst/examples/install2.r \
|
||||
&& chmod +x /usr/local/bin/install2.r \
|
||||
## Clean up from R source install
|
||||
&& cd / \
|
||||
&& rm -rf /tmp/* \
|
||||
&& apt-get autoremove -y \
|
||||
&& apt-get autoclean -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
|
|
@ -0,0 +1,126 @@
|
|||
# Ubuntu 16.04 (Xenial)
|
||||
FROM aztk/base:spark2.1.0
|
||||
|
||||
# modify these ARGs on build time to specify your desired versions of Spark/Hadoop
|
||||
ARG R_VERSION=3.4.1
|
||||
ARG RSTUDIO_SERVER_VERSION=1.1.383
|
||||
ARG BUILD_DATE
|
||||
|
||||
# set env vars
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV BUILD_DATE ${BUILD_DATE:-}
|
||||
ENV RSTUDIO_SERVER_VERSION $RSTUDIO_SERVER_VERSION
|
||||
ENV R_VERSION $R_VERSION
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
bash-completion \
|
||||
ca-certificates \
|
||||
file \
|
||||
fonts-texgyre \
|
||||
g++ \
|
||||
gfortran \
|
||||
gsfonts \
|
||||
libcurl3 \
|
||||
libopenblas-dev \
|
||||
libpangocairo-1.0-0 \
|
||||
libpng16-16 \
|
||||
locales \
|
||||
make \
|
||||
unzip \
|
||||
zip \
|
||||
libcurl4-openssl-dev \
|
||||
libxml2-dev \
|
||||
libapparmor1 \
|
||||
gdebi-core \
|
||||
lsb-release \
|
||||
psmisc \
|
||||
sudo \
|
||||
&& echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \
|
||||
&& locale-gen en_US.utf8 \
|
||||
&& /usr/sbin/update-locale LANG=en_US.UTF-8 \
|
||||
&& BUILDDEPS="libcairo2-dev \
|
||||
libpango1.0-dev \
|
||||
libjpeg-dev \
|
||||
libicu-dev \
|
||||
libpcre3-dev \
|
||||
libpng-dev \
|
||||
libtiff5-dev \
|
||||
liblzma-dev \
|
||||
libx11-dev \
|
||||
libxt-dev \
|
||||
perl \
|
||||
tcl8.6-dev \
|
||||
tk8.6-dev \
|
||||
texinfo \
|
||||
texlive-extra-utils \
|
||||
texlive-fonts-recommended \
|
||||
texlive-fonts-extra \
|
||||
texlive-latex-recommended \
|
||||
x11proto-core-dev \
|
||||
xauth \
|
||||
xfonts-base \
|
||||
xvfb" \
|
||||
&& apt-get install -y --no-install-recommends $BUILDDEPS \
|
||||
## Download source code
|
||||
&& cd tmp/ \
|
||||
&& majorVersion=$(echo $R_VERSION | cut -f1 -d.) \
|
||||
&& curl -O https://cran.r-project.org/src/base/R-${majorVersion}/R-${R_VERSION}.tar.gz \
|
||||
## Extract source code
|
||||
&& tar -xf R-${R_VERSION}.tar.gz \
|
||||
&& cd R-${R_VERSION} \
|
||||
## Set compiler flags
|
||||
&& R_PAPERSIZE=letter \
|
||||
R_BATCHSAVE="--no-save --no-restore" \
|
||||
R_BROWSER=xdg-open \
|
||||
PAGER=/usr/bin/pager \
|
||||
PERL=/usr/bin/perl \
|
||||
R_UNZIPCMD=/usr/bin/unzip \
|
||||
R_ZIPCMD=/usr/bin/zip \
|
||||
R_PRINTCMD=/usr/bin/lpr \
|
||||
LIBnn=lib \
|
||||
AWK=/usr/bin/awk \
|
||||
CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \
|
||||
CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \
|
||||
## Configure options
|
||||
./configure --enable-R-shlib \
|
||||
--enable-memory-profiling \
|
||||
--with-readline \
|
||||
--with-blas="-lopenblas" \
|
||||
--disable-nls \
|
||||
--without-recommended-packages \
|
||||
## Build and install
|
||||
&& make \
|
||||
&& make install \
|
||||
## Add a default CRAN mirror
|
||||
&& echo "options(repos = c(CRAN = 'https://cran.rstudio.com/'), download.file.method = 'libcurl')" >> /usr/local/lib/R/etc/Rprofile.site \
|
||||
## Add a library directory (for user-installed packages)
|
||||
&& mkdir -p /usr/local/lib/R/site-library \
|
||||
&& chown root:staff /usr/local/lib/R/site-library \
|
||||
&& chmod g+wx /usr/local/lib/R/site-library \
|
||||
## Fix library path
|
||||
&& echo "R_LIBS_USER='/usr/local/lib/R/site-library'" >> /usr/local/lib/R/etc/Renviron \
|
||||
&& echo "R_LIBS=\${R_LIBS-'/usr/local/lib/R/site-library:/usr/local/lib/R/library:/usr/lib/R/library'}" >> /usr/local/lib/R/etc/Renviron \
|
||||
## install packages from date-locked MRAN snapshot of CRAN
|
||||
&& [ -z "$BUILD_DATE" ] && BUILD_DATE=$(TZ="America/Los_Angeles" date -I) || true \
|
||||
&& MRAN=https://mran.microsoft.com/snapshot/${BUILD_DATE} \
|
||||
&& echo MRAN=$MRAN >> /etc/environment \
|
||||
&& export MRAN=$MRAN \
|
||||
&& echo "options(repos = c(CRAN='$MRAN'), download.file.method = 'libcurl'); Sys.setenv(SPARK_HOME ='"$SPARK_HOME"')" >> /usr/local/lib/R/etc/Rprofile.site \
|
||||
## Use littler installation scripts
|
||||
&& Rscript -e "install.packages(c('littler', 'docopt', 'tidyverse', 'sparklyr'), repo = '$MRAN')" \
|
||||
&& ln -s /usr/local/lib/R/site-library/littler/examples/install2.r /usr/local/bin/install2.r \
|
||||
&& ln -s /usr/local/lib/R/site-library/littler/examples/installGithub.r /usr/local/bin/installGithub.r \
|
||||
&& ln -s /usr/local/lib/R/site-library/littler/bin/r /usr/local/bin/r \
|
||||
## TEMPORARY WORKAROUND to get more robust error handling for install2.r prior to littler update
|
||||
&& curl -O /usr/local/bin/install2.r https://github.com/eddelbuettel/littler/raw/master/inst/examples/install2.r \
|
||||
&& chmod +x /usr/local/bin/install2.r \
|
||||
## Clean up from R source install
|
||||
&& cd / \
|
||||
&& rm -rf /tmp/* \
|
||||
&& apt-get autoremove -y \
|
||||
&& apt-get autoclean -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
|
|
@ -0,0 +1,126 @@
|
|||
# Ubuntu 16.04 (Xenial)
|
||||
FROM aztk/base:spark2.2.0
|
||||
|
||||
# modify these ARGs on build time to specify your desired versions of Spark/Hadoop
|
||||
ARG R_VERSION=3.4.1
|
||||
ARG RSTUDIO_SERVER_VERSION=1.1.383
|
||||
ARG BUILD_DATE
|
||||
|
||||
# set env vars
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV BUILD_DATE ${BUILD_DATE:-}
|
||||
ENV RSTUDIO_SERVER_VERSION $RSTUDIO_SERVER_VERSION
|
||||
ENV R_VERSION $R_VERSION
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
bash-completion \
|
||||
ca-certificates \
|
||||
file \
|
||||
fonts-texgyre \
|
||||
g++ \
|
||||
gfortran \
|
||||
gsfonts \
|
||||
libcurl3 \
|
||||
libopenblas-dev \
|
||||
libpangocairo-1.0-0 \
|
||||
libpng16-16 \
|
||||
locales \
|
||||
make \
|
||||
unzip \
|
||||
zip \
|
||||
libcurl4-openssl-dev \
|
||||
libxml2-dev \
|
||||
libapparmor1 \
|
||||
gdebi-core \
|
||||
lsb-release \
|
||||
psmisc \
|
||||
sudo \
|
||||
&& echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \
|
||||
&& locale-gen en_US.utf8 \
|
||||
&& /usr/sbin/update-locale LANG=en_US.UTF-8 \
|
||||
&& BUILDDEPS="libcairo2-dev \
|
||||
libpango1.0-dev \
|
||||
libjpeg-dev \
|
||||
libicu-dev \
|
||||
libpcre3-dev \
|
||||
libpng-dev \
|
||||
libtiff5-dev \
|
||||
liblzma-dev \
|
||||
libx11-dev \
|
||||
libxt-dev \
|
||||
perl \
|
||||
tcl8.6-dev \
|
||||
tk8.6-dev \
|
||||
texinfo \
|
||||
texlive-extra-utils \
|
||||
texlive-fonts-recommended \
|
||||
texlive-fonts-extra \
|
||||
texlive-latex-recommended \
|
||||
x11proto-core-dev \
|
||||
xauth \
|
||||
xfonts-base \
|
||||
xvfb" \
|
||||
&& apt-get install -y --no-install-recommends $BUILDDEPS \
|
||||
## Download source code
|
||||
&& cd tmp/ \
|
||||
&& majorVersion=$(echo $R_VERSION | cut -f1 -d.) \
|
||||
&& curl -O https://cran.r-project.org/src/base/R-${majorVersion}/R-${R_VERSION}.tar.gz \
|
||||
## Extract source code
|
||||
&& tar -xf R-${R_VERSION}.tar.gz \
|
||||
&& cd R-${R_VERSION} \
|
||||
## Set compiler flags
|
||||
&& R_PAPERSIZE=letter \
|
||||
R_BATCHSAVE="--no-save --no-restore" \
|
||||
R_BROWSER=xdg-open \
|
||||
PAGER=/usr/bin/pager \
|
||||
PERL=/usr/bin/perl \
|
||||
R_UNZIPCMD=/usr/bin/unzip \
|
||||
R_ZIPCMD=/usr/bin/zip \
|
||||
R_PRINTCMD=/usr/bin/lpr \
|
||||
LIBnn=lib \
|
||||
AWK=/usr/bin/awk \
|
||||
CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \
|
||||
CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \
|
||||
## Configure options
|
||||
./configure --enable-R-shlib \
|
||||
--enable-memory-profiling \
|
||||
--with-readline \
|
||||
--with-blas="-lopenblas" \
|
||||
--disable-nls \
|
||||
--without-recommended-packages \
|
||||
## Build and install
|
||||
&& make \
|
||||
&& make install \
|
||||
## Add a default CRAN mirror
|
||||
&& echo "options(repos = c(CRAN = 'https://cran.rstudio.com/'), download.file.method = 'libcurl')" >> /usr/local/lib/R/etc/Rprofile.site \
|
||||
## Add a library directory (for user-installed packages)
|
||||
&& mkdir -p /usr/local/lib/R/site-library \
|
||||
&& chown root:staff /usr/local/lib/R/site-library \
|
||||
&& chmod g+wx /usr/local/lib/R/site-library \
|
||||
## Fix library path
|
||||
&& echo "R_LIBS_USER='/usr/local/lib/R/site-library'" >> /usr/local/lib/R/etc/Renviron \
|
||||
&& echo "R_LIBS=\${R_LIBS-'/usr/local/lib/R/site-library:/usr/local/lib/R/library:/usr/lib/R/library'}" >> /usr/local/lib/R/etc/Renviron \
|
||||
## install packages from date-locked MRAN snapshot of CRAN
|
||||
&& [ -z "$BUILD_DATE" ] && BUILD_DATE=$(TZ="America/Los_Angeles" date -I) || true \
|
||||
&& MRAN=https://mran.microsoft.com/snapshot/${BUILD_DATE} \
|
||||
&& echo MRAN=$MRAN >> /etc/environment \
|
||||
&& export MRAN=$MRAN \
|
||||
&& echo "options(repos = c(CRAN='$MRAN'), download.file.method = 'libcurl'); Sys.setenv(SPARK_HOME ='"$SPARK_HOME"')" >> /usr/local/lib/R/etc/Rprofile.site \
|
||||
## Use littler installation scripts
|
||||
&& Rscript -e "install.packages(c('littler', 'docopt', 'tidyverse', 'sparklyr'), repo = '$MRAN')" \
|
||||
&& ln -s /usr/local/lib/R/site-library/littler/examples/install2.r /usr/local/bin/install2.r \
|
||||
&& ln -s /usr/local/lib/R/site-library/littler/examples/installGithub.r /usr/local/bin/installGithub.r \
|
||||
&& ln -s /usr/local/lib/R/site-library/littler/bin/r /usr/local/bin/r \
|
||||
## TEMPORARY WORKAROUND to get more robust error handling for install2.r prior to littler update
|
||||
&& curl -O /usr/local/bin/install2.r https://github.com/eddelbuettel/littler/raw/master/inst/examples/install2.r \
|
||||
&& chmod +x /usr/local/bin/install2.r \
|
||||
## Clean up from R source install
|
||||
&& cd / \
|
||||
&& rm -rf /tmp/* \
|
||||
&& apt-get autoremove -y \
|
||||
&& apt-get autoclean -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
|
|
@ -150,7 +150,7 @@ def copy_spark_env():
|
|||
spark_env_path_dest = os.path.join(spark_home, 'conf/spark-env.sh')
|
||||
copyfile(spark_env_path_src, spark_env_path_dest)
|
||||
|
||||
|
||||
|
||||
def copy_spark_defaults():
|
||||
spark_default_path_src = os.path.join(os.environ['DOCKER_WORKING_DIR'], 'conf/spark-defaults.conf')
|
||||
spark_default_path_dest = os.path.join(spark_home, 'conf/spark-defaults.conf')
|
||||
|
@ -202,5 +202,8 @@ def configure_history_server_log_path(path_to_log_file):
|
|||
else:
|
||||
print('Create direcotory {}.'.format(directory))
|
||||
os.makedirs(directory)
|
||||
|
||||
# Make sure the directory can be accessed by all users
|
||||
os.chmod(directory, mode=0o777)
|
||||
else:
|
||||
print('Skipping. The eventLog directory is not local.')
|
||||
|
|
Загрузка…
Ссылка в новой задаче