diff --git a/aztk/spark/helpers/create_cluster.py b/aztk/spark/helpers/create_cluster.py index f58d002f..c125c5a1 100644 --- a/aztk/spark/helpers/create_cluster.py +++ b/aztk/spark/helpers/create_cluster.py @@ -48,6 +48,7 @@ def __docker_run_cmd(docker_repo: str = None, gpu_enabled: bool = False, file_mo cmd.add_option('-p', '7077:7077') # Spark Master cmd.add_option('-p', '4040:4040') # Job UI cmd.add_option('-p', '8888:8888') # Jupyter UI + cmd.add_option('-p', '8787:8787') # Rstudio Server cmd.add_option('-p', '18080:18080') # Spark History Server UI cmd.add_option('-p', '3022:3022') # Docker SSH cmd.add_option('-p', '8020:8020') # Namenode IPC: ClientProtocol @@ -138,6 +139,7 @@ def generate_cluster_start_task( spark_worker_ui_port = constants.DOCKER_SPARK_WORKER_UI_PORT spark_jupyter_port = constants.DOCKER_SPARK_JUPYTER_PORT spark_job_ui_port = constants.DOCKER_SPARK_JOB_UI_PORT + spark_rstudio_server_port = constants.DOCKER_SPARK_RSTUDIO_SERVER_PORT # TODO use certificate environment_settings = [ @@ -159,6 +161,8 @@ def generate_cluster_start_task( name="SPARK_JUPYTER_PORT", value=spark_jupyter_port), batch_models.EnvironmentSetting( name="SPARK_JOB_UI_PORT", value=spark_job_ui_port), + batch_models.EnvironmentSetting( + name="SPARK_RSTUDIO_SERVER_PORT", value=spark_rstudio_server_port), ] + __get_docker_credentials(spark_client) # start task command diff --git a/aztk/utils/constants.py b/aztk/utils/constants.py index d0c50f51..1f136976 100644 --- a/aztk/utils/constants.py +++ b/aztk/utils/constants.py @@ -10,6 +10,7 @@ DOCKER_SPARK_CONTAINER_NAME = "spark" # DOCKER SPARK DOCKER_SPARK_WEB_UI_PORT = 8080 DOCKER_SPARK_WORKER_UI_PORT = 8081 +DOCKER_SPARK_RSTUDIO_SERVER_PORT = 8787 DOCKER_SPARK_JUPYTER_PORT = 8888 DOCKER_SPARK_JOB_UI_PORT = 4040 DOCKER_SPARK_JOB_UI_HISTORY_PORT = 18080 diff --git a/cli/config.py b/cli/config.py index ac1c869d..54cdaf35 100644 --- a/cli/config.py +++ b/cli/config.py @@ -193,7 +193,8 @@ class SshConfig: self.web_ui_port = '8080' self.jupyter_port = '8888' self.name_node_ui_port = '50070' - + self.rstudio_server_port = '8787' + def _read_config_file(self, path: str = aztk.utils.constants.DEFAULT_SSH_CONFIG_PATH): """ Reads the config file in the .aztk/ directory (.aztk/cluster.yaml) @@ -235,13 +236,16 @@ class SshConfig: if config.get('name_node_ui_port') is not None: self.name_node_ui_port = config['name_node_ui_port'] + if config.get('rstudio_server_port') is not None: + self.rstudio_server_port = config['rstudio_server_port'] + if config.get('host') is not None: self.host = config['host'] if config.get('connect') is not None: self.connect = config['connect'] - def merge(self, cluster_id, username, job_ui_port, job_history_ui_port, web_ui_port, jupyter_port, name_node_ui_port, host, connect): + def merge(self, cluster_id, username, job_ui_port, job_history_ui_port, web_ui_port, jupyter_port, name_node_ui_port, rstudio_server_port, host, connect): """ Merges fields with args object """ @@ -256,6 +260,7 @@ class SshConfig: web_ui_port=web_ui_port, jupyter_port=jupyter_port, name_node_ui_port=name_node_ui_port, + rstudio_server_port=rstudio_server_port, host=host, connect=connect ) diff --git a/cli/spark/endpoints/cluster_ssh.py b/cli/spark/endpoints/cluster_ssh.py index 0f33809c..40495861 100644 --- a/cli/spark/endpoints/cluster_ssh.py +++ b/cli/spark/endpoints/cluster_ssh.py @@ -21,6 +21,8 @@ def setup_parser(parser: argparse.ArgumentParser): help='Local port to port jupyter to') parser.add_argument('--namenodeui', help='Local port to port HDFS NameNode UI to') + parser.add_argument('--rstudioserver', + help='Local port to port rstudio server to') parser.add_argument('-u', '--username', help='Username to spark cluster') parser.add_argument('--host', dest="host", @@ -46,6 +48,7 @@ def execute(args: typing.NamedTuple): web_ui_port=args.webui, jupyter_port=args.jupyter, name_node_ui_port=args.namenodeui, + rstudio_server_port=args.rstudioserver, host=args.host, connect=args.connect ) @@ -57,7 +60,8 @@ def execute(args: typing.NamedTuple): log.info("open jobui: %s%s", http_prefix, ssh_conf.job_ui_port) log.info("open jobhistoryui: %s%s", http_prefix, ssh_conf.job_history_ui_port) log.info("open jupyter: %s%s", http_prefix, ssh_conf.jupyter_port) - log.info("open jupyter: %s%s", http_prefix, ssh_conf.name_node_ui_port) + log.info("open namenodeui: %s%s", http_prefix, ssh_conf.name_node_ui_port) + log.info("open rstudio server: %s%s", http_prefix, ssh_conf.rstudio_server_port) log.info("ssh username: %s", ssh_conf.username) log.info("connect: %s", ssh_conf.connect) log.info("-------------------------------------------") @@ -72,6 +76,7 @@ def execute(args: typing.NamedTuple): jobhistoryui=ssh_conf.job_history_ui_port, namenodeui=ssh_conf.name_node_ui_port, jupyter=ssh_conf.jupyter_port, + rstudioserver=ssh_conf.rstudio_server_port, username=ssh_conf.username, host=ssh_conf.host, connect=ssh_conf.connect) diff --git a/cli/utils.py b/cli/utils.py index 7447c813..0bdac7a3 100644 --- a/cli/utils.py +++ b/cli/utils.py @@ -122,6 +122,7 @@ def ssh_in_master( jobhistoryui: str = None, jupyter: str = None, namenodeui: str = None, + rstudioserver: str = None, ports=None, host: bool = False, connect: bool = True): @@ -131,7 +132,8 @@ def ssh_in_master( :param username: Username to use to ssh :param webui: Port for the spark master web ui (Local port) :param jobui: Port for the job web ui (Local port) - :param jupyter: Port for jupyter(Local port) + :param jupyter: Port for jupyter (Local port) + :param rstudioserver: Port for rstudio server (Local port) :param ports: an list of local and remote ports :type ports: [[, ]] """ @@ -150,6 +152,7 @@ def ssh_in_master( spark_web_ui_port = aztk.utils.constants.DOCKER_SPARK_WEB_UI_PORT spark_worker_ui_port = aztk.utils.constants.DOCKER_SPARK_WORKER_UI_PORT + spark_rstudio_server_port = aztk.utils.constants.DOCKER_SPARK_RSTUDIO_SERVER_PORT spark_jupyter_port = aztk.utils.constants.DOCKER_SPARK_JUPYTER_PORT spark_job_ui_port = aztk.utils.constants.DOCKER_SPARK_JOB_UI_PORT spark_job_history_ui_port = aztk.utils.constants.DOCKER_SPARK_JOB_UI_HISTORY_PORT @@ -173,6 +176,8 @@ def ssh_in_master( jupyter, spark_jupyter_port), enable=bool(jupyter)) ssh_command.add_option("-L", "{0}:localhost:{1}".format( namenodeui, spark_namenode_ui_port), enable=bool(namenodeui)) + ssh_command.add_option("-L", "{0}:localhost:{1}".format( + rstudioserver, spark_rstudio_server_port), enable=bool(rstudioserver)) if ports is not None: for port in ports: diff --git a/config/ssh.yaml b/config/ssh.yaml index 1233eddc..909f41c8 100644 --- a/config/ssh.yaml +++ b/config/ssh.yaml @@ -20,5 +20,8 @@ jupyter_port: 8888 # name_node_ui_port: name_node_ui_port: 50070 +# rstudio_server_port: +rstudio_server_port: 8787 + # connect: connect: true diff --git a/custom-scripts/rstudio_server.sh b/custom-scripts/rstudio_server.sh new file mode 100644 index 00000000..fa69b937 --- /dev/null +++ b/custom-scripts/rstudio_server.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# This custom script only works on images where rstudio server is pre-installed on the Docker image +# +# This custom script has been tested to work on the following docker images: +# - jiata/aztk-r:0.1.0-spark2.2.0-r3.4.1 +# - jiata/aztk-r:0.1.0-spark2.1.0-r3.4.1 +# - jiata/aztk-r:0.1.0-spark1.6.3-r3.4.1 + +if [ "$IS_MASTER" = "1" ]; then + + ## Download and install Rstudio Server + wget https://download2.rstudio.org/rstudio-server-$RSTUDIO_SERVER_VERSION-amd64.deb + gdebi rstudio-server-$RSTUDIO_SERVER_VERSION-amd64.deb --non-interactive + echo "server-app-armor-enabled=0" | tee -a /etc/rstudio/rserver.conf + rm rstudio-server-$RSTUDIO_SERVER_VERSION-amd64.deb + + ## Preparing default user for Rstudio Server + set -e + useradd -m -d /home/rstudio rstudio -g staff + echo rstudio:rstudio | chpasswd + + rstudio-server start + +fi diff --git a/docker-image/r/spark1.6.3/Dockerfile b/docker-image/r/spark1.6.3/Dockerfile new file mode 100644 index 00000000..9c2a2519 --- /dev/null +++ b/docker-image/r/spark1.6.3/Dockerfile @@ -0,0 +1,126 @@ +# Ubuntu 16.04 (Xenial) +FROM aztk/base:spark1.6.3 + +# modify these ARGs on build time to specify your desired versions of Spark/Hadoop +ARG R_VERSION=3.4.1 +ARG RSTUDIO_SERVER_VERSION=1.1.383 +ARG BUILD_DATE + +# set env vars +ENV DEBIAN_FRONTEND noninteractive +ENV BUILD_DATE ${BUILD_DATE:-} +ENV RSTUDIO_SERVER_VERSION $RSTUDIO_SERVER_VERSION +ENV R_VERSION $R_VERSION + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + bash-completion \ + ca-certificates \ + file \ + fonts-texgyre \ + g++ \ + gfortran \ + gsfonts \ + libcurl3 \ + libopenblas-dev \ + libpangocairo-1.0-0 \ + libpng16-16 \ + locales \ + make \ + unzip \ + zip \ + libcurl4-openssl-dev \ + libxml2-dev \ + libapparmor1 \ + gdebi-core \ + lsb-release \ + psmisc \ + sudo \ + && echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \ + && locale-gen en_US.utf8 \ + && /usr/sbin/update-locale LANG=en_US.UTF-8 \ + && BUILDDEPS="libcairo2-dev \ + libpango1.0-dev \ + libjpeg-dev \ + libicu-dev \ + libpcre3-dev \ + libpng-dev \ + libtiff5-dev \ + liblzma-dev \ + libx11-dev \ + libxt-dev \ + perl \ + tcl8.6-dev \ + tk8.6-dev \ + texinfo \ + texlive-extra-utils \ + texlive-fonts-recommended \ + texlive-fonts-extra \ + texlive-latex-recommended \ + x11proto-core-dev \ + xauth \ + xfonts-base \ + xvfb" \ + && apt-get install -y --no-install-recommends $BUILDDEPS \ + ## Download source code + && cd tmp/ \ + && majorVersion=$(echo $R_VERSION | cut -f1 -d.) \ + && curl -O https://cran.r-project.org/src/base/R-${majorVersion}/R-${R_VERSION}.tar.gz \ + ## Extract source code + && tar -xf R-${R_VERSION}.tar.gz \ + && cd R-${R_VERSION} \ + ## Set compiler flags + && R_PAPERSIZE=letter \ + R_BATCHSAVE="--no-save --no-restore" \ + R_BROWSER=xdg-open \ + PAGER=/usr/bin/pager \ + PERL=/usr/bin/perl \ + R_UNZIPCMD=/usr/bin/unzip \ + R_ZIPCMD=/usr/bin/zip \ + R_PRINTCMD=/usr/bin/lpr \ + LIBnn=lib \ + AWK=/usr/bin/awk \ + CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \ + CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \ + ## Configure options + ./configure --enable-R-shlib \ + --enable-memory-profiling \ + --with-readline \ + --with-blas="-lopenblas" \ + --disable-nls \ + --without-recommended-packages \ + ## Build and install + && make \ + && make install \ + ## Add a default CRAN mirror + && echo "options(repos = c(CRAN = 'https://cran.rstudio.com/'), download.file.method = 'libcurl')" >> /usr/local/lib/R/etc/Rprofile.site \ + ## Add a library directory (for user-installed packages) + && mkdir -p /usr/local/lib/R/site-library \ + && chown root:staff /usr/local/lib/R/site-library \ + && chmod g+wx /usr/local/lib/R/site-library \ + ## Fix library path + && echo "R_LIBS_USER='/usr/local/lib/R/site-library'" >> /usr/local/lib/R/etc/Renviron \ + && echo "R_LIBS=\${R_LIBS-'/usr/local/lib/R/site-library:/usr/local/lib/R/library:/usr/lib/R/library'}" >> /usr/local/lib/R/etc/Renviron \ + ## install packages from date-locked MRAN snapshot of CRAN + && [ -z "$BUILD_DATE" ] && BUILD_DATE=$(TZ="America/Los_Angeles" date -I) || true \ + && MRAN=https://mran.microsoft.com/snapshot/${BUILD_DATE} \ + && echo MRAN=$MRAN >> /etc/environment \ + && export MRAN=$MRAN \ + && echo "options(repos = c(CRAN='$MRAN'), download.file.method = 'libcurl'); Sys.setenv(SPARK_HOME ='"$SPARK_HOME"')" >> /usr/local/lib/R/etc/Rprofile.site \ + ## Use littler installation scripts + && Rscript -e "install.packages(c('littler', 'docopt', 'tidyverse', 'sparklyr'), repo = '$MRAN')" \ + && ln -s /usr/local/lib/R/site-library/littler/examples/install2.r /usr/local/bin/install2.r \ + && ln -s /usr/local/lib/R/site-library/littler/examples/installGithub.r /usr/local/bin/installGithub.r \ + && ln -s /usr/local/lib/R/site-library/littler/bin/r /usr/local/bin/r \ + ## TEMPORARY WORKAROUND to get more robust error handling for install2.r prior to littler update + && curl -O /usr/local/bin/install2.r https://github.com/eddelbuettel/littler/raw/master/inst/examples/install2.r \ + && chmod +x /usr/local/bin/install2.r \ + ## Clean up from R source install + && cd / \ + && rm -rf /tmp/* \ + && apt-get autoremove -y \ + && apt-get autoclean -y \ + && rm -rf /var/lib/apt/lists/* + + CMD ["/bin/bash"] + \ No newline at end of file diff --git a/docker-image/r/spark2.1.0/Dockerfile b/docker-image/r/spark2.1.0/Dockerfile new file mode 100644 index 00000000..00cc6470 --- /dev/null +++ b/docker-image/r/spark2.1.0/Dockerfile @@ -0,0 +1,126 @@ +# Ubuntu 16.04 (Xenial) +FROM aztk/base:spark2.1.0 + +# modify these ARGs on build time to specify your desired versions of Spark/Hadoop +ARG R_VERSION=3.4.1 +ARG RSTUDIO_SERVER_VERSION=1.1.383 +ARG BUILD_DATE + +# set env vars +ENV DEBIAN_FRONTEND noninteractive +ENV BUILD_DATE ${BUILD_DATE:-} +ENV RSTUDIO_SERVER_VERSION $RSTUDIO_SERVER_VERSION +ENV R_VERSION $R_VERSION + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + bash-completion \ + ca-certificates \ + file \ + fonts-texgyre \ + g++ \ + gfortran \ + gsfonts \ + libcurl3 \ + libopenblas-dev \ + libpangocairo-1.0-0 \ + libpng16-16 \ + locales \ + make \ + unzip \ + zip \ + libcurl4-openssl-dev \ + libxml2-dev \ + libapparmor1 \ + gdebi-core \ + lsb-release \ + psmisc \ + sudo \ + && echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \ + && locale-gen en_US.utf8 \ + && /usr/sbin/update-locale LANG=en_US.UTF-8 \ + && BUILDDEPS="libcairo2-dev \ + libpango1.0-dev \ + libjpeg-dev \ + libicu-dev \ + libpcre3-dev \ + libpng-dev \ + libtiff5-dev \ + liblzma-dev \ + libx11-dev \ + libxt-dev \ + perl \ + tcl8.6-dev \ + tk8.6-dev \ + texinfo \ + texlive-extra-utils \ + texlive-fonts-recommended \ + texlive-fonts-extra \ + texlive-latex-recommended \ + x11proto-core-dev \ + xauth \ + xfonts-base \ + xvfb" \ + && apt-get install -y --no-install-recommends $BUILDDEPS \ + ## Download source code + && cd tmp/ \ + && majorVersion=$(echo $R_VERSION | cut -f1 -d.) \ + && curl -O https://cran.r-project.org/src/base/R-${majorVersion}/R-${R_VERSION}.tar.gz \ + ## Extract source code + && tar -xf R-${R_VERSION}.tar.gz \ + && cd R-${R_VERSION} \ + ## Set compiler flags + && R_PAPERSIZE=letter \ + R_BATCHSAVE="--no-save --no-restore" \ + R_BROWSER=xdg-open \ + PAGER=/usr/bin/pager \ + PERL=/usr/bin/perl \ + R_UNZIPCMD=/usr/bin/unzip \ + R_ZIPCMD=/usr/bin/zip \ + R_PRINTCMD=/usr/bin/lpr \ + LIBnn=lib \ + AWK=/usr/bin/awk \ + CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \ + CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \ + ## Configure options + ./configure --enable-R-shlib \ + --enable-memory-profiling \ + --with-readline \ + --with-blas="-lopenblas" \ + --disable-nls \ + --without-recommended-packages \ + ## Build and install + && make \ + && make install \ + ## Add a default CRAN mirror + && echo "options(repos = c(CRAN = 'https://cran.rstudio.com/'), download.file.method = 'libcurl')" >> /usr/local/lib/R/etc/Rprofile.site \ + ## Add a library directory (for user-installed packages) + && mkdir -p /usr/local/lib/R/site-library \ + && chown root:staff /usr/local/lib/R/site-library \ + && chmod g+wx /usr/local/lib/R/site-library \ + ## Fix library path + && echo "R_LIBS_USER='/usr/local/lib/R/site-library'" >> /usr/local/lib/R/etc/Renviron \ + && echo "R_LIBS=\${R_LIBS-'/usr/local/lib/R/site-library:/usr/local/lib/R/library:/usr/lib/R/library'}" >> /usr/local/lib/R/etc/Renviron \ + ## install packages from date-locked MRAN snapshot of CRAN + && [ -z "$BUILD_DATE" ] && BUILD_DATE=$(TZ="America/Los_Angeles" date -I) || true \ + && MRAN=https://mran.microsoft.com/snapshot/${BUILD_DATE} \ + && echo MRAN=$MRAN >> /etc/environment \ + && export MRAN=$MRAN \ + && echo "options(repos = c(CRAN='$MRAN'), download.file.method = 'libcurl'); Sys.setenv(SPARK_HOME ='"$SPARK_HOME"')" >> /usr/local/lib/R/etc/Rprofile.site \ + ## Use littler installation scripts + && Rscript -e "install.packages(c('littler', 'docopt', 'tidyverse', 'sparklyr'), repo = '$MRAN')" \ + && ln -s /usr/local/lib/R/site-library/littler/examples/install2.r /usr/local/bin/install2.r \ + && ln -s /usr/local/lib/R/site-library/littler/examples/installGithub.r /usr/local/bin/installGithub.r \ + && ln -s /usr/local/lib/R/site-library/littler/bin/r /usr/local/bin/r \ + ## TEMPORARY WORKAROUND to get more robust error handling for install2.r prior to littler update + && curl -O /usr/local/bin/install2.r https://github.com/eddelbuettel/littler/raw/master/inst/examples/install2.r \ + && chmod +x /usr/local/bin/install2.r \ + ## Clean up from R source install + && cd / \ + && rm -rf /tmp/* \ + && apt-get autoremove -y \ + && apt-get autoclean -y \ + && rm -rf /var/lib/apt/lists/* + + CMD ["/bin/bash"] + \ No newline at end of file diff --git a/docker-image/r/spark2.2.0/Dockerfile b/docker-image/r/spark2.2.0/Dockerfile new file mode 100644 index 00000000..d573b2ea --- /dev/null +++ b/docker-image/r/spark2.2.0/Dockerfile @@ -0,0 +1,126 @@ +# Ubuntu 16.04 (Xenial) +FROM aztk/base:spark2.2.0 + +# modify these ARGs on build time to specify your desired versions of Spark/Hadoop +ARG R_VERSION=3.4.1 +ARG RSTUDIO_SERVER_VERSION=1.1.383 +ARG BUILD_DATE + +# set env vars +ENV DEBIAN_FRONTEND noninteractive +ENV BUILD_DATE ${BUILD_DATE:-} +ENV RSTUDIO_SERVER_VERSION $RSTUDIO_SERVER_VERSION +ENV R_VERSION $R_VERSION + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + bash-completion \ + ca-certificates \ + file \ + fonts-texgyre \ + g++ \ + gfortran \ + gsfonts \ + libcurl3 \ + libopenblas-dev \ + libpangocairo-1.0-0 \ + libpng16-16 \ + locales \ + make \ + unzip \ + zip \ + libcurl4-openssl-dev \ + libxml2-dev \ + libapparmor1 \ + gdebi-core \ + lsb-release \ + psmisc \ + sudo \ + && echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \ + && locale-gen en_US.utf8 \ + && /usr/sbin/update-locale LANG=en_US.UTF-8 \ + && BUILDDEPS="libcairo2-dev \ + libpango1.0-dev \ + libjpeg-dev \ + libicu-dev \ + libpcre3-dev \ + libpng-dev \ + libtiff5-dev \ + liblzma-dev \ + libx11-dev \ + libxt-dev \ + perl \ + tcl8.6-dev \ + tk8.6-dev \ + texinfo \ + texlive-extra-utils \ + texlive-fonts-recommended \ + texlive-fonts-extra \ + texlive-latex-recommended \ + x11proto-core-dev \ + xauth \ + xfonts-base \ + xvfb" \ + && apt-get install -y --no-install-recommends $BUILDDEPS \ + ## Download source code + && cd tmp/ \ + && majorVersion=$(echo $R_VERSION | cut -f1 -d.) \ + && curl -O https://cran.r-project.org/src/base/R-${majorVersion}/R-${R_VERSION}.tar.gz \ + ## Extract source code + && tar -xf R-${R_VERSION}.tar.gz \ + && cd R-${R_VERSION} \ + ## Set compiler flags + && R_PAPERSIZE=letter \ + R_BATCHSAVE="--no-save --no-restore" \ + R_BROWSER=xdg-open \ + PAGER=/usr/bin/pager \ + PERL=/usr/bin/perl \ + R_UNZIPCMD=/usr/bin/unzip \ + R_ZIPCMD=/usr/bin/zip \ + R_PRINTCMD=/usr/bin/lpr \ + LIBnn=lib \ + AWK=/usr/bin/awk \ + CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \ + CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \ + ## Configure options + ./configure --enable-R-shlib \ + --enable-memory-profiling \ + --with-readline \ + --with-blas="-lopenblas" \ + --disable-nls \ + --without-recommended-packages \ + ## Build and install + && make \ + && make install \ + ## Add a default CRAN mirror + && echo "options(repos = c(CRAN = 'https://cran.rstudio.com/'), download.file.method = 'libcurl')" >> /usr/local/lib/R/etc/Rprofile.site \ + ## Add a library directory (for user-installed packages) + && mkdir -p /usr/local/lib/R/site-library \ + && chown root:staff /usr/local/lib/R/site-library \ + && chmod g+wx /usr/local/lib/R/site-library \ + ## Fix library path + && echo "R_LIBS_USER='/usr/local/lib/R/site-library'" >> /usr/local/lib/R/etc/Renviron \ + && echo "R_LIBS=\${R_LIBS-'/usr/local/lib/R/site-library:/usr/local/lib/R/library:/usr/lib/R/library'}" >> /usr/local/lib/R/etc/Renviron \ + ## install packages from date-locked MRAN snapshot of CRAN + && [ -z "$BUILD_DATE" ] && BUILD_DATE=$(TZ="America/Los_Angeles" date -I) || true \ + && MRAN=https://mran.microsoft.com/snapshot/${BUILD_DATE} \ + && echo MRAN=$MRAN >> /etc/environment \ + && export MRAN=$MRAN \ + && echo "options(repos = c(CRAN='$MRAN'), download.file.method = 'libcurl'); Sys.setenv(SPARK_HOME ='"$SPARK_HOME"')" >> /usr/local/lib/R/etc/Rprofile.site \ + ## Use littler installation scripts + && Rscript -e "install.packages(c('littler', 'docopt', 'tidyverse', 'sparklyr'), repo = '$MRAN')" \ + && ln -s /usr/local/lib/R/site-library/littler/examples/install2.r /usr/local/bin/install2.r \ + && ln -s /usr/local/lib/R/site-library/littler/examples/installGithub.r /usr/local/bin/installGithub.r \ + && ln -s /usr/local/lib/R/site-library/littler/bin/r /usr/local/bin/r \ + ## TEMPORARY WORKAROUND to get more robust error handling for install2.r prior to littler update + && curl -O /usr/local/bin/install2.r https://github.com/eddelbuettel/littler/raw/master/inst/examples/install2.r \ + && chmod +x /usr/local/bin/install2.r \ + ## Clean up from R source install + && cd / \ + && rm -rf /tmp/* \ + && apt-get autoremove -y \ + && apt-get autoclean -y \ + && rm -rf /var/lib/apt/lists/* + + CMD ["/bin/bash"] + \ No newline at end of file diff --git a/node_scripts/install/spark.py b/node_scripts/install/spark.py index cf494d21..9d61766d 100644 --- a/node_scripts/install/spark.py +++ b/node_scripts/install/spark.py @@ -150,7 +150,7 @@ def copy_spark_env(): spark_env_path_dest = os.path.join(spark_home, 'conf/spark-env.sh') copyfile(spark_env_path_src, spark_env_path_dest) - + def copy_spark_defaults(): spark_default_path_src = os.path.join(os.environ['DOCKER_WORKING_DIR'], 'conf/spark-defaults.conf') spark_default_path_dest = os.path.join(spark_home, 'conf/spark-defaults.conf') @@ -202,5 +202,8 @@ def configure_history_server_log_path(path_to_log_file): else: print('Create direcotory {}.'.format(directory)) os.makedirs(directory) + + # Make sure the directory can be accessed by all users + os.chmod(directory, mode=0o777) else: print('Skipping. The eventLog directory is not local.')