spark/deployment/install-worker.sh

85 строки
2.8 KiB
Bash

#!/bin/bash
##############################################################################
# Description:
# This is a helper script to install the worker binaries on your Apache Spark cluster
#
# Usage:
# ./install-worker.sh <release-provider> <path-to-worker-release> <local-worker-installation-path>
#
# Sample usage:
# ./install-worker.sh
# github
# https://github.com/dotnet/spark/releases/download/v0.1.0/Microsoft.Spark.Worker.netcoreapp2.1.linux-x64-0.1.0.tar.gz
# /usr/local/bin
#
# or if you have your Worker release on filesystem like ABFS, here's how the path would
# look like:
# ./install-worker.sh
# azure
# abfs://<blobcontainer>@<gen2storageaccount>.dfs.core.windows.net/<path>/Microsoft.Spark.Worker.netcoreapp2.1.linux-x64-0.1.0.tar.gz
# /usr/local/bin
#
##############################################################################
set +e
# Uncomment if you want full tracing (for debugging purposes)
#set -o xtrace
# Cloud Provider
CLOUD_PROVIDER=$1
# Path where packaged worker file (tgz) exists.
SRC_WORKER_PATH_OR_URI=$2
# The path on the executor nodes where Microsoft.Spark.Worker executable is installed.
WORKER_INSTALLATION_PATH=$3
# The path where all the dependent libraies are installed so that it doesn't
# pollute the $WORKER_INSTALLATION_PATH.
SPARKDOTNET_ROOT=$WORKER_INSTALLATION_PATH/spark-dotnet
# Temporary worker file.
TEMP_WORKER_FILENAME=/tmp/temp_worker.tgz
# Extract version
IFS='-' read -ra BASE_FILENAME <<< "$(basename $SRC_WORKER_PATH_OR_URI .tar.gz)"
VERSION=${BASE_FILENAME[2]}
IFS='.' read -ra VERSION_CHECK <<< "$VERSION"
[[ ${#VERSION_CHECK[@]} == 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; }
# Path of the final destination for the worker binaries
# (the one we just downloaded and extracted)
DEST_WORKER_PATH=$SPARKDOTNET_ROOT/Microsoft.Spark.Worker-$VERSION
DEST_WORKER_BINARY=$DEST_WORKER_PATH/Microsoft.Spark.Worker
# Clean up any existing files.
sudo rm -f $WORKER_INSTALLATION_PATH/Microsoft.Spark.Worker
sudo rm -rf $SPARKDOTNET_ROOT
# Copy the worker file to a local temporary file.
if [ $"${CLOUD_PROVIDER,,}" = "github" ]; then
wget $SRC_WORKER_PATH_OR_URI -O $TEMP_WORKER_FILENAME
elif [ "${CLOUD_PROVIDER,,}" = "azure" ]; then
hdfs dfs -get $SRC_WORKER_PATH_OR_URI $TEMP_WORKER_FILENAME
elif [ "${CLOUD_PROVIDER,,}" = "aws" ]; then
aws s3 cp $SRC_WORKER_PATH_OR_URI $TEMP_WORKER_FILENAME
else
cp -f $SRC_WORKER_PATH_OR_URI $TEMP_WORKER_FILENAME
fi
# Untar the file.
sudo mkdir -p $SPARKDOTNET_ROOT
sudo tar xzf $TEMP_WORKER_FILENAME -C $SPARKDOTNET_ROOT
# Make the file executable since dotnet doesn't set this correctly.
sudo chmod 755 $DEST_WORKER_BINARY
# Create a symlink.
sudo ln -sf $DEST_WORKER_BINARY $WORKER_INSTALLATION_PATH/Microsoft.Spark.Worker
# Remove the temporary worker file.
sudo rm $TEMP_WORKER_FILENAME