Fix scripts to be Shellcheck clean (#178)
This commit is contained in:
Родитель
a98bbb5242
Коммит
c1a92e4138
|
@ -386,7 +386,7 @@ SOFTWARE.
|
|||
|
||||
pykwalify (https://github.com/Grokzen/pykwalify)
|
||||
|
||||
Copyright (c) 2013-2015 Johan Andersson
|
||||
Copyright (c) 2013-2018 Johan Andersson
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
|
@ -753,7 +753,7 @@ ruamel.yaml (https://bitbucket.org/ruamel/yaml)
|
|||
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2014-2017 Anthon van der Neut, Ruamel bvba
|
||||
Copyright (c) 2014-2018 Anthon van der Neut, Ruamel bvba
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
|
|
@ -3,4 +3,4 @@
|
|||
set -e
|
||||
set -o pipefail
|
||||
|
||||
python3 /opt/batch-shipyard/recurrent_job_manager.py $*
|
||||
python3 /opt/batch-shipyard/recurrent_job_manager.py "$@"
|
||||
|
|
|
@ -10,8 +10,8 @@ for spec in "$@"; do
|
|||
IFS=',' read -ra parts <<< "$spec"
|
||||
# encrypt,creds,jobid,taskid,include,exclude,dst
|
||||
encrypt=${parts[0],,}
|
||||
if [ $encrypt == "true" ]; then
|
||||
SHIPYARD_BATCH_ENV=`echo ${parts[1]} | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
|
||||
if [ "$encrypt" == "true" ]; then
|
||||
SHIPYARD_BATCH_ENV=$(echo "${parts[1]}" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
|
||||
else
|
||||
SHIPYARD_BATCH_ENV=${parts[1]}
|
||||
fi
|
||||
|
@ -23,20 +23,20 @@ for spec in "$@"; do
|
|||
dst=${parts[6]}
|
||||
|
||||
include=
|
||||
if [ ! -z $incl ]; then
|
||||
if [ ! -z "$incl" ]; then
|
||||
include="--include $incl"
|
||||
fi
|
||||
exclude=
|
||||
if [ ! -z $excl ]; then
|
||||
if [ ! -z "$excl" ]; then
|
||||
exclude="--exclude $excl"
|
||||
fi
|
||||
# create destination directory
|
||||
dest=
|
||||
if [ ! -z $dst ]; then
|
||||
if [ ! -z "$dst" ]; then
|
||||
dest="--dst $dst"
|
||||
mkdir -p $dst
|
||||
mkdir -p "$dst"
|
||||
fi
|
||||
# ingress data from batch task
|
||||
export SHIPYARD_BATCH_ENV=$SHIPYARD_BATCH_ENV
|
||||
python3 /opt/batch-shipyard/task_file_mover.py $jobid $taskid $include $exclude $dest
|
||||
python3 /opt/batch-shipyard/task_file_mover.py "$jobid" "$taskid" "$include" "$exclude" "$dest"
|
||||
done
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# this script runs in the context of env vars imported inside of a
|
||||
# Docker run env, thus disable ref but not assigned shellcheck warnings.
|
||||
# shellcheck disable=SC2154
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
|
@ -13,16 +17,16 @@ cd /opt/batch-shipyard
|
|||
# add timing markers
|
||||
if [ ! -z ${SHIPYARD_TIMING+x} ]; then
|
||||
# backfill node prep start
|
||||
python3 perf.py nodeprep start $prefix --ts $npstart --message "offer=$offer,sku=$sku"
|
||||
python3 perf.py nodeprep start "$prefix" --ts "$npstart" --message "offer=$offer,sku=$sku"
|
||||
# backfill docker run pull start
|
||||
python3 perf.py shipyard pull-start $prefix --ts $drpstart
|
||||
python3 perf.py shipyard pull-start "$prefix" --ts "$drpstart"
|
||||
# mark docker run pull end
|
||||
python3 perf.py shipyard pull-end $prefix
|
||||
python3 perf.py shipyard pull-end "$prefix"
|
||||
# mark node prep finished
|
||||
python3 perf.py nodeprep end $prefix
|
||||
python3 perf.py nodeprep end "$prefix"
|
||||
# mark cascade start time
|
||||
python3 perf.py cascade start $prefix
|
||||
python3 perf.py cascade start "$prefix"
|
||||
fi
|
||||
|
||||
# execute cascade
|
||||
python3 cascade.py $p2p --ipaddress $ipaddress $prefix
|
||||
python3 cascade.py "$p2p" --ipaddress "$ipaddress" "$prefix"
|
||||
|
|
|
@ -50,21 +50,17 @@ DEPENDENCIES=(
|
|||
https://github.com/requests/requests/raw/master/LICENSE
|
||||
ruamel.yaml
|
||||
https://bitbucket.org/ruamel/yaml
|
||||
https://bitbucket.org/ruamel/yaml/raw/ef15acf88b039656570f9b1f45b5e7394c154997/LICENSE
|
||||
https://bitbucket.org/ruamel/yaml/raw/8d3f84d78aff534cbc881fa509ade31a5edc451d/LICENSE
|
||||
)
|
||||
DEPLEN=${#DEPENDENCIES[@]}
|
||||
|
||||
add_attribution() {
|
||||
name=$1
|
||||
url=$2
|
||||
license=$(curl -fSsL $3)
|
||||
license=$(curl -fSsL "$3")
|
||||
|
||||
echo "" >> $TPNFILE
|
||||
echo "-------------------------------------------------------------------------------" >> $TPNFILE
|
||||
echo "" >> $TPNFILE
|
||||
echo "$name ($url)" >> $TPNFILE
|
||||
echo "" >> $TPNFILE
|
||||
echo "$license" >> $TPNFILE
|
||||
{ echo ""; echo "-------------------------------------------------------------------------------"; \
|
||||
echo ""; echo "$name ($url)"; echo ""; echo "$license"; } >> $TPNFILE
|
||||
}
|
||||
|
||||
cat << 'EOF' > $TPNFILE
|
||||
|
@ -89,13 +85,12 @@ Redmond, WA 98052 USA
|
|||
Please write "source for [Third Party IP]" in the memo line of your payment.
|
||||
EOF
|
||||
|
||||
echo -n "Generating $(($DEPLEN / 3)) attributions: ["
|
||||
echo -n "Generating $((DEPLEN / 3)) attributions: ["
|
||||
i=0
|
||||
while [ $i -lt $DEPLEN ]; do
|
||||
add_attribution ${DEPENDENCIES[$i]} ${DEPENDENCIES[$(($i+1))]} ${DEPENDENCIES[$(($i+2))]}
|
||||
i=$(($i + 3))
|
||||
while [ $i -lt "$DEPLEN" ]; do
|
||||
add_attribution "${DEPENDENCIES[$i]}" "${DEPENDENCIES[$((i+1))]}" "${DEPENDENCIES[$((i+2))]}"
|
||||
i=$((i + 3))
|
||||
echo -n "."
|
||||
done
|
||||
echo "" >> $TPNFILE
|
||||
echo "-------------------------------------------------------------------------------" >> $TPNFILE
|
||||
{ echo ""; echo "-------------------------------------------------------------------------------"; } >> $TPNFILE
|
||||
echo "] done."
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
printenv
|
||||
docker build --build-arg GIT_BRANCH=$SOURCE_BRANCH --build-arg GIT_COMMIT=$GIT_SHA1 -t $IMAGE_NAME .
|
||||
docker build --build-arg GIT_BRANCH="$SOURCE_BRANCH" --build-arg GIT_COMMIT="$GIT_SHA1" -t "$IMAGE_NAME" .
|
||||
|
|
60
install.sh
60
install.sh
|
@ -1,5 +1,8 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# shellcheck disable=SC1090
|
||||
# shellcheck disable=SC1091
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
|
@ -47,7 +50,7 @@ shift $((OPTIND-1))
|
|||
# non-cloud shell environment checks
|
||||
if [ ! -z $SUDO ]; then
|
||||
# check to ensure this is not being run directly as root
|
||||
if [ $(id -u) -eq 0 ]; then
|
||||
if [ "$(id -u)" -eq 0 ]; then
|
||||
echo "Installation cannot be performed as root or via sudo."
|
||||
echo "Please install as a regular user."
|
||||
exit 1
|
||||
|
@ -62,7 +65,7 @@ if [ ! -z $SUDO ]; then
|
|||
fi
|
||||
|
||||
# check that shipyard.py is in cwd
|
||||
if [ ! -f $PWD/shipyard.py ]; then
|
||||
if [ ! -f "${PWD}"/shipyard.py ]; then
|
||||
echo "shipyard.py not found in $PWD."
|
||||
echo "Please run install.sh from the same directory as shipyard.py."
|
||||
exit 1
|
||||
|
@ -79,8 +82,7 @@ fi
|
|||
# check for anaconda
|
||||
set +e
|
||||
ANACONDA=0
|
||||
$PYTHON -c "from __future__ import print_function; import sys; print(sys.version)" | grep -Ei 'anaconda|continuum|conda-forge'
|
||||
if [ $? -eq 0 ]; then
|
||||
if $PYTHON -c "from __future__ import print_function; import sys; print(sys.version)" | grep -Ei 'anaconda|continuum|conda-forge'; then
|
||||
# check for conda
|
||||
if hash conda 2> /dev/null; then
|
||||
echo "Anaconda environment detected."
|
||||
|
@ -88,7 +90,7 @@ if [ $? -eq 0 ]; then
|
|||
echo "Anaconda environment detected, but conda command not found."
|
||||
exit 1
|
||||
fi
|
||||
if [ -z $VENV_NAME ]; then
|
||||
if [ -z "$VENV_NAME" ]; then
|
||||
echo "Virtual environment name must be supplied for Anaconda installations."
|
||||
exit 1
|
||||
fi
|
||||
|
@ -99,7 +101,7 @@ set -e
|
|||
|
||||
# perform some virtual env parameter checks
|
||||
INSTALL_VENV_BIN=0
|
||||
if [ ! -z $VENV_NAME ]; then
|
||||
if [ ! -z "$VENV_NAME" ]; then
|
||||
# check if virtual env, env is not named shipyard
|
||||
if [ "$VENV_NAME" == "shipyard" ]; then
|
||||
echo "Virtual environment name cannot be shipyard. Please use a different virtual environment name."
|
||||
|
@ -139,7 +141,7 @@ if [ -z ${DISTRIB_ID+x} ] || [ -z ${DISTRIB_RELEASE+x} ]; then
|
|||
fi
|
||||
|
||||
# lowercase vars
|
||||
if [ $DISTRIB_ID != "Darwin" ]; then
|
||||
if [ "$DISTRIB_ID" != "Darwin" ]; then
|
||||
DISTRIB_ID=${DISTRIB_ID,,}
|
||||
DISTRIB_RELEASE=${DISTRIB_RELEASE,,}
|
||||
fi
|
||||
|
@ -147,8 +149,8 @@ fi
|
|||
echo "Detected OS: $DISTRIB_ID $DISTRIB_RELEASE"
|
||||
|
||||
# install requisite packages from distro repo
|
||||
if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
|
||||
if [ $DISTRIB_ID == "ubuntu" ] || [ $DISTRIB_ID == "debian" ]; then
|
||||
if [ ! -z $SUDO ] || [ "$(id -u)" -eq 0 ]; then
|
||||
if [ "$DISTRIB_ID" == "ubuntu" ] || [ "$DISTRIB_ID" == "debian" ]; then
|
||||
$SUDO apt-get update
|
||||
if [ $ANACONDA -eq 1 ]; then
|
||||
PYTHON_PKGS=
|
||||
|
@ -165,10 +167,11 @@ if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
|
|||
fi
|
||||
fi
|
||||
fi
|
||||
# shellcheck disable=SC2086
|
||||
$SUDO apt-get install -y --no-install-recommends \
|
||||
build-essential libssl-dev libffi-dev openssl \
|
||||
openssh-client rsync $PYTHON_PKGS
|
||||
elif [ $DISTRIB_ID == "centos" ] || [ $DISTRIB_ID == "rhel" ]; then
|
||||
elif [ "$DISTRIB_ID" == "centos" ] || [ "$DISTRIB_ID" == "rhel" ]; then
|
||||
$SUDO yum makecache fast
|
||||
if [ $ANACONDA -eq 1 ]; then
|
||||
PYTHON_PKGS=
|
||||
|
@ -176,14 +179,12 @@ if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
|
|||
if [ $PYTHON == "python" ]; then
|
||||
PYTHON_PKGS="python-devel"
|
||||
else
|
||||
yum list installed epel-release
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! yum list installed epel-release; then
|
||||
echo "epel-release package not installed."
|
||||
echo "Please install the epel-release package or refer to the Installation documentation for manual installation steps".
|
||||
exit 1
|
||||
fi
|
||||
yum list installed python34
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! yum list installed python34; then
|
||||
echo "python34 epel package not installed."
|
||||
echo "Please install the python34 epel package or refer to the Installation documentation for manual installation steps."
|
||||
exit 1
|
||||
|
@ -191,12 +192,13 @@ if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
|
|||
PYTHON_PKGS="python34-devel"
|
||||
fi
|
||||
fi
|
||||
# shellcheck disable=SC2086
|
||||
$SUDO yum install -y gcc openssl-devel libffi-devel openssl \
|
||||
openssh-clients rsync $PYTHON_PKGS
|
||||
if [ $ANACONDA -eq 0 ]; then
|
||||
curl -fSsL --tlsv1 https://bootstrap.pypa.io/get-pip.py | $SUDO $PYTHON
|
||||
fi
|
||||
elif [ $DISTRIB_ID == "opensuse" ] || [ $DISTRIB_ID == "sles" ]; then
|
||||
elif [ "$DISTRIB_ID" == "opensuse" ] || [ "$DISTRIB_ID" == "sles" ]; then
|
||||
$SUDO zypper ref
|
||||
if [ $ANACONDA -eq 1 ]; then
|
||||
PYTHON_PKGS=
|
||||
|
@ -207,12 +209,13 @@ if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
|
|||
PYTHON_PKGS="python3-devel"
|
||||
fi
|
||||
fi
|
||||
# shellcheck disable=SC2086
|
||||
$SUDO zypper -n in gcc libopenssl-devel libffi48-devel openssl \
|
||||
openssh rsync $PYTHON_PKGS
|
||||
if [ $ANACONDA -eq 0 ]; then
|
||||
curl -fSsL --tlsv1 https://bootstrap.pypa.io/get-pip.py | $SUDO $PYTHON
|
||||
fi
|
||||
elif [ $DISTRIB_ID == "Darwin" ]; then
|
||||
elif [ "$DISTRIB_ID" == "Darwin" ]; then
|
||||
# check for pip, otherwise install it
|
||||
if hash $PIP 2> /dev/null; then
|
||||
echo "$PIP detected."
|
||||
|
@ -228,10 +231,10 @@ if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
|
|||
fi
|
||||
|
||||
# create virtual env if required and install required python packages
|
||||
if [ ! -z $VENV_NAME ]; then
|
||||
if [ ! -z "$VENV_NAME" ]; then
|
||||
# install virtual env if required
|
||||
if [ $INSTALL_VENV_BIN -eq 1 ]; then
|
||||
if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
|
||||
if [ ! -z $SUDO ] || [ "$(id -u)" -eq 0 ]; then
|
||||
$SUDO $PIP install virtualenv
|
||||
else
|
||||
$PIP install --user virtualenv
|
||||
|
@ -239,12 +242,12 @@ if [ ! -z $VENV_NAME ]; then
|
|||
fi
|
||||
if [ $ANACONDA -eq 0 ]; then
|
||||
# create venv if it doesn't exist
|
||||
if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
|
||||
virtualenv -p $PYTHON $VENV_NAME
|
||||
if [ ! -z $SUDO ] || [ "$(id -u)" -eq 0 ]; then
|
||||
virtualenv -p $PYTHON "$VENV_NAME"
|
||||
else
|
||||
$HOME/.local/bin/virtualenv -p $PYTHON $VENV_NAME
|
||||
"${HOME}"/.local/bin/virtualenv -p $PYTHON "$VENV_NAME"
|
||||
fi
|
||||
source $VENV_NAME/bin/activate
|
||||
source "${VENV_NAME}"/bin/activate
|
||||
$PIP install --upgrade pip setuptools
|
||||
set +e
|
||||
$PIP uninstall -y azure-storage
|
||||
|
@ -258,9 +261,9 @@ if [ ! -z $VENV_NAME ]; then
|
|||
echo "Creating conda env for Python $pyver"
|
||||
# create conda env
|
||||
set +e
|
||||
conda create --yes --name $VENV_NAME python=$pyver
|
||||
conda create --yes --name "$VENV_NAME" python="${pyver}"
|
||||
set -e
|
||||
source activate $VENV_NAME
|
||||
source activate "$VENV_NAME"
|
||||
conda install --yes pip
|
||||
# temporary workaround with pip requirements upgrading setuptools and
|
||||
# conda pip failing to reference the old setuptools version
|
||||
|
@ -270,7 +273,7 @@ if [ ! -z $VENV_NAME ]; then
|
|||
set -e
|
||||
$PIP install --upgrade -r requirements.txt
|
||||
$PIP install --upgrade --no-deps -r req_nodeps.txt
|
||||
source deactivate $VENV_NAME
|
||||
source deactivate "$VENV_NAME"
|
||||
fi
|
||||
else
|
||||
$SUDO $PIP install --upgrade pip
|
||||
|
@ -302,7 +305,7 @@ fi
|
|||
|
||||
EOF
|
||||
|
||||
if [ ! -z $VENV_NAME ]; then
|
||||
if [ ! -z "$VENV_NAME" ]; then
|
||||
if [ $ANACONDA -eq 0 ]; then
|
||||
cat >> shipyard << 'EOF'
|
||||
source $BATCH_SHIPYARD_ROOT_DIR/$VENV_NAME/bin/activate
|
||||
|
@ -324,7 +327,7 @@ python3 $BATCH_SHIPYARD_ROOT_DIR/shipyard.py $*
|
|||
EOF
|
||||
fi
|
||||
|
||||
if [ ! -z $VENV_NAME ]; then
|
||||
if [ ! -z "$VENV_NAME" ]; then
|
||||
if [ $ANACONDA -eq 0 ]; then
|
||||
cat >> shipyard << 'EOF'
|
||||
deactivate
|
||||
|
@ -339,7 +342,8 @@ fi
|
|||
chmod 755 shipyard
|
||||
|
||||
echo ""
|
||||
if [ -z $VENV_NAME ]; then
|
||||
if [ -z "$VENV_NAME" ]; then
|
||||
# shellcheck disable=SC2016
|
||||
echo '>> Please add $HOME/.local/bin to your $PATH. You can do this '
|
||||
echo '>> permanently in your shell rc script, e.g., .bashrc for bash shells.'
|
||||
echo ""
|
||||
|
|
|
@ -11,26 +11,26 @@ log() {
|
|||
|
||||
# decrypt passwords if necessary
|
||||
if [ "$1" == "-e" ]; then
|
||||
if [ ! -z $DOCKER_LOGIN_PASSWORD ]; then
|
||||
DOCKER_LOGIN_PASSWORD=$(echo $DOCKER_LOGIN_PASSWORD | base64 -d | openssl rsautl -decrypt -inkey ../certs/key.pem)
|
||||
if [ ! -z "$DOCKER_LOGIN_PASSWORD" ]; then
|
||||
DOCKER_LOGIN_PASSWORD=$(echo "$DOCKER_LOGIN_PASSWORD" | base64 -d | openssl rsautl -decrypt -inkey ../certs/key.pem)
|
||||
fi
|
||||
if [ ! -z $SINGULARITY_LOGIN_PASSWORD ]; then
|
||||
SINGULARITY_LOGIN_PASSWORD=$(echo $SINGULARITY_LOGIN_PASSWORD | base64 -d | openssl rsautl -decrypt -inkey ../certs/key.pem)
|
||||
if [ ! -z "$SINGULARITY_LOGIN_PASSWORD" ]; then
|
||||
SINGULARITY_LOGIN_PASSWORD=$(echo "$SINGULARITY_LOGIN_PASSWORD" | base64 -d | openssl rsautl -decrypt -inkey ../certs/key.pem)
|
||||
fi
|
||||
fi
|
||||
|
||||
# login to Docker registries
|
||||
if [ ! -z $DOCKER_LOGIN_PASSWORD ]; then
|
||||
if [ ! -z "$DOCKER_LOGIN_PASSWORD" ]; then
|
||||
# parse env vars
|
||||
IFS=',' read -ra servers <<< "${DOCKER_LOGIN_SERVER}"
|
||||
IFS=',' read -ra users <<< "${DOCKER_LOGIN_USERNAME}"
|
||||
IFS=',' read -ra passwords <<< "${DOCKER_LOGIN_PASSWORD}"
|
||||
# loop through each server and login
|
||||
nservers=${#servers[@]}
|
||||
if [ $nservers -ge 1 ]; then
|
||||
if [ "$nservers" -ge 1 ]; then
|
||||
log DEBUG "Logging into $nservers Docker registry servers..."
|
||||
for i in $(seq 0 $((nservers-1))); do
|
||||
docker login --username ${users[$i]} --password ${passwords[$i]} ${servers[$i]}
|
||||
docker login --username "${users[$i]}" --password "${passwords[$i]}" "${servers[$i]}"
|
||||
done
|
||||
log INFO "Docker registry logins completed."
|
||||
fi
|
||||
|
@ -39,14 +39,14 @@ else
|
|||
fi
|
||||
|
||||
# "login" to Singularity registries
|
||||
if [ ! -z $SINGULARITY_LOGIN_PASSWORD ]; then
|
||||
if [ ! -z "$SINGULARITY_LOGIN_PASSWORD" ]; then
|
||||
# parse env vars
|
||||
IFS=',' read -ra servers <<< "${SINGULARITY_LOGIN_SERVER}"
|
||||
IFS=',' read -ra users <<< "${SINGULARITY_LOGIN_USERNAME}"
|
||||
IFS=',' read -ra passwords <<< "${SINGULARITY_LOGIN_PASSWORD}"
|
||||
# loop through each server and login
|
||||
nservers=${#servers[@]}
|
||||
if [ $nservers -ge 1 ]; then
|
||||
if [ "$nservers" -ge 1 ]; then
|
||||
log DEBUG "Creating export script into $nservers Singularity registry servers..."
|
||||
touch singularity-registry-login
|
||||
for i in $(seq 0 $((nservers-1))); do
|
||||
|
|
|
@ -12,13 +12,13 @@ for spec in "$@"; do
|
|||
kind=${parts[1]}
|
||||
encrypted=${parts[2],,}
|
||||
|
||||
if [ $encrypted == "true" ]; then
|
||||
if [ "$encrypted" == "true" ]; then
|
||||
cipher=${parts[3]}
|
||||
local_path=${parts[4]}
|
||||
eo=${parts[5]}
|
||||
# decrypt ciphertext
|
||||
privatekey=$AZ_BATCH_NODE_STARTUP_DIR/certs/key.pem
|
||||
cipher=`echo $cipher | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
|
||||
cipher=$(echo "$cipher" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
|
||||
IFS=',' read -ra storage <<< "$cipher"
|
||||
sa=${storage[0]}
|
||||
ep=${storage[1]}
|
||||
|
@ -36,12 +36,12 @@ for spec in "$@"; do
|
|||
fi
|
||||
|
||||
wd=$(dirname "$local_path")
|
||||
if [ $kind == "i" ]; then
|
||||
if [ "$kind" == "i" ]; then
|
||||
# create destination working directory
|
||||
mkdir -p $wd
|
||||
mkdir -p "$wd"
|
||||
# ingress data from storage
|
||||
action=download
|
||||
elif [ $kind == "e" ]; then
|
||||
elif [ "$kind" == "e" ]; then
|
||||
# egress from compute node to storage
|
||||
action=upload
|
||||
else
|
||||
|
@ -50,8 +50,8 @@ for spec in "$@"; do
|
|||
fi
|
||||
|
||||
# execute blobxfer
|
||||
docker run --rm -t -v $wd:$wd -w $wd alfpark/blobxfer:$bxver \
|
||||
$action --storage-account $sa --sas $saskey --endpoint $ep \
|
||||
--remote-path $remote_path --local-path $local_path \
|
||||
--no-progress-bar $eo
|
||||
docker run --rm -t -v "$wd":"$wd" -w "$wd" alfpark/blobxfer:"$bxver" \
|
||||
"$action" --storage-account "$sa" --sas "$saskey" --endpoint "$ep" \
|
||||
--remote-path "$remote_path" --local-path "$local_path" \
|
||||
--no-progress-bar "$eo"
|
||||
done
|
||||
|
|
|
@ -7,10 +7,10 @@ voltype=$1
|
|||
mntpath=$2
|
||||
|
||||
# get my ip address
|
||||
ipaddress=`ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1`
|
||||
ipaddress=$(ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1)
|
||||
|
||||
# if master, peer and create volume
|
||||
if [ $AZ_BATCH_IS_CURRENT_NODE_MASTER == "true" ]; then
|
||||
if [ "$AZ_BATCH_IS_CURRENT_NODE_MASTER" == "true" ]; then
|
||||
# construct brick locations
|
||||
IFS=',' read -ra HOSTS <<< "$AZ_BATCH_HOST_LIST"
|
||||
bricks=
|
||||
|
@ -18,20 +18,20 @@ if [ $AZ_BATCH_IS_CURRENT_NODE_MASTER == "true" ]; then
|
|||
do
|
||||
bricks+=" $node:$mntpath/gluster/brick"
|
||||
# probe peer
|
||||
if [ $node != $ipaddress ]; then
|
||||
if [ "$node" != "$ipaddress" ]; then
|
||||
echo "probing $node"
|
||||
gluster peer probe $node
|
||||
gluster peer probe "$node"
|
||||
fi
|
||||
done
|
||||
numnodes=${#HOSTS[@]}
|
||||
numpeers=$(($numnodes - 1))
|
||||
numpeers=$((numnodes - 1))
|
||||
echo "waiting for $numpeers peers to reach connected state..."
|
||||
# get peer info
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
numready=`gluster peer status | grep -e '^State: Peer in Cluster' | wc -l`
|
||||
if [ $numready == $numpeers ]; then
|
||||
numready=$(gluster peer status | grep -c '^State: Peer in Cluster')
|
||||
if [ "$numready" == "$numpeers" ]; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
|
@ -42,10 +42,10 @@ if [ $AZ_BATCH_IS_CURRENT_NODE_MASTER == "true" ]; then
|
|||
sleep 5
|
||||
# create volume
|
||||
echo "creating gv0 ($bricks)"
|
||||
gluster volume create gv0 $voltype $numnodes transport tcp$bricks
|
||||
gluster volume create gv0 "$voltype" "$numnodes" transport tcp"$bricks"
|
||||
# modify volume properties: the uid/gid mapping is UNDOCUMENTED behavior
|
||||
gluster volume set gv0 storage.owner-uid `id -u _azbatch`
|
||||
gluster volume set gv0 storage.owner-gid `id -g _azbatch`
|
||||
gluster volume set gv0 storage.owner-uid "$(id -u _azbatch)"
|
||||
gluster volume set gv0 storage.owner-gid "$(id -g _azbatch)"
|
||||
# start volume
|
||||
echo "starting gv0"
|
||||
gluster volume start gv0
|
||||
|
@ -56,8 +56,7 @@ echo "waiting for gv0 volume..."
|
|||
set +e
|
||||
while :
|
||||
do
|
||||
gluster volume info gv0
|
||||
if [ $? -eq 0 ]; then
|
||||
if gluster volume info gv0; then
|
||||
# delay to wait for subvolumes
|
||||
sleep 5
|
||||
break
|
||||
|
@ -68,8 +67,8 @@ set -e
|
|||
|
||||
# add gv0 to /etc/fstab for auto-mount on reboot
|
||||
mountpoint=$AZ_BATCH_NODE_SHARED_DIR/.gluster/gv0
|
||||
mkdir -p $mountpoint
|
||||
chmod 775 $mountpoint
|
||||
mkdir -p "$mountpoint"
|
||||
chmod 775 "$mountpoint"
|
||||
echo "adding $mountpoint to fstab"
|
||||
echo "$ipaddress:/gv0 $mountpoint glusterfs defaults,_netdev 0 0" >> /etc/fstab
|
||||
|
||||
|
@ -79,12 +78,11 @@ START=$(date -u +"%s")
|
|||
set +e
|
||||
while :
|
||||
do
|
||||
mount $mountpoint
|
||||
if [ $? -eq 0 ]; then
|
||||
if mount "$mountpoint"; then
|
||||
break
|
||||
else
|
||||
NOW=$(date -u +"%s")
|
||||
DIFF=$((($NOW-$START)/60))
|
||||
DIFF=$(((NOW-START)/60))
|
||||
# fail after 5 minutes of attempts
|
||||
if [ $DIFF -ge 5 ]; then
|
||||
echo "could not mount gluster volume: $mountpoint"
|
||||
|
@ -94,7 +92,7 @@ do
|
|||
fi
|
||||
done
|
||||
set -e
|
||||
chmod 775 $mountpoint
|
||||
chmod 775 "$mountpoint"
|
||||
|
||||
# touch file noting success
|
||||
touch .glusterfs_success
|
||||
|
|
|
@ -8,7 +8,7 @@ shift
|
|||
mntpath=$1
|
||||
shift
|
||||
numnodes=$1
|
||||
numpeers=$(($numnodes - 1))
|
||||
numpeers=$((numnodes - 1))
|
||||
shift
|
||||
masterip=$1
|
||||
shift
|
||||
|
@ -18,14 +18,14 @@ echo "temp disk mountpoint: $mntpath"
|
|||
echo "master ip: $masterip"
|
||||
|
||||
# get my ip address
|
||||
ipaddress=`ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1`
|
||||
ipaddress=$(ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1)
|
||||
echo "ip address: $ipaddress"
|
||||
|
||||
# check if my ip address is a new node
|
||||
domount=0
|
||||
for i in "$@"
|
||||
do
|
||||
if [ $i == $ipaddress ]; then
|
||||
if [ "$i" == "$ipaddress" ]; then
|
||||
domount=1
|
||||
break
|
||||
fi
|
||||
|
@ -33,22 +33,22 @@ done
|
|||
echo "mount: $domount"
|
||||
|
||||
# master peers and adds the bricks
|
||||
if [ $masterip == $ipaddress ]; then
|
||||
if [ "$masterip" == "$ipaddress" ]; then
|
||||
# probe new nodes
|
||||
bricks=
|
||||
for node in "$@"
|
||||
do
|
||||
bricks+=" $node:$mntpath/gluster/brick"
|
||||
echo "probing $node"
|
||||
gluster peer probe $node
|
||||
gluster peer probe "$node"
|
||||
done
|
||||
|
||||
# get peer info
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
numready=`gluster peer status | grep -e '^State: Peer in Cluster' | wc -l`
|
||||
if [ $numready == $numpeers ]; then
|
||||
numready=$(gluster peer status | grep -c '^State: Peer in Cluster')
|
||||
if [ "$numready" == "$numpeers" ]; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
|
@ -60,7 +60,7 @@ if [ $masterip == $ipaddress ]; then
|
|||
sleep 5
|
||||
|
||||
# add bricks to volume
|
||||
gluster volume add-brick $voltype $numnodes gv0$bricks
|
||||
gluster volume add-brick "$voltype" "$numnodes" gv0"$bricks"
|
||||
|
||||
# get volume info
|
||||
gluster volume info
|
||||
|
@ -71,7 +71,7 @@ echo "waiting for gv0 volume..."
|
|||
set +e
|
||||
while :
|
||||
do
|
||||
numbricks=`gluster volume info gv0 | grep -e '^Number of Bricks:' | cut -d' ' -f4`
|
||||
numbricks=$(gluster volume info gv0 | grep -e '^Number of Bricks:' | cut -d' ' -f4)
|
||||
if [ "$numbricks" == "$numnodes" ]; then
|
||||
# delay to wait for subvolumes
|
||||
sleep 5
|
||||
|
@ -85,8 +85,8 @@ set -e
|
|||
if [ $domount -eq 1 ]; then
|
||||
# add gv0 to /etc/fstab for auto-mount on reboot
|
||||
mountpoint=$AZ_BATCH_NODE_SHARED_DIR/.gluster/gv0
|
||||
mkdir -p $mountpoint
|
||||
chmod 775 $mountpoint
|
||||
mkdir -p "$mountpoint"
|
||||
chmod 775 "$mountpoint"
|
||||
echo "adding $mountpoint to fstab"
|
||||
echo "$ipaddress:/gv0 $mountpoint glusterfs defaults,_netdev 0 0" >> /etc/fstab
|
||||
|
||||
|
@ -96,12 +96,11 @@ if [ $domount -eq 1 ]; then
|
|||
set +e
|
||||
while :
|
||||
do
|
||||
mount $mountpoint
|
||||
if [ $? -eq 0 ]; then
|
||||
if mount "$mountpoint"; then
|
||||
break
|
||||
else
|
||||
NOW=$(date -u +"%s")
|
||||
DIFF=$((($NOW-$START)/60))
|
||||
DIFF=$(((NOW-START)/60))
|
||||
# fail after 5 minutes of attempts
|
||||
if [ $DIFF -ge 5 ]; then
|
||||
echo "could not mount gluster volume: $mountpoint"
|
||||
|
@ -111,7 +110,7 @@ if [ $domount -eq 1 ]; then
|
|||
fi
|
||||
done
|
||||
set -e
|
||||
chmod 775 $mountpoint
|
||||
chmod 775 "$mountpoint"
|
||||
fi
|
||||
|
||||
# touch file noting success
|
||||
|
|
|
@ -6,7 +6,7 @@ set -o pipefail
|
|||
offer=$1
|
||||
sku=$2
|
||||
|
||||
if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
|
||||
if [ "$offer" == "ubuntuserver" ] || [ "$offer" == "debian" ]; then
|
||||
if [[ $sku == 14.04* ]]; then
|
||||
srvrestart="initctl restart sshd"
|
||||
mkdir /tmp/hpnssh
|
||||
|
@ -32,9 +32,7 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
|
|||
exit 1
|
||||
fi
|
||||
# modify sshd config settings
|
||||
echo "HPNDisabled=no" >> /etc/ssh/sshd_config
|
||||
echo "TcpRcvBufPoll=yes" >> /etc/ssh/sshd_config
|
||||
echo "NoneEnabled=yes" >> /etc/ssh/sshd_config
|
||||
{ echo "HPNDisabled=no"; echo "TcpRcvBufPoll=yes"; echo "NoneEnabled=yes"; } >> /etc/ssh/sshd_config
|
||||
# restart sshd
|
||||
$srvrestart
|
||||
elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-linux" ]]; then
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
|
@ -99,7 +101,7 @@ while getopts "h?abcde:fg:m:no:p:s:t:v:wx:" opt; do
|
|||
t)
|
||||
p2p=${OPTARG,,}
|
||||
IFS=':' read -ra p2pflags <<< "$p2p"
|
||||
if [ ${p2pflags[0]} == "true" ]; then
|
||||
if [ "${p2pflags[0]}" == "true" ]; then
|
||||
p2penabled=1
|
||||
else
|
||||
p2penabled=0
|
||||
|
@ -119,15 +121,15 @@ done
|
|||
shift $((OPTIND-1))
|
||||
[ "$1" = "--" ] && shift
|
||||
# check args
|
||||
if [ -z $offer ]; then
|
||||
if [ -z "$offer" ]; then
|
||||
log ERROR "vm offer not specified"
|
||||
exit 1
|
||||
fi
|
||||
if [ -z $sku ]; then
|
||||
if [ -z "$sku" ]; then
|
||||
log ERROR "vm sku not specified"
|
||||
exit 1
|
||||
fi
|
||||
if [ -z $version ]; then
|
||||
if [ -z "$version" ]; then
|
||||
log ERROR "batch-shipyard version not specified"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -168,7 +170,7 @@ check_for_buggy_ntfs_mount() {
|
|||
|
||||
save_startup_to_volatile() {
|
||||
set +e
|
||||
touch $AZ_BATCH_NODE_ROOT_DIR/volatile/startup/.save
|
||||
touch "${AZ_BATCH_NODE_ROOT_DIR}"/volatile/startup/.save
|
||||
set -e
|
||||
}
|
||||
|
||||
|
@ -176,14 +178,15 @@ ensure_nvidia_driver_installed() {
|
|||
check_for_nvidia_card
|
||||
# ensure that nvidia drivers are loaded
|
||||
set +e
|
||||
local out=$(lsmod)
|
||||
local out
|
||||
out=$(lsmod)
|
||||
echo "$out" | grep -i nvidia > /dev/null
|
||||
local rc=$?
|
||||
set -e
|
||||
echo "$out"
|
||||
if [ $rc -ne 0 ]; then
|
||||
log WARNING "Nvidia driver not present!"
|
||||
install_nvidia_software $1 $2
|
||||
install_nvidia_software "$1" "$2"
|
||||
else
|
||||
log INFO "Nvidia driver detected"
|
||||
nvidia-smi
|
||||
|
@ -192,7 +195,8 @@ ensure_nvidia_driver_installed() {
|
|||
|
||||
check_for_nvidia_card() {
|
||||
set +e
|
||||
local out=$(lspci)
|
||||
local out
|
||||
out=$(lspci)
|
||||
echo "$out" | grep -i nvidia > /dev/null
|
||||
local rc=$?
|
||||
set -e
|
||||
|
@ -220,7 +224,7 @@ install_nvidia_software() {
|
|||
rmmod nouveau
|
||||
set -e
|
||||
# purge nouveau off system
|
||||
if [ $offer == "ubuntuserver" ]; then
|
||||
if [ "$offer" == "ubuntuserver" ]; then
|
||||
apt-get --purge remove xserver-xorg-video-nouveau xserver-xorg-video-nouveau-hwe-16.04
|
||||
elif [[ $offer == centos* ]]; then
|
||||
yum erase -y xorg-x11-drv-nouveau
|
||||
|
@ -237,33 +241,34 @@ alias nouveau off
|
|||
alias lbm-nouveau off
|
||||
EOF
|
||||
# get development essentials for nvidia driver
|
||||
if [ $offer == "ubuntuserver" ]; then
|
||||
install_packages $offer build-essential
|
||||
if [ "$offer" == "ubuntuserver" ]; then
|
||||
install_packages "$offer" build-essential
|
||||
elif [[ $offer == centos* ]]; then
|
||||
local kernel_devel_package="kernel-devel-$(uname -r)"
|
||||
if [[ $offer == "centos-hpc" ]] || [[ $sku == "7.4" ]]; then
|
||||
install_packages $offer $kernel_devel_package
|
||||
elif [ $sku == "7.3" ]; then
|
||||
download_file http://vault.centos.org/7.3.1611/updates/x86_64/Packages/${kernel_devel_package}.rpm
|
||||
install_local_packages $offer ${kernel_devel_package}.rpm
|
||||
local kernel_devel_package
|
||||
kernel_devel_package="kernel-devel-$(uname -r)"
|
||||
if [[ $offer == "centos-hpc" ]] || [[ "$sku" == "7.4" ]]; then
|
||||
install_packages "$offer" "$kernel_devel_package"
|
||||
elif [ "$sku" == "7.3" ]; then
|
||||
download_file http://vault.centos.org/7.3.1611/updates/x86_64/Packages/"${kernel_devel_package}".rpm
|
||||
install_local_packages "$offer" "${kernel_devel_package}".rpm
|
||||
else
|
||||
log ERROR "CentOS $sku not supported for GPU"
|
||||
exit 1
|
||||
fi
|
||||
install_packages $offer gcc binutils make
|
||||
install_packages "$offer" gcc binutils make
|
||||
fi
|
||||
# get additional dependency if NV-series VMs
|
||||
if [ $is_viz == "True" ]; then
|
||||
if [ $offer == "ubuntuserver" ]; then
|
||||
install_packages $offer xserver-xorg-dev
|
||||
if [ "$is_viz" == "True" ]; then
|
||||
if [ "$offer" == "ubuntuserver" ]; then
|
||||
install_packages "$offer" xserver-xorg-dev
|
||||
elif [[ $offer == centos* ]]; then
|
||||
install_packages $offer xorg-x11-server-devel
|
||||
install_packages "$offer" xorg-x11-server-devel
|
||||
fi
|
||||
fi
|
||||
# install driver
|
||||
./$nvdriver -s
|
||||
./"${nvdriver}" -s
|
||||
# add flag to config for GRID driver
|
||||
if [ $is_viz == "True" ]; then
|
||||
if [ "$is_viz" == "True" ]; then
|
||||
cp /etc/nvidia/gridd.conf.template /etc/nvidia/gridd.conf
|
||||
echo "IgnoreSP=TRUE" >> /etc/nvidia/gridd.conf
|
||||
fi
|
||||
|
@ -271,15 +276,15 @@ EOF
|
|||
nvidia-persistenced --user root
|
||||
nvidia-smi -pm 1
|
||||
# install nvidia-docker
|
||||
if [ $offer == "ubuntuserver" ]; then
|
||||
add_repo $offer https://nvidia.github.io/nvidia-docker/gpgkey
|
||||
if [ "$offer" == "ubuntuserver" ]; then
|
||||
add_repo "$offer" https://nvidia.github.io/nvidia-docker/gpgkey
|
||||
curl -fSsL https://nvidia.github.io/nvidia-docker/ubuntu16.04/amd64/nvidia-docker.list | \
|
||||
tee /etc/apt/sources.list.d/nvidia-docker.list
|
||||
elif [[ $offer == centos* ]]; then
|
||||
add_repo $offer https://nvidia.github.io/nvidia-docker/centos7/x86_64/nvidia-docker.repo
|
||||
add_repo "$offer" https://nvidia.github.io/nvidia-docker/centos7/x86_64/nvidia-docker.repo
|
||||
fi
|
||||
refresh_package_index $offer
|
||||
install_packages $offer nvidia-docker2
|
||||
refresh_package_index "$offer"
|
||||
install_packages "$offer" nvidia-docker2
|
||||
# merge daemon configs if necessary
|
||||
set +e
|
||||
grep \"graph\" /etc/docker/daemon.json
|
||||
|
@ -287,7 +292,7 @@ EOF
|
|||
set -e
|
||||
if [ $rc -ne 0 ]; then
|
||||
log DEBUG "Graph root not detected in Docker daemon.json"
|
||||
if [ $offer == "ubuntuserver" ]; then
|
||||
if [ "$offer" == "ubuntuserver" ]; then
|
||||
python -c "import json;a=json.load(open('/etc/docker/daemon.json.dpkg-old'));b=json.load(open('/etc/docker/daemon.json'));a.update(b);f=open('/etc/docker/daemon.json','w');json.dump(a,f);f.close();"
|
||||
rm -f /etc/docker/daemon.json.dpkg-old
|
||||
elif [[ $offer == centos* ]]; then
|
||||
|
@ -298,7 +303,8 @@ EOF
|
|||
fi
|
||||
pkill -SIGHUP dockerd
|
||||
nvidia-docker version
|
||||
local rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
|
||||
local rootdir
|
||||
rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
|
||||
log DEBUG "Graph root: $rootdir"
|
||||
nvidia-smi
|
||||
}
|
||||
|
@ -315,21 +321,21 @@ mount_azureblob_container() {
|
|||
log INFO "Mounting Azure Blob Containers"
|
||||
local offer=$1
|
||||
local sku=$2
|
||||
if [ $offer == "ubuntuserver" ]; then
|
||||
if [ "$offer" == "ubuntuserver" ]; then
|
||||
debfile=packages-microsoft-prod.deb
|
||||
if [ ! -f ${debfile} ]; then
|
||||
download_file https://packages.microsoft.com/config/ubuntu/16.04/${debfile}
|
||||
install_local_packages $offer ${debfile}
|
||||
refresh_package_index $offer
|
||||
install_packages $offer blobfuse
|
||||
install_local_packages "$offer" ${debfile}
|
||||
refresh_package_index "$offer"
|
||||
install_packages "$offer" blobfuse
|
||||
fi
|
||||
elif [[ $offer == "rhel" ]] || [[ $offer == centos* ]]; then
|
||||
elif [[ "$offer" == "rhel" ]] || [[ $offer == centos* ]]; then
|
||||
rpmfile=packages-microsoft-prod.rpm
|
||||
if [ ! -f ${rpmfile} ]; then
|
||||
download_file https://packages.microsoft.com/config/rhel/7/${rpmfile}
|
||||
install_local_packages $offer ${rpmfile}
|
||||
refresh_package_index $offer
|
||||
install_packages $offer blobfuse
|
||||
install_local_packages "$offer" ${rpmfile}
|
||||
refresh_package_index "$offer"
|
||||
install_packages "$offer" blobfuse
|
||||
fi
|
||||
else
|
||||
echo "ERROR: unsupported distribution for Azure blob: $offer $sku"
|
||||
|
@ -339,8 +345,8 @@ mount_azureblob_container() {
|
|||
./azureblob-mount.sh
|
||||
chmod 700 azureblob-mount.sh
|
||||
chown root:root azureblob-mount.sh
|
||||
chmod 600 *.cfg
|
||||
chown root:root *.cfg
|
||||
chmod 600 ./*.cfg
|
||||
chown root:root ./*.cfg
|
||||
}
|
||||
|
||||
download_file() {
|
||||
|
@ -348,8 +354,7 @@ download_file() {
|
|||
local retries=10
|
||||
set +e
|
||||
while [ $retries -gt 0 ]; do
|
||||
curl -fSsLO $1
|
||||
if [ $? -eq 0 ]; then
|
||||
if curl -fSsLO "$1"; then
|
||||
break
|
||||
fi
|
||||
retries=$((retries-1))
|
||||
|
@ -367,15 +372,19 @@ add_repo() {
|
|||
local url=$2
|
||||
set +e
|
||||
local retries=120
|
||||
local rc
|
||||
while [ $retries -gt 0 ]; do
|
||||
if [[ $offer == "ubuntuserver" ]] || [[ $offer == "debian" ]]; then
|
||||
curl -fSsL $url | apt-key add -
|
||||
elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-linux" ]]; then
|
||||
yum-config-manager --add-repo $url
|
||||
if [[ "$offer" == "ubuntuserver" ]] || [[ "$offer" == "debian" ]]; then
|
||||
curl -fSsL "$url" | apt-key add -
|
||||
rc=$?
|
||||
elif [[ $offer == centos* ]] || [[ "$offer" == "rhel" ]] || [[ "$offer" == "oracle-linux" ]]; then
|
||||
yum-config-manager --add-repo "$url"
|
||||
rc=$?
|
||||
elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then
|
||||
zypper addrepo $url
|
||||
zypper addrepo "$url"
|
||||
rc=$?
|
||||
fi
|
||||
if [ $? -eq 0 ]; then
|
||||
if [ $rc -eq 0 ]; then
|
||||
break
|
||||
fi
|
||||
retries=$((retries-1))
|
||||
|
@ -392,15 +401,19 @@ refresh_package_index() {
|
|||
local offer=$1
|
||||
set +e
|
||||
local retries=120
|
||||
local rc
|
||||
while [ $retries -gt 0 ]; do
|
||||
if [[ $offer == "ubuntuserver" ]] || [[ $offer == "debian" ]]; then
|
||||
if [[ "$offer" == "ubuntuserver" ]] || [[ "$offer" == "debian" ]]; then
|
||||
apt-get update
|
||||
elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-linux" ]]; then
|
||||
rc=$?
|
||||
elif [[ $offer == centos* ]] || [[ "$offer" == "rhel" ]] || [[ "$offer" == "oracle-linux" ]]; then
|
||||
yum makecache -y fast
|
||||
rc=$?
|
||||
elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then
|
||||
zypper -n --gpg-auto-import-keys ref
|
||||
rc=$?
|
||||
fi
|
||||
if [ $? -eq 0 ]; then
|
||||
if [ $rc -eq 0 ]; then
|
||||
break
|
||||
fi
|
||||
retries=$((retries-1))
|
||||
|
@ -418,15 +431,19 @@ install_packages() {
|
|||
shift
|
||||
set +e
|
||||
local retries=120
|
||||
local rc
|
||||
while [ $retries -gt 0 ]; do
|
||||
if [[ $offer == "ubuntuserver" ]] || [[ $offer == "debian" ]]; then
|
||||
apt-get install -y -q -o Dpkg::Options::="--force-confnew" --no-install-recommends $*
|
||||
elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-linux" ]]; then
|
||||
yum install -y $*
|
||||
if [[ "$offer" == "ubuntuserver" ]] || [[ "$offer" == "debian" ]]; then
|
||||
apt-get install -y -q -o Dpkg::Options::="--force-confnew" --no-install-recommends "$@"
|
||||
rc=$?
|
||||
elif [[ $offer == centos* ]] || [[ "$offer" == "rhel" ]] || [[ "$offer" == "oracle-linux" ]]; then
|
||||
yum install -y "$@"
|
||||
rc=$?
|
||||
elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then
|
||||
zypper -n in $*
|
||||
zypper -n in "$@"
|
||||
rc=$?
|
||||
fi
|
||||
if [ $? -eq 0 ]; then
|
||||
if [ $rc -eq 0 ]; then
|
||||
break
|
||||
fi
|
||||
retries=$((retries-1))
|
||||
|
@ -444,16 +461,19 @@ install_local_packages() {
|
|||
shift
|
||||
set +e
|
||||
local retries=120
|
||||
local rc
|
||||
while [ $retries -gt 0 ]; do
|
||||
if [[ $offer == "ubuntuserver" ]] || [[ $offer == "debian" ]]; then
|
||||
dpkg -i $*
|
||||
dpkg -i "$@"
|
||||
rc=$?
|
||||
else
|
||||
rpm -Uvh --nodeps $*
|
||||
rpm -Uvh --nodeps "$@"
|
||||
rc=$?
|
||||
fi
|
||||
if [ $? -eq 0 ]; then
|
||||
if [ $rc -eq 0 ]; then
|
||||
break
|
||||
fi
|
||||
retries=retries-1
|
||||
retries=$((retries-1))
|
||||
if [ $retries -eq 0 ]; then
|
||||
log ERROR "Could not install local packages: $*"
|
||||
exit 1
|
||||
|
@ -469,7 +489,8 @@ docker_pull_image() {
|
|||
set +e
|
||||
local retries=60
|
||||
while [ $retries -gt 0 ]; do
|
||||
local pull_out=$(docker pull $image 2>&1)
|
||||
local pull_out
|
||||
pull_out=$(docker pull "$image" 2>&1)
|
||||
local rc=$?
|
||||
if [ $rc -eq 0 ]; then
|
||||
echo "$pull_out"
|
||||
|
@ -477,18 +498,24 @@ docker_pull_image() {
|
|||
fi
|
||||
# non-zero exit code: check if pull output has toomanyrequests,
|
||||
# connection resets, or image config error
|
||||
if [[ ! -z "$(grep 'toomanyrequests' <<<$pull_out)" ]] || [[ ! -z "$(grep 'connection reset by peer' <<<$pull_out)" ]] || [[ ! -z "$(grep 'error pulling image configuration' <<<$pull_out)" ]]; then
|
||||
local tmr
|
||||
tmr=$(grep 'toomanyrequests' <<<"$pull_out")
|
||||
local crbp
|
||||
crbp=$(grep 'connection reset by peer' <<<"$pull_out")
|
||||
local epic
|
||||
epic=$(grep 'error pulling image configuration' <<<"$pull_out")
|
||||
if [[ ! -z "$tmr" ]] || [[ ! -z "$crbp" ]] || [[ ! -z "$epic" ]]; then
|
||||
log WARNING "will retry: $pull_out"
|
||||
else
|
||||
log ERROR "$pull_out"
|
||||
exit $rc
|
||||
fi
|
||||
retries=retries-1
|
||||
retries=$((retries-1))
|
||||
if [ $retries -le 0 ]; then
|
||||
log ERROR "Could not pull docker image: $image"
|
||||
exit $rc
|
||||
fi
|
||||
sleep $[($RANDOM % 5) + 1]s
|
||||
sleep $((RANDOM % 5 + 1))s
|
||||
done
|
||||
set -e
|
||||
}
|
||||
|
@ -499,12 +526,12 @@ singularity_setup() {
|
|||
shift
|
||||
local sku=$1
|
||||
shift
|
||||
if [ $offer == "ubuntu" ]; then
|
||||
if [ "$offer" == "ubuntu" ]; then
|
||||
if [[ $sku != 16.04* ]]; then
|
||||
log WARNING "Singularity not supported on $offer $sku"
|
||||
fi
|
||||
singularity_basedir=/mnt/singularity
|
||||
elif [[ $offer == "centos" ]] || [[ $offer == "rhel" ]]; then
|
||||
elif [[ "$offer" == "centos" ]] || [[ "$offer" == "rhel" ]]; then
|
||||
if [[ $sku != 7* ]]; then
|
||||
log WARNING "Singularity not supported on $offer $sku"
|
||||
return
|
||||
|
@ -561,22 +588,23 @@ process_fstab_entry() {
|
|||
local mountpoint=$2
|
||||
local fstab_entry=$3
|
||||
log INFO "Creating host directory for $desc at $mountpoint"
|
||||
mkdir -p $mountpoint
|
||||
chmod 777 $mountpoint
|
||||
mkdir -p "$mountpoint"
|
||||
chmod 777 "$mountpoint"
|
||||
echo "INFO: Adding $mountpoint to fstab"
|
||||
echo $fstab_entry >> /etc/fstab
|
||||
echo "$fstab_entry" >> /etc/fstab
|
||||
tail -n1 /etc/fstab
|
||||
echo "INFO: Mounting $mountpoint"
|
||||
local START=$(date -u +"%s")
|
||||
local START
|
||||
START=$(date -u +"%s")
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
mount $mountpoint
|
||||
if [ $? -eq 0 ]; then
|
||||
if mount "$mountpoint"; then
|
||||
break
|
||||
else
|
||||
local NOW=$(date -u +"%s")
|
||||
local DIFF=$((($NOW-$START)/60))
|
||||
local NOW
|
||||
NOW=$(date -u +"%s")
|
||||
local DIFF=$(((NOW-START)/60))
|
||||
# fail after 5 minutes of attempts
|
||||
if [ $DIFF -ge 5 ]; then
|
||||
echo "ERROR: Could not mount $desc on $mountpoint"
|
||||
|
@ -594,8 +622,7 @@ check_for_docker_host_engine() {
|
|||
# start docker service
|
||||
systemctl start docker.service
|
||||
systemctl status docker.service
|
||||
docker version
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! docker version; then
|
||||
log ERROR "Docker not installed"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -629,14 +656,14 @@ check_for_buggy_ntfs_mount
|
|||
save_startup_to_volatile
|
||||
|
||||
# set python env vars
|
||||
LC_ALL=en_US.UTF-8
|
||||
PYTHONASYNCIODEBUG=1
|
||||
export LC_ALL=en_US.UTF-8
|
||||
export PYTHONASYNCIODEBUG=1
|
||||
|
||||
# store node prep start
|
||||
if command -v python3 > /dev/null 2>&1; then
|
||||
npstart=`python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())'`
|
||||
npstart=$(python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())')
|
||||
else
|
||||
npstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))'`
|
||||
npstart=$(python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))')
|
||||
fi
|
||||
|
||||
# set node prep status files
|
||||
|
@ -644,17 +671,17 @@ nodeprepfinished=$AZ_BATCH_NODE_SHARED_DIR/.node_prep_finished
|
|||
cascadefailed=$AZ_BATCH_NODE_SHARED_DIR/.cascade_failed
|
||||
|
||||
# decrypt encrypted creds
|
||||
if [ ! -z $encrypted ]; then
|
||||
if [ ! -z "$encrypted" ]; then
|
||||
# convert pfx to pem
|
||||
pfxfile=$AZ_BATCH_CERTIFICATES_DIR/sha1-$encrypted.pfx
|
||||
privatekey=$AZ_BATCH_CERTIFICATES_DIR/key.pem
|
||||
openssl pkcs12 -in $pfxfile -out $privatekey -nodes -password file:$pfxfile.pw
|
||||
openssl pkcs12 -in "$pfxfile" -out "$privatekey" -nodes -password file:"${pfxfile}".pw
|
||||
# remove pfx-related files
|
||||
rm -f $pfxfile $pfxfile.pw
|
||||
rm -f "$pfxfile" "${pfxfile}".pw
|
||||
# decrypt creds
|
||||
SHIPYARD_STORAGE_ENV=`echo $SHIPYARD_STORAGE_ENV | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
|
||||
SHIPYARD_STORAGE_ENV=$(echo "$SHIPYARD_STORAGE_ENV" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
|
||||
if [ ! -z ${DOCKER_LOGIN_USERNAME+x} ]; then
|
||||
DOCKER_LOGIN_PASSWORD=`echo $DOCKER_LOGIN_PASSWORD | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
|
||||
DOCKER_LOGIN_PASSWORD=$(echo "$DOCKER_LOGIN_PASSWORD" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
|
||||
fi
|
||||
fi
|
||||
|
||||
|
@ -666,7 +693,7 @@ if [ $p2penabled -eq 1 ]; then
|
|||
fi
|
||||
|
||||
# create shared mount points
|
||||
mkdir -p $MOUNTS_PATH
|
||||
mkdir -p "$MOUNTS_PATH"
|
||||
|
||||
# mount azure resources (this must be done every boot)
|
||||
if [ $azurefile -eq 1 ]; then
|
||||
|
@ -677,19 +704,19 @@ if [ $azureblob -eq 1 ]; then
|
|||
fi
|
||||
|
||||
# check if we're coming up from a reboot
|
||||
if [ -f $cascadefailed ]; then
|
||||
if [ -f "$cascadefailed" ]; then
|
||||
log ERROR "$cascadefailed file exists, assuming cascade failure during node prep"
|
||||
exit 1
|
||||
elif [ -f $nodeprepfinished ]; then
|
||||
elif [ -f "$nodeprepfinished" ]; then
|
||||
# mount any storage clusters
|
||||
if [ ! -z $sc_args ]; then
|
||||
if [ ! -z "$sc_args" ]; then
|
||||
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
|
||||
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
|
||||
IFS='#' read -ra fstabs <<< "$fstab_mounts"
|
||||
i=0
|
||||
for sc_arg in ${sc_args[@]}; do
|
||||
for sc_arg in "${sc_args[@]}"; do
|
||||
IFS=':' read -ra sc <<< "$sc_arg"
|
||||
mount $MOUNTS_PATH/${sc[1]}
|
||||
mount "${MOUNTS_PATH}"/"${sc[1]}"
|
||||
done
|
||||
fi
|
||||
# mount any custom mounts
|
||||
|
@ -699,13 +726,13 @@ elif [ -f $nodeprepfinished ]; then
|
|||
# eval and split fstab var to expand vars
|
||||
fstab_entry=$(eval echo "$fstab")
|
||||
IFS=' ' read -ra parts <<< "$fstab_entry"
|
||||
mount ${parts[1]}
|
||||
mount "${parts[1]}"
|
||||
done
|
||||
fi
|
||||
# start docker engine
|
||||
check_for_docker_host_engine
|
||||
# ensure nvidia software has been installed
|
||||
if [ ! -z $gpu ]; then
|
||||
if [ ! -z "$gpu" ]; then
|
||||
ensure_nvidia_driver_installed $offer $sku
|
||||
fi
|
||||
log INFO "$nodeprepfinished file exists, assuming successful completion of node prep"
|
||||
|
@ -713,10 +740,10 @@ elif [ -f $nodeprepfinished ]; then
|
|||
fi
|
||||
|
||||
# get ip address of eth0
|
||||
ipaddress=`ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1`
|
||||
ipaddress=$(ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1)
|
||||
|
||||
# one-time setup
|
||||
if [ ! -f $nodeprepfinished ]; then
|
||||
if [ ! -f "$nodeprepfinished" ]; then
|
||||
# set up hpn-ssh
|
||||
if [ $hpnssh -eq 1 ]; then
|
||||
./shipyard_hpnssh.sh $offer $sku
|
||||
|
@ -747,8 +774,8 @@ EOF
|
|||
fi
|
||||
|
||||
# install docker host engine
|
||||
if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
|
||||
DEBIAN_FRONTEND=noninteractive
|
||||
if [ "$offer" == "ubuntuserver" ] || [ "$offer" == "debian" ]; then
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
# name will be appended to dockerversion
|
||||
dockerversion=18.03.0~ce-0~
|
||||
name=
|
||||
|
@ -802,7 +829,7 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
|
|||
log ERROR "unsupported sku: $sku for offer: $offer"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -z $gpu ] && [ $name != "ubuntu-xenial" ]; then
|
||||
if [ ! -z "$gpu" ] && [ "$name" != "ubuntu-xenial" ]; then
|
||||
log ERROR "gpu unsupported on this sku: $sku for offer $offer"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -818,8 +845,8 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
|
|||
refresh_package_index $offer
|
||||
# install required software first
|
||||
install_packages $offer apt-transport-https ca-certificates curl gnupg2 software-properties-common
|
||||
if [ $name == "ubuntu-trusty" ]; then
|
||||
install_packages $offer linux-image-extra-$(uname -r) linux-image-extra-virtual
|
||||
if [ "$name" == "ubuntu-trusty" ]; then
|
||||
install_packages $offer linux-image-extra-"$(uname -r)" linux-image-extra-virtual
|
||||
fi
|
||||
# add gpgkey for repo
|
||||
add_repo $offer $gpgkey
|
||||
|
@ -851,11 +878,11 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
|
|||
$srvstatus
|
||||
docker info
|
||||
# install gpu related items
|
||||
if [ ! -z $gpu ] && [ ! -f $nodeprepfinished ]; then
|
||||
if [ ! -z "$gpu" ] && [ ! -f "$nodeprepfinished" ]; then
|
||||
install_nvidia_software $offer $sku
|
||||
fi
|
||||
# set up glusterfs
|
||||
if [ $gluster_on_compute -eq 1 ] && [ ! -f $nodeprepfinished ]; then
|
||||
if [ $gluster_on_compute -eq 1 ] && [ ! -f "$nodeprepfinished" ]; then
|
||||
install_packages $offer glusterfs-server
|
||||
if [[ ! -z $gfsenable ]]; then
|
||||
$gfsenable
|
||||
|
@ -865,13 +892,13 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
|
|||
mkdir -p /mnt/gluster
|
||||
fi
|
||||
# install dependencies for storage cluster mount
|
||||
if [ ! -z $sc_args ]; then
|
||||
for sc_arg in ${sc_args[@]}; do
|
||||
if [ ! -z "$sc_args" ]; then
|
||||
for sc_arg in "${sc_args[@]}"; do
|
||||
IFS=':' read -ra sc <<< "$sc_arg"
|
||||
server_type=${sc[0]}
|
||||
if [ $server_type == "nfs" ]; then
|
||||
if [ "$server_type" == "nfs" ]; then
|
||||
install_packages $offer nfs-common nfs4-acl-tools
|
||||
elif [ $server_type == "glusterfs" ]; then
|
||||
elif [ "$server_type" == "glusterfs" ]; then
|
||||
install_packages $offer glusterfs-client acl
|
||||
else
|
||||
log ERROR "Unknown file server type ${sc[0]} for ${sc[1]}"
|
||||
|
@ -954,11 +981,11 @@ elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-l
|
|||
$srvstatus
|
||||
docker info
|
||||
# install gpu related items
|
||||
if [ ! -z $gpu ] && [ ! -f $nodeprepfinished ]; then
|
||||
if [ ! -z "$gpu" ] && [ ! -f "$nodeprepfinished" ]; then
|
||||
install_nvidia_software $offer $sku
|
||||
fi
|
||||
# set up glusterfs
|
||||
if [ $gluster_on_compute -eq 1 ] && [ ! -f $nodeprepfinished ]; then
|
||||
if [ $gluster_on_compute -eq 1 ] && [ ! -f "$nodeprepfinished" ]; then
|
||||
install_packages $offer epel-release centos-release-gluster38
|
||||
sed -i -e "s/enabled=1/enabled=0/g" /etc/yum.repos.d/CentOS-Gluster-3.8.repo
|
||||
install_packages $offer --enablerepo=centos-gluster38,epel glusterfs-server
|
||||
|
@ -969,16 +996,16 @@ elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-l
|
|||
mkdir -p /mnt/resource/gluster
|
||||
fi
|
||||
# install dependencies for storage cluster mount
|
||||
if [ ! -z $sc_args ]; then
|
||||
for sc_arg in ${sc_args[@]}; do
|
||||
if [ ! -z "$sc_args" ]; then
|
||||
for sc_arg in "${sc_args[@]}"; do
|
||||
IFS=':' read -ra sc <<< "$sc_arg"
|
||||
server_type=${sc[0]}
|
||||
if [ $server_type == "nfs" ]; then
|
||||
if [ "$server_type" == "nfs" ]; then
|
||||
install_packages $offer nfs-utils nfs4-acl-tools
|
||||
systemctl daemon-reload
|
||||
$rpcbindenable
|
||||
systemctl start rpcbind
|
||||
elif [ $server_type == "glusterfs" ]; then
|
||||
elif [ "$server_type" == "glusterfs" ]; then
|
||||
install_packages $offer epel-release centos-release-gluster38
|
||||
sed -i -e "s/enabled=1/enabled=0/g" /etc/yum.repos.d/CentOS-Gluster-3.8.repo
|
||||
install_packages $offer --enablerepo=centos-gluster38,epel glusterfs-server acl
|
||||
|
@ -996,7 +1023,7 @@ elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then
|
|||
exit 1
|
||||
fi
|
||||
# gpu is not supported on these offers
|
||||
if [ ! -z $gpu ]; then
|
||||
if [ ! -z "$gpu" ]; then
|
||||
log ERROR "gpu unsupported on this sku: $sku for offer $offer"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -1004,7 +1031,7 @@ elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then
|
|||
if [ $networkopt -eq 1 ]; then
|
||||
sysctl -p
|
||||
fi
|
||||
if [ ! -f $nodeprepfinished ]; then
|
||||
if [ ! -f "$nodeprepfinished" ]; then
|
||||
# add Virtualization:containers repo for recent docker builds
|
||||
repodir=
|
||||
if [[ $offer == opensuse* ]]; then
|
||||
|
@ -1062,16 +1089,16 @@ elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then
|
|||
mkdir -p /mnt/resource/gluster
|
||||
fi
|
||||
# install dependencies for storage cluster mount
|
||||
if [ ! -z $sc_args ]; then
|
||||
for sc_arg in ${sc_args[@]}; do
|
||||
if [ ! -z "$sc_args" ]; then
|
||||
for sc_arg in "${sc_args[@]}"; do
|
||||
IFS=':' read -ra sc <<< "$sc_arg"
|
||||
server_type=${sc[0]}
|
||||
if [ $server_type == "nfs" ]; then
|
||||
if [ "$server_type" == "nfs" ]; then
|
||||
install_packages $offer nfs-client nfs4-acl-tools
|
||||
systemctl daemon-reload
|
||||
systemctl enable rpcbind
|
||||
systemctl start rpcbind
|
||||
elif [ $server_type == "glusterfs" ]; then
|
||||
elif [ "$server_type" == "glusterfs" ]; then
|
||||
add_repo $offer http://download.opensuse.org/repositories/filesystems/$repodir/filesystems.repo
|
||||
zypper -n --gpg-auto-import-keys ref
|
||||
install_packages $offer glusterfs acl
|
||||
|
@ -1099,11 +1126,11 @@ else
|
|||
fi
|
||||
|
||||
# retrieve docker images related to data movement
|
||||
docker_pull_image alfpark/blobxfer:$blobxferversion
|
||||
docker_pull_image alfpark/batch-shipyard:${version}-cargo
|
||||
docker_pull_image alfpark/blobxfer:"${blobxferversion}"
|
||||
docker_pull_image alfpark/batch-shipyard:"${version}"-cargo
|
||||
|
||||
# set up singularity
|
||||
singularity_setup $DISTRIB_ID $DISTRIB_RELEASE
|
||||
singularity_setup "$DISTRIB_ID" "$DISTRIB_RELEASE"
|
||||
|
||||
# login to registry servers (do not specify -e as creds have been decrypted)
|
||||
./registry_login.sh
|
||||
|
@ -1112,16 +1139,16 @@ if [ -f singularity-registry-login ]; then
|
|||
fi
|
||||
|
||||
# mount any storage clusters
|
||||
if [ ! -z $sc_args ]; then
|
||||
if [ ! -z "$sc_args" ]; then
|
||||
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
|
||||
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
|
||||
IFS='#' read -ra fstabs <<< "$fstab_mounts"
|
||||
i=0
|
||||
for sc_arg in ${sc_args[@]}; do
|
||||
for sc_arg in "${sc_args[@]}"; do
|
||||
IFS=':' read -ra sc <<< "$sc_arg"
|
||||
fstab_entry="${fstabs[$i]}"
|
||||
process_fstab_entry "$sc_arg" "$MOUNTS_PATH/${sc[1]}" "$fstab_entry"
|
||||
i=$(($i + 1))
|
||||
i=$((i + 1))
|
||||
done
|
||||
fi
|
||||
|
||||
|
@ -1137,9 +1164,9 @@ if [ ! -z "$SHIPYARD_CUSTOM_MOUNTS_FSTAB" ]; then
|
|||
fi
|
||||
|
||||
# touch node prep finished file to preserve idempotency
|
||||
touch $nodeprepfinished
|
||||
touch "$nodeprepfinished"
|
||||
# touch cascade failed file, this will be removed once cascade is successful
|
||||
touch $cascadefailed
|
||||
touch "$cascadefailed"
|
||||
|
||||
# execute cascade
|
||||
set +e
|
||||
|
@ -1154,9 +1181,9 @@ if [ $cascadecontainer -eq 1 ]; then
|
|||
fi
|
||||
# store docker cascade start
|
||||
if command -v python3 > /dev/null 2>&1; then
|
||||
drpstart=`python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())'`
|
||||
drpstart=$(python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())')
|
||||
else
|
||||
drpstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))'`
|
||||
drpstart=$(python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))')
|
||||
fi
|
||||
# create env file
|
||||
envfile=.cascade_envfile
|
||||
|
@ -1168,14 +1195,14 @@ sku=$sku
|
|||
npstart=$npstart
|
||||
drpstart=$drpstart
|
||||
p2p=$p2p
|
||||
`env | grep SHIPYARD_`
|
||||
`env | grep AZ_BATCH_`
|
||||
`env | grep DOCKER_LOGIN_`
|
||||
`env | grep SINGULARITY_`
|
||||
$(env | grep SHIPYARD_)
|
||||
$(env | grep AZ_BATCH_)
|
||||
$(env | grep DOCKER_LOGIN_)
|
||||
$(env | grep SINGULARITY_)
|
||||
EOF
|
||||
chmod 600 $envfile
|
||||
# pull image
|
||||
docker_pull_image alfpark/batch-shipyard:${version}-cascade
|
||||
docker_pull_image alfpark/batch-shipyard:"${version}"-cascade
|
||||
# set singularity options
|
||||
singularity_binds=
|
||||
if [ ! -z $singularity_basedir ]; then
|
||||
|
@ -1183,30 +1210,31 @@ EOF
|
|||
-v $singularity_basedir:$singularity_basedir \
|
||||
-v $singularity_basedir/mnt:/var/lib/singularity/mnt"
|
||||
fi
|
||||
log DEBUG "Starting Cascade"
|
||||
# launch container
|
||||
log DEBUG "Starting Cascade"
|
||||
# shellcheck disable=SC2086
|
||||
docker run $detached --net=host --env-file $envfile \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v /etc/passwd:/etc/passwd:ro \
|
||||
-v /etc/group:/etc/group:ro \
|
||||
$singularity_binds \
|
||||
-v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR \
|
||||
-w $AZ_BATCH_TASK_WORKING_DIR \
|
||||
${singularity_binds} \
|
||||
-v "$AZ_BATCH_NODE_ROOT_DIR":"$AZ_BATCH_NODE_ROOT_DIR" \
|
||||
-w "$AZ_BATCH_TASK_WORKING_DIR" \
|
||||
-p 6881-6891:6881-6891 -p 6881-6891:6881-6891/udp \
|
||||
alfpark/batch-shipyard:${version}-cascade &
|
||||
alfpark/batch-shipyard:"${version}"-cascade &
|
||||
cascadepid=$!
|
||||
else
|
||||
# add timings
|
||||
if [ ! -z ${SHIPYARD_TIMING+x} ]; then
|
||||
# backfill node prep start
|
||||
./perf.py nodeprep start $prefix --ts $npstart --message "offer=$offer,sku=$sku"
|
||||
./perf.py nodeprep start "$prefix" --ts "$npstart" --message "offer=$offer,sku=$sku"
|
||||
# mark node prep finished
|
||||
./perf.py nodeprep end $prefix
|
||||
./perf.py nodeprep end "$prefix"
|
||||
# mark start cascade
|
||||
./perf.py cascade start $prefix
|
||||
./perf.py cascade start "$prefix"
|
||||
fi
|
||||
log DEBUG "Starting Cascade"
|
||||
./cascade.py $p2p --ipaddress $ipaddress $prefix &
|
||||
./cascade.py "$p2p" --ipaddress "$ipaddress" "$prefix" &
|
||||
cascadepid=$!
|
||||
fi
|
||||
|
||||
|
@ -1216,20 +1244,20 @@ if [ $p2penabled -eq 0 ]; then
|
|||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
log ERROR "cascade exited with non-zero exit code: $rc"
|
||||
rm -f $nodeprepfinished
|
||||
rm -f "$nodeprepfinished"
|
||||
exit $rc
|
||||
fi
|
||||
fi
|
||||
set -e
|
||||
|
||||
# remove cascade failed file
|
||||
rm -f $cascadefailed
|
||||
rm -f "$cascadefailed"
|
||||
|
||||
# block for images if necessary
|
||||
$AZ_BATCH_TASK_WORKING_DIR/wait_for_images.sh $block
|
||||
"${AZ_BATCH_TASK_WORKING_DIR}"/wait_for_images.sh "$block"
|
||||
|
||||
# clean up cascade env file if block
|
||||
if [ ! -z $block ]; then
|
||||
if [ ! -z "$block" ]; then
|
||||
if [ $cascadecontainer -eq 1 ]; then
|
||||
rm -f $envfile
|
||||
fi
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
|
@ -74,7 +76,7 @@ while getopts "h?abcef:m:np:t:v:x:" opt; do
|
|||
t)
|
||||
p2p=${OPTARG,,}
|
||||
IFS=':' read -ra p2pflags <<< "$p2p"
|
||||
if [ ${p2pflags[0]} == "true" ]; then
|
||||
if [ "${p2pflags[0]}" == "true" ]; then
|
||||
p2penabled=1
|
||||
else
|
||||
p2penabled=0
|
||||
|
@ -94,8 +96,7 @@ shift $((OPTIND-1))
|
|||
check_for_buggy_ntfs_mount() {
|
||||
# Check to ensure sdb1 mount is not mounted as ntfs
|
||||
set +e
|
||||
mount | grep /dev/sdb1 | grep fuseblk
|
||||
if [ $? -eq 0 ]; then
|
||||
if mount | grep /dev/sdb1 | grep fuseblk; then
|
||||
log ERROR "/dev/sdb1 temp disk is mounted as fuseblk/ntfs"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -104,7 +105,7 @@ check_for_buggy_ntfs_mount() {
|
|||
|
||||
save_startup_to_volatile() {
|
||||
set +e
|
||||
touch $AZ_BATCH_NODE_ROOT_DIR/volatile/startup/.save
|
||||
touch "${AZ_BATCH_NODE_ROOT_DIR}"/volatile/startup/.save
|
||||
set -e
|
||||
}
|
||||
|
||||
|
@ -127,7 +128,7 @@ net.ipv4.tcp_abort_on_overflow=1
|
|||
net.ipv4.route.flush=1
|
||||
EOF
|
||||
fi
|
||||
if [ "$1" == "ubuntu" ] && [ "$2" == 14.04* ]; then
|
||||
if [[ "$1" == "ubuntu" ]] && [[ "$2" == 14.04* ]]; then
|
||||
service procps start
|
||||
else
|
||||
service procps reload
|
||||
|
@ -139,7 +140,7 @@ blacklist_kernel_upgrade() {
|
|||
shift
|
||||
local sku=$1
|
||||
shift
|
||||
if [ $offer != "ubuntu" ]; then
|
||||
if [ "$offer" != "ubuntu" ]; then
|
||||
log DEBUG "No kernel upgrade blacklist required on $offer $sku"
|
||||
return
|
||||
fi
|
||||
|
@ -148,15 +149,14 @@ blacklist_kernel_upgrade() {
|
|||
local rc=$?
|
||||
set -e
|
||||
if [ $rc -ne 0 ]; then
|
||||
sed -i "/^Unattended-Upgrade::Package-Blacklist {/alinux-azure\nlinux-cloud-tools-azure\nlinux-headers-azure\nlinux-image-azure\nlinux-tools-azure" /etc/apt/apt.conf.d/50unattended-upgrades
|
||||
sed -i "/^Unattended-Upgrade::Package-Blacklist {/a\"linux-azure\";\\n\"linux-cloud-tools-azure\";\\n\"linux-headers-azure\";\\n\"linux-image-azure\";\\n\"linux-tools-azure\";" /etc/apt/apt.conf.d/50unattended-upgrades
|
||||
log INFO "Added linux-azure to package blacklist for unattended upgrades"
|
||||
fi
|
||||
}
|
||||
|
||||
check_for_nvidia_docker() {
|
||||
set +e
|
||||
nvidia-docker version
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! nvidia-docker version; then
|
||||
log ERROR "nvidia-docker2 not installed"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -165,7 +165,8 @@ check_for_nvidia_docker() {
|
|||
|
||||
check_for_nvidia_driver() {
|
||||
set +e
|
||||
local out=$(lsmod)
|
||||
local out
|
||||
out=$(lsmod)
|
||||
echo "$out" | grep -i nvidia > /dev/null
|
||||
local rc=$?
|
||||
set -e
|
||||
|
@ -182,7 +183,8 @@ check_for_nvidia() {
|
|||
log INFO "Checking for Nvidia Hardware"
|
||||
# first check for card
|
||||
set +e
|
||||
local out=$(lspci)
|
||||
local out
|
||||
out=$(lspci)
|
||||
echo "$out" | grep -i nvidia > /dev/null
|
||||
local rc=$?
|
||||
set -e
|
||||
|
@ -190,7 +192,7 @@ check_for_nvidia() {
|
|||
if [ $rc -ne 0 ]; then
|
||||
log INFO "No Nvidia card(s) detected!"
|
||||
else
|
||||
blacklist_kernel_upgrade $1 $2
|
||||
blacklist_kernel_upgrade "$1" "$2"
|
||||
check_for_nvidia_driver
|
||||
# enable persistence mode
|
||||
nvidia-smi -pm 1
|
||||
|
@ -200,7 +202,8 @@ check_for_nvidia() {
|
|||
|
||||
check_docker_root_dir() {
|
||||
set +e
|
||||
local rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
|
||||
local rootdir
|
||||
rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
|
||||
set -e
|
||||
log DEBUG "Graph root: $rootdir"
|
||||
if [ -z "$rootdir" ]; then
|
||||
|
@ -217,8 +220,7 @@ check_for_docker_host_engine() {
|
|||
# start docker service
|
||||
systemctl start docker.service
|
||||
systemctl status docker.service
|
||||
docker version
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! docker version; then
|
||||
log ERROR "Docker not installed"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -241,16 +243,16 @@ check_for_glusterfs_on_compute() {
|
|||
|
||||
check_for_storage_cluster_software() {
|
||||
local rc=0
|
||||
if [ ! -z $sc_args ]; then
|
||||
for sc_arg in ${sc_args[@]}; do
|
||||
if [ ! -z "$sc_args" ]; then
|
||||
for sc_arg in "${sc_args[@]}"; do
|
||||
IFS=':' read -ra sc <<< "$sc_arg"
|
||||
local server_type=${sc[0]}
|
||||
if [ $server_type == "nfs" ]; then
|
||||
if [ "$server_type" == "nfs" ]; then
|
||||
set +e
|
||||
mount.nfs4 -V
|
||||
local rc=$?
|
||||
set -e
|
||||
elif [ $server_type == "glusterfs" ]; then
|
||||
elif [ "$server_type" == "glusterfs" ]; then
|
||||
set +e
|
||||
glusterfs -V
|
||||
local rc=$?
|
||||
|
@ -281,8 +283,8 @@ mount_azureblob_container() {
|
|||
./azureblob-mount.sh
|
||||
chmod 700 azureblob-mount.sh
|
||||
chown root:root azureblob-mount.sh
|
||||
chmod 600 *.cfg
|
||||
chown root:root *.cfg
|
||||
chmod 600 ./*.cfg
|
||||
chown root:root ./*.cfg
|
||||
}
|
||||
|
||||
docker_pull_image() {
|
||||
|
@ -291,7 +293,8 @@ docker_pull_image() {
|
|||
set +e
|
||||
local retries=60
|
||||
while [ $retries -gt 0 ]; do
|
||||
local pull_out=$(docker pull $image 2>&1)
|
||||
local pull_out
|
||||
pull_out=$(docker pull "$image" 2>&1)
|
||||
local rc=$?
|
||||
if [ $rc -eq 0 ]; then
|
||||
echo "$pull_out"
|
||||
|
@ -299,18 +302,24 @@ docker_pull_image() {
|
|||
fi
|
||||
# non-zero exit code: check if pull output has toomanyrequests,
|
||||
# connection resets, or image config error
|
||||
if [[ ! -z "$(grep 'toomanyrequests' <<<$pull_out)" ]] || [[ ! -z "$(grep 'connection reset by peer' <<<$pull_out)" ]] || [[ ! -z "$(grep 'error pulling image configuration' <<<$pull_out)" ]]; then
|
||||
local tmr
|
||||
tmr=$(grep 'toomanyrequests' <<<"$pull_out")
|
||||
local crbp
|
||||
crbp=$(grep 'connection reset by peer' <<<"$pull_out")
|
||||
local epic
|
||||
epic=$(grep 'error pulling image configuration' <<<"$pull_out")
|
||||
if [[ ! -z "$tmr" ]] || [[ ! -z "$crbp" ]] || [[ ! -z "$epic" ]]; then
|
||||
log WARNING "will retry: $pull_out"
|
||||
else
|
||||
log ERROR "$pull_out"
|
||||
exit $rc
|
||||
fi
|
||||
retries=retries-1
|
||||
retries=$((retries-1))
|
||||
if [ $retries -le 0 ]; then
|
||||
log ERROR "Could not pull docker image: $image"
|
||||
exit $rc
|
||||
fi
|
||||
sleep $[($RANDOM % 5) + 1]s
|
||||
sleep $((RANDOM % 5 + 1))s
|
||||
done
|
||||
set -e
|
||||
}
|
||||
|
@ -321,7 +330,7 @@ singularity_setup() {
|
|||
shift
|
||||
local sku=$1
|
||||
shift
|
||||
if [ $offer == "ubuntu" ]; then
|
||||
if [ "$offer" == "ubuntu" ]; then
|
||||
if [[ $sku != 16.04* ]]; then
|
||||
log WARNING "Singularity not supported on $offer $sku"
|
||||
return
|
||||
|
@ -384,22 +393,23 @@ process_fstab_entry() {
|
|||
local mountpoint=$2
|
||||
local fstab_entry=$3
|
||||
log INFO "Creating host directory for $desc at $mountpoint"
|
||||
mkdir -p $mountpoint
|
||||
chmod 777 $mountpoint
|
||||
mkdir -p "$mountpoint"
|
||||
chmod 777 "$mountpoint"
|
||||
log INFO "Adding $mountpoint to fstab"
|
||||
echo $fstab_entry >> /etc/fstab
|
||||
echo "$fstab_entry" >> /etc/fstab
|
||||
tail -n1 /etc/fstab
|
||||
log INFO "Mounting $mountpoint"
|
||||
local START=$(date -u +"%s")
|
||||
local START
|
||||
START=$(date -u +"%s")
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
mount $mountpoint
|
||||
if [ $? -eq 0 ]; then
|
||||
if mount "$mountpoint"; then
|
||||
break
|
||||
else
|
||||
local NOW=$(date -u +"%s")
|
||||
local DIFF=$((($NOW-$START)/60))
|
||||
local NOW
|
||||
NOW=$(date -u +"%s")
|
||||
local DIFF=$(((NOW-START)/60))
|
||||
# fail after 5 minutes of attempts
|
||||
if [ $DIFF -ge 5 ]; then
|
||||
log ERROR "Could not mount $desc on $mountpoint"
|
||||
|
@ -458,14 +468,14 @@ check_for_buggy_ntfs_mount
|
|||
save_startup_to_volatile
|
||||
|
||||
# set python env vars
|
||||
LC_ALL=en_US.UTF-8
|
||||
PYTHONASYNCIODEBUG=1
|
||||
export LC_ALL=en_US.UTF-8
|
||||
export PYTHONASYNCIODEBUG=1
|
||||
|
||||
# store node prep start
|
||||
if command -v python3 > /dev/null 2>&1; then
|
||||
npstart=`python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())'`
|
||||
npstart=$(python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())')
|
||||
else
|
||||
npstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))'`
|
||||
npstart=$(python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))')
|
||||
fi
|
||||
|
||||
# set node prep status files
|
||||
|
@ -473,20 +483,20 @@ nodeprepfinished=$AZ_BATCH_NODE_SHARED_DIR/.node_prep_finished
|
|||
cascadefailed=$AZ_BATCH_NODE_SHARED_DIR/.cascade_failed
|
||||
|
||||
# create shared mount points
|
||||
mkdir -p $MOUNTS_PATH
|
||||
mkdir -p "$MOUNTS_PATH"
|
||||
|
||||
# decrypt encrypted creds
|
||||
if [ ! -z $encrypted ]; then
|
||||
if [ ! -z "$encrypted" ]; then
|
||||
# convert pfx to pem
|
||||
pfxfile=$AZ_BATCH_CERTIFICATES_DIR/sha1-$encrypted.pfx
|
||||
privatekey=$AZ_BATCH_CERTIFICATES_DIR/key.pem
|
||||
openssl pkcs12 -in $pfxfile -out $privatekey -nodes -password file:$pfxfile.pw
|
||||
openssl pkcs12 -in "$pfxfile" -out "$privatekey" -nodes -password file:"${pfxfile}".pw
|
||||
# remove pfx-related files
|
||||
rm -f $pfxfile $pfxfile.pw
|
||||
rm -f "$pfxfile" "${pfxfile}".pw
|
||||
# decrypt creds
|
||||
SHIPYARD_STORAGE_ENV=`echo $SHIPYARD_STORAGE_ENV | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
|
||||
SHIPYARD_STORAGE_ENV=$(echo "$SHIPYARD_STORAGE_ENV" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
|
||||
if [ ! -z ${DOCKER_LOGIN_USERNAME+x} ]; then
|
||||
DOCKER_LOGIN_PASSWORD=`echo $DOCKER_LOGIN_PASSWORD | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
|
||||
DOCKER_LOGIN_PASSWORD=$(echo "$DOCKER_LOGIN_PASSWORD" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
|
||||
fi
|
||||
fi
|
||||
|
||||
|
@ -499,33 +509,33 @@ fi
|
|||
|
||||
# check for docker host engine
|
||||
check_for_docker_host_engine
|
||||
check_docker_root_dir $DISTRIB_ID
|
||||
check_docker_root_dir "$DISTRIB_ID"
|
||||
|
||||
# check for nvidia card/driver/docker
|
||||
check_for_nvidia $DISTRIB_ID $DISTRIB_RELEASE
|
||||
check_for_nvidia "$DISTRIB_ID" "$DISTRIB_RELEASE"
|
||||
|
||||
# mount azure resources (this must be done every boot)
|
||||
if [ $azurefile -eq 1 ]; then
|
||||
mount_azurefile_share $DISTRIB_ID $DISTRIB_RELEASE
|
||||
mount_azurefile_share "$DISTRIB_ID" "$DISTRIB_RELEASE"
|
||||
fi
|
||||
if [ $azureblob -eq 1 ]; then
|
||||
mount_azureblob_container $DISTRIB_ID $DISTRIB_RELEASE
|
||||
mount_azureblob_container "$DISTRIB_ID" "$DISTRIB_RELEASE"
|
||||
fi
|
||||
|
||||
# check if we're coming up from a reboot
|
||||
if [ -f $cascadefailed ]; then
|
||||
if [ -f "$cascadefailed" ]; then
|
||||
log ERROR "$cascadefailed file exists, assuming cascade failure during node prep"
|
||||
exit 1
|
||||
elif [ -f $nodeprepfinished ]; then
|
||||
elif [ -f "$nodeprepfinished" ]; then
|
||||
# mount any storage clusters
|
||||
if [ ! -z $sc_args ]; then
|
||||
if [ ! -z "$sc_args" ]; then
|
||||
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
|
||||
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
|
||||
IFS='#' read -ra fstabs <<< "$fstab_mounts"
|
||||
i=0
|
||||
for sc_arg in ${sc_args[@]}; do
|
||||
for sc_arg in "${sc_args[@]}"; do
|
||||
IFS=':' read -ra sc <<< "$sc_arg"
|
||||
mount $MOUNTS_PATH/${sc[1]}
|
||||
mount "${MOUNTS_PATH}"/"${sc[1]}"
|
||||
done
|
||||
fi
|
||||
# mount any custom mounts
|
||||
|
@ -535,7 +545,7 @@ elif [ -f $nodeprepfinished ]; then
|
|||
# eval and split fstab var to expand vars
|
||||
fstab_entry=$(eval echo "$fstab")
|
||||
IFS=' ' read -ra parts <<< "$fstab_entry"
|
||||
mount ${parts[1]}
|
||||
mount "${parts[1]}"
|
||||
done
|
||||
fi
|
||||
log INFO "$nodeprepfinished file exists, assuming successful completion of node prep"
|
||||
|
@ -543,13 +553,13 @@ elif [ -f $nodeprepfinished ]; then
|
|||
fi
|
||||
|
||||
# get ip address of eth0
|
||||
ipaddress=`ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1`
|
||||
ipaddress=$(ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1)
|
||||
|
||||
# one-time setup
|
||||
if [ $networkopt -eq 1 ]; then
|
||||
# do not fail script if this function fails
|
||||
set +e
|
||||
optimize_tcp_network_settings $DISTRIB_ID $DISTRIB_RELEASE
|
||||
optimize_tcp_network_settings "$DISTRIB_ID" "$DISTRIB_RELEASE"
|
||||
set -e
|
||||
# set sudoers to not require tty
|
||||
sed -i 's/^Defaults[ ]*requiretty/# Defaults requiretty/g' /etc/sudoers
|
||||
|
@ -564,16 +574,16 @@ fi
|
|||
check_for_storage_cluster_software
|
||||
|
||||
# mount any storage clusters
|
||||
if [ ! -z $sc_args ]; then
|
||||
if [ ! -z "$sc_args" ]; then
|
||||
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
|
||||
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
|
||||
IFS='#' read -ra fstabs <<< "$fstab_mounts"
|
||||
i=0
|
||||
for sc_arg in ${sc_args[@]}; do
|
||||
for sc_arg in "${sc_args[@]}"; do
|
||||
IFS=':' read -ra sc <<< "$sc_arg"
|
||||
fstab_entry="${fstabs[$i]}"
|
||||
process_fstab_entry "$sc_arg" "$MOUNTS_PATH/${sc[1]}" "$fstab_entry"
|
||||
i=$(($i + 1))
|
||||
i=$((i + 1))
|
||||
done
|
||||
fi
|
||||
|
||||
|
@ -589,11 +599,11 @@ if [ ! -z "$SHIPYARD_CUSTOM_MOUNTS_FSTAB" ]; then
|
|||
fi
|
||||
|
||||
# retrieve docker images related to data movement
|
||||
docker_pull_image alfpark/blobxfer:$blobxferversion
|
||||
docker_pull_image alfpark/batch-shipyard:${version}-cargo
|
||||
docker_pull_image alfpark/blobxfer:"${blobxferversion}"
|
||||
docker_pull_image alfpark/batch-shipyard:"${version}"-cargo
|
||||
|
||||
# set up singularity
|
||||
singularity_setup $DISTRIB_ID $DISTRIB_RELEASE
|
||||
singularity_setup "$DISTRIB_ID" "$DISTRIB_RELEASE"
|
||||
|
||||
# login to registry servers (do not specify -e as creds have been decrypted)
|
||||
./registry_login.sh
|
||||
|
@ -602,9 +612,9 @@ if [ -f singularity-registry-login ]; then
|
|||
fi
|
||||
|
||||
# touch node prep finished file to preserve idempotency
|
||||
touch $nodeprepfinished
|
||||
touch "$nodeprepfinished"
|
||||
# touch cascade failed file, this will be removed once cascade is successful
|
||||
touch $cascadefailed
|
||||
touch "$cascadefailed"
|
||||
|
||||
# execute cascade
|
||||
set +e
|
||||
|
@ -618,9 +628,9 @@ else
|
|||
fi
|
||||
# store docker cascade start
|
||||
if command -v python3 > /dev/null 2>&1; then
|
||||
drpstart=`python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())'`
|
||||
drpstart=$(python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())')
|
||||
else
|
||||
drpstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))'`
|
||||
drpstart=$(python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))')
|
||||
fi
|
||||
# create env file
|
||||
envfile=.cascade_envfile
|
||||
|
@ -632,14 +642,14 @@ sku=$sku
|
|||
npstart=$npstart
|
||||
drpstart=$drpstart
|
||||
p2p=$p2p
|
||||
`env | grep SHIPYARD_`
|
||||
`env | grep AZ_BATCH_`
|
||||
`env | grep DOCKER_LOGIN_`
|
||||
`env | grep SINGULARITY_`
|
||||
$(env | grep SHIPYARD_)
|
||||
$(env | grep AZ_BATCH_)
|
||||
$(env | grep DOCKER_LOGIN_)
|
||||
$(env | grep SINGULARITY_)
|
||||
EOF
|
||||
chmod 600 $envfile
|
||||
# pull image
|
||||
docker_pull_image alfpark/batch-shipyard:${version}-cascade
|
||||
docker_pull_image alfpark/batch-shipyard:"${version}"-cascade
|
||||
# set singularity options
|
||||
singularity_binds=
|
||||
if [ ! -z $singularity_basedir ]; then
|
||||
|
@ -649,15 +659,16 @@ if [ ! -z $singularity_basedir ]; then
|
|||
fi
|
||||
# launch container
|
||||
log DEBUG "Starting Cascade"
|
||||
# shellcheck disable=SC2086
|
||||
docker run $detached --net=host --env-file $envfile \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v /etc/passwd:/etc/passwd:ro \
|
||||
-v /etc/group:/etc/group:ro \
|
||||
$singularity_binds \
|
||||
-v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR \
|
||||
-w $AZ_BATCH_TASK_WORKING_DIR \
|
||||
${singularity_binds} \
|
||||
-v "$AZ_BATCH_NODE_ROOT_DIR":"$AZ_BATCH_NODE_ROOT_DIR" \
|
||||
-w "$AZ_BATCH_TASK_WORKING_DIR" \
|
||||
-p 6881-6891:6881-6891 -p 6881-6891:6881-6891/udp \
|
||||
alfpark/batch-shipyard:${version}-cascade &
|
||||
alfpark/batch-shipyard:"${version}"-cascade &
|
||||
cascadepid=$!
|
||||
|
||||
# if not in p2p mode, then wait for cascade exit
|
||||
|
@ -666,19 +677,19 @@ if [ $p2penabled -eq 0 ]; then
|
|||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
log ERROR "cascade exited with non-zero exit code: $rc"
|
||||
rm -f $nodeprepfinished
|
||||
rm -f "$nodeprepfinished"
|
||||
exit $rc
|
||||
fi
|
||||
fi
|
||||
set -e
|
||||
|
||||
# remove cascade failed file
|
||||
rm -f $cascadefailed
|
||||
rm -f "$cascadefailed"
|
||||
|
||||
# block for images if necessary
|
||||
$AZ_BATCH_TASK_WORKING_DIR/wait_for_images.sh $block
|
||||
"${AZ_BATCH_TASK_WORKING_DIR}"/wait_for_images.sh "$block"
|
||||
|
||||
# clean up cascade env file if block
|
||||
if [ ! -z $block ]; then
|
||||
if [ ! -z "$block" ]; then
|
||||
rm -f $envfile
|
||||
fi
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
|
@ -76,8 +78,7 @@ shift $((OPTIND-1))
|
|||
check_for_buggy_ntfs_mount() {
|
||||
# Check to ensure sdb1 mount is not mounted as ntfs
|
||||
set +e
|
||||
mount | grep /dev/sdb1 | grep fuseblk
|
||||
if [ $? -eq 0 ]; then
|
||||
if mount | grep /dev/sdb1 | grep fuseblk; then
|
||||
log ERROR "/dev/sdb1 temp disk is mounted as fuseblk/ntfs"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -86,7 +87,7 @@ check_for_buggy_ntfs_mount() {
|
|||
|
||||
save_startup_to_volatile() {
|
||||
set +e
|
||||
touch $AZ_BATCH_NODE_ROOT_DIR/volatile/startup/.save
|
||||
touch "${AZ_BATCH_NODE_ROOT_DIR}"/volatile/startup/.save
|
||||
set -e
|
||||
}
|
||||
|
||||
|
@ -109,7 +110,7 @@ net.ipv4.tcp_abort_on_overflow=1
|
|||
net.ipv4.route.flush=1
|
||||
EOF
|
||||
fi
|
||||
if [ "$1" == "ubuntu" ] && [ "$2" == 14.04* ]; then
|
||||
if [[ "$1" == "ubuntu" ]] && [[ "$2" == 14.04* ]]; then
|
||||
service procps start
|
||||
else
|
||||
service procps reload
|
||||
|
@ -121,7 +122,7 @@ blacklist_kernel_upgrade() {
|
|||
shift
|
||||
local sku=$1
|
||||
shift
|
||||
if [ $offer != "ubuntu" ]; then
|
||||
if [ "$offer" != "ubuntu" ]; then
|
||||
log DEBUG "No kernel upgrade blacklist required on $offer $sku"
|
||||
return
|
||||
fi
|
||||
|
@ -130,15 +131,14 @@ blacklist_kernel_upgrade() {
|
|||
local rc=$?
|
||||
set -e
|
||||
if [ $rc -ne 0 ]; then
|
||||
sed -i "/^Unattended-Upgrade::Package-Blacklist {/alinux-azure\nlinux-cloud-tools-azure\nlinux-headers-azure\nlinux-image-azure\nlinux-tools-azure" /etc/apt/apt.conf.d/50unattended-upgrades
|
||||
sed -i "/^Unattended-Upgrade::Package-Blacklist {/a\"linux-azure\";\\n\"linux-cloud-tools-azure\";\\n\"linux-headers-azure\";\\n\"linux-image-azure\";\\n\"linux-tools-azure\";" /etc/apt/apt.conf.d/50unattended-upgrades
|
||||
log INFO "Added linux-azure to package blacklist for unattended upgrades"
|
||||
fi
|
||||
}
|
||||
|
||||
check_for_nvidia_docker() {
|
||||
set +e
|
||||
nvidia-docker version
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! nvidia-docker version; then
|
||||
log ERROR "nvidia-docker2 not installed"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -147,7 +147,8 @@ check_for_nvidia_docker() {
|
|||
|
||||
check_for_nvidia_driver() {
|
||||
set +e
|
||||
local out=$(lsmod)
|
||||
local out
|
||||
out=$(lsmod)
|
||||
echo "$out" | grep -i nvidia > /dev/null
|
||||
local rc=$?
|
||||
set -e
|
||||
|
@ -164,7 +165,8 @@ check_for_nvidia() {
|
|||
log INFO "Checking for Nvidia Hardware"
|
||||
# first check for card
|
||||
set +e
|
||||
local out=$(lspci)
|
||||
local out
|
||||
out=$(lspci)
|
||||
echo "$out" | grep -i nvidia > /dev/null
|
||||
local rc=$?
|
||||
set -e
|
||||
|
@ -172,7 +174,7 @@ check_for_nvidia() {
|
|||
if [ $rc -ne 0 ]; then
|
||||
log INFO "No Nvidia card(s) detected!"
|
||||
else
|
||||
blacklist_kernel_upgrade $1 $2
|
||||
blacklist_kernel_upgrade "$1" "$2"
|
||||
check_for_nvidia_driver
|
||||
# enable persistence mode
|
||||
nvidia-smi -pm 1
|
||||
|
@ -182,7 +184,8 @@ check_for_nvidia() {
|
|||
|
||||
check_docker_root_dir() {
|
||||
set +e
|
||||
local rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
|
||||
local rootdir
|
||||
rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
|
||||
set -e
|
||||
log DEBUG "Graph root: $rootdir"
|
||||
if [ -z "$rootdir" ]; then
|
||||
|
@ -198,14 +201,12 @@ check_for_docker_host_engine() {
|
|||
set +e
|
||||
# enable and start docker service if custom image
|
||||
if [ $custom_image -eq 1 ]; then
|
||||
docker version --format '{{.Server.Version}}'
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! docker version --format '{{.Server.Version}}'; then
|
||||
systemctl start docker.service
|
||||
fi
|
||||
fi
|
||||
systemctl status docker.service
|
||||
docker version --format '{{.Server.Version}}'
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! docker version --format '{{.Server.Version}}'; then
|
||||
log ERROR "Docker not installed"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -227,7 +228,8 @@ docker_pull_image() {
|
|||
set +e
|
||||
local retries=60
|
||||
while [ $retries -gt 0 ]; do
|
||||
local pull_out=$(docker pull $image 2>&1)
|
||||
local pull_out
|
||||
pull_out=$(docker pull "$image" 2>&1)
|
||||
local rc=$?
|
||||
if [ $rc -eq 0 ]; then
|
||||
echo "$pull_out"
|
||||
|
@ -235,18 +237,24 @@ docker_pull_image() {
|
|||
fi
|
||||
# non-zero exit code: check if pull output has toomanyrequests,
|
||||
# connection resets, or image config error
|
||||
if [[ ! -z "$(grep 'toomanyrequests' <<<$pull_out)" ]] || [[ ! -z "$(grep 'connection reset by peer' <<<$pull_out)" ]] || [[ ! -z "$(grep 'error pulling image configuration' <<<$pull_out)" ]]; then
|
||||
local tmr
|
||||
tmr=$(grep 'toomanyrequests' <<<"$pull_out")
|
||||
local crbp
|
||||
crbp=$(grep 'connection reset by peer' <<<"$pull_out")
|
||||
local epic
|
||||
epic=$(grep 'error pulling image configuration' <<<"$pull_out")
|
||||
if [[ ! -z "$tmr" ]] || [[ ! -z "$crbp" ]] || [[ ! -z "$epic" ]]; then
|
||||
log WARNING "will retry: $pull_out"
|
||||
else
|
||||
log ERROR "$pull_out"
|
||||
exit $rc
|
||||
fi
|
||||
retries=retries-1
|
||||
retries=$((retries-1))
|
||||
if [ $retries -le 0 ]; then
|
||||
log ERROR "Could not pull docker image: $image"
|
||||
exit $rc
|
||||
fi
|
||||
sleep $[($RANDOM % 5) + 1]s
|
||||
sleep $((RANDOM % 5 + 1))s
|
||||
done
|
||||
set -e
|
||||
}
|
||||
|
@ -256,16 +264,19 @@ install_local_packages() {
|
|||
shift
|
||||
set +e
|
||||
local retries=120
|
||||
local rc
|
||||
while [ $retries -gt 0 ]; do
|
||||
if [[ $distrib == "ubuntu" ]]; then
|
||||
dpkg -i $*
|
||||
dpkg -i "$@"
|
||||
rc=$?
|
||||
else
|
||||
rpm -Uvh --nodeps $*
|
||||
rpm -Uvh --nodeps "$@"
|
||||
rc=$?
|
||||
fi
|
||||
if [ $? -eq 0 ]; then
|
||||
if [ $rc -eq 0 ]; then
|
||||
break
|
||||
fi
|
||||
retries=retries-1
|
||||
retries=$((retries-1))
|
||||
if [ $retries -eq 0 ]; then
|
||||
log ERROR "Could not install local packages: $*"
|
||||
exit 1
|
||||
|
@ -280,16 +291,19 @@ install_packages() {
|
|||
shift
|
||||
set +e
|
||||
local retries=30
|
||||
local rc
|
||||
while [ $retries -gt 0 ]; do
|
||||
if [[ $distrib == "ubuntu" ]]; then
|
||||
apt-get install -y -q -o Dpkg::Options::="--force-confnew" --no-install-recommends $*
|
||||
apt-get install -y -q -o Dpkg::Options::="--force-confnew" --no-install-recommends "$@"
|
||||
rc=$?
|
||||
elif [[ $distrib == centos* ]]; then
|
||||
yum install -y $*
|
||||
yum install -y "$@"
|
||||
rc=$?
|
||||
fi
|
||||
if [ $? -eq 0 ]; then
|
||||
if [ $rc -eq 0 ]; then
|
||||
break
|
||||
fi
|
||||
retries=retries-1
|
||||
retries=$((retries-1))
|
||||
if [ $retries -eq 0 ]; then
|
||||
log ERROR "Could not install packages: $*"
|
||||
exit 1
|
||||
|
@ -303,19 +317,22 @@ refresh_package_index() {
|
|||
local distrib=$1
|
||||
set +e
|
||||
local retries=120
|
||||
local rc
|
||||
while [ $retries -gt 0 ]; do
|
||||
if [[ $distrib == "ubuntu" ]]; then
|
||||
apt-get update
|
||||
rc=$?
|
||||
elif [[ $distrib == centos* ]]; then
|
||||
yum makecache -y fast
|
||||
rc=$?
|
||||
else
|
||||
log ERROR "Unknown distribution for refresh: $distrib"
|
||||
exit 1
|
||||
fi
|
||||
if [ $? -eq 0 ]; then
|
||||
if [ $rc -eq 0 ]; then
|
||||
break
|
||||
fi
|
||||
retries=retries-1
|
||||
retries=$((retries-1))
|
||||
if [ $retries -eq 0 ]; then
|
||||
log ERROR "Could not update package index"
|
||||
exit 1
|
||||
|
@ -329,21 +346,21 @@ mount_azureblob_container() {
|
|||
log INFO "Mounting Azure Blob Containers"
|
||||
local distrib=$1
|
||||
local release=$2
|
||||
if [ $distrib == "ubuntu" ]; then
|
||||
if [ "$distrib" == "ubuntu" ]; then
|
||||
local debfile=packages-microsoft-prod.deb
|
||||
if [ ! -f ${debfile} ]; then
|
||||
download_file https://packages.microsoft.com/config/ubuntu/16.04/${debfile}
|
||||
install_local_packages $distrib ${debfile}
|
||||
refresh_package_index $distrib
|
||||
install_packages $distrib blobfuse
|
||||
download_file https://packages.microsoft.com/config/ubuntu/16.04/"${debfile}"
|
||||
install_local_packages "$distrib" "${debfile}"
|
||||
refresh_package_index "$distrib"
|
||||
install_packages "$distrib" blobfuse
|
||||
fi
|
||||
elif [[ $distrib == centos* ]]; then
|
||||
local rpmfile=packages-microsoft-prod.rpm
|
||||
if [ ! -f ${rpmfile} ]; then
|
||||
download_file https://packages.microsoft.com/config/rhel/7/${rpmfile}
|
||||
install_local_packages $distrib ${rpmfile}
|
||||
refresh_package_index $distrib
|
||||
install_packages $distrib blobfuse
|
||||
download_file https://packages.microsoft.com/config/rhel/7/"${rpmfile}"
|
||||
install_local_packages "$distrib" "${rpmfile}"
|
||||
refresh_package_index "$distrib"
|
||||
install_packages "$distrib" blobfuse
|
||||
fi
|
||||
else
|
||||
log ERROR "unsupported distribution for Azure blob: $distrib $release"
|
||||
|
@ -353,8 +370,8 @@ mount_azureblob_container() {
|
|||
./azureblob-mount.sh
|
||||
chmod 700 azureblob-mount.sh
|
||||
chown root:root azureblob-mount.sh
|
||||
chmod 600 *.cfg
|
||||
chown root:root *.cfg
|
||||
chmod 600 ./*.cfg
|
||||
chown root:root ./*.cfg
|
||||
}
|
||||
|
||||
download_file() {
|
||||
|
@ -362,11 +379,10 @@ download_file() {
|
|||
local retries=10
|
||||
set +e
|
||||
while [ $retries -gt 0 ]; do
|
||||
curl -fSsLO $1
|
||||
if [ $? -eq 0 ]; then
|
||||
if curl -fSsLO "$1"; then
|
||||
break
|
||||
fi
|
||||
retries=retries-1
|
||||
retries=$((retries-1))
|
||||
if [ $retries -eq 0 ]; then
|
||||
log ERROR "Could not download: $1"
|
||||
exit 1
|
||||
|
@ -381,22 +397,23 @@ process_fstab_entry() {
|
|||
local mountpoint=$2
|
||||
local fstab_entry=$3
|
||||
log INFO "Creating host directory for $desc at $mountpoint"
|
||||
mkdir -p $mountpoint
|
||||
chmod 777 $mountpoint
|
||||
mkdir -p "$mountpoint"
|
||||
chmod 777 "$mountpoint"
|
||||
log INFO "Adding $mountpoint to fstab"
|
||||
echo $fstab_entry >> /etc/fstab
|
||||
echo "$fstab_entry" >> /etc/fstab
|
||||
tail -n1 /etc/fstab
|
||||
log INFO "Mounting $mountpoint"
|
||||
local START=$(date -u +"%s")
|
||||
local START
|
||||
START=$(date -u +"%s")
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
mount $mountpoint
|
||||
if [ $? -eq 0 ]; then
|
||||
if mount "$mountpoint"; then
|
||||
break
|
||||
else
|
||||
local NOW=$(date -u +"%s")
|
||||
local DIFF=$((($NOW-$START)/60))
|
||||
local NOW
|
||||
NOW=$(date -u +"%s")
|
||||
local DIFF=$(((NOW-START)/60))
|
||||
# fail after 5 minutes of attempts
|
||||
if [ $DIFF -ge 5 ]; then
|
||||
log ERROR "Could not mount $desc on $mountpoint"
|
||||
|
@ -454,62 +471,55 @@ check_for_buggy_ntfs_mount
|
|||
save_startup_to_volatile
|
||||
|
||||
# set python env vars
|
||||
LC_ALL=en_US.UTF-8
|
||||
PYTHONASYNCIODEBUG=1
|
||||
|
||||
# store node prep start
|
||||
if command -v python3 > /dev/null 2>&1; then
|
||||
npstart=`python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())'`
|
||||
else
|
||||
npstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))'`
|
||||
fi
|
||||
export LC_ALL=en_US.UTF-8
|
||||
export PYTHONASYNCIODEBUG=1
|
||||
|
||||
# set node prep status files
|
||||
nodeprepfinished=$AZ_BATCH_NODE_SHARED_DIR/.node_prep_finished
|
||||
|
||||
# create shared mount points
|
||||
mkdir -p $MOUNTS_PATH
|
||||
mkdir -p "$MOUNTS_PATH"
|
||||
|
||||
# decrypt encrypted creds
|
||||
if [ ! -z $encrypted ]; then
|
||||
if [ ! -z "$encrypted" ]; then
|
||||
# convert pfx to pem
|
||||
pfxfile=$AZ_BATCH_CERTIFICATES_DIR/sha1-$encrypted.pfx
|
||||
privatekey=$AZ_BATCH_CERTIFICATES_DIR/key.pem
|
||||
openssl pkcs12 -in $pfxfile -out $privatekey -nodes -password file:$pfxfile.pw
|
||||
openssl pkcs12 -in "$pfxfile" -out "$privatekey" -nodes -password file:"${pfxfile}".pw
|
||||
# remove pfx-related files
|
||||
rm -f $pfxfile $pfxfile.pw
|
||||
rm -f "$pfxfile" "${pfxfile}".pw
|
||||
# decrypt creds
|
||||
if [ ! -z ${DOCKER_LOGIN_USERNAME+x} ]; then
|
||||
DOCKER_LOGIN_PASSWORD=`echo $DOCKER_LOGIN_PASSWORD | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
|
||||
DOCKER_LOGIN_PASSWORD=$(echo "$DOCKER_LOGIN_PASSWORD" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
|
||||
fi
|
||||
fi
|
||||
|
||||
# check for docker host engine
|
||||
check_for_docker_host_engine
|
||||
check_docker_root_dir $DISTRIB_ID
|
||||
check_docker_root_dir "$DISTRIB_ID"
|
||||
|
||||
# check for nvidia card/driver/docker
|
||||
check_for_nvidia $DISTRIB_ID $DISTRIB_RELEASE
|
||||
check_for_nvidia "$DISTRIB_ID" "$DISTRIB_RELEASE"
|
||||
|
||||
# mount azure resources (this must be done every boot)
|
||||
if [ $azurefile -eq 1 ]; then
|
||||
mount_azurefile_share $DISTRIB_ID $DISTRIB_RELEASE
|
||||
mount_azurefile_share "$DISTRIB_ID" "$DISTRIB_RELEASE"
|
||||
fi
|
||||
if [ $azureblob -eq 1 ]; then
|
||||
mount_azureblob_container $DISTRIB_ID $DISTRIB_RELEASE
|
||||
mount_azureblob_container "$DISTRIB_ID" "$DISTRIB_RELEASE"
|
||||
fi
|
||||
|
||||
# check if we're coming up from a reboot
|
||||
if [ -f $nodeprepfinished ]; then
|
||||
if [ -f "$nodeprepfinished" ]; then
|
||||
# mount any storage clusters
|
||||
if [ ! -z $sc_args ]; then
|
||||
if [ ! -z "$sc_args" ]; then
|
||||
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
|
||||
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
|
||||
IFS='#' read -ra fstabs <<< "$fstab_mounts"
|
||||
i=0
|
||||
for sc_arg in ${sc_args[@]}; do
|
||||
for sc_arg in "${sc_args[@]}"; do
|
||||
IFS=':' read -ra sc <<< "$sc_arg"
|
||||
mount $MOUNTS_PATH/${sc[1]}
|
||||
mount "${MOUNTS_PATH}"/"${sc[1]}"
|
||||
done
|
||||
fi
|
||||
# mount any custom mounts
|
||||
|
@ -519,7 +529,7 @@ if [ -f $nodeprepfinished ]; then
|
|||
# eval and split fstab var to expand vars
|
||||
fstab_entry=$(eval echo "$fstab")
|
||||
IFS=' ' read -ra parts <<< "$fstab_entry"
|
||||
mount ${parts[1]}
|
||||
mount "${parts[1]}"
|
||||
done
|
||||
fi
|
||||
log INFO "$nodeprepfinished file exists, assuming successful completion of node prep"
|
||||
|
@ -530,7 +540,7 @@ fi
|
|||
if [ $networkopt -eq 1 ]; then
|
||||
# do not fail script if this function fails
|
||||
set +e
|
||||
optimize_tcp_network_settings $DISTRIB_ID $DISTRIB_RELEASE
|
||||
optimize_tcp_network_settings "$DISTRIB_ID" "$DISTRIB_RELEASE"
|
||||
set -e
|
||||
# set sudoers to not require tty
|
||||
sed -i 's/^Defaults[ ]*requiretty/# Defaults requiretty/g' /etc/sudoers
|
||||
|
@ -539,16 +549,16 @@ fi
|
|||
# install gluster on compute software
|
||||
if [ $custom_image -eq 0 ]; then
|
||||
if [ $gluster_on_compute -eq 1 ]; then
|
||||
if [ $DISTRIB_ID == "ubuntu" ]; then
|
||||
install_packages $DISTRIB_ID glusterfs-server
|
||||
if [ "$DISTRIB_ID" == "ubuntu" ]; then
|
||||
install_packages "$DISTRIB_ID" glusterfs-server
|
||||
systemctl enable glusterfs-server
|
||||
systemctl start glusterfs-server
|
||||
# create brick directory
|
||||
mkdir -p /mnt/gluster
|
||||
elif [[ $DISTRIB_ID == centos* ]]; then
|
||||
install_packages $DISTRIB_ID epel-release centos-release-gluster38
|
||||
install_packages "$DISTRIB_ID" epel-release centos-release-gluster38
|
||||
sed -i -e "s/enabled=1/enabled=0/g" /etc/yum.repos.d/CentOS-Gluster-3.8.repo
|
||||
install_packages $DISTRIB_ID --enablerepo=centos-gluster38,epel glusterfs-server
|
||||
install_packages "$DISTRIB_ID" --enablerepo=centos-gluster38,epel glusterfs-server
|
||||
systemctl daemon-reload
|
||||
chkconfig glusterd on
|
||||
systemctl start glusterd
|
||||
|
@ -560,33 +570,33 @@ fi
|
|||
|
||||
# install storage cluster software
|
||||
if [ $custom_image -eq 0 ]; then
|
||||
if [ ! -z $sc_args ]; then
|
||||
if [ $DISTRIB_ID == "ubuntu" ]; then
|
||||
for sc_arg in ${sc_args[@]}; do
|
||||
if [ ! -z "$sc_args" ]; then
|
||||
if [ "$DISTRIB_ID" == "ubuntu" ]; then
|
||||
for sc_arg in "${sc_args[@]}"; do
|
||||
IFS=':' read -ra sc <<< "$sc_arg"
|
||||
server_type=${sc[0]}
|
||||
if [ $server_type == "nfs" ]; then
|
||||
install_packages $DISTRIB_ID nfs-common nfs4-acl-tools
|
||||
elif [ $server_type == "glusterfs" ]; then
|
||||
install_packages $DISTRIB_ID glusterfs-client acl
|
||||
if [ "$server_type" == "nfs" ]; then
|
||||
install_packages "$DISTRIB_ID" nfs-common nfs4-acl-tools
|
||||
elif [ "$server_type" == "glusterfs" ]; then
|
||||
install_packages "$DISTRIB_ID" glusterfs-client acl
|
||||
else
|
||||
log ERROR "Unknown file server type ${sc[0]} for ${sc[1]}"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
elif [[ $DISTRIB_ID == centos* ]]; then
|
||||
for sc_arg in ${sc_args[@]}; do
|
||||
for sc_arg in "${sc_args[@]}"; do
|
||||
IFS=':' read -ra sc <<< "$sc_arg"
|
||||
server_type=${sc[0]}
|
||||
if [ $server_type == "nfs" ]; then
|
||||
install_packages $DISTRIB_ID nfs-utils nfs4-acl-tools
|
||||
if [ "$server_type" == "nfs" ]; then
|
||||
install_packages "$DISTRIB_ID" nfs-utils nfs4-acl-tools
|
||||
systemctl daemon-reload
|
||||
systemctl enable rpcbind
|
||||
systemctl start rpcbind
|
||||
elif [ $server_type == "glusterfs" ]; then
|
||||
install_packages $DISTRIB_ID epel-release centos-release-gluster38
|
||||
elif [ "$server_type" == "glusterfs" ]; then
|
||||
install_packages "$DISTRIB_ID" epel-release centos-release-gluster38
|
||||
sed -i -e "s/enabled=1/enabled=0/g" /etc/yum.repos.d/CentOS-Gluster-3.8.repo
|
||||
install_packages $DISTRIB_ID --enablerepo=centos-gluster38,epel glusterfs-server acl
|
||||
install_packages "$DISTRIB_ID" --enablerepo=centos-gluster38,epel glusterfs-server acl
|
||||
else
|
||||
log ERROR "Unknown file server type ${sc[0]} for ${sc[1]}"
|
||||
exit 1
|
||||
|
@ -597,16 +607,16 @@ if [ $custom_image -eq 0 ]; then
|
|||
fi
|
||||
|
||||
# mount any storage clusters
|
||||
if [ ! -z $sc_args ]; then
|
||||
if [ ! -z "$sc_args" ]; then
|
||||
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
|
||||
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
|
||||
IFS='#' read -ra fstabs <<< "$fstab_mounts"
|
||||
i=0
|
||||
for sc_arg in ${sc_args[@]}; do
|
||||
for sc_arg in "${sc_args[@]}"; do
|
||||
IFS=':' read -ra sc <<< "$sc_arg"
|
||||
fstab_entry="${fstabs[$i]}"
|
||||
process_fstab_entry "$sc_arg" "$MOUNTS_PATH/${sc[1]}" "$fstab_entry"
|
||||
i=$(($i + 1))
|
||||
i=$((i + 1))
|
||||
done
|
||||
fi
|
||||
|
||||
|
@ -622,8 +632,8 @@ if [ ! -z "$SHIPYARD_CUSTOM_MOUNTS_FSTAB" ]; then
|
|||
fi
|
||||
|
||||
# retrieve docker images related to data movement
|
||||
docker_pull_image alfpark/blobxfer:$blobxferversion
|
||||
docker_pull_image alfpark/batch-shipyard:${version}-cargo
|
||||
docker_pull_image alfpark/blobxfer:"${blobxferversion}"
|
||||
docker_pull_image alfpark/batch-shipyard:"${version}"-cargo
|
||||
|
||||
# login to registry servers (do not specify -e as creds have been decrypted)
|
||||
./registry_login.sh
|
||||
|
@ -633,4 +643,4 @@ if [ -f singularity-registry-login ]; then
|
|||
fi
|
||||
|
||||
# touch node prep finished file to preserve idempotency
|
||||
touch $nodeprepfinished
|
||||
touch "$nodeprepfinished"
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
set -e
|
||||
set -o pipefail
|
||||
|
||||
DEBIAN_FRONTEND=noninteractive
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# constants
|
||||
gluster_brick_mountpath=/gluster/brick
|
||||
|
@ -20,27 +20,27 @@ volume_type=
|
|||
gluster_peer_probe() {
|
||||
# detach peer if it was connected already
|
||||
set +e
|
||||
gluster peer detach $1 2>&1
|
||||
gluster peer detach "$1" 2>&1
|
||||
set -e
|
||||
echo "Attempting to peer with $1"
|
||||
peered=0
|
||||
local START=$(date -u +"%s")
|
||||
local START
|
||||
START=$(date -u +"%s")
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
# attempt to ping before peering
|
||||
ping -c 2 $1 > /dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
gluster peer probe $1 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
if ping -c 2 "$1" > /dev/null; then
|
||||
if gluster peer probe "$1" 2>&1; then
|
||||
peered=1
|
||||
fi
|
||||
fi
|
||||
if [ $peered -eq 1 ]; then
|
||||
break
|
||||
else
|
||||
local NOW=$(date -u +"%s")
|
||||
local DIFF=$((($NOW-$START)/60))
|
||||
local NOW
|
||||
NOW=$(date -u +"%s")
|
||||
local DIFF=$(((NOW-START)/60))
|
||||
# fail after 15 minutes of attempts
|
||||
if [ $DIFF -ge 15 ]; then
|
||||
echo "Could not probe peer $1"
|
||||
|
@ -54,14 +54,15 @@ gluster_peer_probe() {
|
|||
}
|
||||
|
||||
gluster_poll_for_connections() {
|
||||
local numpeers=$(($vm_count - 1))
|
||||
local numpeers=$((vm_count - 1))
|
||||
echo "Waiting for $numpeers peers to reach connected state..."
|
||||
# get peer info
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
local numready=$(gluster peer status | grep -e '^State: Peer in Cluster' | wc -l)
|
||||
if [ $numready == $numpeers ]; then
|
||||
local numready
|
||||
numready=$(gluster peer status | grep -c '^State: Peer in Cluster')
|
||||
if [ "$numready" == "$numpeers" ]; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
|
@ -79,7 +80,7 @@ gluster_add_bricks() {
|
|||
IFS=',' read -ra hosts <<< "$hostnames"
|
||||
# cross-validate length
|
||||
if [ ${#peers[@]} -ne ${#hosts[@]} ]; then
|
||||
echo "${peers[@]} length does not match ${hosts[@]} length"
|
||||
echo "${peers[*]} length does not match ${hosts[*]} length"
|
||||
exit 1
|
||||
fi
|
||||
# construct brick locations
|
||||
|
@ -88,7 +89,7 @@ gluster_add_bricks() {
|
|||
do
|
||||
bricks+=" $host:$gluster_brick_location"
|
||||
# probe peer
|
||||
gluster_peer_probe $host
|
||||
gluster_peer_probe "$host"
|
||||
done
|
||||
# wait for connections
|
||||
gluster_poll_for_connections
|
||||
|
@ -107,9 +108,9 @@ gluster_add_bricks() {
|
|||
echo "Adding bricks to gluster volume $gluster_volname $volarg ($bricks)"
|
||||
if [[ "$volume_type" == stripe* ]]; then
|
||||
# this should be gated by remotefs.py
|
||||
echo -e "y\n" | gluster volume add-brick $gluster_volname $volarg $bricks
|
||||
echo -e "y\\n" | gluster volume add-brick $gluster_volname $volarg "$bricks"
|
||||
else
|
||||
gluster volume add-brick $gluster_volname $volarg $bricks $force
|
||||
gluster volume add-brick $gluster_volname $volarg "$bricks"
|
||||
fi
|
||||
# get info and status
|
||||
gluster volume info $gluster_volname
|
||||
|
@ -117,8 +118,7 @@ gluster_add_bricks() {
|
|||
# rebalance
|
||||
echo "Rebalancing gluster volume $gluster_volname"
|
||||
set +e
|
||||
gluster volume rebalance $gluster_volname start
|
||||
if [ $? -eq 0 ]; then
|
||||
if gluster volume rebalance $gluster_volname start; then
|
||||
sleep 5
|
||||
gluster volume rebalance $gluster_volname status
|
||||
fi
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
set -e
|
||||
set -o pipefail
|
||||
|
||||
DEBIAN_FRONTEND=noninteractive
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# constants
|
||||
gluster_brick_mountpath=/gluster/brick
|
||||
|
@ -28,11 +28,13 @@ mount_options=
|
|||
# functions
|
||||
wait_for_device() {
|
||||
local device=$1
|
||||
local START=$(date -u +"%s")
|
||||
local START
|
||||
START=$(date -u +"%s")
|
||||
echo "Waiting for device $device..."
|
||||
while [ ! -b $device ]; do
|
||||
local NOW=$(date -u +"%s")
|
||||
local DIFF=$((($NOW-$START)/60))
|
||||
while [ ! -b "$device" ]; do
|
||||
local NOW
|
||||
NOW=$(date -u +"%s")
|
||||
local DIFF=$(((NOW-START)/60))
|
||||
# fail after 5 minutes of waiting
|
||||
if [ $DIFF -ge 5 ]; then
|
||||
echo "Could not find device $device"
|
||||
|
@ -46,8 +48,7 @@ setup_nfs() {
|
|||
# amend /etc/exports if needed
|
||||
add_exports=0
|
||||
set +e
|
||||
grep "^${mountpath}" /etc/exports
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! grep "^${mountpath}" /etc/exports; then
|
||||
add_exports=1
|
||||
fi
|
||||
if [ $add_exports -eq 1 ]; then
|
||||
|
@ -61,8 +62,7 @@ setup_nfs() {
|
|||
set +f
|
||||
systemctl reload nfs-kernel-server.service
|
||||
fi
|
||||
systemctl status nfs-kernel-server.service
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! systemctl status nfs-kernel-server.service; then
|
||||
set -e
|
||||
# attempt to start
|
||||
systemctl start nfs-kernel-server.service
|
||||
|
@ -75,23 +75,23 @@ setup_nfs() {
|
|||
gluster_peer_probe() {
|
||||
echo "Attempting to peer with $1"
|
||||
peered=0
|
||||
local START=$(date -u +"%s")
|
||||
local START
|
||||
START=$(date -u +"%s")
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
# attempt to ping before peering
|
||||
ping -c 2 $1 > /dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
gluster peer probe $1
|
||||
if [ $? -eq 0 ]; then
|
||||
if ping -c 2 "$1" > /dev/null; then
|
||||
if gluster peer probe "$1"; then
|
||||
peered=1
|
||||
fi
|
||||
fi
|
||||
if [ $peered -eq 1 ]; then
|
||||
break
|
||||
else
|
||||
local NOW=$(date -u +"%s")
|
||||
local DIFF=$((($NOW-$START)/60))
|
||||
local NOW
|
||||
NOW=$(date -u +"%s")
|
||||
local DIFF=$(((NOW-START)/60))
|
||||
# fail after 15 minutes of attempts
|
||||
if [ $DIFF -ge 15 ]; then
|
||||
echo "Could not probe peer $1"
|
||||
|
@ -106,14 +106,15 @@ gluster_peer_probe() {
|
|||
|
||||
gluster_poll_for_connections() {
|
||||
local numnodes=$1
|
||||
local numpeers=$(($numnodes - 1))
|
||||
local numpeers=$((numnodes - 1))
|
||||
echo "Waiting for $numpeers peers to reach connected state..."
|
||||
# get peer info
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
local numready=$(gluster peer status | grep -e '^State: Peer in Cluster' | wc -l)
|
||||
if [ $numready == $numpeers ]; then
|
||||
local numready
|
||||
numready=$(gluster peer status | grep -c '^State: Peer in Cluster')
|
||||
if [ "$numready" == "$numpeers" ]; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
|
@ -126,19 +127,19 @@ gluster_poll_for_connections() {
|
|||
|
||||
gluster_poll_for_volume() {
|
||||
echo "Waiting for gluster volume $1"
|
||||
local START=$(date -u +"%s")
|
||||
local START
|
||||
START=$(date -u +"%s")
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
gluster volume info $1
|
||||
if [ $? -eq 0 ]; then
|
||||
echo $gv_info
|
||||
if gluster volume info "$1"; then
|
||||
# delay to wait for subvolumes
|
||||
sleep 5
|
||||
break
|
||||
else
|
||||
local NOW=$(date -u +"%s")
|
||||
local DIFF=$((($NOW-$START)/60))
|
||||
local NOW
|
||||
NOW=$(date -u +"%s")
|
||||
local DIFF=$(((NOW-START)/60))
|
||||
# fail after 15 minutes of attempts
|
||||
if [ $DIFF -ge 15 ]; then
|
||||
echo "Could not connect to gluster volume $1"
|
||||
|
@ -169,36 +170,36 @@ setup_glusterfs() {
|
|||
local i=0
|
||||
declare -a hosts
|
||||
set +e
|
||||
for ip in "${peers[@]}"; do
|
||||
local host=${hostname_prefix}-vm$(printf %03d $i)
|
||||
while [ $i -lt ${#peers[@]} ]; do
|
||||
local host
|
||||
host=${hostname_prefix}-vm$(printf %03d $i)
|
||||
hosts=("${hosts[@]}" "$host")
|
||||
if [ ${peers[$i]} == $ipaddress ]; then
|
||||
if [ "${peers[$i]}" == "$ipaddress" ]; then
|
||||
myhostname=$host
|
||||
fi
|
||||
i=$(($i + 1))
|
||||
i=$((i + 1))
|
||||
done
|
||||
set -e
|
||||
if [ -z $myhostname ]; then
|
||||
if [ -z "$myhostname" ]; then
|
||||
echo "Could not determine own hostname from prefix"
|
||||
exit 1
|
||||
fi
|
||||
# master (first host) performs peering
|
||||
if [ ${peers[0]} == $ipaddress ]; then
|
||||
if [ "${peers[0]}" == "$ipaddress" ]; then
|
||||
# construct brick locations
|
||||
local bricks=
|
||||
for host in "${hosts[@]}"
|
||||
do
|
||||
local bricks
|
||||
for host in "${hosts[@]}"; do
|
||||
bricks+=" $host:$gluster_brick_location"
|
||||
# probe peer
|
||||
if [ $host != $myhostname ]; then
|
||||
gluster_peer_probe $host
|
||||
if [ "$host" != "$myhostname" ]; then
|
||||
gluster_peer_probe "$host"
|
||||
fi
|
||||
done
|
||||
# wait for connections
|
||||
local numnodes=${#peers[@]}
|
||||
gluster_poll_for_connections $numnodes
|
||||
gluster_poll_for_connections "$numnodes"
|
||||
local voltype=${so[1],,}
|
||||
local volarg=
|
||||
local volarg
|
||||
if [ "$voltype" == "replica" ] || [ "$voltype" == "stripe" ]; then
|
||||
volarg="$voltype $numnodes"
|
||||
elif [ "$voltype" != "distributed" ]; then
|
||||
|
@ -206,17 +207,15 @@ setup_glusterfs() {
|
|||
volarg=$voltype
|
||||
fi
|
||||
local transport=${so[2],,}
|
||||
if [ -z $transport ]; then
|
||||
if [ -z "$transport" ]; then
|
||||
transport="tcp"
|
||||
fi
|
||||
# check if volume exists
|
||||
local start_only=0
|
||||
local force=
|
||||
local force
|
||||
set +e
|
||||
gluster volume info $gluster_volname 2>&1 | grep "does not exist"
|
||||
if [ $? -ne 0 ]; then
|
||||
gluster volume info $gluster_volname 2>&1 | grep "Volume Name: $gluster_volname"
|
||||
if [ $? -eq 0 ]; then
|
||||
if ! gluster volume info "$gluster_volname" 2>&1 | grep "does not exist"; then
|
||||
if gluster volume info "$gluster_volname" 2>&1 | grep "Volume Name: $gluster_volname"; then
|
||||
start_only=1
|
||||
else
|
||||
force="force"
|
||||
|
@ -226,29 +225,28 @@ setup_glusterfs() {
|
|||
# create volume
|
||||
if [ $start_only -eq 0 ]; then
|
||||
echo "Creating gluster volume $gluster_volname $volarg ($force$bricks)"
|
||||
gluster volume create $gluster_volname $volarg transport $transport$bricks $force
|
||||
gluster volume create "$gluster_volname" "$volarg" transport "${transport}""${bricks}" $force
|
||||
# modify volume properties as per input
|
||||
for e in "${so[@]:3}"; do
|
||||
IFS=':' read -ra kv <<< "$e"
|
||||
echo "Setting volume option ${kv[@]}"
|
||||
gluster volume set $gluster_volname "${kv[0]}" "${kv[1]}"
|
||||
echo "Setting volume option ${kv[*]}"
|
||||
gluster volume set "$gluster_volname" "${kv[0]}" "${kv[1]}"
|
||||
done
|
||||
fi
|
||||
# start volume
|
||||
echo "Starting gluster volume $gluster_volname"
|
||||
gluster volume start $gluster_volname
|
||||
gluster volume start "$gluster_volname"
|
||||
# heal volume if force created with certain volume types
|
||||
if [ ! -z $force ]; then
|
||||
if [[ "$voltype" == replica* ]] || [[ "$voltype" == disperse* ]]; then
|
||||
echo "Checking if gluster volume $gluster_volname needs healing"
|
||||
set +e
|
||||
gluster volume heal $gluster_volname info
|
||||
if [ $? -eq 0 ]; then
|
||||
gluster volume heal $gluster_volname
|
||||
if gluster volume heal "$gluster_volname" info; then
|
||||
gluster volume heal "$gluster_volname"
|
||||
# print status after heal
|
||||
gluster volume heal $gluster_volname info healed
|
||||
gluster volume heal $gluster_volname info heal-failed
|
||||
gluster volume heal $gluster_volname info split-brain
|
||||
gluster volume heal "$gluster_volname" info healed
|
||||
gluster volume heal "$gluster_volname" info heal-failed
|
||||
gluster volume heal "$gluster_volname" info split-brain
|
||||
fi
|
||||
set -e
|
||||
fi
|
||||
|
@ -256,13 +254,12 @@ setup_glusterfs() {
|
|||
fi
|
||||
|
||||
# poll for volume created
|
||||
gluster_poll_for_volume $gluster_volname
|
||||
gluster_poll_for_volume "$gluster_volname"
|
||||
|
||||
# check if volume is mounted
|
||||
local mounted=0
|
||||
set +e
|
||||
mountpoint -q $mountpath
|
||||
if [ $? -eq 0 ]; then
|
||||
if mountpoint -q "$mountpath"; then
|
||||
mounted=1
|
||||
fi
|
||||
set -e
|
||||
|
@ -271,8 +268,7 @@ setup_glusterfs() {
|
|||
# check if fstab entry exists
|
||||
add_fstab=0
|
||||
set +e
|
||||
grep "$mountpath glusterfs" /etc/fstab
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! grep "$mountpath glusterfs" /etc/fstab; then
|
||||
add_fstab=1
|
||||
fi
|
||||
set -e
|
||||
|
@ -287,19 +283,20 @@ setup_glusterfs() {
|
|||
mkdir -p $mountpath
|
||||
# mount it
|
||||
echo "Mounting gluster volume $gluster_volname locally to $mountpath"
|
||||
local START=$(date -u +"%s")
|
||||
local START
|
||||
START=$(date -u +"%s")
|
||||
set +e
|
||||
while :
|
||||
do
|
||||
mount $mountpath
|
||||
if [ $? -eq 0 ]; then
|
||||
if mount "$mountpath"; then
|
||||
break
|
||||
else
|
||||
local NOW=$(date -u +"%s")
|
||||
local DIFF=$((($NOW-$START)/60))
|
||||
local NOW
|
||||
NOW=$(date -u +"%s")
|
||||
local DIFF=$(((NOW-START)/60))
|
||||
# fail after 5 minutes of attempts
|
||||
if [ $DIFF -ge 5 ]; then
|
||||
echo "Could not mount gluster volume $gluster_volume to $mountpath"
|
||||
echo "Could not mount gluster volume $gluster_volname to $mountpath"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
|
@ -307,7 +304,7 @@ setup_glusterfs() {
|
|||
done
|
||||
set -e
|
||||
# ensure proper permissions on mounted directory
|
||||
chmod 1777 $mountpath
|
||||
chmod 1777 "$mountpath"
|
||||
fi
|
||||
}
|
||||
|
||||
|
@ -381,7 +378,7 @@ shift $((OPTIND-1))
|
|||
|
||||
echo "Parameters:"
|
||||
echo " Attach mode: $attach_disks"
|
||||
echo " Samba options: ${samba_options[@]}"
|
||||
echo " Samba options: ${samba_options[*]}"
|
||||
echo " Rebalance filesystem: $rebalance"
|
||||
echo " Filesystem: $filesystem"
|
||||
echo " Mountpath: $mountpath"
|
||||
|
@ -424,13 +421,12 @@ EOF
|
|||
fi
|
||||
# install required server_type software
|
||||
apt-get update
|
||||
if [ $server_type == "nfs" ]; then
|
||||
if [ "$server_type" == "nfs" ]; then
|
||||
apt-get install -y --no-install-recommends nfs-kernel-server nfs4-acl-tools
|
||||
# patch buggy nfs-mountd.service unit file
|
||||
# https://bugs.launchpad.net/ubuntu/+source/nfs-utils/+bug/1590799
|
||||
set +e
|
||||
grep "^After=network.target local-fs.target" /lib/systemd/system/nfs-mountd.service
|
||||
if [ $? -eq 0 ]; then
|
||||
if grep "^After=network.target local-fs.target" /lib/systemd/system/nfs-mountd.service; then
|
||||
set -e
|
||||
sed -i -e "s/^After=network.target local-fs.target/After=rpcbind.target/g" /lib/systemd/system/nfs-mountd.service
|
||||
fi
|
||||
|
@ -441,14 +437,13 @@ EOF
|
|||
systemctl enable nfs-kernel-server.service
|
||||
# start service if not started
|
||||
set +e
|
||||
systemctl status nfs-kernel-server.service
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! systemctl status nfs-kernel-server.service; then
|
||||
set -e
|
||||
systemctl start nfs-kernel-server.service
|
||||
systemctl status nfs-kernel-server.service
|
||||
fi
|
||||
set -e
|
||||
elif [ $server_type == "glusterfs" ]; then
|
||||
elif [ "$server_type" == "glusterfs" ]; then
|
||||
# to prevent a race where the master (aka prober) script execution
|
||||
# runs well before the child, we should block all gluster connection
|
||||
# requests with iptables. we should not remove the filter rules
|
||||
|
@ -461,8 +456,7 @@ EOF
|
|||
systemctl enable glusterfs-server
|
||||
# start service if not started
|
||||
set +e
|
||||
systemctl status glusterfs-server
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! systemctl status glusterfs-server; then
|
||||
set -e
|
||||
systemctl start glusterfs-server
|
||||
systemctl status glusterfs-server
|
||||
|
@ -477,31 +471,31 @@ fi
|
|||
|
||||
# get all data disks
|
||||
declare -a data_disks
|
||||
all_disks=($(lsblk -l -d -n -p -I 8,65,66,67,68 -o NAME))
|
||||
mapfile -t all_disks < <(lsblk -l -d -n -p -I 8,65,66,67,68 -o NAME)
|
||||
for disk in "${all_disks[@]}"; do
|
||||
# ignore os and ephemeral disks
|
||||
if [ $disk != "/dev/sda" ] && [ $disk != "/dev/sdb" ]; then
|
||||
if [ "$disk" != "/dev/sda" ] && [ "$disk" != "/dev/sdb" ]; then
|
||||
data_disks=("${data_disks[@]}" "$disk")
|
||||
fi
|
||||
done
|
||||
unset all_disks
|
||||
numdisks=${#data_disks[@]}
|
||||
echo "found $numdisks data disks: ${data_disks[@]}"
|
||||
echo "found $numdisks data disks: ${data_disks[*]}"
|
||||
|
||||
# check if data disks are already partitioned
|
||||
declare -a skipped_part
|
||||
for disk in "${data_disks[@]}"; do
|
||||
part1=$(partprobe -d -s $disk | cut -d' ' -f4)
|
||||
if [ -z $part1 ]; then
|
||||
part1=$(partprobe -d -s "$disk" | cut -d' ' -f4)
|
||||
if [ -z "$part1" ]; then
|
||||
echo "$disk: partition 1 not found. Partitioning $disk."
|
||||
parted -a opt -s $disk mklabel gpt mkpart primary 0% 100%
|
||||
part1=$(partprobe -d -s $disk | cut -d' ' -f4)
|
||||
if [ -z $part1 ]; then
|
||||
parted -a opt -s "$disk" mklabel gpt mkpart primary 0% 100%
|
||||
part1=$(partprobe -d -s "$disk" | cut -d' ' -f4)
|
||||
if [ -z "$part1" ]; then
|
||||
echo "$disk: partition 1 not found after partitioning."
|
||||
exit 1
|
||||
fi
|
||||
# wait for block device
|
||||
wait_for_device $disk$part1
|
||||
wait_for_device "${disk}""${part1}"
|
||||
else
|
||||
echo "$disk: partition 1 found. Skipping partitioning."
|
||||
skipped_part=("${skipped_part[@]}" "$disk")
|
||||
|
@ -509,65 +503,66 @@ for disk in "${data_disks[@]}"; do
|
|||
done
|
||||
|
||||
# set format target
|
||||
target=
|
||||
target_md=
|
||||
target_uuid=
|
||||
format_target=1
|
||||
# check if there was only one skipped disk during partitioning
|
||||
if [ ${#skipped_part[@]} -eq $numdisks ] && [ $numdisks -eq 1 ]; then
|
||||
target=${skipped_part[0]}
|
||||
read target_uuid target_fs < <(blkid -u filesystem $target | awk -F "[= ]" '{print $3" "$5}'|tr -d "\"")
|
||||
if [ ! -z $target_fs ]; then
|
||||
if [ ${#skipped_part[@]} -eq "$numdisks" ] && [ "$numdisks" -eq 1 ]; then
|
||||
target_md=${skipped_part[0]}
|
||||
read -r target_uuid target_fs < <(blkid -u filesystem "$target_md" | awk -F "[= ]" '{print $3" "$5}'|tr -d "\"")
|
||||
if [ ! -z "$target_fs" ]; then
|
||||
format_target=0
|
||||
fi
|
||||
fi
|
||||
|
||||
# check if disks are already in raid set
|
||||
raid_resized=0
|
||||
if [ $raid_level -ge 0 ]; then
|
||||
if [ "$raid_level" -ge 0 ]; then
|
||||
# redirect mountpath if gluster for bricks
|
||||
saved_mp=$mountpath
|
||||
if [ $server_type == "glusterfs" ]; then
|
||||
if [ "$server_type" == "glusterfs" ]; then
|
||||
mountpath=$gluster_brick_mountpath
|
||||
fi
|
||||
format_target=0
|
||||
md_preexist=0
|
||||
if [ $filesystem == "btrfs" ]; then
|
||||
if [ $raid_level -ne 0 ]; then
|
||||
if [ "$filesystem" == "btrfs" ]; then
|
||||
if [ "$raid_level" -ne 0 ]; then
|
||||
echo "btrfs with non-RAID 0 is not supported."
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
# find any pre-existing targets
|
||||
set +e
|
||||
mdadm --detail --scan
|
||||
if [ $? -eq 0 ]; then
|
||||
target=($(find /dev/md* -maxdepth 0 -type b))
|
||||
if mdadm --detail --scan; then
|
||||
mapfile -t target < <(find /dev/md* -maxdepth 0 -type b)
|
||||
if [ ${#target[@]} -ne 0 ]; then
|
||||
target=${target[0]}
|
||||
md_preexist=1
|
||||
echo "Existing array found: $target"
|
||||
target_md=${target[0]}
|
||||
echo "Existing array found: $target_md"
|
||||
# refresh target uuid to md target
|
||||
read target_uuid < <(blkid ${target} | awk -F "[= ]" '{print $3}' | sed 's/\"//g')
|
||||
read -r target_uuid < <(blkid "$target_md" | awk -F "[= ]" '{print $3}' | sed 's/\"//g')
|
||||
else
|
||||
echo "No pre-existing md target could be found"
|
||||
fi
|
||||
fi
|
||||
set -e
|
||||
if [ -z $target ]; then
|
||||
target=/dev/md0
|
||||
echo "Setting default target: $target"
|
||||
if [ -z "$target_md" ]; then
|
||||
target_md=/dev/md0
|
||||
echo "Setting default target: $target_md"
|
||||
fi
|
||||
fi
|
||||
declare -a raid_array
|
||||
declare -a all_raid_disks
|
||||
set +e
|
||||
for disk in "${data_disks[@]}"; do
|
||||
if [ $filesystem == "btrfs" ]; then
|
||||
if [ "$filesystem" == "btrfs" ]; then
|
||||
btrfs device scan "${disk}1"
|
||||
rc=$?
|
||||
else
|
||||
mdadm --examine "${disk}1"
|
||||
rc=$?
|
||||
fi
|
||||
if [ $? -ne 0 ]; then
|
||||
if [ $rc -ne 0 ]; then
|
||||
raid_array=("${raid_array[@]}" "${disk}1")
|
||||
fi
|
||||
all_raid_disks=("${all_raid_disks[@]}" "${disk}1")
|
||||
|
@ -575,64 +570,64 @@ if [ $raid_level -ge 0 ]; then
|
|||
set -e
|
||||
no_raid_count=${#raid_array[@]}
|
||||
# take action depending upon no raid count
|
||||
if [ $no_raid_count -eq 0 ]; then
|
||||
if [ "$no_raid_count" -eq 0 ]; then
|
||||
echo "No disks require RAID setup"
|
||||
elif [ $no_raid_count -eq $numdisks ]; then
|
||||
echo "$numdisks data disks require RAID setup: ${raid_array[@]}"
|
||||
if [ $filesystem == "btrfs" ]; then
|
||||
if [ $raid_level -eq 0 ]; then
|
||||
mkfs.btrfs -d raid0 ${raid_array[@]}
|
||||
elif [ "$no_raid_count" -eq "$numdisks" ]; then
|
||||
echo "$numdisks data disks require RAID setup: ${raid_array[*]}"
|
||||
if [ "$filesystem" == "btrfs" ]; then
|
||||
if [ "$raid_level" -eq 0 ]; then
|
||||
mkfs.btrfs -d raid0 "${raid_array[@]}"
|
||||
else
|
||||
mkfs.btrfs -m raid${raid_level} ${raid_array[@]}
|
||||
mkfs.btrfs -m raid"${raid_level}" "${raid_array[@]}"
|
||||
fi
|
||||
else
|
||||
set +e
|
||||
# first check if this is a pre-existing array
|
||||
mdadm_detail=$(mdadm --detail --scan)
|
||||
if [ -z $mdadm_detail ]; then
|
||||
if [ -z "$mdadm_detail" ]; then
|
||||
set -e
|
||||
mdadm --create --verbose $target --level=$raid_level --raid-devices=$numdisks ${raid_array[@]}
|
||||
mdadm --create --verbose $target_md --level="$raid_level" --raid-devices="$numdisks" "${raid_array[@]}"
|
||||
format_target=1
|
||||
else
|
||||
if [ $md_preexist -eq 0 ]; then
|
||||
echo "Could not determine pre-existing md target"
|
||||
exit 1
|
||||
fi
|
||||
echo "Not creating a new array since pre-exsting md target found: $target"
|
||||
echo "Not creating a new array since pre-exsting md target found: $target_md"
|
||||
fi
|
||||
set -e
|
||||
fi
|
||||
else
|
||||
echo "Mismatch of non-RAID disks $no_raid_count to total disks $numdisks."
|
||||
if [ $raid_level -ne 0 ]; then
|
||||
if [ "$raid_level" -ne 0 ]; then
|
||||
echo "Cannot resize with RAID level of $raid_level."
|
||||
exit 1
|
||||
fi
|
||||
if [ $filesystem == "btrfs" ]; then
|
||||
if [ "$filesystem" == "btrfs" ]; then
|
||||
# add new block devices first
|
||||
echo "Adding devices ${raid_array[@]} to $mountpath"
|
||||
btrfs device add ${raid_array[@]} $mountpath
|
||||
echo "Adding devices ${raid_array[*]} to $mountpath"
|
||||
btrfs device add "${raid_array[@]}" $mountpath
|
||||
raid_resized=1
|
||||
else
|
||||
# add new block device first
|
||||
echo "Adding devices ${raid_array[@]} to $target"
|
||||
mdadm --add $target ${raid_array[@]}
|
||||
echo "Adding devices ${raid_array[*]} to $target_md"
|
||||
mdadm --add $target_md "${raid_array[@]}"
|
||||
# grow the array
|
||||
echo "Growing array $target to a total of $numdisks devices"
|
||||
mdadm --grow --raid-devices=$numdisks $target
|
||||
echo "Growing array $target_md to a total of $numdisks devices"
|
||||
mdadm --grow --raid-devices="$numdisks" "$target_md"
|
||||
raid_resized=1
|
||||
fi
|
||||
fi
|
||||
# dump diagnostic info
|
||||
if [ $filesystem == "btrfs" ]; then
|
||||
if [ "$filesystem" == "btrfs" ]; then
|
||||
btrfs filesystem show
|
||||
else
|
||||
cat /proc/mdstat
|
||||
mdadm --detail $target
|
||||
mdadm --detail $target_md
|
||||
fi
|
||||
# get uuid of first disk as target uuid if not populated
|
||||
if [ -z $target_uuid ]; then
|
||||
read target_uuid < <(blkid ${all_raid_disks[0]} | awk -F "[= ]" '{print $3}' | sed 's/\"//g')
|
||||
if [ -z "$target_uuid" ]; then
|
||||
read -r target_uuid < <(blkid "${all_raid_disks[0]}" | awk -F "[= ]" '{print $3}' | sed 's/\"//g')
|
||||
fi
|
||||
# restore mountpath
|
||||
mountpath=$saved_mp
|
||||
|
@ -641,49 +636,47 @@ fi
|
|||
|
||||
# create filesystem on target device
|
||||
if [ $format_target -eq 1 ]; then
|
||||
if [ -z $target ]; then
|
||||
if [ -z "$target_md" ]; then
|
||||
echo "Target not specified for format"
|
||||
exit 1
|
||||
fi
|
||||
echo "Creating filesystem on $target."
|
||||
if [ $filesystem == "btrfs" ]; then
|
||||
mkfs.btrfs $target
|
||||
echo "Creating filesystem on $target_md"
|
||||
if [ "$filesystem" == "btrfs" ]; then
|
||||
mkfs.btrfs "$target_md"
|
||||
elif [[ $filesystem == ext* ]]; then
|
||||
mkfs.${filesystem} -m 0 $target
|
||||
mkfs."${filesystem}" -m 0 "$target_md"
|
||||
else
|
||||
echo "Unknown filesystem: $filesystem"
|
||||
exit 1
|
||||
fi
|
||||
# refresh target uuid
|
||||
read target_uuid < <(blkid ${target} | awk -F "[= ]" '{print $3}' | sed 's/\"//g')
|
||||
read -r target_uuid < <(blkid "${target_md}" | awk -F "[= ]" '{print $3}' | sed 's/\"//g')
|
||||
fi
|
||||
|
||||
# mount filesystem
|
||||
if [ $attach_disks -eq 0 ]; then
|
||||
# redirect mountpath if gluster for bricks
|
||||
saved_mp=$mountpath
|
||||
if [ $server_type == "glusterfs" ]; then
|
||||
if [ "$server_type" == "glusterfs" ]; then
|
||||
mountpath=$gluster_brick_mountpath
|
||||
fi
|
||||
# check if filesystem is mounted (active array)
|
||||
mounted=0
|
||||
set +e
|
||||
mountpoint -q $mountpath
|
||||
if [ $? -eq 0 ]; then
|
||||
if mountpoint -q $mountpath; then
|
||||
mounted=1
|
||||
fi
|
||||
set -e
|
||||
# add fstab entry and mount
|
||||
if [ $mounted -eq 0 ]; then
|
||||
if [ -z $target_uuid ]; then
|
||||
if [ -z "$target_uuid" ]; then
|
||||
echo "Target UUID not populated!"
|
||||
exit 1
|
||||
fi
|
||||
# check if fstab entry exists
|
||||
add_fstab=0
|
||||
set +e
|
||||
grep "^UUID=${target_uuid}" /etc/fstab
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! grep "^UUID=${target_uuid}" /etc/fstab; then
|
||||
add_fstab=1
|
||||
fi
|
||||
set -e
|
||||
|
@ -691,14 +684,14 @@ if [ $attach_disks -eq 0 ]; then
|
|||
if [ $add_fstab -eq 1 ]; then
|
||||
echo "Adding $target_uuid to mountpoint $mountpath to /etc/fstab"
|
||||
# construct mount options
|
||||
if [ -z $mount_options ]; then
|
||||
if [ -z "$mount_options" ]; then
|
||||
mount_options="defaults"
|
||||
else
|
||||
mount_options="defaults,$mount_options"
|
||||
fi
|
||||
if [ $premium_storage -eq 1 ]; then
|
||||
# disable barriers due to cache
|
||||
if [ $filesystem == "btrfs" ]; then
|
||||
if [ "$filesystem" == "btrfs" ]; then
|
||||
# also enable ssd optimizations on btrfs
|
||||
mount_options+=",nobarrier,ssd"
|
||||
else
|
||||
|
@ -714,10 +707,10 @@ if [ $attach_disks -eq 0 ]; then
|
|||
mkdir -p $mountpath
|
||||
# mount
|
||||
mount $mountpath
|
||||
if [ $server_type == "nfs" ]; then
|
||||
if [ "$server_type" == "nfs" ]; then
|
||||
# ensure proper permissions
|
||||
chmod 1777 $mountpath
|
||||
elif [ $server_type == "glusterfs" ]; then
|
||||
elif [ "$server_type" == "glusterfs" ]; then
|
||||
# create the brick location
|
||||
mkdir -p $gluster_brick_location
|
||||
fi
|
||||
|
@ -734,11 +727,11 @@ fi
|
|||
if [ $raid_resized -eq 1 ]; then
|
||||
# redirect mountpath if gluster for bricks
|
||||
saved_mp=$mountpath
|
||||
if [ $server_type == "glusterfs" ]; then
|
||||
if [ "$server_type" == "glusterfs" ]; then
|
||||
mountpath=$gluster_brick_mountpath
|
||||
fi
|
||||
echo "Resizing filesystem at $mountpath."
|
||||
if [ $filesystem == "btrfs" ]; then
|
||||
if [ "$filesystem" == "btrfs" ]; then
|
||||
btrfs filesystem resize max $mountpath
|
||||
# rebalance data and metadata across all devices
|
||||
if [ $rebalance -eq 1 ]; then
|
||||
|
@ -759,9 +752,9 @@ fi
|
|||
|
||||
# set up server_type software
|
||||
if [ $attach_disks -eq 0 ]; then
|
||||
if [ $server_type == "nfs" ]; then
|
||||
if [ "$server_type" == "nfs" ]; then
|
||||
setup_nfs
|
||||
elif [ $server_type == "glusterfs" ]; then
|
||||
elif [ "$server_type" == "glusterfs" ]; then
|
||||
flush_glusterfs_firewall_rules
|
||||
setup_glusterfs
|
||||
else
|
||||
|
@ -769,7 +762,7 @@ if [ $attach_disks -eq 0 ]; then
|
|||
exit 1
|
||||
fi
|
||||
# setup samba server if specified
|
||||
if [ ! -z $samba_options ]; then
|
||||
if [ ! -z "$samba_options" ]; then
|
||||
# install samba
|
||||
apt-get install -y -q --no-install-recommends samba
|
||||
# parse options
|
||||
|
@ -791,16 +784,16 @@ cat >> /etc/samba/smb.conf << EOF
|
|||
create mask = $smb_create_mask
|
||||
directory mask = $smb_directory_mask
|
||||
EOF
|
||||
if [ $smb_username != "nobody" ]; then
|
||||
if [ "$smb_username" != "nobody" ]; then
|
||||
# create group
|
||||
groupadd -o -g $smb_gid $smb_username
|
||||
groupadd -o -g "$smb_gid" "$smb_username"
|
||||
# create user (disable login)
|
||||
useradd -N -g $smb_gid -p '!' -o -u $smb_uid -s /bin/bash -m -d /home/$smb_username $smb_username
|
||||
useradd -N -g "$smb_gid" -p '!' -o -u "$smb_uid" -s /bin/bash -m -d /home/"${smb_username}" "$smb_username"
|
||||
# add user to smb tdbsam
|
||||
echo -ne "${smb_password}\n${smb_password}\n" | smbpasswd -a -s $smb_username
|
||||
smbpasswd -e $smb_username
|
||||
echo -ne "${smb_password}\\n${smb_password}\\n" | smbpasswd -a -s "$smb_username"
|
||||
smbpasswd -e "$smb_username"
|
||||
# modify smb.conf global
|
||||
sed -i "/^\[global\]/a load printers = no\nprinting = bsd\nprintcap name = /dev/null\ndisable spoolss = yes\nsecurity = user\nserver signing = auto\nsmb encrypt = auto" /etc/samba/smb.conf
|
||||
sed -i "/^\\[global\\]/a load printers = no\\nprinting = bsd\\nprintcap name = /dev/null\\ndisable spoolss = yes\\nsecurity = user\\nserver signing = auto\\nsmb encrypt = auto" /etc/samba/smb.conf
|
||||
# modify smb.conf share
|
||||
cat >> /etc/samba/smb.conf << EOF
|
||||
guest ok = no
|
||||
|
@ -809,7 +802,7 @@ cat >> /etc/samba/smb.conf << EOF
|
|||
EOF
|
||||
else
|
||||
# modify smb.conf global
|
||||
sed -i "/^\[global\]/a load printers = no\nprinting = bsd\nprintcap name = /dev/null\ndisable spoolss = yes\nsecurity = user\nserver signing = auto\nsmb encrypt = auto\nguest account = $smb_username" /etc/samba/smb.conf
|
||||
sed -i "/^\\[global\\]/a load printers = no\\nprinting = bsd\\nprintcap name = /dev/null\\ndisable spoolss = yes\\nsecurity = user\\nserver signing = auto\\nsmb encrypt = auto\\nguest account = $smb_username" /etc/samba/smb.conf
|
||||
# modify smb.conf share
|
||||
cat >> /etc/samba/smb.conf << EOF
|
||||
guest ok = yes
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
set -o pipefail
|
||||
|
||||
DEBIAN_FRONTEND=noninteractive
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# constants
|
||||
gluster_brick_mountpath=/gluster/brick
|
||||
|
@ -54,22 +54,21 @@ shift $((OPTIND-1))
|
|||
[ "$1" = "--" ] && shift
|
||||
|
||||
# get all data disks
|
||||
declare -a data_disks
|
||||
all_disks=($(lsblk -l -d -n -p -I 8,65,66,67,68 -o NAME))
|
||||
mapfile -t all_disks < <(lsblk -l -d -n -p -I 8,65,66,67,68 -o NAME)
|
||||
for disk in "${all_disks[@]}"; do
|
||||
# ignore os and ephemeral disks
|
||||
if [ $disk != "/dev/sda" ] && [ $disk != "/dev/sdb" ]; then
|
||||
if [ "$disk" != "/dev/sda" ] && [ "$disk" != "/dev/sdb" ]; then
|
||||
data_disks=("${data_disks[@]}" "$disk")
|
||||
fi
|
||||
done
|
||||
unset all_disks
|
||||
numdisks=${#data_disks[@]}
|
||||
|
||||
echo "Detected $numdisks data disks: ${data_disks[@]}"
|
||||
echo "Detected $numdisks data disks: ${data_disks[*]}"
|
||||
echo ""
|
||||
|
||||
# check server_type software
|
||||
if [ $server_type == "nfs" ]; then
|
||||
if [ "$server_type" == "nfs" ]; then
|
||||
echo "NFS service status:"
|
||||
systemctl status nfs-kernel-server.service
|
||||
echo ""
|
||||
|
@ -81,7 +80,7 @@ if [ $server_type == "nfs" ]; then
|
|||
echo ""
|
||||
echo "connected clients:"
|
||||
netstat -tn | grep :2049
|
||||
elif [ $server_type == "glusterfs" ]; then
|
||||
elif [ "$server_type" == "glusterfs" ]; then
|
||||
echo "glusterfs service status:"
|
||||
systemctl status glusterfs-server
|
||||
echo ""
|
||||
|
@ -92,12 +91,11 @@ elif [ $server_type == "glusterfs" ]; then
|
|||
gluster volume status all clients
|
||||
echo ""
|
||||
set +e
|
||||
gluster volume rebalance $gluster_volname status 2>&1
|
||||
gluster volume heal $gluster_volname info 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
gluster volume heal $gluster_volname info healed 2>&1
|
||||
gluster volume heal $gluster_volname info heal-failed 2>&1
|
||||
gluster volume heal $gluster_volname info split-brain 2>&1
|
||||
gluster volume rebalance "$gluster_volname" status 2>&1
|
||||
if gluster volume heal "$gluster_volname" info 2>&1; then
|
||||
gluster volume heal "$gluster_volname" info healed 2>&1
|
||||
gluster volume heal "$gluster_volname" info heal-failed 2>&1
|
||||
gluster volume heal "$gluster_volname" info split-brain 2>&1
|
||||
fi
|
||||
set -e
|
||||
echo ""
|
||||
|
@ -110,11 +108,8 @@ fi
|
|||
echo ""
|
||||
|
||||
# check if mount is active
|
||||
mount=$(mount | grep $mountpath)
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Mount information:"
|
||||
echo $mount
|
||||
else
|
||||
echo "Mount information:"
|
||||
if ! mount | grep $mountpath; then
|
||||
echo "$mountpath not mounted"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -123,12 +118,12 @@ fi
|
|||
df -h
|
||||
|
||||
# get raid status
|
||||
if [ $raid_level -ge 0 ]; then
|
||||
if [ "$raid_level" -ge 0 ]; then
|
||||
echo ""
|
||||
if [ $filesystem == "btrfs" ]; then
|
||||
if [ "$filesystem" == "btrfs" ]; then
|
||||
echo "btrfs device status:"
|
||||
for disk in "${data_disks[@]}"; do
|
||||
btrfs device stats ${disk}1
|
||||
btrfs device stats "${disk}"1
|
||||
done
|
||||
echo ""
|
||||
echo "btrfs filesystem:"
|
||||
|
@ -139,14 +134,13 @@ if [ $raid_level -ge 0 ]; then
|
|||
cat /proc/mdstat
|
||||
echo ""
|
||||
# find md target
|
||||
target=($(find /dev/md* -maxdepth 0 -type b))
|
||||
mapfile -t target < <(find /dev/md* -maxdepth 0 -type b)
|
||||
if [ ${#target[@]} -ne 1 ]; then
|
||||
echo "Could not determine md target"
|
||||
exit 1
|
||||
fi
|
||||
target=${target[0]}
|
||||
echo "mdadm detail:"
|
||||
mdadm --detail $target
|
||||
mdadm --detail "${target[0]}"
|
||||
fi
|
||||
fi
|
||||
|
||||
|
|
|
@ -15,14 +15,14 @@ block_singularity=${cip[1]}
|
|||
log DEBUG "Block for Docker images: $block_docker"
|
||||
log DEBUG "Block for Singularity images: $block_singularity"
|
||||
|
||||
if [ ! -z $block_docker ]; then
|
||||
if [ ! -z "$block_docker" ]; then
|
||||
log INFO "blocking until Docker images ready: $block_docker"
|
||||
IFS=',' read -ra RES <<< "$block_docker"
|
||||
declare -a missing
|
||||
while :
|
||||
do
|
||||
for image in "${RES[@]}"; do
|
||||
if [ -z "$(docker images -q $image 2>/dev/null)" ]; then
|
||||
if [ -z "$(docker images -q "$image" 2>/dev/null)" ]; then
|
||||
missing=("${missing[@]}" "$image")
|
||||
fi
|
||||
done
|
||||
|
@ -36,7 +36,7 @@ if [ ! -z $block_docker ]; then
|
|||
done
|
||||
fi
|
||||
|
||||
if [ ! -z $block_singularity ]; then
|
||||
if [ ! -z "$block_singularity" ]; then
|
||||
log INFO "blocking until Singularity images ready: $block_singularity"
|
||||
log DEBUG "Singularity cache dir: ${SINGULARITY_CACHEDIR}"
|
||||
IFS=',' read -ra RES <<< "$block_singularity"
|
||||
|
|
Загрузка…
Ссылка в новой задаче