Fix scripts to be Shellcheck clean (#178)

This commit is contained in:
Fred Park 2018-03-29 12:47:37 -07:00
Родитель a98bbb5242
Коммит c1a92e4138
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 3C4D545F457737EB
19 изменённых файлов: 673 добавлений и 639 удалений

Просмотреть файл

@ -386,7 +386,7 @@ SOFTWARE.
pykwalify (https://github.com/Grokzen/pykwalify)
Copyright (c) 2013-2015 Johan Andersson
Copyright (c) 2013-2018 Johan Andersson
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
@ -753,7 +753,7 @@ ruamel.yaml (https://bitbucket.org/ruamel/yaml)
The MIT License (MIT)
Copyright (c) 2014-2017 Anthon van der Neut, Ruamel bvba
Copyright (c) 2014-2018 Anthon van der Neut, Ruamel bvba
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

Просмотреть файл

@ -3,4 +3,4 @@
set -e
set -o pipefail
python3 /opt/batch-shipyard/recurrent_job_manager.py $*
python3 /opt/batch-shipyard/recurrent_job_manager.py "$@"

Просмотреть файл

@ -10,8 +10,8 @@ for spec in "$@"; do
IFS=',' read -ra parts <<< "$spec"
# encrypt,creds,jobid,taskid,include,exclude,dst
encrypt=${parts[0],,}
if [ $encrypt == "true" ]; then
SHIPYARD_BATCH_ENV=`echo ${parts[1]} | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
if [ "$encrypt" == "true" ]; then
SHIPYARD_BATCH_ENV=$(echo "${parts[1]}" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
else
SHIPYARD_BATCH_ENV=${parts[1]}
fi
@ -23,20 +23,20 @@ for spec in "$@"; do
dst=${parts[6]}
include=
if [ ! -z $incl ]; then
if [ ! -z "$incl" ]; then
include="--include $incl"
fi
exclude=
if [ ! -z $excl ]; then
if [ ! -z "$excl" ]; then
exclude="--exclude $excl"
fi
# create destination directory
dest=
if [ ! -z $dst ]; then
if [ ! -z "$dst" ]; then
dest="--dst $dst"
mkdir -p $dst
mkdir -p "$dst"
fi
# ingress data from batch task
export SHIPYARD_BATCH_ENV=$SHIPYARD_BATCH_ENV
python3 /opt/batch-shipyard/task_file_mover.py $jobid $taskid $include $exclude $dest
python3 /opt/batch-shipyard/task_file_mover.py "$jobid" "$taskid" "$include" "$exclude" "$dest"
done

Просмотреть файл

@ -1,5 +1,9 @@
#!/usr/bin/env bash
# this script runs in the context of env vars imported inside of a
# Docker run env, thus disable ref but not assigned shellcheck warnings.
# shellcheck disable=SC2154
set -e
set -o pipefail
@ -13,16 +17,16 @@ cd /opt/batch-shipyard
# add timing markers
if [ ! -z ${SHIPYARD_TIMING+x} ]; then
# backfill node prep start
python3 perf.py nodeprep start $prefix --ts $npstart --message "offer=$offer,sku=$sku"
python3 perf.py nodeprep start "$prefix" --ts "$npstart" --message "offer=$offer,sku=$sku"
# backfill docker run pull start
python3 perf.py shipyard pull-start $prefix --ts $drpstart
python3 perf.py shipyard pull-start "$prefix" --ts "$drpstart"
# mark docker run pull end
python3 perf.py shipyard pull-end $prefix
python3 perf.py shipyard pull-end "$prefix"
# mark node prep finished
python3 perf.py nodeprep end $prefix
python3 perf.py nodeprep end "$prefix"
# mark cascade start time
python3 perf.py cascade start $prefix
python3 perf.py cascade start "$prefix"
fi
# execute cascade
python3 cascade.py $p2p --ipaddress $ipaddress $prefix
python3 cascade.py "$p2p" --ipaddress "$ipaddress" "$prefix"

Просмотреть файл

@ -50,21 +50,17 @@ DEPENDENCIES=(
https://github.com/requests/requests/raw/master/LICENSE
ruamel.yaml
https://bitbucket.org/ruamel/yaml
https://bitbucket.org/ruamel/yaml/raw/ef15acf88b039656570f9b1f45b5e7394c154997/LICENSE
https://bitbucket.org/ruamel/yaml/raw/8d3f84d78aff534cbc881fa509ade31a5edc451d/LICENSE
)
DEPLEN=${#DEPENDENCIES[@]}
add_attribution() {
name=$1
url=$2
license=$(curl -fSsL $3)
license=$(curl -fSsL "$3")
echo "" >> $TPNFILE
echo "-------------------------------------------------------------------------------" >> $TPNFILE
echo "" >> $TPNFILE
echo "$name ($url)" >> $TPNFILE
echo "" >> $TPNFILE
echo "$license" >> $TPNFILE
{ echo ""; echo "-------------------------------------------------------------------------------"; \
echo ""; echo "$name ($url)"; echo ""; echo "$license"; } >> $TPNFILE
}
cat << 'EOF' > $TPNFILE
@ -89,13 +85,12 @@ Redmond, WA 98052 USA
Please write "source for [Third Party IP]" in the memo line of your payment.
EOF
echo -n "Generating $(($DEPLEN / 3)) attributions: ["
echo -n "Generating $((DEPLEN / 3)) attributions: ["
i=0
while [ $i -lt $DEPLEN ]; do
add_attribution ${DEPENDENCIES[$i]} ${DEPENDENCIES[$(($i+1))]} ${DEPENDENCIES[$(($i+2))]}
i=$(($i + 3))
while [ $i -lt "$DEPLEN" ]; do
add_attribution "${DEPENDENCIES[$i]}" "${DEPENDENCIES[$((i+1))]}" "${DEPENDENCIES[$((i+2))]}"
i=$((i + 3))
echo -n "."
done
echo "" >> $TPNFILE
echo "-------------------------------------------------------------------------------" >> $TPNFILE
{ echo ""; echo "-------------------------------------------------------------------------------"; } >> $TPNFILE
echo "] done."

Просмотреть файл

@ -1,4 +1,4 @@
#!/usr/bin/env sh
printenv
docker build --build-arg GIT_BRANCH=$SOURCE_BRANCH --build-arg GIT_COMMIT=$GIT_SHA1 -t $IMAGE_NAME .
docker build --build-arg GIT_BRANCH="$SOURCE_BRANCH" --build-arg GIT_COMMIT="$GIT_SHA1" -t "$IMAGE_NAME" .

Просмотреть файл

@ -1,5 +1,8 @@
#!/usr/bin/env bash
# shellcheck disable=SC1090
# shellcheck disable=SC1091
set -e
set -o pipefail
@ -47,7 +50,7 @@ shift $((OPTIND-1))
# non-cloud shell environment checks
if [ ! -z $SUDO ]; then
# check to ensure this is not being run directly as root
if [ $(id -u) -eq 0 ]; then
if [ "$(id -u)" -eq 0 ]; then
echo "Installation cannot be performed as root or via sudo."
echo "Please install as a regular user."
exit 1
@ -62,7 +65,7 @@ if [ ! -z $SUDO ]; then
fi
# check that shipyard.py is in cwd
if [ ! -f $PWD/shipyard.py ]; then
if [ ! -f "${PWD}"/shipyard.py ]; then
echo "shipyard.py not found in $PWD."
echo "Please run install.sh from the same directory as shipyard.py."
exit 1
@ -79,8 +82,7 @@ fi
# check for anaconda
set +e
ANACONDA=0
$PYTHON -c "from __future__ import print_function; import sys; print(sys.version)" | grep -Ei 'anaconda|continuum|conda-forge'
if [ $? -eq 0 ]; then
if $PYTHON -c "from __future__ import print_function; import sys; print(sys.version)" | grep -Ei 'anaconda|continuum|conda-forge'; then
# check for conda
if hash conda 2> /dev/null; then
echo "Anaconda environment detected."
@ -88,7 +90,7 @@ if [ $? -eq 0 ]; then
echo "Anaconda environment detected, but conda command not found."
exit 1
fi
if [ -z $VENV_NAME ]; then
if [ -z "$VENV_NAME" ]; then
echo "Virtual environment name must be supplied for Anaconda installations."
exit 1
fi
@ -99,7 +101,7 @@ set -e
# perform some virtual env parameter checks
INSTALL_VENV_BIN=0
if [ ! -z $VENV_NAME ]; then
if [ ! -z "$VENV_NAME" ]; then
# check if virtual env, env is not named shipyard
if [ "$VENV_NAME" == "shipyard" ]; then
echo "Virtual environment name cannot be shipyard. Please use a different virtual environment name."
@ -139,7 +141,7 @@ if [ -z ${DISTRIB_ID+x} ] || [ -z ${DISTRIB_RELEASE+x} ]; then
fi
# lowercase vars
if [ $DISTRIB_ID != "Darwin" ]; then
if [ "$DISTRIB_ID" != "Darwin" ]; then
DISTRIB_ID=${DISTRIB_ID,,}
DISTRIB_RELEASE=${DISTRIB_RELEASE,,}
fi
@ -147,8 +149,8 @@ fi
echo "Detected OS: $DISTRIB_ID $DISTRIB_RELEASE"
# install requisite packages from distro repo
if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
if [ $DISTRIB_ID == "ubuntu" ] || [ $DISTRIB_ID == "debian" ]; then
if [ ! -z $SUDO ] || [ "$(id -u)" -eq 0 ]; then
if [ "$DISTRIB_ID" == "ubuntu" ] || [ "$DISTRIB_ID" == "debian" ]; then
$SUDO apt-get update
if [ $ANACONDA -eq 1 ]; then
PYTHON_PKGS=
@ -165,10 +167,11 @@ if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
fi
fi
fi
# shellcheck disable=SC2086
$SUDO apt-get install -y --no-install-recommends \
build-essential libssl-dev libffi-dev openssl \
openssh-client rsync $PYTHON_PKGS
elif [ $DISTRIB_ID == "centos" ] || [ $DISTRIB_ID == "rhel" ]; then
elif [ "$DISTRIB_ID" == "centos" ] || [ "$DISTRIB_ID" == "rhel" ]; then
$SUDO yum makecache fast
if [ $ANACONDA -eq 1 ]; then
PYTHON_PKGS=
@ -176,14 +179,12 @@ if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
if [ $PYTHON == "python" ]; then
PYTHON_PKGS="python-devel"
else
yum list installed epel-release
if [ $? -ne 0 ]; then
if ! yum list installed epel-release; then
echo "epel-release package not installed."
echo "Please install the epel-release package or refer to the Installation documentation for manual installation steps".
exit 1
fi
yum list installed python34
if [ $? -ne 0 ]; then
if ! yum list installed python34; then
echo "python34 epel package not installed."
echo "Please install the python34 epel package or refer to the Installation documentation for manual installation steps."
exit 1
@ -191,12 +192,13 @@ if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
PYTHON_PKGS="python34-devel"
fi
fi
# shellcheck disable=SC2086
$SUDO yum install -y gcc openssl-devel libffi-devel openssl \
openssh-clients rsync $PYTHON_PKGS
if [ $ANACONDA -eq 0 ]; then
curl -fSsL --tlsv1 https://bootstrap.pypa.io/get-pip.py | $SUDO $PYTHON
fi
elif [ $DISTRIB_ID == "opensuse" ] || [ $DISTRIB_ID == "sles" ]; then
elif [ "$DISTRIB_ID" == "opensuse" ] || [ "$DISTRIB_ID" == "sles" ]; then
$SUDO zypper ref
if [ $ANACONDA -eq 1 ]; then
PYTHON_PKGS=
@ -207,12 +209,13 @@ if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
PYTHON_PKGS="python3-devel"
fi
fi
# shellcheck disable=SC2086
$SUDO zypper -n in gcc libopenssl-devel libffi48-devel openssl \
openssh rsync $PYTHON_PKGS
if [ $ANACONDA -eq 0 ]; then
curl -fSsL --tlsv1 https://bootstrap.pypa.io/get-pip.py | $SUDO $PYTHON
fi
elif [ $DISTRIB_ID == "Darwin" ]; then
elif [ "$DISTRIB_ID" == "Darwin" ]; then
# check for pip, otherwise install it
if hash $PIP 2> /dev/null; then
echo "$PIP detected."
@ -228,10 +231,10 @@ if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
fi
# create virtual env if required and install required python packages
if [ ! -z $VENV_NAME ]; then
if [ ! -z "$VENV_NAME" ]; then
# install virtual env if required
if [ $INSTALL_VENV_BIN -eq 1 ]; then
if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
if [ ! -z $SUDO ] || [ "$(id -u)" -eq 0 ]; then
$SUDO $PIP install virtualenv
else
$PIP install --user virtualenv
@ -239,12 +242,12 @@ if [ ! -z $VENV_NAME ]; then
fi
if [ $ANACONDA -eq 0 ]; then
# create venv if it doesn't exist
if [ ! -z $SUDO ] || [ $(id -u) -eq 0 ]; then
virtualenv -p $PYTHON $VENV_NAME
if [ ! -z $SUDO ] || [ "$(id -u)" -eq 0 ]; then
virtualenv -p $PYTHON "$VENV_NAME"
else
$HOME/.local/bin/virtualenv -p $PYTHON $VENV_NAME
"${HOME}"/.local/bin/virtualenv -p $PYTHON "$VENV_NAME"
fi
source $VENV_NAME/bin/activate
source "${VENV_NAME}"/bin/activate
$PIP install --upgrade pip setuptools
set +e
$PIP uninstall -y azure-storage
@ -258,9 +261,9 @@ if [ ! -z $VENV_NAME ]; then
echo "Creating conda env for Python $pyver"
# create conda env
set +e
conda create --yes --name $VENV_NAME python=$pyver
conda create --yes --name "$VENV_NAME" python="${pyver}"
set -e
source activate $VENV_NAME
source activate "$VENV_NAME"
conda install --yes pip
# temporary workaround with pip requirements upgrading setuptools and
# conda pip failing to reference the old setuptools version
@ -270,7 +273,7 @@ if [ ! -z $VENV_NAME ]; then
set -e
$PIP install --upgrade -r requirements.txt
$PIP install --upgrade --no-deps -r req_nodeps.txt
source deactivate $VENV_NAME
source deactivate "$VENV_NAME"
fi
else
$SUDO $PIP install --upgrade pip
@ -302,7 +305,7 @@ fi
EOF
if [ ! -z $VENV_NAME ]; then
if [ ! -z "$VENV_NAME" ]; then
if [ $ANACONDA -eq 0 ]; then
cat >> shipyard << 'EOF'
source $BATCH_SHIPYARD_ROOT_DIR/$VENV_NAME/bin/activate
@ -324,7 +327,7 @@ python3 $BATCH_SHIPYARD_ROOT_DIR/shipyard.py $*
EOF
fi
if [ ! -z $VENV_NAME ]; then
if [ ! -z "$VENV_NAME" ]; then
if [ $ANACONDA -eq 0 ]; then
cat >> shipyard << 'EOF'
deactivate
@ -339,7 +342,8 @@ fi
chmod 755 shipyard
echo ""
if [ -z $VENV_NAME ]; then
if [ -z "$VENV_NAME" ]; then
# shellcheck disable=SC2016
echo '>> Please add $HOME/.local/bin to your $PATH. You can do this '
echo '>> permanently in your shell rc script, e.g., .bashrc for bash shells.'
echo ""

Просмотреть файл

@ -11,26 +11,26 @@ log() {
# decrypt passwords if necessary
if [ "$1" == "-e" ]; then
if [ ! -z $DOCKER_LOGIN_PASSWORD ]; then
DOCKER_LOGIN_PASSWORD=$(echo $DOCKER_LOGIN_PASSWORD | base64 -d | openssl rsautl -decrypt -inkey ../certs/key.pem)
if [ ! -z "$DOCKER_LOGIN_PASSWORD" ]; then
DOCKER_LOGIN_PASSWORD=$(echo "$DOCKER_LOGIN_PASSWORD" | base64 -d | openssl rsautl -decrypt -inkey ../certs/key.pem)
fi
if [ ! -z $SINGULARITY_LOGIN_PASSWORD ]; then
SINGULARITY_LOGIN_PASSWORD=$(echo $SINGULARITY_LOGIN_PASSWORD | base64 -d | openssl rsautl -decrypt -inkey ../certs/key.pem)
if [ ! -z "$SINGULARITY_LOGIN_PASSWORD" ]; then
SINGULARITY_LOGIN_PASSWORD=$(echo "$SINGULARITY_LOGIN_PASSWORD" | base64 -d | openssl rsautl -decrypt -inkey ../certs/key.pem)
fi
fi
# login to Docker registries
if [ ! -z $DOCKER_LOGIN_PASSWORD ]; then
if [ ! -z "$DOCKER_LOGIN_PASSWORD" ]; then
# parse env vars
IFS=',' read -ra servers <<< "${DOCKER_LOGIN_SERVER}"
IFS=',' read -ra users <<< "${DOCKER_LOGIN_USERNAME}"
IFS=',' read -ra passwords <<< "${DOCKER_LOGIN_PASSWORD}"
# loop through each server and login
nservers=${#servers[@]}
if [ $nservers -ge 1 ]; then
if [ "$nservers" -ge 1 ]; then
log DEBUG "Logging into $nservers Docker registry servers..."
for i in $(seq 0 $((nservers-1))); do
docker login --username ${users[$i]} --password ${passwords[$i]} ${servers[$i]}
docker login --username "${users[$i]}" --password "${passwords[$i]}" "${servers[$i]}"
done
log INFO "Docker registry logins completed."
fi
@ -39,14 +39,14 @@ else
fi
# "login" to Singularity registries
if [ ! -z $SINGULARITY_LOGIN_PASSWORD ]; then
if [ ! -z "$SINGULARITY_LOGIN_PASSWORD" ]; then
# parse env vars
IFS=',' read -ra servers <<< "${SINGULARITY_LOGIN_SERVER}"
IFS=',' read -ra users <<< "${SINGULARITY_LOGIN_USERNAME}"
IFS=',' read -ra passwords <<< "${SINGULARITY_LOGIN_PASSWORD}"
# loop through each server and login
nservers=${#servers[@]}
if [ $nservers -ge 1 ]; then
if [ "$nservers" -ge 1 ]; then
log DEBUG "Creating export script into $nservers Singularity registry servers..."
touch singularity-registry-login
for i in $(seq 0 $((nservers-1))); do

Просмотреть файл

@ -12,13 +12,13 @@ for spec in "$@"; do
kind=${parts[1]}
encrypted=${parts[2],,}
if [ $encrypted == "true" ]; then
if [ "$encrypted" == "true" ]; then
cipher=${parts[3]}
local_path=${parts[4]}
eo=${parts[5]}
# decrypt ciphertext
privatekey=$AZ_BATCH_NODE_STARTUP_DIR/certs/key.pem
cipher=`echo $cipher | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
cipher=$(echo "$cipher" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
IFS=',' read -ra storage <<< "$cipher"
sa=${storage[0]}
ep=${storage[1]}
@ -36,12 +36,12 @@ for spec in "$@"; do
fi
wd=$(dirname "$local_path")
if [ $kind == "i" ]; then
if [ "$kind" == "i" ]; then
# create destination working directory
mkdir -p $wd
mkdir -p "$wd"
# ingress data from storage
action=download
elif [ $kind == "e" ]; then
elif [ "$kind" == "e" ]; then
# egress from compute node to storage
action=upload
else
@ -50,8 +50,8 @@ for spec in "$@"; do
fi
# execute blobxfer
docker run --rm -t -v $wd:$wd -w $wd alfpark/blobxfer:$bxver \
$action --storage-account $sa --sas $saskey --endpoint $ep \
--remote-path $remote_path --local-path $local_path \
--no-progress-bar $eo
docker run --rm -t -v "$wd":"$wd" -w "$wd" alfpark/blobxfer:"$bxver" \
"$action" --storage-account "$sa" --sas "$saskey" --endpoint "$ep" \
--remote-path "$remote_path" --local-path "$local_path" \
--no-progress-bar "$eo"
done

Просмотреть файл

@ -7,10 +7,10 @@ voltype=$1
mntpath=$2
# get my ip address
ipaddress=`ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1`
ipaddress=$(ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1)
# if master, peer and create volume
if [ $AZ_BATCH_IS_CURRENT_NODE_MASTER == "true" ]; then
if [ "$AZ_BATCH_IS_CURRENT_NODE_MASTER" == "true" ]; then
# construct brick locations
IFS=',' read -ra HOSTS <<< "$AZ_BATCH_HOST_LIST"
bricks=
@ -18,20 +18,20 @@ if [ $AZ_BATCH_IS_CURRENT_NODE_MASTER == "true" ]; then
do
bricks+=" $node:$mntpath/gluster/brick"
# probe peer
if [ $node != $ipaddress ]; then
if [ "$node" != "$ipaddress" ]; then
echo "probing $node"
gluster peer probe $node
gluster peer probe "$node"
fi
done
numnodes=${#HOSTS[@]}
numpeers=$(($numnodes - 1))
numpeers=$((numnodes - 1))
echo "waiting for $numpeers peers to reach connected state..."
# get peer info
set +e
while :
do
numready=`gluster peer status | grep -e '^State: Peer in Cluster' | wc -l`
if [ $numready == $numpeers ]; then
numready=$(gluster peer status | grep -c '^State: Peer in Cluster')
if [ "$numready" == "$numpeers" ]; then
break
fi
sleep 1
@ -42,10 +42,10 @@ if [ $AZ_BATCH_IS_CURRENT_NODE_MASTER == "true" ]; then
sleep 5
# create volume
echo "creating gv0 ($bricks)"
gluster volume create gv0 $voltype $numnodes transport tcp$bricks
gluster volume create gv0 "$voltype" "$numnodes" transport tcp"$bricks"
# modify volume properties: the uid/gid mapping is UNDOCUMENTED behavior
gluster volume set gv0 storage.owner-uid `id -u _azbatch`
gluster volume set gv0 storage.owner-gid `id -g _azbatch`
gluster volume set gv0 storage.owner-uid "$(id -u _azbatch)"
gluster volume set gv0 storage.owner-gid "$(id -g _azbatch)"
# start volume
echo "starting gv0"
gluster volume start gv0
@ -56,8 +56,7 @@ echo "waiting for gv0 volume..."
set +e
while :
do
gluster volume info gv0
if [ $? -eq 0 ]; then
if gluster volume info gv0; then
# delay to wait for subvolumes
sleep 5
break
@ -68,8 +67,8 @@ set -e
# add gv0 to /etc/fstab for auto-mount on reboot
mountpoint=$AZ_BATCH_NODE_SHARED_DIR/.gluster/gv0
mkdir -p $mountpoint
chmod 775 $mountpoint
mkdir -p "$mountpoint"
chmod 775 "$mountpoint"
echo "adding $mountpoint to fstab"
echo "$ipaddress:/gv0 $mountpoint glusterfs defaults,_netdev 0 0" >> /etc/fstab
@ -79,12 +78,11 @@ START=$(date -u +"%s")
set +e
while :
do
mount $mountpoint
if [ $? -eq 0 ]; then
if mount "$mountpoint"; then
break
else
NOW=$(date -u +"%s")
DIFF=$((($NOW-$START)/60))
DIFF=$(((NOW-START)/60))
# fail after 5 minutes of attempts
if [ $DIFF -ge 5 ]; then
echo "could not mount gluster volume: $mountpoint"
@ -94,7 +92,7 @@ do
fi
done
set -e
chmod 775 $mountpoint
chmod 775 "$mountpoint"
# touch file noting success
touch .glusterfs_success

Просмотреть файл

@ -8,7 +8,7 @@ shift
mntpath=$1
shift
numnodes=$1
numpeers=$(($numnodes - 1))
numpeers=$((numnodes - 1))
shift
masterip=$1
shift
@ -18,14 +18,14 @@ echo "temp disk mountpoint: $mntpath"
echo "master ip: $masterip"
# get my ip address
ipaddress=`ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1`
ipaddress=$(ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1)
echo "ip address: $ipaddress"
# check if my ip address is a new node
domount=0
for i in "$@"
do
if [ $i == $ipaddress ]; then
if [ "$i" == "$ipaddress" ]; then
domount=1
break
fi
@ -33,22 +33,22 @@ done
echo "mount: $domount"
# master peers and adds the bricks
if [ $masterip == $ipaddress ]; then
if [ "$masterip" == "$ipaddress" ]; then
# probe new nodes
bricks=
for node in "$@"
do
bricks+=" $node:$mntpath/gluster/brick"
echo "probing $node"
gluster peer probe $node
gluster peer probe "$node"
done
# get peer info
set +e
while :
do
numready=`gluster peer status | grep -e '^State: Peer in Cluster' | wc -l`
if [ $numready == $numpeers ]; then
numready=$(gluster peer status | grep -c '^State: Peer in Cluster')
if [ "$numready" == "$numpeers" ]; then
break
fi
sleep 1
@ -60,7 +60,7 @@ if [ $masterip == $ipaddress ]; then
sleep 5
# add bricks to volume
gluster volume add-brick $voltype $numnodes gv0$bricks
gluster volume add-brick "$voltype" "$numnodes" gv0"$bricks"
# get volume info
gluster volume info
@ -71,7 +71,7 @@ echo "waiting for gv0 volume..."
set +e
while :
do
numbricks=`gluster volume info gv0 | grep -e '^Number of Bricks:' | cut -d' ' -f4`
numbricks=$(gluster volume info gv0 | grep -e '^Number of Bricks:' | cut -d' ' -f4)
if [ "$numbricks" == "$numnodes" ]; then
# delay to wait for subvolumes
sleep 5
@ -85,8 +85,8 @@ set -e
if [ $domount -eq 1 ]; then
# add gv0 to /etc/fstab for auto-mount on reboot
mountpoint=$AZ_BATCH_NODE_SHARED_DIR/.gluster/gv0
mkdir -p $mountpoint
chmod 775 $mountpoint
mkdir -p "$mountpoint"
chmod 775 "$mountpoint"
echo "adding $mountpoint to fstab"
echo "$ipaddress:/gv0 $mountpoint glusterfs defaults,_netdev 0 0" >> /etc/fstab
@ -96,12 +96,11 @@ if [ $domount -eq 1 ]; then
set +e
while :
do
mount $mountpoint
if [ $? -eq 0 ]; then
if mount "$mountpoint"; then
break
else
NOW=$(date -u +"%s")
DIFF=$((($NOW-$START)/60))
DIFF=$(((NOW-START)/60))
# fail after 5 minutes of attempts
if [ $DIFF -ge 5 ]; then
echo "could not mount gluster volume: $mountpoint"
@ -111,7 +110,7 @@ if [ $domount -eq 1 ]; then
fi
done
set -e
chmod 775 $mountpoint
chmod 775 "$mountpoint"
fi
# touch file noting success

Просмотреть файл

@ -6,7 +6,7 @@ set -o pipefail
offer=$1
sku=$2
if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
if [ "$offer" == "ubuntuserver" ] || [ "$offer" == "debian" ]; then
if [[ $sku == 14.04* ]]; then
srvrestart="initctl restart sshd"
mkdir /tmp/hpnssh
@ -32,9 +32,7 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
exit 1
fi
# modify sshd config settings
echo "HPNDisabled=no" >> /etc/ssh/sshd_config
echo "TcpRcvBufPoll=yes" >> /etc/ssh/sshd_config
echo "NoneEnabled=yes" >> /etc/ssh/sshd_config
{ echo "HPNDisabled=no"; echo "TcpRcvBufPoll=yes"; echo "NoneEnabled=yes"; } >> /etc/ssh/sshd_config
# restart sshd
$srvrestart
elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-linux" ]]; then

Просмотреть файл

@ -1,5 +1,7 @@
#!/usr/bin/env bash
# shellcheck disable=SC1091
set -e
set -o pipefail
@ -99,7 +101,7 @@ while getopts "h?abcde:fg:m:no:p:s:t:v:wx:" opt; do
t)
p2p=${OPTARG,,}
IFS=':' read -ra p2pflags <<< "$p2p"
if [ ${p2pflags[0]} == "true" ]; then
if [ "${p2pflags[0]}" == "true" ]; then
p2penabled=1
else
p2penabled=0
@ -119,15 +121,15 @@ done
shift $((OPTIND-1))
[ "$1" = "--" ] && shift
# check args
if [ -z $offer ]; then
if [ -z "$offer" ]; then
log ERROR "vm offer not specified"
exit 1
fi
if [ -z $sku ]; then
if [ -z "$sku" ]; then
log ERROR "vm sku not specified"
exit 1
fi
if [ -z $version ]; then
if [ -z "$version" ]; then
log ERROR "batch-shipyard version not specified"
exit 1
fi
@ -168,7 +170,7 @@ check_for_buggy_ntfs_mount() {
save_startup_to_volatile() {
set +e
touch $AZ_BATCH_NODE_ROOT_DIR/volatile/startup/.save
touch "${AZ_BATCH_NODE_ROOT_DIR}"/volatile/startup/.save
set -e
}
@ -176,14 +178,15 @@ ensure_nvidia_driver_installed() {
check_for_nvidia_card
# ensure that nvidia drivers are loaded
set +e
local out=$(lsmod)
local out
out=$(lsmod)
echo "$out" | grep -i nvidia > /dev/null
local rc=$?
set -e
echo "$out"
if [ $rc -ne 0 ]; then
log WARNING "Nvidia driver not present!"
install_nvidia_software $1 $2
install_nvidia_software "$1" "$2"
else
log INFO "Nvidia driver detected"
nvidia-smi
@ -192,7 +195,8 @@ ensure_nvidia_driver_installed() {
check_for_nvidia_card() {
set +e
local out=$(lspci)
local out
out=$(lspci)
echo "$out" | grep -i nvidia > /dev/null
local rc=$?
set -e
@ -220,7 +224,7 @@ install_nvidia_software() {
rmmod nouveau
set -e
# purge nouveau off system
if [ $offer == "ubuntuserver" ]; then
if [ "$offer" == "ubuntuserver" ]; then
apt-get --purge remove xserver-xorg-video-nouveau xserver-xorg-video-nouveau-hwe-16.04
elif [[ $offer == centos* ]]; then
yum erase -y xorg-x11-drv-nouveau
@ -237,33 +241,34 @@ alias nouveau off
alias lbm-nouveau off
EOF
# get development essentials for nvidia driver
if [ $offer == "ubuntuserver" ]; then
install_packages $offer build-essential
if [ "$offer" == "ubuntuserver" ]; then
install_packages "$offer" build-essential
elif [[ $offer == centos* ]]; then
local kernel_devel_package="kernel-devel-$(uname -r)"
if [[ $offer == "centos-hpc" ]] || [[ $sku == "7.4" ]]; then
install_packages $offer $kernel_devel_package
elif [ $sku == "7.3" ]; then
download_file http://vault.centos.org/7.3.1611/updates/x86_64/Packages/${kernel_devel_package}.rpm
install_local_packages $offer ${kernel_devel_package}.rpm
local kernel_devel_package
kernel_devel_package="kernel-devel-$(uname -r)"
if [[ $offer == "centos-hpc" ]] || [[ "$sku" == "7.4" ]]; then
install_packages "$offer" "$kernel_devel_package"
elif [ "$sku" == "7.3" ]; then
download_file http://vault.centos.org/7.3.1611/updates/x86_64/Packages/"${kernel_devel_package}".rpm
install_local_packages "$offer" "${kernel_devel_package}".rpm
else
log ERROR "CentOS $sku not supported for GPU"
exit 1
fi
install_packages $offer gcc binutils make
install_packages "$offer" gcc binutils make
fi
# get additional dependency if NV-series VMs
if [ $is_viz == "True" ]; then
if [ $offer == "ubuntuserver" ]; then
install_packages $offer xserver-xorg-dev
if [ "$is_viz" == "True" ]; then
if [ "$offer" == "ubuntuserver" ]; then
install_packages "$offer" xserver-xorg-dev
elif [[ $offer == centos* ]]; then
install_packages $offer xorg-x11-server-devel
install_packages "$offer" xorg-x11-server-devel
fi
fi
# install driver
./$nvdriver -s
./"${nvdriver}" -s
# add flag to config for GRID driver
if [ $is_viz == "True" ]; then
if [ "$is_viz" == "True" ]; then
cp /etc/nvidia/gridd.conf.template /etc/nvidia/gridd.conf
echo "IgnoreSP=TRUE" >> /etc/nvidia/gridd.conf
fi
@ -271,15 +276,15 @@ EOF
nvidia-persistenced --user root
nvidia-smi -pm 1
# install nvidia-docker
if [ $offer == "ubuntuserver" ]; then
add_repo $offer https://nvidia.github.io/nvidia-docker/gpgkey
if [ "$offer" == "ubuntuserver" ]; then
add_repo "$offer" https://nvidia.github.io/nvidia-docker/gpgkey
curl -fSsL https://nvidia.github.io/nvidia-docker/ubuntu16.04/amd64/nvidia-docker.list | \
tee /etc/apt/sources.list.d/nvidia-docker.list
elif [[ $offer == centos* ]]; then
add_repo $offer https://nvidia.github.io/nvidia-docker/centos7/x86_64/nvidia-docker.repo
add_repo "$offer" https://nvidia.github.io/nvidia-docker/centos7/x86_64/nvidia-docker.repo
fi
refresh_package_index $offer
install_packages $offer nvidia-docker2
refresh_package_index "$offer"
install_packages "$offer" nvidia-docker2
# merge daemon configs if necessary
set +e
grep \"graph\" /etc/docker/daemon.json
@ -287,7 +292,7 @@ EOF
set -e
if [ $rc -ne 0 ]; then
log DEBUG "Graph root not detected in Docker daemon.json"
if [ $offer == "ubuntuserver" ]; then
if [ "$offer" == "ubuntuserver" ]; then
python -c "import json;a=json.load(open('/etc/docker/daemon.json.dpkg-old'));b=json.load(open('/etc/docker/daemon.json'));a.update(b);f=open('/etc/docker/daemon.json','w');json.dump(a,f);f.close();"
rm -f /etc/docker/daemon.json.dpkg-old
elif [[ $offer == centos* ]]; then
@ -298,7 +303,8 @@ EOF
fi
pkill -SIGHUP dockerd
nvidia-docker version
local rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
local rootdir
rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
log DEBUG "Graph root: $rootdir"
nvidia-smi
}
@ -315,21 +321,21 @@ mount_azureblob_container() {
log INFO "Mounting Azure Blob Containers"
local offer=$1
local sku=$2
if [ $offer == "ubuntuserver" ]; then
if [ "$offer" == "ubuntuserver" ]; then
debfile=packages-microsoft-prod.deb
if [ ! -f ${debfile} ]; then
download_file https://packages.microsoft.com/config/ubuntu/16.04/${debfile}
install_local_packages $offer ${debfile}
refresh_package_index $offer
install_packages $offer blobfuse
install_local_packages "$offer" ${debfile}
refresh_package_index "$offer"
install_packages "$offer" blobfuse
fi
elif [[ $offer == "rhel" ]] || [[ $offer == centos* ]]; then
elif [[ "$offer" == "rhel" ]] || [[ $offer == centos* ]]; then
rpmfile=packages-microsoft-prod.rpm
if [ ! -f ${rpmfile} ]; then
download_file https://packages.microsoft.com/config/rhel/7/${rpmfile}
install_local_packages $offer ${rpmfile}
refresh_package_index $offer
install_packages $offer blobfuse
install_local_packages "$offer" ${rpmfile}
refresh_package_index "$offer"
install_packages "$offer" blobfuse
fi
else
echo "ERROR: unsupported distribution for Azure blob: $offer $sku"
@ -339,8 +345,8 @@ mount_azureblob_container() {
./azureblob-mount.sh
chmod 700 azureblob-mount.sh
chown root:root azureblob-mount.sh
chmod 600 *.cfg
chown root:root *.cfg
chmod 600 ./*.cfg
chown root:root ./*.cfg
}
download_file() {
@ -348,8 +354,7 @@ download_file() {
local retries=10
set +e
while [ $retries -gt 0 ]; do
curl -fSsLO $1
if [ $? -eq 0 ]; then
if curl -fSsLO "$1"; then
break
fi
retries=$((retries-1))
@ -367,15 +372,19 @@ add_repo() {
local url=$2
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [[ $offer == "ubuntuserver" ]] || [[ $offer == "debian" ]]; then
curl -fSsL $url | apt-key add -
elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-linux" ]]; then
yum-config-manager --add-repo $url
if [[ "$offer" == "ubuntuserver" ]] || [[ "$offer" == "debian" ]]; then
curl -fSsL "$url" | apt-key add -
rc=$?
elif [[ $offer == centos* ]] || [[ "$offer" == "rhel" ]] || [[ "$offer" == "oracle-linux" ]]; then
yum-config-manager --add-repo "$url"
rc=$?
elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then
zypper addrepo $url
zypper addrepo "$url"
rc=$?
fi
if [ $? -eq 0 ]; then
if [ $rc -eq 0 ]; then
break
fi
retries=$((retries-1))
@ -392,15 +401,19 @@ refresh_package_index() {
local offer=$1
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [[ $offer == "ubuntuserver" ]] || [[ $offer == "debian" ]]; then
if [[ "$offer" == "ubuntuserver" ]] || [[ "$offer" == "debian" ]]; then
apt-get update
elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-linux" ]]; then
rc=$?
elif [[ $offer == centos* ]] || [[ "$offer" == "rhel" ]] || [[ "$offer" == "oracle-linux" ]]; then
yum makecache -y fast
rc=$?
elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then
zypper -n --gpg-auto-import-keys ref
rc=$?
fi
if [ $? -eq 0 ]; then
if [ $rc -eq 0 ]; then
break
fi
retries=$((retries-1))
@ -418,15 +431,19 @@ install_packages() {
shift
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [[ $offer == "ubuntuserver" ]] || [[ $offer == "debian" ]]; then
apt-get install -y -q -o Dpkg::Options::="--force-confnew" --no-install-recommends $*
elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-linux" ]]; then
yum install -y $*
if [[ "$offer" == "ubuntuserver" ]] || [[ "$offer" == "debian" ]]; then
apt-get install -y -q -o Dpkg::Options::="--force-confnew" --no-install-recommends "$@"
rc=$?
elif [[ $offer == centos* ]] || [[ "$offer" == "rhel" ]] || [[ "$offer" == "oracle-linux" ]]; then
yum install -y "$@"
rc=$?
elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then
zypper -n in $*
zypper -n in "$@"
rc=$?
fi
if [ $? -eq 0 ]; then
if [ $rc -eq 0 ]; then
break
fi
retries=$((retries-1))
@ -444,16 +461,19 @@ install_local_packages() {
shift
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [[ $offer == "ubuntuserver" ]] || [[ $offer == "debian" ]]; then
dpkg -i $*
dpkg -i "$@"
rc=$?
else
rpm -Uvh --nodeps $*
rpm -Uvh --nodeps "$@"
rc=$?
fi
if [ $? -eq 0 ]; then
if [ $rc -eq 0 ]; then
break
fi
retries=retries-1
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not install local packages: $*"
exit 1
@ -469,7 +489,8 @@ docker_pull_image() {
set +e
local retries=60
while [ $retries -gt 0 ]; do
local pull_out=$(docker pull $image 2>&1)
local pull_out
pull_out=$(docker pull "$image" 2>&1)
local rc=$?
if [ $rc -eq 0 ]; then
echo "$pull_out"
@ -477,18 +498,24 @@ docker_pull_image() {
fi
# non-zero exit code: check if pull output has toomanyrequests,
# connection resets, or image config error
if [[ ! -z "$(grep 'toomanyrequests' <<<$pull_out)" ]] || [[ ! -z "$(grep 'connection reset by peer' <<<$pull_out)" ]] || [[ ! -z "$(grep 'error pulling image configuration' <<<$pull_out)" ]]; then
local tmr
tmr=$(grep 'toomanyrequests' <<<"$pull_out")
local crbp
crbp=$(grep 'connection reset by peer' <<<"$pull_out")
local epic
epic=$(grep 'error pulling image configuration' <<<"$pull_out")
if [[ ! -z "$tmr" ]] || [[ ! -z "$crbp" ]] || [[ ! -z "$epic" ]]; then
log WARNING "will retry: $pull_out"
else
log ERROR "$pull_out"
exit $rc
fi
retries=retries-1
retries=$((retries-1))
if [ $retries -le 0 ]; then
log ERROR "Could not pull docker image: $image"
exit $rc
fi
sleep $[($RANDOM % 5) + 1]s
sleep $((RANDOM % 5 + 1))s
done
set -e
}
@ -499,12 +526,12 @@ singularity_setup() {
shift
local sku=$1
shift
if [ $offer == "ubuntu" ]; then
if [ "$offer" == "ubuntu" ]; then
if [[ $sku != 16.04* ]]; then
log WARNING "Singularity not supported on $offer $sku"
fi
singularity_basedir=/mnt/singularity
elif [[ $offer == "centos" ]] || [[ $offer == "rhel" ]]; then
elif [[ "$offer" == "centos" ]] || [[ "$offer" == "rhel" ]]; then
if [[ $sku != 7* ]]; then
log WARNING "Singularity not supported on $offer $sku"
return
@ -561,22 +588,23 @@ process_fstab_entry() {
local mountpoint=$2
local fstab_entry=$3
log INFO "Creating host directory for $desc at $mountpoint"
mkdir -p $mountpoint
chmod 777 $mountpoint
mkdir -p "$mountpoint"
chmod 777 "$mountpoint"
echo "INFO: Adding $mountpoint to fstab"
echo $fstab_entry >> /etc/fstab
echo "$fstab_entry" >> /etc/fstab
tail -n1 /etc/fstab
echo "INFO: Mounting $mountpoint"
local START=$(date -u +"%s")
local START
START=$(date -u +"%s")
set +e
while :
do
mount $mountpoint
if [ $? -eq 0 ]; then
if mount "$mountpoint"; then
break
else
local NOW=$(date -u +"%s")
local DIFF=$((($NOW-$START)/60))
local NOW
NOW=$(date -u +"%s")
local DIFF=$(((NOW-START)/60))
# fail after 5 minutes of attempts
if [ $DIFF -ge 5 ]; then
echo "ERROR: Could not mount $desc on $mountpoint"
@ -594,8 +622,7 @@ check_for_docker_host_engine() {
# start docker service
systemctl start docker.service
systemctl status docker.service
docker version
if [ $? -ne 0 ]; then
if ! docker version; then
log ERROR "Docker not installed"
exit 1
fi
@ -629,14 +656,14 @@ check_for_buggy_ntfs_mount
save_startup_to_volatile
# set python env vars
LC_ALL=en_US.UTF-8
PYTHONASYNCIODEBUG=1
export LC_ALL=en_US.UTF-8
export PYTHONASYNCIODEBUG=1
# store node prep start
if command -v python3 > /dev/null 2>&1; then
npstart=`python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())'`
npstart=$(python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())')
else
npstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))'`
npstart=$(python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))')
fi
# set node prep status files
@ -644,17 +671,17 @@ nodeprepfinished=$AZ_BATCH_NODE_SHARED_DIR/.node_prep_finished
cascadefailed=$AZ_BATCH_NODE_SHARED_DIR/.cascade_failed
# decrypt encrypted creds
if [ ! -z $encrypted ]; then
if [ ! -z "$encrypted" ]; then
# convert pfx to pem
pfxfile=$AZ_BATCH_CERTIFICATES_DIR/sha1-$encrypted.pfx
privatekey=$AZ_BATCH_CERTIFICATES_DIR/key.pem
openssl pkcs12 -in $pfxfile -out $privatekey -nodes -password file:$pfxfile.pw
openssl pkcs12 -in "$pfxfile" -out "$privatekey" -nodes -password file:"${pfxfile}".pw
# remove pfx-related files
rm -f $pfxfile $pfxfile.pw
rm -f "$pfxfile" "${pfxfile}".pw
# decrypt creds
SHIPYARD_STORAGE_ENV=`echo $SHIPYARD_STORAGE_ENV | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
SHIPYARD_STORAGE_ENV=$(echo "$SHIPYARD_STORAGE_ENV" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
if [ ! -z ${DOCKER_LOGIN_USERNAME+x} ]; then
DOCKER_LOGIN_PASSWORD=`echo $DOCKER_LOGIN_PASSWORD | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
DOCKER_LOGIN_PASSWORD=$(echo "$DOCKER_LOGIN_PASSWORD" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
fi
fi
@ -666,7 +693,7 @@ if [ $p2penabled -eq 1 ]; then
fi
# create shared mount points
mkdir -p $MOUNTS_PATH
mkdir -p "$MOUNTS_PATH"
# mount azure resources (this must be done every boot)
if [ $azurefile -eq 1 ]; then
@ -677,19 +704,19 @@ if [ $azureblob -eq 1 ]; then
fi
# check if we're coming up from a reboot
if [ -f $cascadefailed ]; then
if [ -f "$cascadefailed" ]; then
log ERROR "$cascadefailed file exists, assuming cascade failure during node prep"
exit 1
elif [ -f $nodeprepfinished ]; then
elif [ -f "$nodeprepfinished" ]; then
# mount any storage clusters
if [ ! -z $sc_args ]; then
if [ ! -z "$sc_args" ]; then
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
IFS='#' read -ra fstabs <<< "$fstab_mounts"
i=0
for sc_arg in ${sc_args[@]}; do
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
mount $MOUNTS_PATH/${sc[1]}
mount "${MOUNTS_PATH}"/"${sc[1]}"
done
fi
# mount any custom mounts
@ -699,13 +726,13 @@ elif [ -f $nodeprepfinished ]; then
# eval and split fstab var to expand vars
fstab_entry=$(eval echo "$fstab")
IFS=' ' read -ra parts <<< "$fstab_entry"
mount ${parts[1]}
mount "${parts[1]}"
done
fi
# start docker engine
check_for_docker_host_engine
# ensure nvidia software has been installed
if [ ! -z $gpu ]; then
if [ ! -z "$gpu" ]; then
ensure_nvidia_driver_installed $offer $sku
fi
log INFO "$nodeprepfinished file exists, assuming successful completion of node prep"
@ -713,10 +740,10 @@ elif [ -f $nodeprepfinished ]; then
fi
# get ip address of eth0
ipaddress=`ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1`
ipaddress=$(ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1)
# one-time setup
if [ ! -f $nodeprepfinished ]; then
if [ ! -f "$nodeprepfinished" ]; then
# set up hpn-ssh
if [ $hpnssh -eq 1 ]; then
./shipyard_hpnssh.sh $offer $sku
@ -747,8 +774,8 @@ EOF
fi
# install docker host engine
if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
DEBIAN_FRONTEND=noninteractive
if [ "$offer" == "ubuntuserver" ] || [ "$offer" == "debian" ]; then
export DEBIAN_FRONTEND=noninteractive
# name will be appended to dockerversion
dockerversion=18.03.0~ce-0~
name=
@ -802,7 +829,7 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
log ERROR "unsupported sku: $sku for offer: $offer"
exit 1
fi
if [ ! -z $gpu ] && [ $name != "ubuntu-xenial" ]; then
if [ ! -z "$gpu" ] && [ "$name" != "ubuntu-xenial" ]; then
log ERROR "gpu unsupported on this sku: $sku for offer $offer"
exit 1
fi
@ -818,8 +845,8 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
refresh_package_index $offer
# install required software first
install_packages $offer apt-transport-https ca-certificates curl gnupg2 software-properties-common
if [ $name == "ubuntu-trusty" ]; then
install_packages $offer linux-image-extra-$(uname -r) linux-image-extra-virtual
if [ "$name" == "ubuntu-trusty" ]; then
install_packages $offer linux-image-extra-"$(uname -r)" linux-image-extra-virtual
fi
# add gpgkey for repo
add_repo $offer $gpgkey
@ -851,11 +878,11 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
$srvstatus
docker info
# install gpu related items
if [ ! -z $gpu ] && [ ! -f $nodeprepfinished ]; then
if [ ! -z "$gpu" ] && [ ! -f "$nodeprepfinished" ]; then
install_nvidia_software $offer $sku
fi
# set up glusterfs
if [ $gluster_on_compute -eq 1 ] && [ ! -f $nodeprepfinished ]; then
if [ $gluster_on_compute -eq 1 ] && [ ! -f "$nodeprepfinished" ]; then
install_packages $offer glusterfs-server
if [[ ! -z $gfsenable ]]; then
$gfsenable
@ -865,13 +892,13 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
mkdir -p /mnt/gluster
fi
# install dependencies for storage cluster mount
if [ ! -z $sc_args ]; then
for sc_arg in ${sc_args[@]}; do
if [ ! -z "$sc_args" ]; then
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
server_type=${sc[0]}
if [ $server_type == "nfs" ]; then
if [ "$server_type" == "nfs" ]; then
install_packages $offer nfs-common nfs4-acl-tools
elif [ $server_type == "glusterfs" ]; then
elif [ "$server_type" == "glusterfs" ]; then
install_packages $offer glusterfs-client acl
else
log ERROR "Unknown file server type ${sc[0]} for ${sc[1]}"
@ -954,11 +981,11 @@ elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-l
$srvstatus
docker info
# install gpu related items
if [ ! -z $gpu ] && [ ! -f $nodeprepfinished ]; then
if [ ! -z "$gpu" ] && [ ! -f "$nodeprepfinished" ]; then
install_nvidia_software $offer $sku
fi
# set up glusterfs
if [ $gluster_on_compute -eq 1 ] && [ ! -f $nodeprepfinished ]; then
if [ $gluster_on_compute -eq 1 ] && [ ! -f "$nodeprepfinished" ]; then
install_packages $offer epel-release centos-release-gluster38
sed -i -e "s/enabled=1/enabled=0/g" /etc/yum.repos.d/CentOS-Gluster-3.8.repo
install_packages $offer --enablerepo=centos-gluster38,epel glusterfs-server
@ -969,16 +996,16 @@ elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-l
mkdir -p /mnt/resource/gluster
fi
# install dependencies for storage cluster mount
if [ ! -z $sc_args ]; then
for sc_arg in ${sc_args[@]}; do
if [ ! -z "$sc_args" ]; then
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
server_type=${sc[0]}
if [ $server_type == "nfs" ]; then
if [ "$server_type" == "nfs" ]; then
install_packages $offer nfs-utils nfs4-acl-tools
systemctl daemon-reload
$rpcbindenable
systemctl start rpcbind
elif [ $server_type == "glusterfs" ]; then
elif [ "$server_type" == "glusterfs" ]; then
install_packages $offer epel-release centos-release-gluster38
sed -i -e "s/enabled=1/enabled=0/g" /etc/yum.repos.d/CentOS-Gluster-3.8.repo
install_packages $offer --enablerepo=centos-gluster38,epel glusterfs-server acl
@ -996,7 +1023,7 @@ elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then
exit 1
fi
# gpu is not supported on these offers
if [ ! -z $gpu ]; then
if [ ! -z "$gpu" ]; then
log ERROR "gpu unsupported on this sku: $sku for offer $offer"
exit 1
fi
@ -1004,7 +1031,7 @@ elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then
if [ $networkopt -eq 1 ]; then
sysctl -p
fi
if [ ! -f $nodeprepfinished ]; then
if [ ! -f "$nodeprepfinished" ]; then
# add Virtualization:containers repo for recent docker builds
repodir=
if [[ $offer == opensuse* ]]; then
@ -1062,16 +1089,16 @@ elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then
mkdir -p /mnt/resource/gluster
fi
# install dependencies for storage cluster mount
if [ ! -z $sc_args ]; then
for sc_arg in ${sc_args[@]}; do
if [ ! -z "$sc_args" ]; then
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
server_type=${sc[0]}
if [ $server_type == "nfs" ]; then
if [ "$server_type" == "nfs" ]; then
install_packages $offer nfs-client nfs4-acl-tools
systemctl daemon-reload
systemctl enable rpcbind
systemctl start rpcbind
elif [ $server_type == "glusterfs" ]; then
elif [ "$server_type" == "glusterfs" ]; then
add_repo $offer http://download.opensuse.org/repositories/filesystems/$repodir/filesystems.repo
zypper -n --gpg-auto-import-keys ref
install_packages $offer glusterfs acl
@ -1099,11 +1126,11 @@ else
fi
# retrieve docker images related to data movement
docker_pull_image alfpark/blobxfer:$blobxferversion
docker_pull_image alfpark/batch-shipyard:${version}-cargo
docker_pull_image alfpark/blobxfer:"${blobxferversion}"
docker_pull_image alfpark/batch-shipyard:"${version}"-cargo
# set up singularity
singularity_setup $DISTRIB_ID $DISTRIB_RELEASE
singularity_setup "$DISTRIB_ID" "$DISTRIB_RELEASE"
# login to registry servers (do not specify -e as creds have been decrypted)
./registry_login.sh
@ -1112,16 +1139,16 @@ if [ -f singularity-registry-login ]; then
fi
# mount any storage clusters
if [ ! -z $sc_args ]; then
if [ ! -z "$sc_args" ]; then
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
IFS='#' read -ra fstabs <<< "$fstab_mounts"
i=0
for sc_arg in ${sc_args[@]}; do
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
fstab_entry="${fstabs[$i]}"
process_fstab_entry "$sc_arg" "$MOUNTS_PATH/${sc[1]}" "$fstab_entry"
i=$(($i + 1))
i=$((i + 1))
done
fi
@ -1137,9 +1164,9 @@ if [ ! -z "$SHIPYARD_CUSTOM_MOUNTS_FSTAB" ]; then
fi
# touch node prep finished file to preserve idempotency
touch $nodeprepfinished
touch "$nodeprepfinished"
# touch cascade failed file, this will be removed once cascade is successful
touch $cascadefailed
touch "$cascadefailed"
# execute cascade
set +e
@ -1154,9 +1181,9 @@ if [ $cascadecontainer -eq 1 ]; then
fi
# store docker cascade start
if command -v python3 > /dev/null 2>&1; then
drpstart=`python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())'`
drpstart=$(python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())')
else
drpstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))'`
drpstart=$(python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))')
fi
# create env file
envfile=.cascade_envfile
@ -1168,14 +1195,14 @@ sku=$sku
npstart=$npstart
drpstart=$drpstart
p2p=$p2p
`env | grep SHIPYARD_`
`env | grep AZ_BATCH_`
`env | grep DOCKER_LOGIN_`
`env | grep SINGULARITY_`
$(env | grep SHIPYARD_)
$(env | grep AZ_BATCH_)
$(env | grep DOCKER_LOGIN_)
$(env | grep SINGULARITY_)
EOF
chmod 600 $envfile
# pull image
docker_pull_image alfpark/batch-shipyard:${version}-cascade
docker_pull_image alfpark/batch-shipyard:"${version}"-cascade
# set singularity options
singularity_binds=
if [ ! -z $singularity_basedir ]; then
@ -1183,30 +1210,31 @@ EOF
-v $singularity_basedir:$singularity_basedir \
-v $singularity_basedir/mnt:/var/lib/singularity/mnt"
fi
log DEBUG "Starting Cascade"
# launch container
log DEBUG "Starting Cascade"
# shellcheck disable=SC2086
docker run $detached --net=host --env-file $envfile \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /etc/passwd:/etc/passwd:ro \
-v /etc/group:/etc/group:ro \
$singularity_binds \
-v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR \
-w $AZ_BATCH_TASK_WORKING_DIR \
${singularity_binds} \
-v "$AZ_BATCH_NODE_ROOT_DIR":"$AZ_BATCH_NODE_ROOT_DIR" \
-w "$AZ_BATCH_TASK_WORKING_DIR" \
-p 6881-6891:6881-6891 -p 6881-6891:6881-6891/udp \
alfpark/batch-shipyard:${version}-cascade &
alfpark/batch-shipyard:"${version}"-cascade &
cascadepid=$!
else
# add timings
if [ ! -z ${SHIPYARD_TIMING+x} ]; then
# backfill node prep start
./perf.py nodeprep start $prefix --ts $npstart --message "offer=$offer,sku=$sku"
./perf.py nodeprep start "$prefix" --ts "$npstart" --message "offer=$offer,sku=$sku"
# mark node prep finished
./perf.py nodeprep end $prefix
./perf.py nodeprep end "$prefix"
# mark start cascade
./perf.py cascade start $prefix
./perf.py cascade start "$prefix"
fi
log DEBUG "Starting Cascade"
./cascade.py $p2p --ipaddress $ipaddress $prefix &
./cascade.py "$p2p" --ipaddress "$ipaddress" "$prefix" &
cascadepid=$!
fi
@ -1216,20 +1244,20 @@ if [ $p2penabled -eq 0 ]; then
rc=$?
if [ $rc -ne 0 ]; then
log ERROR "cascade exited with non-zero exit code: $rc"
rm -f $nodeprepfinished
rm -f "$nodeprepfinished"
exit $rc
fi
fi
set -e
# remove cascade failed file
rm -f $cascadefailed
rm -f "$cascadefailed"
# block for images if necessary
$AZ_BATCH_TASK_WORKING_DIR/wait_for_images.sh $block
"${AZ_BATCH_TASK_WORKING_DIR}"/wait_for_images.sh "$block"
# clean up cascade env file if block
if [ ! -z $block ]; then
if [ ! -z "$block" ]; then
if [ $cascadecontainer -eq 1 ]; then
rm -f $envfile
fi

Просмотреть файл

@ -1,5 +1,7 @@
#!/usr/bin/env bash
# shellcheck disable=SC1091
set -e
set -o pipefail
@ -74,7 +76,7 @@ while getopts "h?abcef:m:np:t:v:x:" opt; do
t)
p2p=${OPTARG,,}
IFS=':' read -ra p2pflags <<< "$p2p"
if [ ${p2pflags[0]} == "true" ]; then
if [ "${p2pflags[0]}" == "true" ]; then
p2penabled=1
else
p2penabled=0
@ -94,8 +96,7 @@ shift $((OPTIND-1))
check_for_buggy_ntfs_mount() {
# Check to ensure sdb1 mount is not mounted as ntfs
set +e
mount | grep /dev/sdb1 | grep fuseblk
if [ $? -eq 0 ]; then
if mount | grep /dev/sdb1 | grep fuseblk; then
log ERROR "/dev/sdb1 temp disk is mounted as fuseblk/ntfs"
exit 1
fi
@ -104,7 +105,7 @@ check_for_buggy_ntfs_mount() {
save_startup_to_volatile() {
set +e
touch $AZ_BATCH_NODE_ROOT_DIR/volatile/startup/.save
touch "${AZ_BATCH_NODE_ROOT_DIR}"/volatile/startup/.save
set -e
}
@ -127,7 +128,7 @@ net.ipv4.tcp_abort_on_overflow=1
net.ipv4.route.flush=1
EOF
fi
if [ "$1" == "ubuntu" ] && [ "$2" == 14.04* ]; then
if [[ "$1" == "ubuntu" ]] && [[ "$2" == 14.04* ]]; then
service procps start
else
service procps reload
@ -139,7 +140,7 @@ blacklist_kernel_upgrade() {
shift
local sku=$1
shift
if [ $offer != "ubuntu" ]; then
if [ "$offer" != "ubuntu" ]; then
log DEBUG "No kernel upgrade blacklist required on $offer $sku"
return
fi
@ -148,15 +149,14 @@ blacklist_kernel_upgrade() {
local rc=$?
set -e
if [ $rc -ne 0 ]; then
sed -i "/^Unattended-Upgrade::Package-Blacklist {/alinux-azure\nlinux-cloud-tools-azure\nlinux-headers-azure\nlinux-image-azure\nlinux-tools-azure" /etc/apt/apt.conf.d/50unattended-upgrades
sed -i "/^Unattended-Upgrade::Package-Blacklist {/a\"linux-azure\";\\n\"linux-cloud-tools-azure\";\\n\"linux-headers-azure\";\\n\"linux-image-azure\";\\n\"linux-tools-azure\";" /etc/apt/apt.conf.d/50unattended-upgrades
log INFO "Added linux-azure to package blacklist for unattended upgrades"
fi
}
check_for_nvidia_docker() {
set +e
nvidia-docker version
if [ $? -ne 0 ]; then
if ! nvidia-docker version; then
log ERROR "nvidia-docker2 not installed"
exit 1
fi
@ -165,7 +165,8 @@ check_for_nvidia_docker() {
check_for_nvidia_driver() {
set +e
local out=$(lsmod)
local out
out=$(lsmod)
echo "$out" | grep -i nvidia > /dev/null
local rc=$?
set -e
@ -182,7 +183,8 @@ check_for_nvidia() {
log INFO "Checking for Nvidia Hardware"
# first check for card
set +e
local out=$(lspci)
local out
out=$(lspci)
echo "$out" | grep -i nvidia > /dev/null
local rc=$?
set -e
@ -190,7 +192,7 @@ check_for_nvidia() {
if [ $rc -ne 0 ]; then
log INFO "No Nvidia card(s) detected!"
else
blacklist_kernel_upgrade $1 $2
blacklist_kernel_upgrade "$1" "$2"
check_for_nvidia_driver
# enable persistence mode
nvidia-smi -pm 1
@ -200,7 +202,8 @@ check_for_nvidia() {
check_docker_root_dir() {
set +e
local rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
local rootdir
rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
set -e
log DEBUG "Graph root: $rootdir"
if [ -z "$rootdir" ]; then
@ -217,8 +220,7 @@ check_for_docker_host_engine() {
# start docker service
systemctl start docker.service
systemctl status docker.service
docker version
if [ $? -ne 0 ]; then
if ! docker version; then
log ERROR "Docker not installed"
exit 1
fi
@ -241,16 +243,16 @@ check_for_glusterfs_on_compute() {
check_for_storage_cluster_software() {
local rc=0
if [ ! -z $sc_args ]; then
for sc_arg in ${sc_args[@]}; do
if [ ! -z "$sc_args" ]; then
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
local server_type=${sc[0]}
if [ $server_type == "nfs" ]; then
if [ "$server_type" == "nfs" ]; then
set +e
mount.nfs4 -V
local rc=$?
set -e
elif [ $server_type == "glusterfs" ]; then
elif [ "$server_type" == "glusterfs" ]; then
set +e
glusterfs -V
local rc=$?
@ -281,8 +283,8 @@ mount_azureblob_container() {
./azureblob-mount.sh
chmod 700 azureblob-mount.sh
chown root:root azureblob-mount.sh
chmod 600 *.cfg
chown root:root *.cfg
chmod 600 ./*.cfg
chown root:root ./*.cfg
}
docker_pull_image() {
@ -291,7 +293,8 @@ docker_pull_image() {
set +e
local retries=60
while [ $retries -gt 0 ]; do
local pull_out=$(docker pull $image 2>&1)
local pull_out
pull_out=$(docker pull "$image" 2>&1)
local rc=$?
if [ $rc -eq 0 ]; then
echo "$pull_out"
@ -299,18 +302,24 @@ docker_pull_image() {
fi
# non-zero exit code: check if pull output has toomanyrequests,
# connection resets, or image config error
if [[ ! -z "$(grep 'toomanyrequests' <<<$pull_out)" ]] || [[ ! -z "$(grep 'connection reset by peer' <<<$pull_out)" ]] || [[ ! -z "$(grep 'error pulling image configuration' <<<$pull_out)" ]]; then
local tmr
tmr=$(grep 'toomanyrequests' <<<"$pull_out")
local crbp
crbp=$(grep 'connection reset by peer' <<<"$pull_out")
local epic
epic=$(grep 'error pulling image configuration' <<<"$pull_out")
if [[ ! -z "$tmr" ]] || [[ ! -z "$crbp" ]] || [[ ! -z "$epic" ]]; then
log WARNING "will retry: $pull_out"
else
log ERROR "$pull_out"
exit $rc
fi
retries=retries-1
retries=$((retries-1))
if [ $retries -le 0 ]; then
log ERROR "Could not pull docker image: $image"
exit $rc
fi
sleep $[($RANDOM % 5) + 1]s
sleep $((RANDOM % 5 + 1))s
done
set -e
}
@ -321,7 +330,7 @@ singularity_setup() {
shift
local sku=$1
shift
if [ $offer == "ubuntu" ]; then
if [ "$offer" == "ubuntu" ]; then
if [[ $sku != 16.04* ]]; then
log WARNING "Singularity not supported on $offer $sku"
return
@ -384,22 +393,23 @@ process_fstab_entry() {
local mountpoint=$2
local fstab_entry=$3
log INFO "Creating host directory for $desc at $mountpoint"
mkdir -p $mountpoint
chmod 777 $mountpoint
mkdir -p "$mountpoint"
chmod 777 "$mountpoint"
log INFO "Adding $mountpoint to fstab"
echo $fstab_entry >> /etc/fstab
echo "$fstab_entry" >> /etc/fstab
tail -n1 /etc/fstab
log INFO "Mounting $mountpoint"
local START=$(date -u +"%s")
local START
START=$(date -u +"%s")
set +e
while :
do
mount $mountpoint
if [ $? -eq 0 ]; then
if mount "$mountpoint"; then
break
else
local NOW=$(date -u +"%s")
local DIFF=$((($NOW-$START)/60))
local NOW
NOW=$(date -u +"%s")
local DIFF=$(((NOW-START)/60))
# fail after 5 minutes of attempts
if [ $DIFF -ge 5 ]; then
log ERROR "Could not mount $desc on $mountpoint"
@ -458,14 +468,14 @@ check_for_buggy_ntfs_mount
save_startup_to_volatile
# set python env vars
LC_ALL=en_US.UTF-8
PYTHONASYNCIODEBUG=1
export LC_ALL=en_US.UTF-8
export PYTHONASYNCIODEBUG=1
# store node prep start
if command -v python3 > /dev/null 2>&1; then
npstart=`python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())'`
npstart=$(python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())')
else
npstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))'`
npstart=$(python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))')
fi
# set node prep status files
@ -473,20 +483,20 @@ nodeprepfinished=$AZ_BATCH_NODE_SHARED_DIR/.node_prep_finished
cascadefailed=$AZ_BATCH_NODE_SHARED_DIR/.cascade_failed
# create shared mount points
mkdir -p $MOUNTS_PATH
mkdir -p "$MOUNTS_PATH"
# decrypt encrypted creds
if [ ! -z $encrypted ]; then
if [ ! -z "$encrypted" ]; then
# convert pfx to pem
pfxfile=$AZ_BATCH_CERTIFICATES_DIR/sha1-$encrypted.pfx
privatekey=$AZ_BATCH_CERTIFICATES_DIR/key.pem
openssl pkcs12 -in $pfxfile -out $privatekey -nodes -password file:$pfxfile.pw
openssl pkcs12 -in "$pfxfile" -out "$privatekey" -nodes -password file:"${pfxfile}".pw
# remove pfx-related files
rm -f $pfxfile $pfxfile.pw
rm -f "$pfxfile" "${pfxfile}".pw
# decrypt creds
SHIPYARD_STORAGE_ENV=`echo $SHIPYARD_STORAGE_ENV | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
SHIPYARD_STORAGE_ENV=$(echo "$SHIPYARD_STORAGE_ENV" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
if [ ! -z ${DOCKER_LOGIN_USERNAME+x} ]; then
DOCKER_LOGIN_PASSWORD=`echo $DOCKER_LOGIN_PASSWORD | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
DOCKER_LOGIN_PASSWORD=$(echo "$DOCKER_LOGIN_PASSWORD" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
fi
fi
@ -499,33 +509,33 @@ fi
# check for docker host engine
check_for_docker_host_engine
check_docker_root_dir $DISTRIB_ID
check_docker_root_dir "$DISTRIB_ID"
# check for nvidia card/driver/docker
check_for_nvidia $DISTRIB_ID $DISTRIB_RELEASE
check_for_nvidia "$DISTRIB_ID" "$DISTRIB_RELEASE"
# mount azure resources (this must be done every boot)
if [ $azurefile -eq 1 ]; then
mount_azurefile_share $DISTRIB_ID $DISTRIB_RELEASE
mount_azurefile_share "$DISTRIB_ID" "$DISTRIB_RELEASE"
fi
if [ $azureblob -eq 1 ]; then
mount_azureblob_container $DISTRIB_ID $DISTRIB_RELEASE
mount_azureblob_container "$DISTRIB_ID" "$DISTRIB_RELEASE"
fi
# check if we're coming up from a reboot
if [ -f $cascadefailed ]; then
if [ -f "$cascadefailed" ]; then
log ERROR "$cascadefailed file exists, assuming cascade failure during node prep"
exit 1
elif [ -f $nodeprepfinished ]; then
elif [ -f "$nodeprepfinished" ]; then
# mount any storage clusters
if [ ! -z $sc_args ]; then
if [ ! -z "$sc_args" ]; then
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
IFS='#' read -ra fstabs <<< "$fstab_mounts"
i=0
for sc_arg in ${sc_args[@]}; do
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
mount $MOUNTS_PATH/${sc[1]}
mount "${MOUNTS_PATH}"/"${sc[1]}"
done
fi
# mount any custom mounts
@ -535,7 +545,7 @@ elif [ -f $nodeprepfinished ]; then
# eval and split fstab var to expand vars
fstab_entry=$(eval echo "$fstab")
IFS=' ' read -ra parts <<< "$fstab_entry"
mount ${parts[1]}
mount "${parts[1]}"
done
fi
log INFO "$nodeprepfinished file exists, assuming successful completion of node prep"
@ -543,13 +553,13 @@ elif [ -f $nodeprepfinished ]; then
fi
# get ip address of eth0
ipaddress=`ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1`
ipaddress=$(ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1)
# one-time setup
if [ $networkopt -eq 1 ]; then
# do not fail script if this function fails
set +e
optimize_tcp_network_settings $DISTRIB_ID $DISTRIB_RELEASE
optimize_tcp_network_settings "$DISTRIB_ID" "$DISTRIB_RELEASE"
set -e
# set sudoers to not require tty
sed -i 's/^Defaults[ ]*requiretty/# Defaults requiretty/g' /etc/sudoers
@ -564,16 +574,16 @@ fi
check_for_storage_cluster_software
# mount any storage clusters
if [ ! -z $sc_args ]; then
if [ ! -z "$sc_args" ]; then
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
IFS='#' read -ra fstabs <<< "$fstab_mounts"
i=0
for sc_arg in ${sc_args[@]}; do
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
fstab_entry="${fstabs[$i]}"
process_fstab_entry "$sc_arg" "$MOUNTS_PATH/${sc[1]}" "$fstab_entry"
i=$(($i + 1))
i=$((i + 1))
done
fi
@ -589,11 +599,11 @@ if [ ! -z "$SHIPYARD_CUSTOM_MOUNTS_FSTAB" ]; then
fi
# retrieve docker images related to data movement
docker_pull_image alfpark/blobxfer:$blobxferversion
docker_pull_image alfpark/batch-shipyard:${version}-cargo
docker_pull_image alfpark/blobxfer:"${blobxferversion}"
docker_pull_image alfpark/batch-shipyard:"${version}"-cargo
# set up singularity
singularity_setup $DISTRIB_ID $DISTRIB_RELEASE
singularity_setup "$DISTRIB_ID" "$DISTRIB_RELEASE"
# login to registry servers (do not specify -e as creds have been decrypted)
./registry_login.sh
@ -602,9 +612,9 @@ if [ -f singularity-registry-login ]; then
fi
# touch node prep finished file to preserve idempotency
touch $nodeprepfinished
touch "$nodeprepfinished"
# touch cascade failed file, this will be removed once cascade is successful
touch $cascadefailed
touch "$cascadefailed"
# execute cascade
set +e
@ -618,9 +628,9 @@ else
fi
# store docker cascade start
if command -v python3 > /dev/null 2>&1; then
drpstart=`python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())'`
drpstart=$(python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())')
else
drpstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))'`
drpstart=$(python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))')
fi
# create env file
envfile=.cascade_envfile
@ -632,14 +642,14 @@ sku=$sku
npstart=$npstart
drpstart=$drpstart
p2p=$p2p
`env | grep SHIPYARD_`
`env | grep AZ_BATCH_`
`env | grep DOCKER_LOGIN_`
`env | grep SINGULARITY_`
$(env | grep SHIPYARD_)
$(env | grep AZ_BATCH_)
$(env | grep DOCKER_LOGIN_)
$(env | grep SINGULARITY_)
EOF
chmod 600 $envfile
# pull image
docker_pull_image alfpark/batch-shipyard:${version}-cascade
docker_pull_image alfpark/batch-shipyard:"${version}"-cascade
# set singularity options
singularity_binds=
if [ ! -z $singularity_basedir ]; then
@ -649,15 +659,16 @@ if [ ! -z $singularity_basedir ]; then
fi
# launch container
log DEBUG "Starting Cascade"
# shellcheck disable=SC2086
docker run $detached --net=host --env-file $envfile \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /etc/passwd:/etc/passwd:ro \
-v /etc/group:/etc/group:ro \
$singularity_binds \
-v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR \
-w $AZ_BATCH_TASK_WORKING_DIR \
${singularity_binds} \
-v "$AZ_BATCH_NODE_ROOT_DIR":"$AZ_BATCH_NODE_ROOT_DIR" \
-w "$AZ_BATCH_TASK_WORKING_DIR" \
-p 6881-6891:6881-6891 -p 6881-6891:6881-6891/udp \
alfpark/batch-shipyard:${version}-cascade &
alfpark/batch-shipyard:"${version}"-cascade &
cascadepid=$!
# if not in p2p mode, then wait for cascade exit
@ -666,19 +677,19 @@ if [ $p2penabled -eq 0 ]; then
rc=$?
if [ $rc -ne 0 ]; then
log ERROR "cascade exited with non-zero exit code: $rc"
rm -f $nodeprepfinished
rm -f "$nodeprepfinished"
exit $rc
fi
fi
set -e
# remove cascade failed file
rm -f $cascadefailed
rm -f "$cascadefailed"
# block for images if necessary
$AZ_BATCH_TASK_WORKING_DIR/wait_for_images.sh $block
"${AZ_BATCH_TASK_WORKING_DIR}"/wait_for_images.sh "$block"
# clean up cascade env file if block
if [ ! -z $block ]; then
if [ ! -z "$block" ]; then
rm -f $envfile
fi

Просмотреть файл

@ -1,5 +1,7 @@
#!/usr/bin/env bash
# shellcheck disable=SC1091
set -e
set -o pipefail
@ -76,8 +78,7 @@ shift $((OPTIND-1))
check_for_buggy_ntfs_mount() {
# Check to ensure sdb1 mount is not mounted as ntfs
set +e
mount | grep /dev/sdb1 | grep fuseblk
if [ $? -eq 0 ]; then
if mount | grep /dev/sdb1 | grep fuseblk; then
log ERROR "/dev/sdb1 temp disk is mounted as fuseblk/ntfs"
exit 1
fi
@ -86,7 +87,7 @@ check_for_buggy_ntfs_mount() {
save_startup_to_volatile() {
set +e
touch $AZ_BATCH_NODE_ROOT_DIR/volatile/startup/.save
touch "${AZ_BATCH_NODE_ROOT_DIR}"/volatile/startup/.save
set -e
}
@ -109,7 +110,7 @@ net.ipv4.tcp_abort_on_overflow=1
net.ipv4.route.flush=1
EOF
fi
if [ "$1" == "ubuntu" ] && [ "$2" == 14.04* ]; then
if [[ "$1" == "ubuntu" ]] && [[ "$2" == 14.04* ]]; then
service procps start
else
service procps reload
@ -121,7 +122,7 @@ blacklist_kernel_upgrade() {
shift
local sku=$1
shift
if [ $offer != "ubuntu" ]; then
if [ "$offer" != "ubuntu" ]; then
log DEBUG "No kernel upgrade blacklist required on $offer $sku"
return
fi
@ -130,15 +131,14 @@ blacklist_kernel_upgrade() {
local rc=$?
set -e
if [ $rc -ne 0 ]; then
sed -i "/^Unattended-Upgrade::Package-Blacklist {/alinux-azure\nlinux-cloud-tools-azure\nlinux-headers-azure\nlinux-image-azure\nlinux-tools-azure" /etc/apt/apt.conf.d/50unattended-upgrades
sed -i "/^Unattended-Upgrade::Package-Blacklist {/a\"linux-azure\";\\n\"linux-cloud-tools-azure\";\\n\"linux-headers-azure\";\\n\"linux-image-azure\";\\n\"linux-tools-azure\";" /etc/apt/apt.conf.d/50unattended-upgrades
log INFO "Added linux-azure to package blacklist for unattended upgrades"
fi
}
check_for_nvidia_docker() {
set +e
nvidia-docker version
if [ $? -ne 0 ]; then
if ! nvidia-docker version; then
log ERROR "nvidia-docker2 not installed"
exit 1
fi
@ -147,7 +147,8 @@ check_for_nvidia_docker() {
check_for_nvidia_driver() {
set +e
local out=$(lsmod)
local out
out=$(lsmod)
echo "$out" | grep -i nvidia > /dev/null
local rc=$?
set -e
@ -164,7 +165,8 @@ check_for_nvidia() {
log INFO "Checking for Nvidia Hardware"
# first check for card
set +e
local out=$(lspci)
local out
out=$(lspci)
echo "$out" | grep -i nvidia > /dev/null
local rc=$?
set -e
@ -172,7 +174,7 @@ check_for_nvidia() {
if [ $rc -ne 0 ]; then
log INFO "No Nvidia card(s) detected!"
else
blacklist_kernel_upgrade $1 $2
blacklist_kernel_upgrade "$1" "$2"
check_for_nvidia_driver
# enable persistence mode
nvidia-smi -pm 1
@ -182,7 +184,8 @@ check_for_nvidia() {
check_docker_root_dir() {
set +e
local rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
local rootdir
rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
set -e
log DEBUG "Graph root: $rootdir"
if [ -z "$rootdir" ]; then
@ -198,14 +201,12 @@ check_for_docker_host_engine() {
set +e
# enable and start docker service if custom image
if [ $custom_image -eq 1 ]; then
docker version --format '{{.Server.Version}}'
if [ $? -ne 0 ]; then
if ! docker version --format '{{.Server.Version}}'; then
systemctl start docker.service
fi
fi
systemctl status docker.service
docker version --format '{{.Server.Version}}'
if [ $? -ne 0 ]; then
if ! docker version --format '{{.Server.Version}}'; then
log ERROR "Docker not installed"
exit 1
fi
@ -227,7 +228,8 @@ docker_pull_image() {
set +e
local retries=60
while [ $retries -gt 0 ]; do
local pull_out=$(docker pull $image 2>&1)
local pull_out
pull_out=$(docker pull "$image" 2>&1)
local rc=$?
if [ $rc -eq 0 ]; then
echo "$pull_out"
@ -235,18 +237,24 @@ docker_pull_image() {
fi
# non-zero exit code: check if pull output has toomanyrequests,
# connection resets, or image config error
if [[ ! -z "$(grep 'toomanyrequests' <<<$pull_out)" ]] || [[ ! -z "$(grep 'connection reset by peer' <<<$pull_out)" ]] || [[ ! -z "$(grep 'error pulling image configuration' <<<$pull_out)" ]]; then
local tmr
tmr=$(grep 'toomanyrequests' <<<"$pull_out")
local crbp
crbp=$(grep 'connection reset by peer' <<<"$pull_out")
local epic
epic=$(grep 'error pulling image configuration' <<<"$pull_out")
if [[ ! -z "$tmr" ]] || [[ ! -z "$crbp" ]] || [[ ! -z "$epic" ]]; then
log WARNING "will retry: $pull_out"
else
log ERROR "$pull_out"
exit $rc
fi
retries=retries-1
retries=$((retries-1))
if [ $retries -le 0 ]; then
log ERROR "Could not pull docker image: $image"
exit $rc
fi
sleep $[($RANDOM % 5) + 1]s
sleep $((RANDOM % 5 + 1))s
done
set -e
}
@ -256,16 +264,19 @@ install_local_packages() {
shift
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [[ $distrib == "ubuntu" ]]; then
dpkg -i $*
dpkg -i "$@"
rc=$?
else
rpm -Uvh --nodeps $*
rpm -Uvh --nodeps "$@"
rc=$?
fi
if [ $? -eq 0 ]; then
if [ $rc -eq 0 ]; then
break
fi
retries=retries-1
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not install local packages: $*"
exit 1
@ -280,16 +291,19 @@ install_packages() {
shift
set +e
local retries=30
local rc
while [ $retries -gt 0 ]; do
if [[ $distrib == "ubuntu" ]]; then
apt-get install -y -q -o Dpkg::Options::="--force-confnew" --no-install-recommends $*
apt-get install -y -q -o Dpkg::Options::="--force-confnew" --no-install-recommends "$@"
rc=$?
elif [[ $distrib == centos* ]]; then
yum install -y $*
yum install -y "$@"
rc=$?
fi
if [ $? -eq 0 ]; then
if [ $rc -eq 0 ]; then
break
fi
retries=retries-1
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not install packages: $*"
exit 1
@ -303,19 +317,22 @@ refresh_package_index() {
local distrib=$1
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [[ $distrib == "ubuntu" ]]; then
apt-get update
rc=$?
elif [[ $distrib == centos* ]]; then
yum makecache -y fast
rc=$?
else
log ERROR "Unknown distribution for refresh: $distrib"
exit 1
fi
if [ $? -eq 0 ]; then
if [ $rc -eq 0 ]; then
break
fi
retries=retries-1
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not update package index"
exit 1
@ -329,21 +346,21 @@ mount_azureblob_container() {
log INFO "Mounting Azure Blob Containers"
local distrib=$1
local release=$2
if [ $distrib == "ubuntu" ]; then
if [ "$distrib" == "ubuntu" ]; then
local debfile=packages-microsoft-prod.deb
if [ ! -f ${debfile} ]; then
download_file https://packages.microsoft.com/config/ubuntu/16.04/${debfile}
install_local_packages $distrib ${debfile}
refresh_package_index $distrib
install_packages $distrib blobfuse
download_file https://packages.microsoft.com/config/ubuntu/16.04/"${debfile}"
install_local_packages "$distrib" "${debfile}"
refresh_package_index "$distrib"
install_packages "$distrib" blobfuse
fi
elif [[ $distrib == centos* ]]; then
local rpmfile=packages-microsoft-prod.rpm
if [ ! -f ${rpmfile} ]; then
download_file https://packages.microsoft.com/config/rhel/7/${rpmfile}
install_local_packages $distrib ${rpmfile}
refresh_package_index $distrib
install_packages $distrib blobfuse
download_file https://packages.microsoft.com/config/rhel/7/"${rpmfile}"
install_local_packages "$distrib" "${rpmfile}"
refresh_package_index "$distrib"
install_packages "$distrib" blobfuse
fi
else
log ERROR "unsupported distribution for Azure blob: $distrib $release"
@ -353,8 +370,8 @@ mount_azureblob_container() {
./azureblob-mount.sh
chmod 700 azureblob-mount.sh
chown root:root azureblob-mount.sh
chmod 600 *.cfg
chown root:root *.cfg
chmod 600 ./*.cfg
chown root:root ./*.cfg
}
download_file() {
@ -362,11 +379,10 @@ download_file() {
local retries=10
set +e
while [ $retries -gt 0 ]; do
curl -fSsLO $1
if [ $? -eq 0 ]; then
if curl -fSsLO "$1"; then
break
fi
retries=retries-1
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not download: $1"
exit 1
@ -381,22 +397,23 @@ process_fstab_entry() {
local mountpoint=$2
local fstab_entry=$3
log INFO "Creating host directory for $desc at $mountpoint"
mkdir -p $mountpoint
chmod 777 $mountpoint
mkdir -p "$mountpoint"
chmod 777 "$mountpoint"
log INFO "Adding $mountpoint to fstab"
echo $fstab_entry >> /etc/fstab
echo "$fstab_entry" >> /etc/fstab
tail -n1 /etc/fstab
log INFO "Mounting $mountpoint"
local START=$(date -u +"%s")
local START
START=$(date -u +"%s")
set +e
while :
do
mount $mountpoint
if [ $? -eq 0 ]; then
if mount "$mountpoint"; then
break
else
local NOW=$(date -u +"%s")
local DIFF=$((($NOW-$START)/60))
local NOW
NOW=$(date -u +"%s")
local DIFF=$(((NOW-START)/60))
# fail after 5 minutes of attempts
if [ $DIFF -ge 5 ]; then
log ERROR "Could not mount $desc on $mountpoint"
@ -454,62 +471,55 @@ check_for_buggy_ntfs_mount
save_startup_to_volatile
# set python env vars
LC_ALL=en_US.UTF-8
PYTHONASYNCIODEBUG=1
# store node prep start
if command -v python3 > /dev/null 2>&1; then
npstart=`python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())'`
else
npstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))'`
fi
export LC_ALL=en_US.UTF-8
export PYTHONASYNCIODEBUG=1
# set node prep status files
nodeprepfinished=$AZ_BATCH_NODE_SHARED_DIR/.node_prep_finished
# create shared mount points
mkdir -p $MOUNTS_PATH
mkdir -p "$MOUNTS_PATH"
# decrypt encrypted creds
if [ ! -z $encrypted ]; then
if [ ! -z "$encrypted" ]; then
# convert pfx to pem
pfxfile=$AZ_BATCH_CERTIFICATES_DIR/sha1-$encrypted.pfx
privatekey=$AZ_BATCH_CERTIFICATES_DIR/key.pem
openssl pkcs12 -in $pfxfile -out $privatekey -nodes -password file:$pfxfile.pw
openssl pkcs12 -in "$pfxfile" -out "$privatekey" -nodes -password file:"${pfxfile}".pw
# remove pfx-related files
rm -f $pfxfile $pfxfile.pw
rm -f "$pfxfile" "${pfxfile}".pw
# decrypt creds
if [ ! -z ${DOCKER_LOGIN_USERNAME+x} ]; then
DOCKER_LOGIN_PASSWORD=`echo $DOCKER_LOGIN_PASSWORD | base64 -d | openssl rsautl -decrypt -inkey $privatekey`
DOCKER_LOGIN_PASSWORD=$(echo "$DOCKER_LOGIN_PASSWORD" | base64 -d | openssl rsautl -decrypt -inkey "$privatekey")
fi
fi
# check for docker host engine
check_for_docker_host_engine
check_docker_root_dir $DISTRIB_ID
check_docker_root_dir "$DISTRIB_ID"
# check for nvidia card/driver/docker
check_for_nvidia $DISTRIB_ID $DISTRIB_RELEASE
check_for_nvidia "$DISTRIB_ID" "$DISTRIB_RELEASE"
# mount azure resources (this must be done every boot)
if [ $azurefile -eq 1 ]; then
mount_azurefile_share $DISTRIB_ID $DISTRIB_RELEASE
mount_azurefile_share "$DISTRIB_ID" "$DISTRIB_RELEASE"
fi
if [ $azureblob -eq 1 ]; then
mount_azureblob_container $DISTRIB_ID $DISTRIB_RELEASE
mount_azureblob_container "$DISTRIB_ID" "$DISTRIB_RELEASE"
fi
# check if we're coming up from a reboot
if [ -f $nodeprepfinished ]; then
if [ -f "$nodeprepfinished" ]; then
# mount any storage clusters
if [ ! -z $sc_args ]; then
if [ ! -z "$sc_args" ]; then
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
IFS='#' read -ra fstabs <<< "$fstab_mounts"
i=0
for sc_arg in ${sc_args[@]}; do
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
mount $MOUNTS_PATH/${sc[1]}
mount "${MOUNTS_PATH}"/"${sc[1]}"
done
fi
# mount any custom mounts
@ -519,7 +529,7 @@ if [ -f $nodeprepfinished ]; then
# eval and split fstab var to expand vars
fstab_entry=$(eval echo "$fstab")
IFS=' ' read -ra parts <<< "$fstab_entry"
mount ${parts[1]}
mount "${parts[1]}"
done
fi
log INFO "$nodeprepfinished file exists, assuming successful completion of node prep"
@ -530,7 +540,7 @@ fi
if [ $networkopt -eq 1 ]; then
# do not fail script if this function fails
set +e
optimize_tcp_network_settings $DISTRIB_ID $DISTRIB_RELEASE
optimize_tcp_network_settings "$DISTRIB_ID" "$DISTRIB_RELEASE"
set -e
# set sudoers to not require tty
sed -i 's/^Defaults[ ]*requiretty/# Defaults requiretty/g' /etc/sudoers
@ -539,16 +549,16 @@ fi
# install gluster on compute software
if [ $custom_image -eq 0 ]; then
if [ $gluster_on_compute -eq 1 ]; then
if [ $DISTRIB_ID == "ubuntu" ]; then
install_packages $DISTRIB_ID glusterfs-server
if [ "$DISTRIB_ID" == "ubuntu" ]; then
install_packages "$DISTRIB_ID" glusterfs-server
systemctl enable glusterfs-server
systemctl start glusterfs-server
# create brick directory
mkdir -p /mnt/gluster
elif [[ $DISTRIB_ID == centos* ]]; then
install_packages $DISTRIB_ID epel-release centos-release-gluster38
install_packages "$DISTRIB_ID" epel-release centos-release-gluster38
sed -i -e "s/enabled=1/enabled=0/g" /etc/yum.repos.d/CentOS-Gluster-3.8.repo
install_packages $DISTRIB_ID --enablerepo=centos-gluster38,epel glusterfs-server
install_packages "$DISTRIB_ID" --enablerepo=centos-gluster38,epel glusterfs-server
systemctl daemon-reload
chkconfig glusterd on
systemctl start glusterd
@ -560,33 +570,33 @@ fi
# install storage cluster software
if [ $custom_image -eq 0 ]; then
if [ ! -z $sc_args ]; then
if [ $DISTRIB_ID == "ubuntu" ]; then
for sc_arg in ${sc_args[@]}; do
if [ ! -z "$sc_args" ]; then
if [ "$DISTRIB_ID" == "ubuntu" ]; then
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
server_type=${sc[0]}
if [ $server_type == "nfs" ]; then
install_packages $DISTRIB_ID nfs-common nfs4-acl-tools
elif [ $server_type == "glusterfs" ]; then
install_packages $DISTRIB_ID glusterfs-client acl
if [ "$server_type" == "nfs" ]; then
install_packages "$DISTRIB_ID" nfs-common nfs4-acl-tools
elif [ "$server_type" == "glusterfs" ]; then
install_packages "$DISTRIB_ID" glusterfs-client acl
else
log ERROR "Unknown file server type ${sc[0]} for ${sc[1]}"
exit 1
fi
done
elif [[ $DISTRIB_ID == centos* ]]; then
for sc_arg in ${sc_args[@]}; do
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
server_type=${sc[0]}
if [ $server_type == "nfs" ]; then
install_packages $DISTRIB_ID nfs-utils nfs4-acl-tools
if [ "$server_type" == "nfs" ]; then
install_packages "$DISTRIB_ID" nfs-utils nfs4-acl-tools
systemctl daemon-reload
systemctl enable rpcbind
systemctl start rpcbind
elif [ $server_type == "glusterfs" ]; then
install_packages $DISTRIB_ID epel-release centos-release-gluster38
elif [ "$server_type" == "glusterfs" ]; then
install_packages "$DISTRIB_ID" epel-release centos-release-gluster38
sed -i -e "s/enabled=1/enabled=0/g" /etc/yum.repos.d/CentOS-Gluster-3.8.repo
install_packages $DISTRIB_ID --enablerepo=centos-gluster38,epel glusterfs-server acl
install_packages "$DISTRIB_ID" --enablerepo=centos-gluster38,epel glusterfs-server acl
else
log ERROR "Unknown file server type ${sc[0]} for ${sc[1]}"
exit 1
@ -597,16 +607,16 @@ if [ $custom_image -eq 0 ]; then
fi
# mount any storage clusters
if [ ! -z $sc_args ]; then
if [ ! -z "$sc_args" ]; then
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
IFS='#' read -ra fstabs <<< "$fstab_mounts"
i=0
for sc_arg in ${sc_args[@]}; do
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
fstab_entry="${fstabs[$i]}"
process_fstab_entry "$sc_arg" "$MOUNTS_PATH/${sc[1]}" "$fstab_entry"
i=$(($i + 1))
i=$((i + 1))
done
fi
@ -622,8 +632,8 @@ if [ ! -z "$SHIPYARD_CUSTOM_MOUNTS_FSTAB" ]; then
fi
# retrieve docker images related to data movement
docker_pull_image alfpark/blobxfer:$blobxferversion
docker_pull_image alfpark/batch-shipyard:${version}-cargo
docker_pull_image alfpark/blobxfer:"${blobxferversion}"
docker_pull_image alfpark/batch-shipyard:"${version}"-cargo
# login to registry servers (do not specify -e as creds have been decrypted)
./registry_login.sh
@ -633,4 +643,4 @@ if [ -f singularity-registry-login ]; then
fi
# touch node prep finished file to preserve idempotency
touch $nodeprepfinished
touch "$nodeprepfinished"

Просмотреть файл

@ -3,7 +3,7 @@
set -e
set -o pipefail
DEBIAN_FRONTEND=noninteractive
export DEBIAN_FRONTEND=noninteractive
# constants
gluster_brick_mountpath=/gluster/brick
@ -20,27 +20,27 @@ volume_type=
gluster_peer_probe() {
# detach peer if it was connected already
set +e
gluster peer detach $1 2>&1
gluster peer detach "$1" 2>&1
set -e
echo "Attempting to peer with $1"
peered=0
local START=$(date -u +"%s")
local START
START=$(date -u +"%s")
set +e
while :
do
# attempt to ping before peering
ping -c 2 $1 > /dev/null
if [ $? -eq 0 ]; then
gluster peer probe $1 2>&1
if [ $? -eq 0 ]; then
if ping -c 2 "$1" > /dev/null; then
if gluster peer probe "$1" 2>&1; then
peered=1
fi
fi
if [ $peered -eq 1 ]; then
break
else
local NOW=$(date -u +"%s")
local DIFF=$((($NOW-$START)/60))
local NOW
NOW=$(date -u +"%s")
local DIFF=$(((NOW-START)/60))
# fail after 15 minutes of attempts
if [ $DIFF -ge 15 ]; then
echo "Could not probe peer $1"
@ -54,14 +54,15 @@ gluster_peer_probe() {
}
gluster_poll_for_connections() {
local numpeers=$(($vm_count - 1))
local numpeers=$((vm_count - 1))
echo "Waiting for $numpeers peers to reach connected state..."
# get peer info
set +e
while :
do
local numready=$(gluster peer status | grep -e '^State: Peer in Cluster' | wc -l)
if [ $numready == $numpeers ]; then
local numready
numready=$(gluster peer status | grep -c '^State: Peer in Cluster')
if [ "$numready" == "$numpeers" ]; then
break
fi
sleep 1
@ -79,7 +80,7 @@ gluster_add_bricks() {
IFS=',' read -ra hosts <<< "$hostnames"
# cross-validate length
if [ ${#peers[@]} -ne ${#hosts[@]} ]; then
echo "${peers[@]} length does not match ${hosts[@]} length"
echo "${peers[*]} length does not match ${hosts[*]} length"
exit 1
fi
# construct brick locations
@ -88,7 +89,7 @@ gluster_add_bricks() {
do
bricks+=" $host:$gluster_brick_location"
# probe peer
gluster_peer_probe $host
gluster_peer_probe "$host"
done
# wait for connections
gluster_poll_for_connections
@ -107,9 +108,9 @@ gluster_add_bricks() {
echo "Adding bricks to gluster volume $gluster_volname $volarg ($bricks)"
if [[ "$volume_type" == stripe* ]]; then
# this should be gated by remotefs.py
echo -e "y\n" | gluster volume add-brick $gluster_volname $volarg $bricks
echo -e "y\\n" | gluster volume add-brick $gluster_volname $volarg "$bricks"
else
gluster volume add-brick $gluster_volname $volarg $bricks $force
gluster volume add-brick $gluster_volname $volarg "$bricks"
fi
# get info and status
gluster volume info $gluster_volname
@ -117,8 +118,7 @@ gluster_add_bricks() {
# rebalance
echo "Rebalancing gluster volume $gluster_volname"
set +e
gluster volume rebalance $gluster_volname start
if [ $? -eq 0 ]; then
if gluster volume rebalance $gluster_volname start; then
sleep 5
gluster volume rebalance $gluster_volname status
fi

Просмотреть файл

@ -3,7 +3,7 @@
set -e
set -o pipefail
DEBIAN_FRONTEND=noninteractive
export DEBIAN_FRONTEND=noninteractive
# constants
gluster_brick_mountpath=/gluster/brick
@ -28,11 +28,13 @@ mount_options=
# functions
wait_for_device() {
local device=$1
local START=$(date -u +"%s")
local START
START=$(date -u +"%s")
echo "Waiting for device $device..."
while [ ! -b $device ]; do
local NOW=$(date -u +"%s")
local DIFF=$((($NOW-$START)/60))
while [ ! -b "$device" ]; do
local NOW
NOW=$(date -u +"%s")
local DIFF=$(((NOW-START)/60))
# fail after 5 minutes of waiting
if [ $DIFF -ge 5 ]; then
echo "Could not find device $device"
@ -46,8 +48,7 @@ setup_nfs() {
# amend /etc/exports if needed
add_exports=0
set +e
grep "^${mountpath}" /etc/exports
if [ $? -ne 0 ]; then
if ! grep "^${mountpath}" /etc/exports; then
add_exports=1
fi
if [ $add_exports -eq 1 ]; then
@ -61,8 +62,7 @@ setup_nfs() {
set +f
systemctl reload nfs-kernel-server.service
fi
systemctl status nfs-kernel-server.service
if [ $? -ne 0 ]; then
if ! systemctl status nfs-kernel-server.service; then
set -e
# attempt to start
systemctl start nfs-kernel-server.service
@ -75,23 +75,23 @@ setup_nfs() {
gluster_peer_probe() {
echo "Attempting to peer with $1"
peered=0
local START=$(date -u +"%s")
local START
START=$(date -u +"%s")
set +e
while :
do
# attempt to ping before peering
ping -c 2 $1 > /dev/null
if [ $? -eq 0 ]; then
gluster peer probe $1
if [ $? -eq 0 ]; then
if ping -c 2 "$1" > /dev/null; then
if gluster peer probe "$1"; then
peered=1
fi
fi
if [ $peered -eq 1 ]; then
break
else
local NOW=$(date -u +"%s")
local DIFF=$((($NOW-$START)/60))
local NOW
NOW=$(date -u +"%s")
local DIFF=$(((NOW-START)/60))
# fail after 15 minutes of attempts
if [ $DIFF -ge 15 ]; then
echo "Could not probe peer $1"
@ -106,14 +106,15 @@ gluster_peer_probe() {
gluster_poll_for_connections() {
local numnodes=$1
local numpeers=$(($numnodes - 1))
local numpeers=$((numnodes - 1))
echo "Waiting for $numpeers peers to reach connected state..."
# get peer info
set +e
while :
do
local numready=$(gluster peer status | grep -e '^State: Peer in Cluster' | wc -l)
if [ $numready == $numpeers ]; then
local numready
numready=$(gluster peer status | grep -c '^State: Peer in Cluster')
if [ "$numready" == "$numpeers" ]; then
break
fi
sleep 1
@ -126,19 +127,19 @@ gluster_poll_for_connections() {
gluster_poll_for_volume() {
echo "Waiting for gluster volume $1"
local START=$(date -u +"%s")
local START
START=$(date -u +"%s")
set +e
while :
do
gluster volume info $1
if [ $? -eq 0 ]; then
echo $gv_info
if gluster volume info "$1"; then
# delay to wait for subvolumes
sleep 5
break
else
local NOW=$(date -u +"%s")
local DIFF=$((($NOW-$START)/60))
local NOW
NOW=$(date -u +"%s")
local DIFF=$(((NOW-START)/60))
# fail after 15 minutes of attempts
if [ $DIFF -ge 15 ]; then
echo "Could not connect to gluster volume $1"
@ -169,36 +170,36 @@ setup_glusterfs() {
local i=0
declare -a hosts
set +e
for ip in "${peers[@]}"; do
local host=${hostname_prefix}-vm$(printf %03d $i)
while [ $i -lt ${#peers[@]} ]; do
local host
host=${hostname_prefix}-vm$(printf %03d $i)
hosts=("${hosts[@]}" "$host")
if [ ${peers[$i]} == $ipaddress ]; then
if [ "${peers[$i]}" == "$ipaddress" ]; then
myhostname=$host
fi
i=$(($i + 1))
i=$((i + 1))
done
set -e
if [ -z $myhostname ]; then
if [ -z "$myhostname" ]; then
echo "Could not determine own hostname from prefix"
exit 1
fi
# master (first host) performs peering
if [ ${peers[0]} == $ipaddress ]; then
if [ "${peers[0]}" == "$ipaddress" ]; then
# construct brick locations
local bricks=
for host in "${hosts[@]}"
do
local bricks
for host in "${hosts[@]}"; do
bricks+=" $host:$gluster_brick_location"
# probe peer
if [ $host != $myhostname ]; then
gluster_peer_probe $host
if [ "$host" != "$myhostname" ]; then
gluster_peer_probe "$host"
fi
done
# wait for connections
local numnodes=${#peers[@]}
gluster_poll_for_connections $numnodes
gluster_poll_for_connections "$numnodes"
local voltype=${so[1],,}
local volarg=
local volarg
if [ "$voltype" == "replica" ] || [ "$voltype" == "stripe" ]; then
volarg="$voltype $numnodes"
elif [ "$voltype" != "distributed" ]; then
@ -206,17 +207,15 @@ setup_glusterfs() {
volarg=$voltype
fi
local transport=${so[2],,}
if [ -z $transport ]; then
if [ -z "$transport" ]; then
transport="tcp"
fi
# check if volume exists
local start_only=0
local force=
local force
set +e
gluster volume info $gluster_volname 2>&1 | grep "does not exist"
if [ $? -ne 0 ]; then
gluster volume info $gluster_volname 2>&1 | grep "Volume Name: $gluster_volname"
if [ $? -eq 0 ]; then
if ! gluster volume info "$gluster_volname" 2>&1 | grep "does not exist"; then
if gluster volume info "$gluster_volname" 2>&1 | grep "Volume Name: $gluster_volname"; then
start_only=1
else
force="force"
@ -226,29 +225,28 @@ setup_glusterfs() {
# create volume
if [ $start_only -eq 0 ]; then
echo "Creating gluster volume $gluster_volname $volarg ($force$bricks)"
gluster volume create $gluster_volname $volarg transport $transport$bricks $force
gluster volume create "$gluster_volname" "$volarg" transport "${transport}""${bricks}" $force
# modify volume properties as per input
for e in "${so[@]:3}"; do
IFS=':' read -ra kv <<< "$e"
echo "Setting volume option ${kv[@]}"
gluster volume set $gluster_volname "${kv[0]}" "${kv[1]}"
echo "Setting volume option ${kv[*]}"
gluster volume set "$gluster_volname" "${kv[0]}" "${kv[1]}"
done
fi
# start volume
echo "Starting gluster volume $gluster_volname"
gluster volume start $gluster_volname
gluster volume start "$gluster_volname"
# heal volume if force created with certain volume types
if [ ! -z $force ]; then
if [[ "$voltype" == replica* ]] || [[ "$voltype" == disperse* ]]; then
echo "Checking if gluster volume $gluster_volname needs healing"
set +e
gluster volume heal $gluster_volname info
if [ $? -eq 0 ]; then
gluster volume heal $gluster_volname
if gluster volume heal "$gluster_volname" info; then
gluster volume heal "$gluster_volname"
# print status after heal
gluster volume heal $gluster_volname info healed
gluster volume heal $gluster_volname info heal-failed
gluster volume heal $gluster_volname info split-brain
gluster volume heal "$gluster_volname" info healed
gluster volume heal "$gluster_volname" info heal-failed
gluster volume heal "$gluster_volname" info split-brain
fi
set -e
fi
@ -256,13 +254,12 @@ setup_glusterfs() {
fi
# poll for volume created
gluster_poll_for_volume $gluster_volname
gluster_poll_for_volume "$gluster_volname"
# check if volume is mounted
local mounted=0
set +e
mountpoint -q $mountpath
if [ $? -eq 0 ]; then
if mountpoint -q "$mountpath"; then
mounted=1
fi
set -e
@ -271,8 +268,7 @@ setup_glusterfs() {
# check if fstab entry exists
add_fstab=0
set +e
grep "$mountpath glusterfs" /etc/fstab
if [ $? -ne 0 ]; then
if ! grep "$mountpath glusterfs" /etc/fstab; then
add_fstab=1
fi
set -e
@ -287,19 +283,20 @@ setup_glusterfs() {
mkdir -p $mountpath
# mount it
echo "Mounting gluster volume $gluster_volname locally to $mountpath"
local START=$(date -u +"%s")
local START
START=$(date -u +"%s")
set +e
while :
do
mount $mountpath
if [ $? -eq 0 ]; then
if mount "$mountpath"; then
break
else
local NOW=$(date -u +"%s")
local DIFF=$((($NOW-$START)/60))
local NOW
NOW=$(date -u +"%s")
local DIFF=$(((NOW-START)/60))
# fail after 5 minutes of attempts
if [ $DIFF -ge 5 ]; then
echo "Could not mount gluster volume $gluster_volume to $mountpath"
echo "Could not mount gluster volume $gluster_volname to $mountpath"
exit 1
fi
sleep 1
@ -307,7 +304,7 @@ setup_glusterfs() {
done
set -e
# ensure proper permissions on mounted directory
chmod 1777 $mountpath
chmod 1777 "$mountpath"
fi
}
@ -381,7 +378,7 @@ shift $((OPTIND-1))
echo "Parameters:"
echo " Attach mode: $attach_disks"
echo " Samba options: ${samba_options[@]}"
echo " Samba options: ${samba_options[*]}"
echo " Rebalance filesystem: $rebalance"
echo " Filesystem: $filesystem"
echo " Mountpath: $mountpath"
@ -424,13 +421,12 @@ EOF
fi
# install required server_type software
apt-get update
if [ $server_type == "nfs" ]; then
if [ "$server_type" == "nfs" ]; then
apt-get install -y --no-install-recommends nfs-kernel-server nfs4-acl-tools
# patch buggy nfs-mountd.service unit file
# https://bugs.launchpad.net/ubuntu/+source/nfs-utils/+bug/1590799
set +e
grep "^After=network.target local-fs.target" /lib/systemd/system/nfs-mountd.service
if [ $? -eq 0 ]; then
if grep "^After=network.target local-fs.target" /lib/systemd/system/nfs-mountd.service; then
set -e
sed -i -e "s/^After=network.target local-fs.target/After=rpcbind.target/g" /lib/systemd/system/nfs-mountd.service
fi
@ -441,14 +437,13 @@ EOF
systemctl enable nfs-kernel-server.service
# start service if not started
set +e
systemctl status nfs-kernel-server.service
if [ $? -ne 0 ]; then
if ! systemctl status nfs-kernel-server.service; then
set -e
systemctl start nfs-kernel-server.service
systemctl status nfs-kernel-server.service
fi
set -e
elif [ $server_type == "glusterfs" ]; then
elif [ "$server_type" == "glusterfs" ]; then
# to prevent a race where the master (aka prober) script execution
# runs well before the child, we should block all gluster connection
# requests with iptables. we should not remove the filter rules
@ -461,8 +456,7 @@ EOF
systemctl enable glusterfs-server
# start service if not started
set +e
systemctl status glusterfs-server
if [ $? -ne 0 ]; then
if ! systemctl status glusterfs-server; then
set -e
systemctl start glusterfs-server
systemctl status glusterfs-server
@ -477,31 +471,31 @@ fi
# get all data disks
declare -a data_disks
all_disks=($(lsblk -l -d -n -p -I 8,65,66,67,68 -o NAME))
mapfile -t all_disks < <(lsblk -l -d -n -p -I 8,65,66,67,68 -o NAME)
for disk in "${all_disks[@]}"; do
# ignore os and ephemeral disks
if [ $disk != "/dev/sda" ] && [ $disk != "/dev/sdb" ]; then
if [ "$disk" != "/dev/sda" ] && [ "$disk" != "/dev/sdb" ]; then
data_disks=("${data_disks[@]}" "$disk")
fi
done
unset all_disks
numdisks=${#data_disks[@]}
echo "found $numdisks data disks: ${data_disks[@]}"
echo "found $numdisks data disks: ${data_disks[*]}"
# check if data disks are already partitioned
declare -a skipped_part
for disk in "${data_disks[@]}"; do
part1=$(partprobe -d -s $disk | cut -d' ' -f4)
if [ -z $part1 ]; then
part1=$(partprobe -d -s "$disk" | cut -d' ' -f4)
if [ -z "$part1" ]; then
echo "$disk: partition 1 not found. Partitioning $disk."
parted -a opt -s $disk mklabel gpt mkpart primary 0% 100%
part1=$(partprobe -d -s $disk | cut -d' ' -f4)
if [ -z $part1 ]; then
parted -a opt -s "$disk" mklabel gpt mkpart primary 0% 100%
part1=$(partprobe -d -s "$disk" | cut -d' ' -f4)
if [ -z "$part1" ]; then
echo "$disk: partition 1 not found after partitioning."
exit 1
fi
# wait for block device
wait_for_device $disk$part1
wait_for_device "${disk}""${part1}"
else
echo "$disk: partition 1 found. Skipping partitioning."
skipped_part=("${skipped_part[@]}" "$disk")
@ -509,65 +503,66 @@ for disk in "${data_disks[@]}"; do
done
# set format target
target=
target_md=
target_uuid=
format_target=1
# check if there was only one skipped disk during partitioning
if [ ${#skipped_part[@]} -eq $numdisks ] && [ $numdisks -eq 1 ]; then
target=${skipped_part[0]}
read target_uuid target_fs < <(blkid -u filesystem $target | awk -F "[= ]" '{print $3" "$5}'|tr -d "\"")
if [ ! -z $target_fs ]; then
if [ ${#skipped_part[@]} -eq "$numdisks" ] && [ "$numdisks" -eq 1 ]; then
target_md=${skipped_part[0]}
read -r target_uuid target_fs < <(blkid -u filesystem "$target_md" | awk -F "[= ]" '{print $3" "$5}'|tr -d "\"")
if [ ! -z "$target_fs" ]; then
format_target=0
fi
fi
# check if disks are already in raid set
raid_resized=0
if [ $raid_level -ge 0 ]; then
if [ "$raid_level" -ge 0 ]; then
# redirect mountpath if gluster for bricks
saved_mp=$mountpath
if [ $server_type == "glusterfs" ]; then
if [ "$server_type" == "glusterfs" ]; then
mountpath=$gluster_brick_mountpath
fi
format_target=0
md_preexist=0
if [ $filesystem == "btrfs" ]; then
if [ $raid_level -ne 0 ]; then
if [ "$filesystem" == "btrfs" ]; then
if [ "$raid_level" -ne 0 ]; then
echo "btrfs with non-RAID 0 is not supported."
exit 1
fi
else
# find any pre-existing targets
set +e
mdadm --detail --scan
if [ $? -eq 0 ]; then
target=($(find /dev/md* -maxdepth 0 -type b))
if mdadm --detail --scan; then
mapfile -t target < <(find /dev/md* -maxdepth 0 -type b)
if [ ${#target[@]} -ne 0 ]; then
target=${target[0]}
md_preexist=1
echo "Existing array found: $target"
target_md=${target[0]}
echo "Existing array found: $target_md"
# refresh target uuid to md target
read target_uuid < <(blkid ${target} | awk -F "[= ]" '{print $3}' | sed 's/\"//g')
read -r target_uuid < <(blkid "$target_md" | awk -F "[= ]" '{print $3}' | sed 's/\"//g')
else
echo "No pre-existing md target could be found"
fi
fi
set -e
if [ -z $target ]; then
target=/dev/md0
echo "Setting default target: $target"
if [ -z "$target_md" ]; then
target_md=/dev/md0
echo "Setting default target: $target_md"
fi
fi
declare -a raid_array
declare -a all_raid_disks
set +e
for disk in "${data_disks[@]}"; do
if [ $filesystem == "btrfs" ]; then
if [ "$filesystem" == "btrfs" ]; then
btrfs device scan "${disk}1"
rc=$?
else
mdadm --examine "${disk}1"
rc=$?
fi
if [ $? -ne 0 ]; then
if [ $rc -ne 0 ]; then
raid_array=("${raid_array[@]}" "${disk}1")
fi
all_raid_disks=("${all_raid_disks[@]}" "${disk}1")
@ -575,64 +570,64 @@ if [ $raid_level -ge 0 ]; then
set -e
no_raid_count=${#raid_array[@]}
# take action depending upon no raid count
if [ $no_raid_count -eq 0 ]; then
if [ "$no_raid_count" -eq 0 ]; then
echo "No disks require RAID setup"
elif [ $no_raid_count -eq $numdisks ]; then
echo "$numdisks data disks require RAID setup: ${raid_array[@]}"
if [ $filesystem == "btrfs" ]; then
if [ $raid_level -eq 0 ]; then
mkfs.btrfs -d raid0 ${raid_array[@]}
elif [ "$no_raid_count" -eq "$numdisks" ]; then
echo "$numdisks data disks require RAID setup: ${raid_array[*]}"
if [ "$filesystem" == "btrfs" ]; then
if [ "$raid_level" -eq 0 ]; then
mkfs.btrfs -d raid0 "${raid_array[@]}"
else
mkfs.btrfs -m raid${raid_level} ${raid_array[@]}
mkfs.btrfs -m raid"${raid_level}" "${raid_array[@]}"
fi
else
set +e
# first check if this is a pre-existing array
mdadm_detail=$(mdadm --detail --scan)
if [ -z $mdadm_detail ]; then
if [ -z "$mdadm_detail" ]; then
set -e
mdadm --create --verbose $target --level=$raid_level --raid-devices=$numdisks ${raid_array[@]}
mdadm --create --verbose $target_md --level="$raid_level" --raid-devices="$numdisks" "${raid_array[@]}"
format_target=1
else
if [ $md_preexist -eq 0 ]; then
echo "Could not determine pre-existing md target"
exit 1
fi
echo "Not creating a new array since pre-exsting md target found: $target"
echo "Not creating a new array since pre-exsting md target found: $target_md"
fi
set -e
fi
else
echo "Mismatch of non-RAID disks $no_raid_count to total disks $numdisks."
if [ $raid_level -ne 0 ]; then
if [ "$raid_level" -ne 0 ]; then
echo "Cannot resize with RAID level of $raid_level."
exit 1
fi
if [ $filesystem == "btrfs" ]; then
if [ "$filesystem" == "btrfs" ]; then
# add new block devices first
echo "Adding devices ${raid_array[@]} to $mountpath"
btrfs device add ${raid_array[@]} $mountpath
echo "Adding devices ${raid_array[*]} to $mountpath"
btrfs device add "${raid_array[@]}" $mountpath
raid_resized=1
else
# add new block device first
echo "Adding devices ${raid_array[@]} to $target"
mdadm --add $target ${raid_array[@]}
echo "Adding devices ${raid_array[*]} to $target_md"
mdadm --add $target_md "${raid_array[@]}"
# grow the array
echo "Growing array $target to a total of $numdisks devices"
mdadm --grow --raid-devices=$numdisks $target
echo "Growing array $target_md to a total of $numdisks devices"
mdadm --grow --raid-devices="$numdisks" "$target_md"
raid_resized=1
fi
fi
# dump diagnostic info
if [ $filesystem == "btrfs" ]; then
if [ "$filesystem" == "btrfs" ]; then
btrfs filesystem show
else
cat /proc/mdstat
mdadm --detail $target
mdadm --detail $target_md
fi
# get uuid of first disk as target uuid if not populated
if [ -z $target_uuid ]; then
read target_uuid < <(blkid ${all_raid_disks[0]} | awk -F "[= ]" '{print $3}' | sed 's/\"//g')
if [ -z "$target_uuid" ]; then
read -r target_uuid < <(blkid "${all_raid_disks[0]}" | awk -F "[= ]" '{print $3}' | sed 's/\"//g')
fi
# restore mountpath
mountpath=$saved_mp
@ -641,49 +636,47 @@ fi
# create filesystem on target device
if [ $format_target -eq 1 ]; then
if [ -z $target ]; then
if [ -z "$target_md" ]; then
echo "Target not specified for format"
exit 1
fi
echo "Creating filesystem on $target."
if [ $filesystem == "btrfs" ]; then
mkfs.btrfs $target
echo "Creating filesystem on $target_md"
if [ "$filesystem" == "btrfs" ]; then
mkfs.btrfs "$target_md"
elif [[ $filesystem == ext* ]]; then
mkfs.${filesystem} -m 0 $target
mkfs."${filesystem}" -m 0 "$target_md"
else
echo "Unknown filesystem: $filesystem"
exit 1
fi
# refresh target uuid
read target_uuid < <(blkid ${target} | awk -F "[= ]" '{print $3}' | sed 's/\"//g')
read -r target_uuid < <(blkid "${target_md}" | awk -F "[= ]" '{print $3}' | sed 's/\"//g')
fi
# mount filesystem
if [ $attach_disks -eq 0 ]; then
# redirect mountpath if gluster for bricks
saved_mp=$mountpath
if [ $server_type == "glusterfs" ]; then
if [ "$server_type" == "glusterfs" ]; then
mountpath=$gluster_brick_mountpath
fi
# check if filesystem is mounted (active array)
mounted=0
set +e
mountpoint -q $mountpath
if [ $? -eq 0 ]; then
if mountpoint -q $mountpath; then
mounted=1
fi
set -e
# add fstab entry and mount
if [ $mounted -eq 0 ]; then
if [ -z $target_uuid ]; then
if [ -z "$target_uuid" ]; then
echo "Target UUID not populated!"
exit 1
fi
# check if fstab entry exists
add_fstab=0
set +e
grep "^UUID=${target_uuid}" /etc/fstab
if [ $? -ne 0 ]; then
if ! grep "^UUID=${target_uuid}" /etc/fstab; then
add_fstab=1
fi
set -e
@ -691,14 +684,14 @@ if [ $attach_disks -eq 0 ]; then
if [ $add_fstab -eq 1 ]; then
echo "Adding $target_uuid to mountpoint $mountpath to /etc/fstab"
# construct mount options
if [ -z $mount_options ]; then
if [ -z "$mount_options" ]; then
mount_options="defaults"
else
mount_options="defaults,$mount_options"
fi
if [ $premium_storage -eq 1 ]; then
# disable barriers due to cache
if [ $filesystem == "btrfs" ]; then
if [ "$filesystem" == "btrfs" ]; then
# also enable ssd optimizations on btrfs
mount_options+=",nobarrier,ssd"
else
@ -714,10 +707,10 @@ if [ $attach_disks -eq 0 ]; then
mkdir -p $mountpath
# mount
mount $mountpath
if [ $server_type == "nfs" ]; then
if [ "$server_type" == "nfs" ]; then
# ensure proper permissions
chmod 1777 $mountpath
elif [ $server_type == "glusterfs" ]; then
elif [ "$server_type" == "glusterfs" ]; then
# create the brick location
mkdir -p $gluster_brick_location
fi
@ -734,11 +727,11 @@ fi
if [ $raid_resized -eq 1 ]; then
# redirect mountpath if gluster for bricks
saved_mp=$mountpath
if [ $server_type == "glusterfs" ]; then
if [ "$server_type" == "glusterfs" ]; then
mountpath=$gluster_brick_mountpath
fi
echo "Resizing filesystem at $mountpath."
if [ $filesystem == "btrfs" ]; then
if [ "$filesystem" == "btrfs" ]; then
btrfs filesystem resize max $mountpath
# rebalance data and metadata across all devices
if [ $rebalance -eq 1 ]; then
@ -759,9 +752,9 @@ fi
# set up server_type software
if [ $attach_disks -eq 0 ]; then
if [ $server_type == "nfs" ]; then
if [ "$server_type" == "nfs" ]; then
setup_nfs
elif [ $server_type == "glusterfs" ]; then
elif [ "$server_type" == "glusterfs" ]; then
flush_glusterfs_firewall_rules
setup_glusterfs
else
@ -769,7 +762,7 @@ if [ $attach_disks -eq 0 ]; then
exit 1
fi
# setup samba server if specified
if [ ! -z $samba_options ]; then
if [ ! -z "$samba_options" ]; then
# install samba
apt-get install -y -q --no-install-recommends samba
# parse options
@ -791,16 +784,16 @@ cat >> /etc/samba/smb.conf << EOF
create mask = $smb_create_mask
directory mask = $smb_directory_mask
EOF
if [ $smb_username != "nobody" ]; then
if [ "$smb_username" != "nobody" ]; then
# create group
groupadd -o -g $smb_gid $smb_username
groupadd -o -g "$smb_gid" "$smb_username"
# create user (disable login)
useradd -N -g $smb_gid -p '!' -o -u $smb_uid -s /bin/bash -m -d /home/$smb_username $smb_username
useradd -N -g "$smb_gid" -p '!' -o -u "$smb_uid" -s /bin/bash -m -d /home/"${smb_username}" "$smb_username"
# add user to smb tdbsam
echo -ne "${smb_password}\n${smb_password}\n" | smbpasswd -a -s $smb_username
smbpasswd -e $smb_username
echo -ne "${smb_password}\\n${smb_password}\\n" | smbpasswd -a -s "$smb_username"
smbpasswd -e "$smb_username"
# modify smb.conf global
sed -i "/^\[global\]/a load printers = no\nprinting = bsd\nprintcap name = /dev/null\ndisable spoolss = yes\nsecurity = user\nserver signing = auto\nsmb encrypt = auto" /etc/samba/smb.conf
sed -i "/^\\[global\\]/a load printers = no\\nprinting = bsd\\nprintcap name = /dev/null\\ndisable spoolss = yes\\nsecurity = user\\nserver signing = auto\\nsmb encrypt = auto" /etc/samba/smb.conf
# modify smb.conf share
cat >> /etc/samba/smb.conf << EOF
guest ok = no
@ -809,7 +802,7 @@ cat >> /etc/samba/smb.conf << EOF
EOF
else
# modify smb.conf global
sed -i "/^\[global\]/a load printers = no\nprinting = bsd\nprintcap name = /dev/null\ndisable spoolss = yes\nsecurity = user\nserver signing = auto\nsmb encrypt = auto\nguest account = $smb_username" /etc/samba/smb.conf
sed -i "/^\\[global\\]/a load printers = no\\nprinting = bsd\\nprintcap name = /dev/null\\ndisable spoolss = yes\\nsecurity = user\\nserver signing = auto\\nsmb encrypt = auto\\nguest account = $smb_username" /etc/samba/smb.conf
# modify smb.conf share
cat >> /etc/samba/smb.conf << EOF
guest ok = yes

Просмотреть файл

@ -2,7 +2,7 @@
set -o pipefail
DEBIAN_FRONTEND=noninteractive
export DEBIAN_FRONTEND=noninteractive
# constants
gluster_brick_mountpath=/gluster/brick
@ -54,22 +54,21 @@ shift $((OPTIND-1))
[ "$1" = "--" ] && shift
# get all data disks
declare -a data_disks
all_disks=($(lsblk -l -d -n -p -I 8,65,66,67,68 -o NAME))
mapfile -t all_disks < <(lsblk -l -d -n -p -I 8,65,66,67,68 -o NAME)
for disk in "${all_disks[@]}"; do
# ignore os and ephemeral disks
if [ $disk != "/dev/sda" ] && [ $disk != "/dev/sdb" ]; then
if [ "$disk" != "/dev/sda" ] && [ "$disk" != "/dev/sdb" ]; then
data_disks=("${data_disks[@]}" "$disk")
fi
done
unset all_disks
numdisks=${#data_disks[@]}
echo "Detected $numdisks data disks: ${data_disks[@]}"
echo "Detected $numdisks data disks: ${data_disks[*]}"
echo ""
# check server_type software
if [ $server_type == "nfs" ]; then
if [ "$server_type" == "nfs" ]; then
echo "NFS service status:"
systemctl status nfs-kernel-server.service
echo ""
@ -81,7 +80,7 @@ if [ $server_type == "nfs" ]; then
echo ""
echo "connected clients:"
netstat -tn | grep :2049
elif [ $server_type == "glusterfs" ]; then
elif [ "$server_type" == "glusterfs" ]; then
echo "glusterfs service status:"
systemctl status glusterfs-server
echo ""
@ -92,12 +91,11 @@ elif [ $server_type == "glusterfs" ]; then
gluster volume status all clients
echo ""
set +e
gluster volume rebalance $gluster_volname status 2>&1
gluster volume heal $gluster_volname info 2>&1
if [ $? -eq 0 ]; then
gluster volume heal $gluster_volname info healed 2>&1
gluster volume heal $gluster_volname info heal-failed 2>&1
gluster volume heal $gluster_volname info split-brain 2>&1
gluster volume rebalance "$gluster_volname" status 2>&1
if gluster volume heal "$gluster_volname" info 2>&1; then
gluster volume heal "$gluster_volname" info healed 2>&1
gluster volume heal "$gluster_volname" info heal-failed 2>&1
gluster volume heal "$gluster_volname" info split-brain 2>&1
fi
set -e
echo ""
@ -110,11 +108,8 @@ fi
echo ""
# check if mount is active
mount=$(mount | grep $mountpath)
if [ $? -eq 0 ]; then
echo "Mount information:"
echo $mount
else
echo "Mount information:"
if ! mount | grep $mountpath; then
echo "$mountpath not mounted"
exit 1
fi
@ -123,12 +118,12 @@ fi
df -h
# get raid status
if [ $raid_level -ge 0 ]; then
if [ "$raid_level" -ge 0 ]; then
echo ""
if [ $filesystem == "btrfs" ]; then
if [ "$filesystem" == "btrfs" ]; then
echo "btrfs device status:"
for disk in "${data_disks[@]}"; do
btrfs device stats ${disk}1
btrfs device stats "${disk}"1
done
echo ""
echo "btrfs filesystem:"
@ -139,14 +134,13 @@ if [ $raid_level -ge 0 ]; then
cat /proc/mdstat
echo ""
# find md target
target=($(find /dev/md* -maxdepth 0 -type b))
mapfile -t target < <(find /dev/md* -maxdepth 0 -type b)
if [ ${#target[@]} -ne 1 ]; then
echo "Could not determine md target"
exit 1
fi
target=${target[0]}
echo "mdadm detail:"
mdadm --detail $target
mdadm --detail "${target[0]}"
fi
fi

Просмотреть файл

@ -15,14 +15,14 @@ block_singularity=${cip[1]}
log DEBUG "Block for Docker images: $block_docker"
log DEBUG "Block for Singularity images: $block_singularity"
if [ ! -z $block_docker ]; then
if [ ! -z "$block_docker" ]; then
log INFO "blocking until Docker images ready: $block_docker"
IFS=',' read -ra RES <<< "$block_docker"
declare -a missing
while :
do
for image in "${RES[@]}"; do
if [ -z "$(docker images -q $image 2>/dev/null)" ]; then
if [ -z "$(docker images -q "$image" 2>/dev/null)" ]; then
missing=("${missing[@]}" "$image")
fi
done
@ -36,7 +36,7 @@ if [ ! -z $block_docker ]; then
done
fi
if [ ! -z $block_singularity ]; then
if [ ! -z "$block_singularity" ]; then
log INFO "blocking until Singularity images ready: $block_singularity"
log DEBUG "Singularity cache dir: ${SINGULARITY_CACHEDIR}"
IFS=',' read -ra RES <<< "$block_singularity"