azurehpc/libexec/install_helper.sh

393 строки
12 KiB
Bash

#!/bin/bash
if [ "${BASH_SOURCE[0]}" -ef "$0" ]
then
echo "Error: this script should be sourced and not executed"
exit 1
fi
# constants
pssh_parallelism=50
function create_jumpbox_setup_script()
{
local tmp_dir="$1"
local ssh_public_key="$2"
local ssh_private_key="$3"
local install_sh=$tmp_dir/install/00_install_node_setup.sh
local log_file=install/00_install_node_setup.log
cat <<OUTER_EOF > $install_sh
#!/bin/bash
# expecting to be in $tmp_dir
cd "\$( dirname "\${BASH_SOURCE[0]}" )/.."
tag=linux
if [ "\$1" != "" ]; then
tag=tags/\$1
else
sudo yum install -y epel-release > $log_file 2>&1
sudo yum install -y pssh nc >> $log_file 2>&1
# setting up keys
cat <<EOF > ~/.ssh/config
Host *
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
LogLevel ERROR
EOF
cp $ssh_public_key ~/.ssh/id_rsa.pub
cp $ssh_private_key ~/.ssh/id_rsa
chmod 600 ~/.ssh/id_rsa
chmod 644 ~/.ssh/config
chmod 644 ~/.ssh/id_rsa.pub
fi
prsync -p $pssh_parallelism -a -h hostlists/\$tag ~/$tmp_dir ~ >> $log_file 2>&1
prsync -p $pssh_parallelism -a -h hostlists/\$tag ~/.ssh ~ >> $log_file 2>&1
pssh -p $pssh_parallelism -t 0 -i -h hostlists/\$tag 'echo "AcceptEnv PSSH_NODENUM PSSH_HOST" | sudo tee -a /etc/ssh/sshd_config' >> $log_file 2>&1
pssh -p $pssh_parallelism -t 0 -i -h hostlists/\$tag 'sudo systemctl restart sshd' >> $log_file 2>&1
pssh -p $pssh_parallelism -t 0 -i -h hostlists/\$tag "echo 'Defaults env_keep += \"PSSH_NODENUM PSSH_HOST\"' | sudo tee -a /etc/sudoers" >> $log_file 2>&1
OUTER_EOF
}
function create_jumpbox_script()
{
local config_file=$1
local tmp_dir=$2
local step=$3
local idx=$(($step - 1))
read_value install_script ".install[$idx].script"
local install_sh=$tmp_dir/install/$(printf %02d $step)_$install_script
local log_file=install/$(printf %02d $step)_${install_script%.sh}.log
read_value install_tag ".install[$idx].tag"
cat <<OUTER_EOF > $install_sh
#!/bin/bash
# expecting to be in $tmp_dir
cd "\$( dirname "\${BASH_SOURCE[0]}" )/.."
tag=\${1:-$install_tag}
OUTER_EOF
read_value install_reboot ".install[$idx].reboot" false
read_value install_sudo ".install[$idx].sudo" false
local install_nfiles=$(jq -r ".install[$idx].copy | length" $config_file)
local install_script_arg_count=$(jq -r ".install[$idx].args | length" $config_file)
local install_command_line=$install_script
if [ "$install_script_arg_count" -ne "0" ]; then
for n in $(seq 0 $((install_script_arg_count - 1))); do
read_value arg ".install[$idx].args[$n]"
install_command_line="$install_command_line '$arg'"
done
fi
if [ "$install_nfiles" != "0" ]; then
echo "## copying files" >>$install_sh
for f in $(jq -r ".install[$idx].copy | @tsv" $config_file); do
echo "pscp.pssh -p $pssh_parallelism -h hostlists/tags/\$tag $f \$(pwd) >> $log_file 2>&1" >>$install_sh
done
fi
local sudo_prefix=
if [ "$install_sudo" = "true" ]; then
sudo_prefix=sudo
fi
# can run in parallel with pssh
echo "pssh -p $pssh_parallelism -t 0 -i -h hostlists/tags/\$tag \"cd $tmp_dir; $sudo_prefix scripts/$install_command_line\" >> $log_file 2>&1" >>$install_sh
if [ "$install_reboot" = "true" ]; then
cat <<EOF >> $install_sh
pssh -p $pssh_parallelism -t 0 -i -h hostlists/tags/\$tag "sudo reboot" >> $log_file 2>&1
echo " Waiting for nodes to come back"
sleep 10
for h in \$(<hostlists/tags/\$tag); do
nc -z \$h 22
echo " \$h rebooted"
done
sleep 10
EOF
fi
}
function create_local_script()
{
local config_file=$1
local tmp_dir=$2
local step=$3
local idx=$(($step - 1))
read_value install_script ".install[$idx].script"
local install_sh=$tmp_dir/install/$(printf %02d $step)_$install_script
local log_file=install/$(printf %02d $step)_${install_script%.sh}.log
cat <<OUTER_EOF > $install_sh
#!/bin/bash
# expecting to be in $tmp_dir
cd "\$( dirname "\${BASH_SOURCE[0]}" )/.."
OUTER_EOF
local install_script_arg_count=$(jq -r ".install[$idx].args | length" $config_file)
local install_command_line=$install_script
if [ "$install_script_arg_count" -ne "0" ]; then
for n in $(seq 0 $((install_script_arg_count - 1))); do
read_value arg ".install[$idx].args[$n]"
install_command_line="$install_command_line '$arg'"
done
fi
echo "scripts/$install_command_line >> $log_file 2>&1" >>$install_sh
}
function create_install_scripts()
{
# function args
local config_file="$1"
local tmp_dir="$2"
local ssh_public_key="$3"
local ssh_private_key="$4"
local ssh_args="$5"
local admin_user="$6"
local local_script_dir="$7"
local fqdn="$8"
local is_jumpbox_required=0
local nsteps=$(jq -r ".install | length" $config_file)
mkdir -p $tmp_dir/install
for step in $(seq 0 $nsteps); do
if [ "$step" = "0" ]; then
create_jumpbox_setup_script "$tmp_dir" "$ssh_public_key" "$ssh_private_key"
else
idx=$(($step - 1))
read_value install_script_type ".install[$idx].type" jumpbox_script
if [ "$install_script_type" = "jumpbox_script" ]; then
is_jumpbox_required=1
create_jumpbox_script "$config_file" "$tmp_dir" "$step"
elif [ "$install_script_type" = "local_script" ]; then
create_local_script "$config_file" "$tmp_dir" "$step"
else
echo "Error: unrecognised script type - $install_script_type"
fi
fi
done
chmod +x $tmp_dir/install/*.sh
cp $ssh_private_key $tmp_dir
cp $ssh_public_key $tmp_dir
cp -r $azhpc_dir/scripts $tmp_dir
cp -r $local_script_dir/* $tmp_dir/scripts/. 2>/dev/null
if [ "$is_jumpbox_required" = "1" ]; then
echo "rsync $tmp_dir to $fqdn"
rsync -a -e "ssh $ssh_args -i $ssh_private_key" $tmp_dir $admin_user@$fqdn:.
fi
}
function run_install_scripts()
{
# function args
local config_file="$1"
local tmp_dir="$2"
local ssh_private_key="$3"
local ssh_args="$4"
local admin_user="$5"
local local_script_dir="$6"
local fqdn="$7"
local vmss_resized="$8"
local run_tag=
if [ "$vmss_resized" != "" ]; then
run_tag=$vmss_resized.added
fi
local nsteps=$(jq -r ".install | length" $config_file)
local is_jumpbox_required=0
for idx in $(seq 0 $(($nsteps - 1))); do
read_value install_script_type ".install[$idx].type" jumpbox_script
if [ "$install_script_type" = "jumpbox_script" ]; then
is_jumpbox_required=1
fi
done
local script_error=0
for step in $(seq 0 $nsteps); do
# skip jumpbox setup if no jumpbox scripts are required
if [ "$is_jumpbox_required" = "0" -a "$step" = "0" ]; then
continue
fi
idx=$(($step - 1))
if [ "$step" = "0" ]; then
install_script=install_node_setup.sh
install_script_type=jumpbox_script
else
read_value install_script ".install[$idx].script"
read_value install_script_type ".install[$idx].type" jumpbox_script
fi
if [ "$vmss_resized" != "" -a "$idx" != "-1" ]; then
if [ "$install_script_type" != "jumpbox_script" ]; then
status "skipping step $step as it doesn't apply to $vmss_resized"
continue
fi
read_value install_tag ".install[$idx].tag"
resource_has_tag=$(jq ".resources.$vmss_resized.tags | index(\"$install_tag\")" $config_file)
if [ "$resource_has_tag" = "null" ]; then
status "skipping step $step as it doesn't apply to $vmss_resized"
continue
fi
fi
install_sh=$tmp_dir/install/$(printf %02d $step)_$install_script
echo "Step $step : $install_script ($install_script_type)"
start_time=$SECONDS
if [ "$install_script_type" = "jumpbox_script" ]; then
host_tag=$run_tag
if [ "$host_tag" = "" ]; then
if [ "$idx" = "-1" ]; then
host_tag=../linux
else
read_value host_tag ".install[$idx].tag"
fi
fi
nhosts=$(wc -l <$tmp_dir/hostlists/tags/$host_tag)
if [ "$nhosts" = "0" ]; then
status "skipping step $step as hostlist is empty ($host_tag)"
else
ssh $ssh_args -i $ssh_private_key $admin_user@$fqdn $install_sh $run_tag
exit_code=$?
if [ "$exit_code" -ne "0" ]; then
echo "Error: ($exit_code) Errors while running $install_sh"
script_error=1
break
fi
fi
elif [ "$install_script_type" = "local_script" ]; then
$install_sh
exit_code=$?
if [ "$exit_code" -ne "0" ]; then
echo "Error: ($exit_code) Errors while running $install_sh"
script_error=1
break
fi
else
echo "Error: unrecognised script type - $install_script_type"
fi
echo " duration: $(($SECONDS - $start_time)) seconds"
done
if [ "$is_jumpbox_required" = "1" ]; then
rsync -a -e "ssh $ssh_args -i $ssh_private_key" $admin_user@$fqdn:$tmp_dir/install/*.log $tmp_dir/install/.
fi
if [ "$script_error" -ne "0" ]; then
error "There were errors while running scripts, exiting"
fi
}
function build_hostlists
{
# function args
local config_file="$1"
local tmp_dir="$2"
rm -rf $tmp_dir/hostlists
mkdir -p $tmp_dir/hostlists/tags
for resource_name in $(jq -r ".resources | keys | @tsv" $config_file); do
read_value resource_type ".resources.$resource_name.type"
if [ "$resource_type" = "vmss" ]; then
az vmss list-instances \
--resource-group $resource_group \
--name $resource_name \
--query [].osProfile.computerName \
--output tsv \
> $tmp_dir/hostlists/$resource_name
for tag in $(jq -r ".resources.$resource_name.tags | @tsv" $config_file); do
cat $tmp_dir/hostlists/$resource_name >> $tmp_dir/hostlists/tags/$tag
done
cat $tmp_dir/hostlists/$resource_name >> $tmp_dir/hostlists/linux
elif [ "$resource_type" = "vm" ]; then
# only get ip for passwordless nodes
read_value resource_password ".resources.$resource_name.password" "<no-password>"
read_value resource_instances ".resources.$resource_name.instances" "1"
if [ "$resource_instances" = "1" ]; then
echo $resource_name > $tmp_dir/hostlists/$resource_name
else
for i in $(seq -w $resource_instances); do
echo ${resource_name}${i}
done > $tmp_dir/hostlists/$resource_name
fi
for tag in $(jq -r ".resources.$resource_name.tags | @tsv" $config_file); do
cat $tmp_dir/hostlists/$resource_name >> $tmp_dir/hostlists/tags/$tag
done
if [ "$resource_password" = "<no-password>" ]; then
cat $tmp_dir/hostlists/$resource_name >> $tmp_dir/hostlists/linux
fi
fi
done
read_value dns_prefix ".vnet.dns_domain" NOT-SET
if [ "$dns_prefix" != "NOT-SET" ]; then
find $tmp_dir/hostlists -type f | xargs sed -i "s/\$/.$dns_prefix/g"
fi
}