azurehpc/pyazhpc/azhpc.py

640 строки
20 KiB
Python

import argparse
import datetime
import json
import os
import shutil
import sys
import textwrap
import time
import arm
import azconfig
import azinstall
import azlog
import azutil
from cryptography.hazmat.primitives import serialization as crypto_serialization
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography.hazmat.backends import default_backend as crypto_default_backend
log = azlog.getLogger(__name__)
def do_preprocess(args):
log.debug("reading config file ({})".format(args.config_file))
config = azconfig.ConfigFile()
config.open(args.config_file)
print(json.dumps(config.preprocess(), indent=4))
def do_get(args):
config = azconfig.ConfigFile()
config.open(args.config_file)
val = config.read_value(args.path)
print(f"{args.path} = {val}")
def __add_unset_vars(vset, config_file):
log.debug(f"looking for vars in {config_file}")
config = azconfig.ConfigFile()
config.open(config_file)
vset.update(config.get_unset_vars())
def do_init(args):
if not os.path.exists(args.config_file):
log.error("config file/dir does not exist")
sys.exit(1)
if args.show:
vlist = set()
if os.path.isfile(args.config_file):
__add_unset_vars(vlist, args.config_file)
else:
for root, dirs, files in os.walk(args.config_file):
for name in files:
if os.path.splitext(name)[1] == ".json":
__add_unset_vars(vlist, os.path.join(root, name))
print("Variables to set: " + ",".join(vlist))
print()
print("Example string for '--vars' argument (add values):")
print(" --vars " + ",".join([ x+"=" for x in vlist ]))
else:
log.debug("creating directory")
os.makedirs(args.dir, exist_ok=True)
if os.path.isfile(args.config_file):
shutil.copy(args.config_file, args.dir)
elif os.path.isdir(args.config_file):
for root, dirs, files in os.walk(args.config_file):
for d in dirs:
newdir = os.path.join(
args.dir,
os.path.relpath(
os.path.join(root, d),
args.config_file
)
)
log.debug("creating directory: " + newdir)
os.makedirs(newdir, exist_ok=True)
for f in files:
oldfile = os.path.join(root, f)
newfile = os.path.join(
args.dir,
os.path.relpath(
os.path.join(root, f),
args.config_file
)
)
log.debug(f"copying file: {oldfile} -> {newfile}")
shutil.copy(oldfile, newfile)
# get vars
vset = {}
if args.vars:
for vp in args.vars.split(","):
vk, vv = vp.split("=")
vset[vk] = vv
for root, dirs, files in os.walk(args.dir):
for name in files:
if os.path.splitext(name)[1] == ".json":
config = azconfig.ConfigFile()
config.open(os.path.join(root, name))
config.replace_vars(vset)
config.save(os.path.join(root, name))
def do_scp(args):
log.debug("reading config file ({})".format(args.config_file))
c = azconfig.ConfigFile()
c.open(args.config_file)
adminuser = c.read_value("admin_user")
sshkey="{}_id_rsa".format(adminuser)
# TODO: check ssh key exists
jumpbox = c.read_value("install_from")
if jumpbox == None:
log.error(f"Missing 'install_from' property")
sys.exit(1)
fqdn = c.get_install_from_destination()
if args.args and args.args[0] == "--":
scp_args = args.args[1:]
else:
scp_args = args.args
scp_exe = "scp"
scp_cmd = [
scp_exe, "-q",
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-i", sshkey,
"-o", f"ProxyCommand=ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i {sshkey} -W %h:%p {adminuser}@{fqdn}"
] + scp_args
log.debug(" ".join([ f"'{a}'" for a in scp_cmd ]))
os.execvp(scp_exe, scp_cmd)
def do_connect(args):
log.debug("reading config file ({})".format(args.config_file))
c = azconfig.ConfigFile()
c.open(args.config_file)
adminuser = c.read_value("admin_user")
ssh_private_key="{}_id_rsa".format(adminuser)
# TODO: check ssh key exists
if args.user == None:
sshuser = adminuser
else:
sshuser = args.user
jumpbox = c.read_value("install_from")
if jumpbox == None:
log.error(f"Missing 'install_from' property")
sys.exit(1)
resource_group = c.read_value("resource_group")
fqdn = c.get_install_from_destination()
log.debug("Getting resource name")
rtype = c.read_value(f"resources.{args.resource}.type", "hostname")
target = args.resource
if rtype == "vm":
instances = c.read_value(f"resources.{args.resource}.instances", 1)
if instances > 1:
target = f"{args.resource}{1:04}"
log.info(f"Multiple instances of {args.resource}, connecting to {target}")
elif rtype == "vmss":
vmssnodes = azutil.get_vmss_instances(resource_group, args.resource)
if len(vmssnodes) == 0:
log.error("There are no instances in the vmss")
sys.exit(1)
target = vmssnodes[0]
if len(vmssnodes) > 1:
log.info(f"Multiple instances of {args.resource}, connecting to {target}")
elif rtype == "hostname":
pass
else:
log.debug(f"Unknown resource type - {rtype}")
sys.exit(1)
ssh_exe = "ssh"
cmdline = []
if len(args.args) > 0:
cmdline.append(" ".join(args.args))
if args.resource == jumpbox:
log.info("logging directly into {}".format(fqdn))
ssh_args = [
"ssh", "-t", "-q",
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-i", ssh_private_key,
f"{sshuser}@{fqdn}"
]
log.debug(" ".join(ssh_args + cmdline))
os.execvp(ssh_exe, ssh_args + cmdline)
else:
log.info("logging in to {} (via {})".format(target, fqdn))
ssh_args = [
ssh_exe, "-t", "-q",
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-i", ssh_private_key,
"-o", f"ProxyCommand=ssh -i {ssh_private_key} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -W %h:%p {sshuser}@{fqdn}",
f"{sshuser}@{target}"
]
log.debug(" ".join(ssh_args + cmdline))
os.execvp(ssh_exe, ssh_args + cmdline)
def _exec_command(fqdn, sshuser, sshkey, cmdline):
ssh_exe = "ssh"
ssh_args = [
ssh_exe, "-q",
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-i", sshkey,
f"{sshuser}@{fqdn}"
]
log.debug(" ".join(ssh_args + [ cmdline ]))
os.execvp(ssh_exe, ssh_args + [ cmdline ])
def do_status(args):
log.debug("reading config file ({})".format(args.config_file))
c = azconfig.ConfigFile()
c.open(args.config_file)
adminuser = c.read_value("admin_user")
ssh_private_key="{}_id_rsa".format(adminuser)
jumpbox = c.read_value("install_from")
if jumpbox == None:
log.error(f"Missing 'install_from' property")
sys.exit(1)
resource_group = c.read_value("resource_group")
fqdn = c.get_install_from_destination()
tmpdir = "azhpc_install_" + os.path.basename(args.config_file).strip(".json")
_exec_command(fqdn, adminuser, ssh_private_key, f"pssh -h {tmpdir}/hostlists/linux -i -t 0 'printf \"%-20s%s\n\" \"$(hostname)\" \"$(uptime)\"' | grep -v SUCCESS")
def do_run(args):
log.debug("reading config file ({})".format(args.config_file))
c = azconfig.ConfigFile()
c.open(args.config_file)
adminuser = c.read_value("admin_user")
ssh_private_key="{}_id_rsa".format(adminuser)
# TODO: check ssh key exists
if args.user == None:
sshuser = adminuser
else:
sshuser = args.user
jumpbox = c.read_value("install_from")
if jumpbox == None:
log.error(f"Missing 'install_from' property")
sys.exit(1)
resource_group = c.read_value("resource_group")
fqdn = c.get_install_from_destination()
hosts = []
if args.nodes:
for r in args.nodes.split(" "):
rtype = c.read_value(f"resources.{r}.type")
if not rtype:
log.error(f"resource {r} does not exist in config")
sys.exit(1)
if rtype == "vm":
instances = c.read_value(f"resources.{r}.instances", 1)
if instances == 1:
hosts.append(r)
else:
hosts += [ f"{r}{n:04}" for n in range(1, instances+1) ]
elif rtype == "vmss":
hosts += azutil.get_vmss_instances(c.read_value("resource_group"), r)
if not hosts:
hosts.append(jumpbox)
hostlist = " ".join(hosts)
cmd = " ".join(args.args)
_exec_command(fqdn, sshuser, ssh_private_key, f"pssh -H '{hostlist}' -i -t 0 '{cmd}'")
def do_build(args):
log.debug(f"reading config file ({args.config_file})")
tmpdir = "azhpc_install_" + os.path.basename(args.config_file).strip(".json")
log.debug(f"tmpdir = {tmpdir}")
if os.path.isdir(tmpdir):
log.debug("removing existing tmp directory")
shutil.rmtree(tmpdir)
c = azconfig.ConfigFile()
c.open(args.config_file)
config = c.preprocess()
adminuser = config["admin_user"]
private_key_file = adminuser+"_id_rsa"
public_key_file = adminuser+"_id_rsa.pub"
if not (os.path.exists(private_key_file) and os.path.exists(public_key_file)):
# create ssh keys
key = rsa.generate_private_key(
backend=crypto_default_backend(),
public_exponent=65537,
key_size=2048
)
private_key = key.private_bytes(
crypto_serialization.Encoding.PEM,
crypto_serialization.PrivateFormat.TraditionalOpenSSL,
crypto_serialization.NoEncryption())
public_key = key.public_key().public_bytes(
crypto_serialization.Encoding.OpenSSH,
crypto_serialization.PublicFormat.OpenSSH
)
with open(private_key_file, "wb") as f:
os.chmod(private_key_file, 0o600)
f.write(private_key)
with open(public_key_file, "wb") as f:
os.chmod(public_key_file, 0o644)
f.write(public_key+b'\n')
tpl = arm.ArmTemplate()
tpl.read(config)
log.info("writing out arm template to " + args.output_template)
with open(args.output_template, "w") as f:
f.write(tpl.to_json())
log.info("creating resource group " + config["resource_group"])
resource_tags = config.get("resource_tags", {})
azutil.create_resource_group(
config["resource_group"],
config["location"],
[
{
"key": "CreatedBy",
"value": os.getenv("USER")
},
{
"key": "CreatedOn",
"value": datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
}
] + [ { "key": key, "value": resource_tags[key] } for key in resource_tags.keys() ]
)
log.info("deploying arm template")
deployname = azutil.deploy(
config["resource_group"],
args.output_template
)
log.debug(f"deployment name: {deployname}")
building = True
success = True
del_lines = 1
while building:
time.sleep(5)
res = azutil.get_deployment_status(config["resource_group"], deployname)
log.debug(res)
print("\033[F"*del_lines)
del_lines = 1
for i in res:
props = i["properties"]
status_code = props["statusCode"]
if props.get("targetResource", None):
resource_name = props["targetResource"]["resourceName"]
resource_type = props["targetResource"]["resourceType"]
del_lines += 1
print(f"{resource_name:15} {resource_type:47} {status_code:15}")
else:
provisioning_state = props["provisioningState"]
del_lines += 1
building = False
if provisioning_state != "Succeeded":
success = False
if success:
log.info("Provising succeeded")
else:
log.error("Provisioning failed")
for i in res:
props = i["properties"]
status_code = props["statusCode"]
if props.get("targetResource", None):
resource_name = props["targetResource"]["resourceName"]
if props.get("statusMessage", None):
if "error" in props["statusMessage"]:
error_code = props["statusMessage"]["error"]["code"]
error_message = textwrap.TextWrapper(width=60).wrap(text=props["statusMessage"]["error"]["message"])
error_target = props["statusMessage"]["error"].get("target", None)
error_target_str = ""
if error_target:
error_target_str = f"({error_target})"
print(f" Resource : {resource_name} - {error_code} {error_target_str}")
print(f" Message : {error_message[0]}")
for line in error_message[1:]:
print(f" {line}")
if "details" in props["statusMessage"]["error"]:
details_code = props["statusMessage"]["error"]["details"].get("code", "")
details_message = textwrap.TextWrapper(width=60).wrap(text=props["statusMessage"]["error"]["details"].get("message", ""))
print(f" Details : {details_code}")
for line in details_message:
print(f" {line}")
sys.exit(1)
log.info("building host lists")
azinstall.generate_hostlists(config, tmpdir)
log.info("building install scripts")
azinstall.generate_install(config, tmpdir, adminuser, private_key_file, public_key_file)
jumpbox = c.read_value("install_from")
if jumpbox == None:
log.info("nothing to install ('install_from' is not set)")
else:
resource_group = c.read_value("resource_group")
fqdn = c.get_install_from_destination()
log.debug(f"running script from : {fqdn}")
azinstall.run(config, tmpdir, adminuser, private_key_file, public_key_file, fqdn)
def do_destroy(args):
log.info("reading config file ({})".format(args.config_file))
config = azconfig.ConfigFile()
config.open(args.config_file)
log.warning("deleting entire resource group ({})".format(config.read_value("resource_group")))
if not args.force:
log.info("you have 10s to change your mind and ctrl-c!")
time.sleep(10)
log.info("too late!")
azutil.delete_resource_group(
config.read_value("resource_group"), args.no_wait
)
if __name__ == "__main__":
azhpc_parser = argparse.ArgumentParser(prog="azhpc")
gopt_parser = argparse.ArgumentParser()
gopt_parser.add_argument(
"--config-file", "-c", type=str,
default="config.json", help="config file"
)
gopt_parser.add_argument(
"--debug",
help="increase output verbosity",
action="store_true"
)
gopt_parser.add_argument(
"--no-color",
help="turn off color in output",
action="store_true"
)
subparsers = azhpc_parser.add_subparsers(help="actions")
build_parser = subparsers.add_parser(
"build",
parents=[gopt_parser],
add_help=False,
description="deploy the config",
help="create an arm template and deploy"
)
build_parser.set_defaults(func=do_build)
build_parser.add_argument(
"--output-template",
"-o",
type=str,
default="deploy.json",
help="filename for the arm template",
)
connect_parser = subparsers.add_parser(
"connect",
parents=[gopt_parser],
add_help=False,
description="connect to a resource",
help="connect to a resource with 'ssh'"
)
connect_parser.set_defaults(func=do_connect)
connect_parser.add_argument(
"--user",
"-u",
type=str,
help="the user to connect as",
)
connect_parser.add_argument(
"resource",
type=str,
help="the resource to connect to"
)
connect_parser.add_argument(
'args',
nargs=argparse.REMAINDER,
help="additional arguments will be passed to the ssh command"
)
destroy_parser = subparsers.add_parser(
"destroy",
parents=[gopt_parser],
add_help=False,
description="delete the resource group",
help="delete entire resource group"
)
destroy_parser.set_defaults(func=do_destroy)
destroy_parser.add_argument(
"--force",
action="store_true",
default=False,
help="delete resource group immediately"
)
destroy_parser.add_argument(
"--no-wait",
action="store_true",
default=False,
help="do not wait for resources to be deleted"
)
get_parser = subparsers.add_parser(
"get",
parents=[gopt_parser],
add_help=False,
description="get a config value",
help="evaluate the value at the json path specified"
)
get_parser.set_defaults(func=do_get)
get_parser.add_argument(
"path",
type=str,
help="the json path to evaluate"
)
init_parser = subparsers.add_parser(
"init",
parents=[gopt_parser],
add_help=False,
description="initialise a project",
help="copy a file or directory with config files"
)
init_parser.set_defaults(func=do_init)
init_parser.add_argument(
"--show",
"-s",
action="store_true",
default=False,
help="display all vars that are <NOT-SET>"
)
init_parser.add_argument(
"--dir",
"-d",
type=str,
help="output directory",
)
init_parser.add_argument(
"--vars",
"-v",
type=str,
help="variables to replace in format VAR=VAL(,VAR=VAL)*",
)
preprocess_parser = subparsers.add_parser(
"preprocess",
parents=[gopt_parser],
add_help=False,
description="preprocess the config file",
help="expand all the config macros"
)
preprocess_parser.set_defaults(func=do_preprocess)
run_parser = subparsers.add_parser(
"run",
parents=[gopt_parser],
add_help=False,
description="run a command on the specified resources",
help="run command on resources"
)
run_parser.set_defaults(func=do_run)
run_parser.add_argument(
"--user",
"-u",
type=str,
help="the user to run as"
)
run_parser.add_argument(
"--nodes",
"-n",
type=str,
help="the resources to run on (space separated for multiple)"
)
run_parser.add_argument(
'args',
nargs=argparse.REMAINDER,
help="the command to run"
)
scp_parser = subparsers.add_parser(
"scp",
parents=[gopt_parser],
add_help=False,
description="secure copy",
help="copy files to a resource with 'scp'"
)
scp_parser.set_defaults(func=do_scp)
scp_parser.add_argument(
'args',
nargs=argparse.REMAINDER,
help="the arguments passed to scp (use '--' to separate scp arguments)"
)
status_parser = subparsers.add_parser(
"status",
parents=[gopt_parser],
add_help=False,
description="show status of all the resources",
help="displays the resource uptime"
)
status_parser.set_defaults(func=do_status)
args = azhpc_parser.parse_args()
if args.debug:
azlog.setDebug(True)
if args.no_color:
azlog.setColor(False)
log.debug(args)
args.func(args)