azurehpc/scripts/cyclecloud8_install.py

542 строки
20 KiB
Python

#!/usr/bin/python3
# Prepare an Azure provider account for CycleCloud usage.
import os
import argparse
import json
import re
import random
import platform
from string import ascii_uppercase, ascii_lowercase, digits
from subprocess import CalledProcessError, check_output
from os import path, listdir, chdir, fdopen, remove
from urllib.request import urlopen, Request
from shutil import rmtree, copy2, move
from tempfile import mkstemp, mkdtemp
from time import sleep
tmpdir = mkdtemp()
print("Creating temp directory {} for installing CycleCloud".format(tmpdir))
cycle_root = "/opt/cycle_server"
cs_cmd = cycle_root + "/cycle_server"
def clean_up():
rmtree(tmpdir)
def _catch_sys_error(cmd_list):
try:
output = check_output(cmd_list)
print(cmd_list)
print(output)
return output
except CalledProcessError as e:
print("Error with cmd: %s" % e.cmd)
print("Output: %s" % e.output)
raise
def create_user(username):
import pwd
try:
pwd.getpwnam(username)
except KeyError:
print('Creating user {}'.format(username))
_catch_sys_error(["useradd", "-m", "-d", "/home/{}".format(username), username])
_catch_sys_error(["chown", "-R", username + ":" + username, "/home/{}".format(username)])
def create_keypair(username, public_key=None):
if not os.path.isdir("/home/{}/.ssh".format(username)):
_catch_sys_error(["mkdir", "-p", "/home/{}/.ssh".format(username)])
public_key_file = "/home/{}/.ssh/id_rsa.pub".format(username)
if not os.path.exists(public_key_file):
if public_key:
with open(public_key_file, 'w') as pubkeyfile:
pubkeyfile.write(public_key)
pubkeyfile.write("\n")
else:
_catch_sys_error(["ssh-keygen", "-f", "/home/{}/.ssh/id_rsa".format(username), "-N", ""])
with open(public_key_file, 'r') as pubkeyfile:
public_key = pubkeyfile.read()
authorized_key_file = "/home/{}/.ssh/authorized_keys".format(username)
authorized_keys = ""
if os.path.exists(authorized_key_file):
with open(authorized_key_file, 'r') as authkeyfile:
authorized_keys = authkeyfile.read()
if public_key not in authorized_keys:
with open(authorized_key_file, 'w') as authkeyfile:
authkeyfile.write(public_key)
authkeyfile.write("\n")
_catch_sys_error(["chown", "-R", username + ":" + username, "/home/{}".format(username)])
return public_key
def create_user_credential(username, public_key=None):
create_user(username)
public_key = create_keypair(username, public_key)
credential_record = {
"PublicKey": public_key,
"AdType": "Credential",
"CredentialType": "PublicKey",
"Name": username + "/public"
}
credential_data_file = os.path.join(tmpdir, "credential.json")
print("Creating cred file: {}".format(credential_data_file))
with open(credential_data_file, 'w') as fp:
json.dump(credential_record, fp)
config_path = os.path.join(cycle_root, "config/data/")
print("Copying config to {}".format(config_path))
copy2(credential_data_file, config_path)
def generate_password_string():
random_pw_chars = ([random.choice(ascii_lowercase) for _ in range(20)] +
[random.choice(ascii_uppercase) for _ in range(20)] +
[random.choice(digits) for _ in range(10)])
random.shuffle(random_pw_chars)
return ''.join(random_pw_chars)
def cyclecloud_account_setup(vm_metadata, use_managed_identity, tenant_id, application_id, application_secret,
admin_user, azure_cloud, accept_terms, password, storageAccount):
print("Setting up azure account in CycleCloud and initializing cyclecloud CLI")
if not accept_terms:
print("Accept terms was FALSE !!!!! Over-riding for now...")
accept_terms = True
# if path.isfile(cycle_root + "/config/data/account_data.json.imported"):
# print 'Azure account is already configured in CycleCloud. Skipping...'
# return
subscription_id = vm_metadata["compute"]["subscriptionId"]
location = vm_metadata["compute"]["location"]
resource_group = vm_metadata["compute"]["resourceGroupName"]
random_suffix = ''.join(random.SystemRandom().choice(
ascii_lowercase) for _ in range(14))
cyclecloud_admin_pw = ""
if password:
print('Password specified, using it as the admin password')
cyclecloud_admin_pw = password
else:
cyclecloud_admin_pw = generate_password_string()
if storageAccount:
print('Storage account specified, using it as the default locker')
storage_account_name = storageAccount
else:
storage_account_name = 'cyclecloud{}'.format(random_suffix)
azure_data = {
"Environment": azure_cloud,
"AzureRMUseManagedIdentity": use_managed_identity,
"AzureResourceGroup": resource_group,
"AzureRMApplicationId": application_id,
"AzureRMApplicationSecret": application_secret,
"AzureRMSubscriptionId": subscription_id,
"AzureRMTenantId": tenant_id,
"DefaultAccount": True,
"Location": location,
"Name": "azure",
"Provider": "azure",
"ProviderId": subscription_id,
"RMStorageAccount": storage_account_name,
"RMStorageContainer": "cyclecloud"
}
if use_managed_identity:
azure_data["AzureRMUseManagedIdentity"] = True
app_setting_installation = {
"AdType": "Application.Setting",
"Name": "cycleserver.installation.complete",
"Value": True
}
initial_user = {
"AdType": "Application.Setting",
"Name": "cycleserver.installation.initial_user",
"Value": admin_user
}
account_data = [
initial_user,
app_setting_installation
]
if accept_terms:
# Terms accepted, auto-create login user account as well
login_user = {
"AdType": "AuthenticatedUser",
"Name": admin_user,
"RawPassword": cyclecloud_admin_pw,
"Superuser": True
}
account_data.append(login_user)
account_data_file = tmpdir + "/account_data.json"
azure_data_file = tmpdir + "/azure_data.json"
with open(account_data_file, 'w') as fp:
json.dump(account_data, fp)
with open(azure_data_file, 'w') as fp:
json.dump(azure_data, fp)
print("CycleCloud account data:")
print(json.dumps(azure_data))
copy2(account_data_file, cycle_root + "/config/data/")
if not accept_terms:
# reset the installation status so the splash screen re-appears
print("Resetting installation")
sql_statement = 'update Application.Setting set Value = false where name ==\"cycleserver.installation.complete\"'
_catch_sys_error(
["/opt/cycle_server/cycle_server", "execute", sql_statement])
# set the permissions so that the first login works.
perms_sql_statement = 'update Application.Setting set Value = false where Name == \"authorization.check_datastore_permissions\"'
_catch_sys_error(
["/opt/cycle_server/cycle_server", "execute", perms_sql_statement])
initialize_cyclecloud_cli(admin_user, cyclecloud_admin_pw)
output = _catch_sys_error(["/usr/local/bin/cyclecloud", "account", "show", "azure"])
if 'Credentials: azure' in str(output):
print("Account \"azure\" already exists. Skipping account setup...")
else:
# wait until Managed Identity is ready for use before creating the Account
if use_managed_identity:
get_vm_managed_identity()
# create the cloud provide account
print("Registering Azure subscription in CycleCloud")
_catch_sys_error(["/usr/local/bin/cyclecloud", "account",
"create", "-f", azure_data_file])
def initialize_cyclecloud_cli(admin_user, cyclecloud_admin_pw):
print("Setting up azure account in CycleCloud and initializing cyclecloud CLI")
# wait for the data to be imported
password_flag = ("--password=%s" % cyclecloud_admin_pw)
sleep(5)
print("Initializing cylcecloud CLI")
_catch_sys_error(["/usr/local/bin/cyclecloud", "initialize", "--loglevel=debug", "--batch",
"--url=https://localhost", "--verify-ssl=false", "--username=%s" % admin_user, password_flag])
def letsEncrypt(fqdn, location):
# FQDN is assumed to be in the form: hostname.location.cloudapp.azure.com
# fqdn = hostname + "." + location + ".cloudapp.azure.com"
sleep(60)
try:
cmd_list = [cs_cmd, "keystore", "automatic", "--accept-terms", fqdn]
output = check_output(cmd_list)
print(cmd_list)
print(output)
except CalledProcessError as e:
print("Error getting SSL cert from Lets Encrypt")
print("Proceeding with self-signed cert")
def get_vm_metadata():
metadata_url = "http://169.254.169.254/metadata/instance?api-version=2017-08-01"
metadata_req = Request(metadata_url, headers={"Metadata": True})
for _ in range(30):
print("Fetching metadata")
metadata_response = urlopen(metadata_req, timeout=2)
try:
return json.load(metadata_response)
except ValueError as e:
print("Failed to get metadata %s" % e)
print(" Retrying")
sleep(2)
continue
except:
print("Unable to obtain metadata after 30 tries")
raise
def get_vm_managed_identity():
# Managed Identity may not be available immediately at VM startup...
# Test/Pause/Retry to see if it gets assigned
metadata_url = 'http://169.254.169.254/metadata/identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/'
metadata_req = Request(metadata_url, headers={"Metadata": True})
for _ in range(30):
print("Fetching managed identity")
metadata_response = urlopen(metadata_req, timeout=2)
try:
return json.load(metadata_response)
except ValueError as e:
print("Failed to get managed identity %s" % e)
print(" Retrying")
sleep(10)
continue
except:
print("Unable to obtain managed identity after 30 tries")
raise
def start_cc():
import glob
import subprocess
print("(Re-)Starting CycleCloud server")
_catch_sys_error([cs_cmd, "stop"])
if glob.glob("/opt/cycle_server/data/ads/corrupt*") or glob.glob("/opt/cycle_server/data/ads/*logfile_failure"):
print("WARNING: Corrupted datastore masterlog detected. Restoring from last backup...")
if not glob.glob("/opt/cycle_server/data/backups/backup-*"):
raise Exception("ERROR: No backups found, but master.logfile is corrupt!")
try:
yes = subprocess.Popen(['echo', 'yes'], stdout=subprocess.PIPE)
output = subprocess.check_output(['/opt/cycle_server/util/restore.sh'], stdin=yes.stdout)
yes.wait()
print(output)
except CalledProcessError as e:
print("Error with cmd: %s" % e.cmd)
print("Output: %s" % e.output)
raise
_catch_sys_error([cs_cmd, "start"])
# Add 30s wait for cycle to start
_catch_sys_error([sleep, "30"])
# Retry await_startup in case it takes much longer than expected
# (this is common in local testing with limited compute resources)
max_tries = 3
started = False
while not started:
try:
max_tries -= 1
_catch_sys_error([cs_cmd, "await_startup"])
started = True
except:
if max_tries > 0:
# Wait 30s seconds before retrying
_catch_sys_error([sleep, "30"])
print("Retrying...")
else:
raise
def modify_cs_config():
print("Editing CycleCloud server system properties file")
# modify the CS config files
cs_config_file = cycle_root + "/config/cycle_server.properties"
fh, tmp_cs_config_file = mkstemp()
with fdopen(fh, 'w') as new_config:
with open(cs_config_file) as cs_config:
for line in cs_config:
if line.startswith('webServerMaxHeapSize='):
new_config.write('webServerMaxHeapSize=4096M\n')
elif line.startswith('webServerPort='):
new_config.write('webServerPort=80\n')
elif line.startswith('webServerSslPort='):
new_config.write('webServerSslPort=443\n')
elif line.startswith('webServerEnableHttps='):
new_config.write('webServerEnableHttps=true\n')
else:
new_config.write(line)
remove(cs_config_file)
move(tmp_cs_config_file, cs_config_file)
#Ensure that the files are created by the cycleserver service user
_catch_sys_error(["chown", "-R", "cycle_server.", cycle_root])
def install_cc_cli():
# CLI comes with an install script but that installation is user specific
# rather than system wide.
# Downloading and installing pip, then using that to install the CLIs
# from source.
if os.path.exists("/usr/local/bin/cyclecloud"):
print("CycleCloud CLI already installed.")
return
print("Unzip and install CLI")
chdir(tmpdir)
_catch_sys_error(["unzip", "/opt/cycle_server/tools/cyclecloud-cli.zip"])
for cli_install_dir in listdir("."):
if path.isdir(cli_install_dir) and re.match("cyclecloud-cli-installer", cli_install_dir):
print("Found CLI install DIR %s" % cli_install_dir)
chdir(cli_install_dir)
_catch_sys_error(["./install.sh", "--system"])
def already_installed():
print("Checking for existing Azure CycleCloud install")
return os.path.exists("/opt/cycle_server/cycle_server")
def download_install_cc():
print("Installing Azure CycleCloud server")
if "ubuntu" in str(platform.platform()).lower():
_catch_sys_error(["apt", "install", "-y", "cyclecloud8"])
else:
_catch_sys_error(["yum", "install", "-y", "cyclecloud8"])
def configure_msft_repos():
if "ubuntu" in str(platform.platform()).lower():
configure_msft_apt_repos()
else:
configure_msft_yum_repos()
def configure_msft_apt_repos():
print("Configuring Microsoft apt repository for CycleCloud install")
_catch_sys_error(
["wget", "-qO", "-", "https://packages.microsoft.com/keys/microsoft.asc", "|", "apt-key", "add", "-"])
lsb_release = _catch_sys_error(["lsb_release", "-cs"])
with open('/etc/apt/sources.list.d/azure-cli.list', 'w') as f:
f.write("deb [arch=amd64] https://packages.microsoft.com/repos/azure-cli/ {} main".format(lsb_release))
with open('/etc/apt/sources.list.d/cyclecloud.list', 'w') as f:
f.write("deb [arch=amd64] https://packages.microsoft.com/repos/cyclecloud {} main".format(lsb_release))
_catch_sys_error(["apt", "-y", "update"])
def configure_msft_yum_repos():
print("Configuring Microsoft yum repository for CycleCloud install")
_catch_sys_error(
["rpm", "--import", "https://packages.microsoft.com/keys/microsoft.asc"])
with open('/etc/yum.repos.d/cyclecloud.repo', 'w') as f:
f.write("""\
[cyclecloud]
name=cyclecloud
baseurl=https://packages.microsoft.com/yumrepos/cyclecloud
gpgcheck=1
gpgkey=https://packages.microsoft.com/keys/microsoft.asc
""")
with open('/etc/yum.repos.d/azure-cli.repo', 'w') as f:
f.write("""\
[azure-cli]
name=Azure CLI
baseurl=https://packages.microsoft.com/yumrepos/azure-cli
enabled=1
gpgcheck=1
gpgkey=https://packages.microsoft.com/keys/microsoft.asc
""")
def install_pre_req():
print("Installing pre-requisites for CycleCloud server")
# not strictly needed, but it's useful to have the AZ CLI
# Taken from https://docs.microsoft.com/en-us/cli/azure/install-azure-cli-yum?view=azure-cli-latest
if "ubuntu" in str(platform.platform()).lower():
_catch_sys_error(["apt", "install", "-y", "openjdk-8-jre-headless"])
_catch_sys_error(["apt", "install", "-y", "azure-cli"])
else:
_catch_sys_error(["yum", "install", "-y", "java-1.8.0-openjdk-headless"])
_catch_sys_error(["yum", "install", "-y", "azure-cli"])
def main():
parser = argparse.ArgumentParser(description="usage: %prog [options]")
parser.add_argument("--azureSovereignCloud",
dest="azureSovereignCloud",
default="public",
help="Azure Region [china|germany|public|usgov]")
parser.add_argument("--tenantId",
dest="tenantId",
help="Tenant ID of the Azure subscription")
parser.add_argument("--applicationId",
dest="applicationId",
help="Application ID of the Service Principal")
parser.add_argument("--applicationSecret",
dest="applicationSecret",
help="Application Secret of the Service Principal")
parser.add_argument("--username",
dest="username",
help="The local admin user for the CycleCloud VM")
parser.add_argument("--hostname",
dest="hostname",
help="The short public hostname assigned to this VM (or public IP), used for LetsEncrypt")
parser.add_argument("--acceptTerms",
dest="acceptTerms",
action="store_true",
help="Accept Cyclecloud terms and do a silent install")
parser.add_argument("--useLetsEncrypt",
dest="useLetsEncrypt",
action="store_true",
help="Automatically fetch certificate from Let's Encrypt. (Only suitable for installations with public IP.)")
parser.add_argument("--useManagedIdentity",
dest="useManagedIdentity",
action="store_true",
help="Use the first assigned Managed Identity rather than a Service Principle for the default account")
parser.add_argument("--password",
dest="password",
help="The password for the CycleCloud UI user")
parser.add_argument("--publickey",
dest="publickey",
help="The public ssh key for the CycleCloud UI user")
parser.add_argument("--storageAccount",
dest="storageAccount",
help="The storage account to use as a CycleCloud locker")
parser.add_argument("--resourceGroup",
dest="resourceGroup",
help="The resource group for CycleCloud cluster resources. Resource Group must already exist. (Default: same RG as CycleCloud)")
args = parser.parse_args()
print("Debugging arguments: %s" % args)
# if not already_installed():
# configure_msft_repos()
# install_pre_req()
# download_install_cc()
# modify_cs_config()
#start_cc()
#install_cc_cli()
vm_metadata = get_vm_metadata()
if args.resourceGroup:
print("CycleCloud created in resource group: %s" % vm_metadata["compute"]["resourceGroupName"])
print("Cluster resources will be created in resource group: %s" % args.resourceGroup)
vm_metadata["compute"]["resourceGroupName"] = args.resourceGroup
cyclecloud_account_setup(vm_metadata, args.useManagedIdentity, args.tenantId, args.applicationId,
args.applicationSecret, args.username, args.azureSovereignCloud,
args.acceptTerms, args.password, args.storageAccount)
if args.useLetsEncrypt:
letsEncrypt(args.hostname, vm_metadata["compute"]["location"])
# Create user requires root privileges
# create_user_credential(args.username, args.publickey)
clean_up()
if __name__ == "__main__":
try:
main()
except:
print("Deployment failed... Staying alive for DEBUGGING")