simply config using azure config VM, add ACS deployment document
This commit is contained in:
Родитель
9011474725
Коммит
6574f17822
|
@ -0,0 +1,63 @@
|
||||||
|
# Deploy DL workspace cluster on Azure Container Service (ACS)
|
||||||
|
|
||||||
|
This document describes the procedure to deploy DL workspace cluster on ACS. We are still improving the deployment procedure on ACS. Please contact the authors if you encounter any issues.
|
||||||
|
|
||||||
|
0. Follow [this document](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest) to install Azure CLI and login to your Azure subscription on your dev machine.
|
||||||
|
|
||||||
|
1. Clone this repo
|
||||||
|
|
||||||
|
2. Go into directory src/ClusterBootstrap inside the repo directory
|
||||||
|
|
||||||
|
3. Please create a configuration file called "config.yaml"
|
||||||
|
|
||||||
|
```
|
||||||
|
cluster_name: [your cluster name]
|
||||||
|
cluster_location : [your cluster location - e.g., northcentralus]
|
||||||
|
worker_node_num : [number of agent nodes for the ACS cluster]
|
||||||
|
master_node_num : [number of master nodes for the ACS cluster]
|
||||||
|
acsagentsize : [size of VM for agent nodes - e.g., Standard_NC12]
|
||||||
|
azstoragesku: [sku for Azure storage account - e.g., Standard_LRS]
|
||||||
|
azfilesharequota: [quota for fileshare in GB - e.g., 2048]
|
||||||
|
```
|
||||||
|
|
||||||
|
4. To start and deploy the cluster
|
||||||
|
```
|
||||||
|
./deploy.py acs
|
||||||
|
```
|
||||||
|
|
||||||
|
The deployment script executes the following commands (you do not need to run them if you directly run step 4)
|
||||||
|
1. Setup basic K8S cluster on ACS
|
||||||
|
```
|
||||||
|
./deploy.py acs deploy
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Label nodels and deploy services:
|
||||||
|
```
|
||||||
|
./deploy.py acs postdeploy
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Mount storage on nodes:
|
||||||
|
```
|
||||||
|
./deploy.py acs storagemount
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Install GPU drivers on nodes (if needed):
|
||||||
|
```
|
||||||
|
./deploy.py acs gpudrivers
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Install network plugin
|
||||||
|
```
|
||||||
|
./deploy.py acs freeflow
|
||||||
|
```
|
||||||
|
|
||||||
|
6. Build needed docker images and configuration files for restfulapi, jobmanager, and webui
|
||||||
|
```
|
||||||
|
./deploy.py acs bldwebui
|
||||||
|
```
|
||||||
|
|
||||||
|
7. Start DLWorkspace pods
|
||||||
|
```
|
||||||
|
./deploy.py acs restartwebui
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,380 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
# Tools to build ACS cluster
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import yaml
|
||||||
|
import re
|
||||||
|
import numbers
|
||||||
|
|
||||||
|
sys.path.append("../utils")
|
||||||
|
import utils
|
||||||
|
|
||||||
|
import az_tools
|
||||||
|
|
||||||
|
# AZ ACS commands
|
||||||
|
def az_cmd(cmd):
|
||||||
|
if verbose:
|
||||||
|
print "az "+cmd
|
||||||
|
output = subprocess.check_output("az "+cmd, shell=True)
|
||||||
|
return yaml.load(output)
|
||||||
|
|
||||||
|
def az_sys(cmd):
|
||||||
|
if verbose:
|
||||||
|
print "az "+cmd
|
||||||
|
os.system("az "+cmd)
|
||||||
|
|
||||||
|
def az_tryuntil(cmd, stopFn, waitPeriod=5):
|
||||||
|
return utils.tryuntil(lambda : az_sys(cmd), stopFn, lambda : (), waitPeriod)
|
||||||
|
|
||||||
|
# Create SQL database
|
||||||
|
def az_create_sql_server():
|
||||||
|
# escape the password in case it has characters such as "$"
|
||||||
|
pwd = utils.shellquote(config["sqlserver-password"])
|
||||||
|
cmd = "sql server create"
|
||||||
|
cmd += " --resource-group=%s" % config["resource_group"]
|
||||||
|
cmd += " --location=%s" % config["cluster_location"]
|
||||||
|
cmd += " --name=%s" % config["azure-sqlservername"]
|
||||||
|
cmd += " --admin-user=%s" % config["sqlserver-username"]
|
||||||
|
cmd += " --admin-password=%s" % pwd
|
||||||
|
az_sys(cmd)
|
||||||
|
# now open firewall
|
||||||
|
cmd = "sql server firewall-rule create"
|
||||||
|
cmd += " --resource-group=%s" % config["resource_group"]
|
||||||
|
cmd += " --server=%s" % config["azure-sqlservername"]
|
||||||
|
# first open all IPs
|
||||||
|
cmd2 = cmd + " --name=All --start-ip-address=0.0.0.0 --end-ip-address=255.255.255.255"
|
||||||
|
az_sys(cmd2)
|
||||||
|
# now open Azure
|
||||||
|
cmd2 = cmd + " --name=Azure --start-ip-address=0.0.0.0 --end-ip-address=0.0.0.0"
|
||||||
|
az_sys(cmd2)
|
||||||
|
|
||||||
|
def az_create_sql_database(dbname):
|
||||||
|
cmd = "sql db create"
|
||||||
|
cmd += " --resource-group=%s" % config["resource_group"]
|
||||||
|
cmd += " --server=%s" % config["azure-sqlservername"]
|
||||||
|
cmd += " --name=%s" % dbname
|
||||||
|
az_sys(cmd)
|
||||||
|
|
||||||
|
def az_create_sql():
|
||||||
|
az_create_sql_server()
|
||||||
|
az_create_sql_database(config["sqlserver-database"])
|
||||||
|
|
||||||
|
def az_grp_exist(grpname):
|
||||||
|
resgrp = az_cmd("group show --name=%s" % grpname)
|
||||||
|
return not resgrp is None
|
||||||
|
|
||||||
|
# Overwrite resource group with location where machines are located
|
||||||
|
# If no machines are found, that may be because they are not created, so leave it as it is
|
||||||
|
def acs_set_resource_grp(exitIfNotFound):
|
||||||
|
config["acs_resource_group"] = config["resource_group"] # where container service resides
|
||||||
|
bFoundMachines = False
|
||||||
|
if (az_grp_exist(config["resource_group"])):
|
||||||
|
machines = az_cmd("vm list --resource-group=%s" % config["resource_group"])
|
||||||
|
if (len(machines) > 0):
|
||||||
|
bFoundMachines = True
|
||||||
|
if not bFoundMachines:
|
||||||
|
# try child resource group
|
||||||
|
tryGroup = "%s_%s_%s" % (config["resource_group"], config["cluster_name"], config["cluster_location"])
|
||||||
|
print "Grp %s has no matchines trying %s" % (config["resource_group"], tryGroup)
|
||||||
|
if (az_grp_exist(tryGroup)):
|
||||||
|
machines = az_cmd("vm list --resource-group=%s" % tryGroup)
|
||||||
|
if (len(machines) > 0):
|
||||||
|
# overwrite with group where machines are located
|
||||||
|
config["resource_group"] = tryGroup
|
||||||
|
bFoundMachines = True
|
||||||
|
if not bFoundMachines and exitIfNotFound:
|
||||||
|
print "No machines found -- quitting"
|
||||||
|
exit()
|
||||||
|
print "Resource group = %s" % config["resource_group"]
|
||||||
|
|
||||||
|
def acs_get_id(elem):
|
||||||
|
elemFullName = elem["id"]
|
||||||
|
reMatch = re.match('(.*)/(.*)', elemFullName)
|
||||||
|
return reMatch.group(2)
|
||||||
|
|
||||||
|
def acs_get_ip(ipaddrName):
|
||||||
|
ipInfo = az_cmd("network public-ip show --resource-group="+config["resource_group"]+" --name="+ipaddrName)
|
||||||
|
return ipInfo["ipAddress"]
|
||||||
|
|
||||||
|
def acs_attach_dns_to_node(node, dnsName=None):
|
||||||
|
nodeName = config["nodenames_from_ip"][node]
|
||||||
|
if (dnsName is None):
|
||||||
|
dnsName = nodeName
|
||||||
|
ipName = config["acsnodes"][nodeName]["publicipname"]
|
||||||
|
cmd = "network public-ip update"
|
||||||
|
cmd += " --resource-group=%s" % config["resource_group"]
|
||||||
|
cmd += " --name=%s" % ipName
|
||||||
|
cmd += " --dns-name=%s" % dnsName
|
||||||
|
az_sys(cmd)
|
||||||
|
|
||||||
|
def acs_get_machineIP(machineName):
|
||||||
|
print "Machine: "+machineName
|
||||||
|
nics = az_cmd("vm show --name="+machineName+" --resource-group="+config["resource_group"])
|
||||||
|
#print nics
|
||||||
|
nics = nics["networkProfile"]["networkInterfaces"]
|
||||||
|
i = 0
|
||||||
|
for nic in nics:
|
||||||
|
nicName = acs_get_id(nic)
|
||||||
|
print "Nic Name: "+nicName
|
||||||
|
if (i==0):
|
||||||
|
nicDefault = nicName
|
||||||
|
ipconfigs = az_cmd("network nic show --resource-group="+config["resource_group"]+" --name="+nicName)
|
||||||
|
ipConfigs = ipconfigs["ipConfigurations"]
|
||||||
|
j = 0
|
||||||
|
for ipConfig in ipConfigs:
|
||||||
|
ipConfigName = acs_get_id(ipConfig)
|
||||||
|
print "IP Config Name: "+ipConfigName
|
||||||
|
if ((i==0) and (j==0)):
|
||||||
|
ipConfigDefault = ipConfigName
|
||||||
|
configInfo = az_cmd("network nic ip-config show --resource-group="+config["resource_group"]+
|
||||||
|
" --nic-name="+nicName+" --name="+ipConfigName)
|
||||||
|
publicIP = configInfo["publicIpAddress"]
|
||||||
|
if (not (publicIP is None)):
|
||||||
|
ipName = acs_get_id(publicIP)
|
||||||
|
print "IP Name: " + ipName
|
||||||
|
return {"nic" : nicName, "ipconfig" : ipConfigName, "publicipname" : ipName, "publicip" : acs_get_ip(ipName)}
|
||||||
|
j+=1
|
||||||
|
i+=1
|
||||||
|
return {"nic" : nicDefault, "ipconfig": ipConfigDefault, "publicipname" : None, "publicip" : None}
|
||||||
|
|
||||||
|
def acs_get_nodes():
|
||||||
|
binary = os.path.abspath('./deploy/bin/kubectl')
|
||||||
|
kubeconfig = os.path.abspath('./deploy/'+config["acskubeconfig"])
|
||||||
|
cmd = binary + ' -o=json --kubeconfig='+kubeconfig+' get nodes'
|
||||||
|
nodeInfo = utils.subproc_runonce(cmd)
|
||||||
|
nodes = yaml.load(nodeInfo)
|
||||||
|
return nodes["items"]
|
||||||
|
|
||||||
|
def acs_get_machinesAndIPs(bCreateIP):
|
||||||
|
# Public IP on worker nodes
|
||||||
|
nodes = acs_get_nodes()
|
||||||
|
ipInfo = {}
|
||||||
|
#print nodes["items"]
|
||||||
|
config["nodenames_from_ip"] = {}
|
||||||
|
for n in nodes:
|
||||||
|
machineName = n["metadata"]["name"]
|
||||||
|
ipInfo[machineName] = acs_get_machineIP(machineName)
|
||||||
|
if bCreateIP and (ipInfo[machineName]["publicip"] is None):
|
||||||
|
# Create IP
|
||||||
|
ipName = machineName+"-public-ip-0"
|
||||||
|
print "Creating public-IP: "+ipName
|
||||||
|
cmd = "network public-ip create --allocation-method=Dynamic"
|
||||||
|
cmd += " --resource-group=%s" % config["resource_group"]
|
||||||
|
cmd += " --name=%s" % ipName
|
||||||
|
cmd += " --location=%s" % config["cluster_location"]
|
||||||
|
az_sys(cmd)
|
||||||
|
# Add to NIC of machine
|
||||||
|
cmd = "network nic ip-config update"
|
||||||
|
cmd += " --resource-group=%s" % config["resource_group"]
|
||||||
|
cmd += " --nic-name=%s" % ipInfo[machineName]["nic"]
|
||||||
|
cmd += " --name=%s" % ipInfo[machineName]["ipconfig"]
|
||||||
|
cmd += " --public-ip-address=%s" % ipName
|
||||||
|
az_sys(cmd)
|
||||||
|
# now update
|
||||||
|
ipInfo[machineName]["publicipname"] = ipName
|
||||||
|
ipInfo[machineName]["publicip"] = acs_get_ip(ipName)
|
||||||
|
config["nodenames_from_ip"][ipInfo[machineName]["publicip"]] = machineName
|
||||||
|
return ipInfo
|
||||||
|
|
||||||
|
def acs_get_machinesAndIPsFast():
|
||||||
|
nodes = acs_get_nodes()
|
||||||
|
ipInfo = {}
|
||||||
|
config["nodenames_from_ip"] = {}
|
||||||
|
for n in nodes:
|
||||||
|
machineName = n["metadata"]["name"]
|
||||||
|
#print "MachineName: "+machineName
|
||||||
|
ipName = machineName+"-public-ip-0"
|
||||||
|
if (verbose):
|
||||||
|
print "PublicIP: "+ipName
|
||||||
|
ipInfo[machineName] = {}
|
||||||
|
ipInfo[machineName]["publicipname"] = ipName
|
||||||
|
ipInfo[machineName]["publicip"] = acs_get_ip(ipName)
|
||||||
|
config["nodenames_from_ip"][ipInfo[machineName]["publicip"]] = machineName
|
||||||
|
return ipInfo
|
||||||
|
|
||||||
|
def acs_is_valid_nsg_rule(rule):
|
||||||
|
#print "Access: %s D: %s P: %s P: %s" % (rule["access"].lower()=="allow",
|
||||||
|
#rule["direction"].lower()=="inbound",rule["sourceAddressPrefix"]=='*',
|
||||||
|
#(rule["protocol"].lower()=="tcp" or rule["protocol"]=='*'))
|
||||||
|
return (rule["access"].lower()=="allow" and
|
||||||
|
rule["direction"].lower()=="inbound" and
|
||||||
|
rule["sourceAddressPrefix"]=='*' and
|
||||||
|
(rule["protocol"].lower()=="tcp" or rule["protocol"]=='*'))
|
||||||
|
|
||||||
|
def acs_add_nsg_rules(ports_to_add):
|
||||||
|
nsgs = az_cmd("network nsg list --resource-group={0}".format(config["resource_group"]))
|
||||||
|
nsg_name = acs_get_id(nsgs[0])
|
||||||
|
cmd = "network nsg show --resource-group="+config["resource_group"]+" --name="+nsg_name
|
||||||
|
rulesInfo = az_cmd(cmd)
|
||||||
|
rules = rulesInfo["defaultSecurityRules"] + rulesInfo["securityRules"]
|
||||||
|
|
||||||
|
maxThreeDigitRule = 100
|
||||||
|
for rule in rules:
|
||||||
|
if acs_is_valid_nsg_rule(rule):
|
||||||
|
if (rule["priority"] < 1000):
|
||||||
|
#print "Priority: %d" % rule["priority"]
|
||||||
|
maxThreeDigitRule = max(maxThreeDigitRule, rule["priority"])
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print "Existing max three digit rule for NSG: %s is %d" % (nsg_name, maxThreeDigitRule)
|
||||||
|
|
||||||
|
for port_rule in ports_to_add:
|
||||||
|
port_num = ports_to_add[port_rule]
|
||||||
|
createRule = True
|
||||||
|
isNum = isinstance(port_num, numbers.Number)
|
||||||
|
if (not isNum) and port_num.isdigit():
|
||||||
|
port_num = int(port_num)
|
||||||
|
isNum = True
|
||||||
|
if isNum:
|
||||||
|
# check for existing rules
|
||||||
|
found_port = None
|
||||||
|
for rule in rules:
|
||||||
|
if acs_is_valid_nsg_rule(rule):
|
||||||
|
match = re.match('(.*)-(.*)', rule["destinationPortRange"])
|
||||||
|
if (match is None):
|
||||||
|
minPort = int(rule["destinationPortRange"])
|
||||||
|
maxPort = minPort
|
||||||
|
elif (rule["destinationPortRange"] != "*"):
|
||||||
|
minPort = int(match.group(1))
|
||||||
|
maxPort = int(match.group(2))
|
||||||
|
else:
|
||||||
|
minPort = -1
|
||||||
|
maxPort = -1
|
||||||
|
if (minPort <= port_num) and (port_num <= maxPort):
|
||||||
|
found_port = rule["name"]
|
||||||
|
break
|
||||||
|
if not (found_port is None):
|
||||||
|
print "Rule for %s : %d -- already satisfied by %s" % (port_rule, port_num, found_port)
|
||||||
|
createRule = False
|
||||||
|
if createRule:
|
||||||
|
maxThreeDigitRule = maxThreeDigitRule + 10
|
||||||
|
cmd = "network nsg rule create"
|
||||||
|
cmd += " --resource-group=%s" % config["resource_group"]
|
||||||
|
cmd += " --nsg-name=%s" % nsg_name
|
||||||
|
cmd += " --name=%s" % port_rule
|
||||||
|
cmd += " --access=Allow"
|
||||||
|
if isNum:
|
||||||
|
cmd += " --destination-port-range=%d" % port_num
|
||||||
|
else:
|
||||||
|
cmd += " --destination-port-range=%s" % port_num
|
||||||
|
cmd += " --direction=Inbound"
|
||||||
|
cmd += " --priority=%d" % maxThreeDigitRule
|
||||||
|
az_cmd(cmd)
|
||||||
|
|
||||||
|
def acs_get_config():
|
||||||
|
# Install kubectl / get credentials
|
||||||
|
if not (os.path.exists('./deploy/bin/kubectl')):
|
||||||
|
os.system("mkdir -p ./deploy/bin")
|
||||||
|
az_tryuntil("acs kubernetes install-cli --install-location ./deploy/bin/kubectl", lambda : os.path.exists('./deploy/bin/kubectl'))
|
||||||
|
if not (os.path.exists('./deploy/'+config["acskubeconfig"])):
|
||||||
|
cmd = "acs kubernetes get-credentials"
|
||||||
|
cmd += " --resource-group=%s" % config["acs_resource_group"]
|
||||||
|
cmd += " --name=%s" % config["cluster_name"]
|
||||||
|
cmd += " --file=./deploy/%s" % config["acskubeconfig"]
|
||||||
|
cmd += " --ssh-key-file=%s" % "./deploy/sshkey/id_rsa"
|
||||||
|
az_tryuntil(cmd, lambda : os.path.exists("./deploy/%s" % config["acskubeconfig"]))
|
||||||
|
|
||||||
|
def acs_get_storage_key():
|
||||||
|
cmd = "storage account keys list"
|
||||||
|
cmd += " --account-name=%s" % config["mountpoints"]["rootshare"]["accountname"]
|
||||||
|
cmd += " --resource-group=%s" % config["resource_group"]
|
||||||
|
keys = az_cmd(cmd)
|
||||||
|
return keys[0]["value"]
|
||||||
|
|
||||||
|
def acs_create_storage():
|
||||||
|
# Create storage account
|
||||||
|
cmd = "storage account create"
|
||||||
|
cmd += " --name=%s" % config["mountpoints"]["rootshare"]["accountname"]
|
||||||
|
cmd += " --resource-group=%s" % config["resource_group"]
|
||||||
|
cmd += " --sku=%s" % config["mountpoints"]["rootshare"]["azstoragesku"]
|
||||||
|
az_sys(cmd)
|
||||||
|
# Create file share
|
||||||
|
azureKey = acs_get_storage_key()
|
||||||
|
config["mountpoints"]["rootshare"]["accesskey"] = azureKey
|
||||||
|
cmd = "storage share create"
|
||||||
|
cmd += " --name=%s" % config["mountpoints"]["rootshare"]["filesharename"]
|
||||||
|
cmd += " --quota=%s" % config["mountpoints"]["rootshare"]["azfilesharequota"]
|
||||||
|
cmd += " --account-name=%s" % config["mountpoints"]["rootshare"]["accountname"]
|
||||||
|
cmd += " --account-key=%s" % azureKey
|
||||||
|
az_sys(cmd)
|
||||||
|
|
||||||
|
def acs_load_azconfig():
|
||||||
|
if (os.path.exists(azConfigFile)):
|
||||||
|
with open(azConfigFile, "r") as f:
|
||||||
|
return yaml.load(f)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def acs_write_azconfig(configToWrite):
|
||||||
|
with open(azConfigFile, "w") as f:
|
||||||
|
yaml.dump(configToWrite, f, default_flow_style=False)
|
||||||
|
|
||||||
|
def acs_generate_azconfig():
|
||||||
|
az_tools.config = az_tools.init_config()
|
||||||
|
az_tools.config["azure_cluster"]["cluster_name"] = config["cluster_name"]
|
||||||
|
az_tools.config["azure_cluster"]["azure_location"] = config["cluster_location"]
|
||||||
|
az_tools.config = az_tools.update_config(az_tools.config, False)
|
||||||
|
return az_tools.gen_cluster_config("", False)
|
||||||
|
|
||||||
|
def acs_update_azconfig(gen_cluster_config):
|
||||||
|
config = acs_load_azconfig()
|
||||||
|
configNew = acs_generate_azconfig()
|
||||||
|
if not gen_cluster_config:
|
||||||
|
if config is None:
|
||||||
|
config = configNew
|
||||||
|
acs_write_azconfig(config)
|
||||||
|
else:
|
||||||
|
if config is None:
|
||||||
|
config = {}
|
||||||
|
utils.mergeDict(config, configNew, False)
|
||||||
|
acs_write_azconfig(config)
|
||||||
|
return config
|
||||||
|
|
||||||
|
def acs_deploy():
|
||||||
|
generate_key = not os.path.exists("./deploy/sshkey")
|
||||||
|
|
||||||
|
cmd = "group create"
|
||||||
|
cmd += " --location=%s" % config["cluster_location"]
|
||||||
|
cmd += " --name=%s" % config["resource_group"]
|
||||||
|
az_sys(cmd)
|
||||||
|
|
||||||
|
cmd = "acs create --orchestrator-type=kubernetes"
|
||||||
|
cmd += " --resource-group=%s" % config["resource_group"]
|
||||||
|
cmd += " --name=%s" % config["cluster_name"]
|
||||||
|
cmd += " --agent-count=%d" % config["worker_node_num"]
|
||||||
|
cmd += " --master-count=%d" % config["master_node_num"]
|
||||||
|
cmd += " --location=%s" % config["cluster_location"]
|
||||||
|
cmd += " --agent-vm-size=%s" % config["acsagentsize"]
|
||||||
|
cmd += " --admin-username=%s" % config["admin_username"]
|
||||||
|
cmd += " --ssh-key-value=%s" % "./deploy/sshkey/id_rsa.pub"
|
||||||
|
if (generate_key):
|
||||||
|
os.system("rm -r ./deploy/sshkey || true")
|
||||||
|
cmd += " --generate-ssh-keys"
|
||||||
|
az_sys(cmd)
|
||||||
|
|
||||||
|
acs_set_resource_grp(True) # overwrite resource group if machines are elsewhere
|
||||||
|
|
||||||
|
acs_create_storage()
|
||||||
|
az_create_sql()
|
||||||
|
|
||||||
|
acs_update_azconfig(True)
|
||||||
|
|
||||||
|
acs_get_config()
|
||||||
|
|
||||||
|
# Get/create public IP addresses for all machines
|
||||||
|
Nodes = acs_get_machinesAndIPs(True)
|
||||||
|
|
||||||
|
# Add rules for NSG
|
||||||
|
acs_add_nsg_rules({"HTTPAllow" : 80, "RestfulAPIAllow" : 5000, "AllowKubernetesServicePorts" : "30000-32767"})
|
||||||
|
|
||||||
|
return Nodes
|
||||||
|
|
||||||
|
# Main / Globals
|
||||||
|
azConfigFile = "azure_cluster_config.yaml"
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# nothing for now
|
||||||
|
verbose = False
|
||||||
|
config = {}
|
||||||
|
|
|
@ -64,7 +64,7 @@ def merge_config( config1, config2 ):
|
||||||
else:
|
else:
|
||||||
config1[entry] = config2[entry]
|
config1[entry] = config2[entry]
|
||||||
|
|
||||||
def update_config(config):
|
def update_config(config, genSSH=True):
|
||||||
config["azure_cluster"]["resource_group_name"] = config["azure_cluster"]["cluster_name"]+"ResGrp"
|
config["azure_cluster"]["resource_group_name"] = config["azure_cluster"]["cluster_name"]+"ResGrp"
|
||||||
config["azure_cluster"]["vnet_name"] = config["azure_cluster"]["cluster_name"]+"-VNet"
|
config["azure_cluster"]["vnet_name"] = config["azure_cluster"]["cluster_name"]+"-VNet"
|
||||||
config["azure_cluster"]["storage_account_name"] = config["azure_cluster"]["cluster_name"]+"storage"
|
config["azure_cluster"]["storage_account_name"] = config["azure_cluster"]["cluster_name"]+"storage"
|
||||||
|
@ -78,6 +78,7 @@ def update_config(config):
|
||||||
if "sql_admin_password" not in config["azure_cluster"]:
|
if "sql_admin_password" not in config["azure_cluster"]:
|
||||||
config["azure_cluster"]["sql_admin_password"] = uuid.uuid4().hex+"12!AB"
|
config["azure_cluster"]["sql_admin_password"] = uuid.uuid4().hex+"12!AB"
|
||||||
|
|
||||||
|
if (genSSH):
|
||||||
if (os.path.exists('./deploy/sshkey/id_rsa.pub')):
|
if (os.path.exists('./deploy/sshkey/id_rsa.pub')):
|
||||||
f = open('./deploy/sshkey/id_rsa.pub')
|
f = open('./deploy/sshkey/id_rsa.pub')
|
||||||
config["azure_cluster"]["sshkey"] = f.read()
|
config["azure_cluster"]["sshkey"] = f.read()
|
||||||
|
@ -89,6 +90,7 @@ def update_config(config):
|
||||||
f = open('./deploy/sshkey/azure_id_rsa.pub')
|
f = open('./deploy/sshkey/azure_id_rsa.pub')
|
||||||
config["azure_cluster"]["sshkey"] = f.read()
|
config["azure_cluster"]["sshkey"] = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
@ -282,7 +284,7 @@ def create_cluster():
|
||||||
print "creating VM %s..." % vmname
|
print "creating VM %s..." % vmname
|
||||||
create_vm(vmname, True)
|
create_vm(vmname, True)
|
||||||
|
|
||||||
def gen_cluster_config(output_file_name):
|
def gen_cluster_config(output_file_name, output_file=True):
|
||||||
|
|
||||||
cmd = """
|
cmd = """
|
||||||
az storage account show-connection-string \
|
az storage account show-connection-string \
|
||||||
|
@ -337,10 +339,13 @@ def gen_cluster_config(output_file_name):
|
||||||
if file_share_key is not None:
|
if file_share_key is not None:
|
||||||
cc["mountpoints"]["rootshare"]["accesskey"] = file_share_key
|
cc["mountpoints"]["rootshare"]["accesskey"] = file_share_key
|
||||||
|
|
||||||
|
if output_file:
|
||||||
print yaml.dump(cc, default_flow_style=False)
|
print yaml.dump(cc, default_flow_style=False)
|
||||||
with open(output_file_name, 'w') as outfile:
|
with open(output_file_name, 'w') as outfile:
|
||||||
yaml.dump(cc, outfile, default_flow_style=False)
|
yaml.dump(cc, outfile, default_flow_style=False)
|
||||||
|
|
||||||
|
return cc
|
||||||
|
|
||||||
def run_command( args, command, nargs, parser ):
|
def run_command( args, command, nargs, parser ):
|
||||||
if command =="create":
|
if command =="create":
|
||||||
create_cluster()
|
create_cluster()
|
||||||
|
|
|
@ -39,6 +39,8 @@ from config import config as k8sconfig
|
||||||
|
|
||||||
sys.path.append("../docker-images/glusterfs")
|
sys.path.append("../docker-images/glusterfs")
|
||||||
import launch_glusterfs
|
import launch_glusterfs
|
||||||
|
import az_tools
|
||||||
|
import acs_tools
|
||||||
|
|
||||||
capacityMatch = re.compile("\d+[M|G]B")
|
capacityMatch = re.compile("\d+[M|G]B")
|
||||||
digitsMatch = re.compile("\d+")
|
digitsMatch = re.compile("\d+")
|
||||||
|
@ -810,15 +812,35 @@ def create_cluster_id():
|
||||||
config["clusterId"] = utils.get_cluster_ID_from_file()
|
config["clusterId"] = utils.get_cluster_ID_from_file()
|
||||||
print "Cluster ID is " + config["clusterId"]
|
print "Cluster ID is " + config["clusterId"]
|
||||||
|
|
||||||
def add_acs_config():
|
def add_acs_config(command):
|
||||||
if (os.path.exists("./deploy/"+config["acskubeconfig"])):
|
if (command=="acs" or os.path.exists("./deploy/"+config["acskubeconfig"])):
|
||||||
config["isacs"] = True
|
config["isacs"] = True
|
||||||
create_cluster_id()
|
create_cluster_id()
|
||||||
|
|
||||||
|
#print "Config:{0}".format(config)
|
||||||
|
#print "Dockerprefix:{0}".format(config["dockerprefix"])
|
||||||
|
|
||||||
|
# Set ACS params to match
|
||||||
|
acs_tools.config = config
|
||||||
|
acs_tools.verbose = verbose
|
||||||
|
|
||||||
|
# Use az tools to generate default config params and overwrite if they don't exist
|
||||||
|
configAzure = acs_tools.acs_update_azconfig(False)
|
||||||
|
if verbose:
|
||||||
|
print "AzureConfig:\n{0}".format(configAzure)
|
||||||
|
utils.mergeDict(config, configAzure, True) # ovewrites defaults with Azure defaults
|
||||||
|
if verbose:
|
||||||
|
print "Config:\n{0}".format(config)
|
||||||
|
|
||||||
|
config["master_dns_name"] = config["cluster_name"]
|
||||||
|
config["resource_group"] = az_tools.config["azure_cluster"]["resource_group_name"]
|
||||||
config["platform-scripts"] = "acs"
|
config["platform-scripts"] = "acs"
|
||||||
config["WinbindServers"] = []
|
config["WinbindServers"] = []
|
||||||
config["etcd_node_num"] = config["master_node_num"]
|
config["etcd_node_num"] = config["master_node_num"]
|
||||||
config["kube_addons"] = [] # no addons
|
config["kube_addons"] = [] # no addons
|
||||||
|
config["mountpoints"]["rootshare"]["azstoragesku"] = config["azstoragesku"]
|
||||||
|
config["mountpoints"]["rootshare"]["azfilesharequota"] = config["azfilesharequota"]
|
||||||
|
config["freeflow"] = True
|
||||||
|
|
||||||
if ("azure-sqlservername" in config) and (not "sqlserver-hostname" in config):
|
if ("azure-sqlservername" in config) and (not "sqlserver-hostname" in config):
|
||||||
config["sqlserver-hostname"] = ("tcp:%s.database.windows.net" % config["azure-sqlservername"])
|
config["sqlserver-hostname"] = ("tcp:%s.database.windows.net" % config["azure-sqlservername"])
|
||||||
|
@ -828,7 +850,7 @@ def add_acs_config():
|
||||||
config["azure-sqlservername"] = match.group(1)
|
config["azure-sqlservername"] = match.group(1)
|
||||||
|
|
||||||
# Some locations put VMs in child resource groups
|
# Some locations put VMs in child resource groups
|
||||||
acs_set_resource_grp()
|
acs_tools.acs_set_resource_grp(False)
|
||||||
|
|
||||||
# check for GPU sku
|
# check for GPU sku
|
||||||
match = re.match('.*\_N.*', config["acsagentsize"])
|
match = re.match('.*\_N.*', config["acsagentsize"])
|
||||||
|
@ -855,6 +877,9 @@ def add_acs_config():
|
||||||
except:
|
except:
|
||||||
()
|
()
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print "Config:{0}".format(config)
|
||||||
|
|
||||||
# Render scripts for kubenete nodes
|
# Render scripts for kubenete nodes
|
||||||
def add_kubelet_config():
|
def add_kubelet_config():
|
||||||
renderfiles = []
|
renderfiles = []
|
||||||
|
@ -1065,7 +1090,7 @@ def get_ETCD_master_nodes_from_config(clusterId):
|
||||||
def get_nodes_from_acs(tomatch=""):
|
def get_nodes_from_acs(tomatch=""):
|
||||||
bFindNodes = True
|
bFindNodes = True
|
||||||
if not ("acsnodes" in config):
|
if not ("acsnodes" in config):
|
||||||
machines = acs_get_machinesAndIPsFast()
|
machines = acs_tools.acs_get_machinesAndIPsFast()
|
||||||
config["acsnodes"] = machines
|
config["acsnodes"] = machines
|
||||||
else:
|
else:
|
||||||
bFindNodes = not (tomatch == "" or tomatch == "master" or tomatch == "agent")
|
bFindNodes = not (tomatch == "" or tomatch == "master" or tomatch == "agent")
|
||||||
|
@ -1838,63 +1863,12 @@ def pick_server( nodelists, curNode ):
|
||||||
return curNode
|
return curNode
|
||||||
|
|
||||||
# simple utils
|
# simple utils
|
||||||
class ValClass:
|
|
||||||
def __init__(self, initVal):
|
|
||||||
self.val = initVal
|
|
||||||
def set(self, newVal):
|
|
||||||
self.val = newVal
|
|
||||||
|
|
||||||
def shellquote(s):
|
|
||||||
return "'" + s.replace("'", "'\\''") + "'"
|
|
||||||
|
|
||||||
def exec_rmt_cmd(node, cmd):
|
def exec_rmt_cmd(node, cmd):
|
||||||
utils.SSH_exec_cmd(config["ssh_cert"], config["admin_username"], node, cmd)
|
utils.SSH_exec_cmd(config["ssh_cert"], config["admin_username"], node, cmd)
|
||||||
|
|
||||||
def rmt_cp(node, source, target):
|
def rmt_cp(node, source, target):
|
||||||
utils.sudo_scp(config["ssh_cert"], source, target, config["admin_username"], node)
|
utils.sudo_scp(config["ssh_cert"], source, target, config["admin_username"], node)
|
||||||
|
|
||||||
def tryuntil(cmdLambda, stopFn, updateFn, waitPeriod=5):
|
|
||||||
while not stopFn():
|
|
||||||
try:
|
|
||||||
output = cmdLambda() # if exception occurs here, update does not occur
|
|
||||||
#print "Output: {0}".format(output)
|
|
||||||
updateFn()
|
|
||||||
toStop = False
|
|
||||||
try:
|
|
||||||
toStop = stopFn()
|
|
||||||
except Exception as e:
|
|
||||||
print "Exception {0} -- stopping anyways".format(e)
|
|
||||||
toStop = True
|
|
||||||
if toStop:
|
|
||||||
#print "Returning {0}".format(output)
|
|
||||||
return output
|
|
||||||
except Exception as e:
|
|
||||||
print "Exception in command {0}".format(e)
|
|
||||||
if not stopFn():
|
|
||||||
print "Not done yet - Sleep for 5 seconds and continue"
|
|
||||||
time.sleep(waitPeriod)
|
|
||||||
|
|
||||||
# Run until stop condition and success
|
|
||||||
def subproc_tryuntil(cmd, stopFn, shell=True, waitPeriod=5):
|
|
||||||
bFirst = ValClass(True)
|
|
||||||
return tryuntil(lambda : subprocess.check_output(cmd, shell), lambda : not bFirst.val and stopFn(), lambda : bFirst.set(False), waitPeriod)
|
|
||||||
|
|
||||||
def subprocrun(cmd, shellArg):
|
|
||||||
#print "Running Cmd: {0} Shell: {1}".format(cmd, shellArg)
|
|
||||||
#embed()
|
|
||||||
return subprocess.check_output(cmd, shell=shellArg)
|
|
||||||
|
|
||||||
# Run once until success (no exception)
|
|
||||||
def subproc_runonce(cmd, shell=True, waitPeriod=5):
|
|
||||||
bFirst = ValClass(True)
|
|
||||||
#print "Running cmd:{0} Shell:{1}".format(cmd, shell)
|
|
||||||
return tryuntil(lambda : subprocrun(cmd, shell), lambda : not bFirst.val, lambda : bFirst.set(False), waitPeriod)
|
|
||||||
|
|
||||||
# Run for N success
|
|
||||||
def subproc_runN(cmd, n, shell=True, waitPeriod=5):
|
|
||||||
bCnt = ValClass(0)
|
|
||||||
return tryuntil(lambda : subprocess.check_output(cmd, shell), lambda : (bCnt.val < n), lambda : bCnt.set(bCnt.val+1), waitPeriod)
|
|
||||||
|
|
||||||
# copy list of files to a node
|
# copy list of files to a node
|
||||||
def copy_list_of_files(listOfFiles, node):
|
def copy_list_of_files(listOfFiles, node):
|
||||||
with open(listOfFiles, "r") as f:
|
with open(listOfFiles, "r") as f:
|
||||||
|
@ -1943,188 +1917,9 @@ def kube_dpeloy_configchanges():
|
||||||
for configChange in config["kube_configchanges"]:
|
for configChange in config["kube_configchanges"]:
|
||||||
exec_rmt_cmd(node, "sudo kubectl apply -f "+configChange)
|
exec_rmt_cmd(node, "sudo kubectl apply -f "+configChange)
|
||||||
|
|
||||||
# AZ ACS commands
|
def acs_deploy_addons():
|
||||||
def az_cmd(cmd):
|
kube_dpeloy_configchanges()
|
||||||
if verbose:
|
kube_deploy_addons()
|
||||||
print "az "+cmd
|
|
||||||
output = subprocess.check_output("az "+cmd, shell=True)
|
|
||||||
return yaml.load(output)
|
|
||||||
|
|
||||||
def az_sys(cmd):
|
|
||||||
if verbose:
|
|
||||||
print "az "+cmd
|
|
||||||
os.system("az "+cmd)
|
|
||||||
|
|
||||||
def az_tryuntil(cmd, stopFn, waitPeriod=5):
|
|
||||||
return tryuntil(lambda : az_sys(cmd), stopFn, lambda : (), waitPeriod)
|
|
||||||
|
|
||||||
# Create SQL database
|
|
||||||
def az_create_sql_server():
|
|
||||||
# escape the password in case it has characters such as "$"
|
|
||||||
pwd = shellquote(config["sqlserver-password"])
|
|
||||||
cmd = "sql server create"
|
|
||||||
cmd += " --resource-group=%s" % config["resource_group"]
|
|
||||||
cmd += " --location=%s" % config["cluster_location"]
|
|
||||||
cmd += " --name=%s" % config["azure-sqlservername"]
|
|
||||||
cmd += " --admin-user=%s" % config["sqlserver-username"]
|
|
||||||
cmd += " --admin-password=%s" % pwd
|
|
||||||
az_sys(cmd)
|
|
||||||
# now open firewall
|
|
||||||
cmd = "sql server firewall-rule create"
|
|
||||||
cmd += " --resource-group=%s" % config["resource_group"]
|
|
||||||
cmd += " --server=%s" % config["azure-sqlservername"]
|
|
||||||
# first open all IPs
|
|
||||||
cmd2 = cmd + " --name=All --start-ip-address=0.0.0.0 --end-ip-address=255.255.255.255"
|
|
||||||
az_sys(cmd2)
|
|
||||||
# now open Azure
|
|
||||||
cmd2 = cmd + " --name=Azure --start-ip-address=0.0.0.0 --end-ip-address=0.0.0.0"
|
|
||||||
az_sys(cmd2)
|
|
||||||
|
|
||||||
def az_create_sql_database(dbname):
|
|
||||||
cmd = "sql db create"
|
|
||||||
cmd += " --resource-group=%s" % config["resource_group"]
|
|
||||||
cmd += " --server=%s" % config["azure-sqlservername"]
|
|
||||||
cmd += " --name=%s" % dbname
|
|
||||||
az_sys(cmd)
|
|
||||||
|
|
||||||
def az_create_sql():
|
|
||||||
az_create_sql_server()
|
|
||||||
az_create_sql_database(config["sqlserver-database"])
|
|
||||||
|
|
||||||
def az_grp_exist(grpname):
|
|
||||||
resgrp = az_cmd("group show --name=%s" % grpname)
|
|
||||||
return not resgrp is None
|
|
||||||
|
|
||||||
# Overwrite resource group with location where machines are located
|
|
||||||
# If no machines are found, that may be because they are not created, so leave it as it is
|
|
||||||
def acs_set_resource_grp():
|
|
||||||
if (az_grp_exist(config["resource_group"])):
|
|
||||||
machines = az_cmd("vm list --resource-group=%s" % config["resource_group"])
|
|
||||||
if (len(machines)==0):
|
|
||||||
# try child resource group
|
|
||||||
tryGroup = "%s_%s_%s" % (config["resource_group"], config["cluster_name"], config["cluster_location"])
|
|
||||||
print "Grp %s has no matchines trying %s" % (config["resource_group"], tryGroup)
|
|
||||||
if (az_grp_exist(tryGroup)):
|
|
||||||
machines = az_cmd("vm list --resource-group=%s" % tryGroup)
|
|
||||||
if (len(machines) > 0):
|
|
||||||
# overwrite with group where machines are located
|
|
||||||
config["resource_group"] = tryGroup
|
|
||||||
print "Resource group = %s" % config["resource_group"]
|
|
||||||
|
|
||||||
def acs_get_id(elem):
|
|
||||||
elemFullName = elem["id"]
|
|
||||||
reMatch = re.match('(.*)/(.*)', elemFullName)
|
|
||||||
return reMatch.group(2)
|
|
||||||
|
|
||||||
def acs_get_ip(ipaddrName):
|
|
||||||
ipInfo = az_cmd("network public-ip show --resource-group="+config["resource_group"]+" --name="+ipaddrName)
|
|
||||||
return ipInfo["ipAddress"]
|
|
||||||
|
|
||||||
def acs_attach_dns_to_node(node, dnsName=None):
|
|
||||||
nodeName = config["nodenames_from_ip"][node]
|
|
||||||
if (dnsName is None):
|
|
||||||
dnsName = nodeName
|
|
||||||
ipName = config["acsnodes"][nodeName]["publicipname"]
|
|
||||||
cmd = "network public-ip update"
|
|
||||||
cmd += " --resource-group=%s" % config["resource_group"]
|
|
||||||
cmd += " --name=%s" % ipName
|
|
||||||
cmd += " --dns-name=%s" % dnsName
|
|
||||||
az_sys(cmd)
|
|
||||||
|
|
||||||
def acs_attach_dns_name():
|
|
||||||
get_nodes_from_acs()
|
|
||||||
firstMasterNode = config["kubernetes_master_node"][0]
|
|
||||||
acs_attach_dns_to_node(firstMasterNode, config["master_dns_name"])
|
|
||||||
for i in range(len(config["kubernetes_master_node"])):
|
|
||||||
if (i != 0):
|
|
||||||
acs_attach_dns_to_node(config["kubernetes_master_node"][i])
|
|
||||||
for node in config["worker_node"]:
|
|
||||||
acs_attach_dns_to_node(node)
|
|
||||||
|
|
||||||
def acs_get_machineIP(machineName):
|
|
||||||
print "Machine: "+machineName
|
|
||||||
nics = az_cmd("vm show --name="+machineName+" --resource-group="+config["resource_group"])
|
|
||||||
#print nics
|
|
||||||
nics = nics["networkProfile"]["networkInterfaces"]
|
|
||||||
i = 0
|
|
||||||
for nic in nics:
|
|
||||||
nicName = acs_get_id(nic)
|
|
||||||
print "Nic Name: "+nicName
|
|
||||||
if (i==0):
|
|
||||||
nicDefault = nicName
|
|
||||||
ipconfigs = az_cmd("network nic show --resource-group="+config["resource_group"]+" --name="+nicName)
|
|
||||||
ipConfigs = ipconfigs["ipConfigurations"]
|
|
||||||
j = 0
|
|
||||||
for ipConfig in ipConfigs:
|
|
||||||
ipConfigName = acs_get_id(ipConfig)
|
|
||||||
print "IP Config Name: "+ipConfigName
|
|
||||||
if ((i==0) and (j==0)):
|
|
||||||
ipConfigDefault = ipConfigName
|
|
||||||
configInfo = az_cmd("network nic ip-config show --resource-group="+config["resource_group"]+
|
|
||||||
" --nic-name="+nicName+" --name="+ipConfigName)
|
|
||||||
publicIP = configInfo["publicIpAddress"]
|
|
||||||
if (not (publicIP is None)):
|
|
||||||
ipName = acs_get_id(publicIP)
|
|
||||||
print "IP Name: " + ipName
|
|
||||||
return {"nic" : nicName, "ipconfig" : ipConfigName, "publicipname" : ipName, "publicip" : acs_get_ip(ipName)}
|
|
||||||
j+=1
|
|
||||||
i+=1
|
|
||||||
return {"nic" : nicDefault, "ipconfig": ipConfigDefault, "publicipname" : None, "publicip" : None}
|
|
||||||
|
|
||||||
def acs_get_nodes():
|
|
||||||
binary = os.path.abspath('./deploy/bin/kubectl')
|
|
||||||
kubeconfig = os.path.abspath('./deploy/'+config["acskubeconfig"])
|
|
||||||
cmd = binary + ' -o=json --kubeconfig='+kubeconfig+' get nodes'
|
|
||||||
nodeInfo = subproc_runonce(cmd)
|
|
||||||
nodes = yaml.load(nodeInfo)
|
|
||||||
return nodes["items"]
|
|
||||||
|
|
||||||
def acs_get_machinesAndIPs(bCreateIP):
|
|
||||||
# Public IP on worker nodes
|
|
||||||
nodes = acs_get_nodes()
|
|
||||||
ipInfo = {}
|
|
||||||
#print nodes["items"]
|
|
||||||
config["nodenames_from_ip"] = {}
|
|
||||||
for n in nodes:
|
|
||||||
machineName = n["metadata"]["name"]
|
|
||||||
ipInfo[machineName] = acs_get_machineIP(machineName)
|
|
||||||
if bCreateIP and (ipInfo[machineName]["publicip"] is None):
|
|
||||||
# Create IP
|
|
||||||
ipName = machineName+"-public-ip-0"
|
|
||||||
print "Creating public-IP: "+ipName
|
|
||||||
cmd = "network public-ip create --allocation-method=Dynamic"
|
|
||||||
cmd += " --resource-group=%s" % config["resource_group"]
|
|
||||||
cmd += " --name=%s" % ipName
|
|
||||||
cmd += " --location=%s" % config["cluster_location"]
|
|
||||||
az_sys(cmd)
|
|
||||||
# Add to NIC of machine
|
|
||||||
cmd = "network nic ip-config update"
|
|
||||||
cmd += " --resource-group=%s" % config["resource_group"]
|
|
||||||
cmd += " --nic-name=%s" % ipInfo[machineName]["nic"]
|
|
||||||
cmd += " --name=%s" % ipInfo[machineName]["ipconfig"]
|
|
||||||
cmd += " --public-ip-address=%s" % ipName
|
|
||||||
az_sys(cmd)
|
|
||||||
# now update
|
|
||||||
ipInfo[machineName]["publicipname"] = ipName
|
|
||||||
ipInfo[machineName]["publicip"] = acs_get_ip(ipName)
|
|
||||||
config["nodenames_from_ip"][ipInfo[machineName]["publicip"]] = machineName
|
|
||||||
return ipInfo
|
|
||||||
|
|
||||||
def acs_get_machinesAndIPsFast():
|
|
||||||
nodes = acs_get_nodes()
|
|
||||||
ipInfo = {}
|
|
||||||
config["nodenames_from_ip"] = {}
|
|
||||||
for n in nodes:
|
|
||||||
machineName = n["metadata"]["name"]
|
|
||||||
#print "MachineName: "+machineName
|
|
||||||
ipName = machineName+"-public-ip-0"
|
|
||||||
if (verbose):
|
|
||||||
print "PublicIP: "+ipName
|
|
||||||
ipInfo[machineName] = {}
|
|
||||||
ipInfo[machineName]["publicipname"] = ipName
|
|
||||||
ipInfo[machineName]["publicip"] = acs_get_ip(ipName)
|
|
||||||
config["nodenames_from_ip"][ipInfo[machineName]["publicip"]] = machineName
|
|
||||||
return ipInfo
|
|
||||||
|
|
||||||
def acs_label_webui():
|
def acs_label_webui():
|
||||||
for n in config["kubernetes_master_node"]:
|
for n in config["kubernetes_master_node"]:
|
||||||
|
@ -2133,96 +1928,18 @@ def acs_label_webui():
|
||||||
print "Label node: "+nodeName
|
print "Label node: "+nodeName
|
||||||
label_webUI(nodeName)
|
label_webUI(nodeName)
|
||||||
|
|
||||||
def acs_is_valid_nsg_rule(rule):
|
|
||||||
#print "Access: %s D: %s P: %s P: %s" % (rule["access"].lower()=="allow",
|
|
||||||
#rule["direction"].lower()=="inbound",rule["sourceAddressPrefix"]=='*',
|
|
||||||
#(rule["protocol"].lower()=="tcp" or rule["protocol"]=='*'))
|
|
||||||
return (rule["access"].lower()=="allow" and
|
|
||||||
rule["direction"].lower()=="inbound" and
|
|
||||||
rule["sourceAddressPrefix"]=='*' and
|
|
||||||
(rule["protocol"].lower()=="tcp" or rule["protocol"]=='*'))
|
|
||||||
|
|
||||||
def acs_add_nsg_rules(ports_to_add):
|
|
||||||
Nodes = get_nodes_from_acs("")
|
|
||||||
#print "Nodes: %s" % Nodes
|
|
||||||
match = re.match('(.*)-0', config["nodenames_from_ip"][config["kubernetes_master_node"][0]])
|
|
||||||
nsg_name = match.group(1)+"-nsg"
|
|
||||||
rulesInfo = az_cmd("network nsg show --resource-group="+config["resource_group"]+" --name="+nsg_name)
|
|
||||||
rules = rulesInfo["defaultSecurityRules"] + rulesInfo["securityRules"]
|
|
||||||
|
|
||||||
maxThreeDigitRule = 100
|
|
||||||
for rule in rules:
|
|
||||||
if acs_is_valid_nsg_rule(rule):
|
|
||||||
if (rule["priority"] < 1000):
|
|
||||||
#print "Priority: %d" % rule["priority"]
|
|
||||||
maxThreeDigitRule = max(maxThreeDigitRule, rule["priority"])
|
|
||||||
|
|
||||||
if verbose:
|
|
||||||
print "Existing max three digit rule for NSG: %s is %d" % (nsg_name, maxThreeDigitRule)
|
|
||||||
|
|
||||||
for port_rule in ports_to_add:
|
|
||||||
port_num = ports_to_add[port_rule]
|
|
||||||
createRule = True
|
|
||||||
isNum = isinstance(port_num, numbers.Number)
|
|
||||||
if (not isNum) and port_num.isdigit():
|
|
||||||
port_num = int(port_num)
|
|
||||||
isNum = True
|
|
||||||
if isNum:
|
|
||||||
# check for existing rules
|
|
||||||
found_port = None
|
|
||||||
for rule in rules:
|
|
||||||
if acs_is_valid_nsg_rule(rule):
|
|
||||||
match = re.match('(.*)-(.*)', rule["destinationPortRange"])
|
|
||||||
if (match is None):
|
|
||||||
minPort = int(rule["destinationPortRange"])
|
|
||||||
maxPort = minPort
|
|
||||||
elif (rule["destinationPortRange"] != "*"):
|
|
||||||
minPort = int(match.group(1))
|
|
||||||
maxPort = int(match.group(2))
|
|
||||||
else:
|
|
||||||
minPort = -1
|
|
||||||
maxPort = -1
|
|
||||||
if (minPort <= port_num) and (port_num <= maxPort):
|
|
||||||
found_port = rule["name"]
|
|
||||||
break
|
|
||||||
if not (found_port is None):
|
|
||||||
print "Rule for %s : %d -- already satisfied by %s" % (port_rule, port_num, found_port)
|
|
||||||
createRule = False
|
|
||||||
if createRule:
|
|
||||||
maxThreeDigitRule = maxThreeDigitRule + 10
|
|
||||||
cmd = "network nsg rule create"
|
|
||||||
cmd += " --resource-group=%s" % config["resource_group"]
|
|
||||||
cmd += " --nsg-name=%s" % nsg_name
|
|
||||||
cmd += " --name=%s" % port_rule
|
|
||||||
cmd += " --access=Allow"
|
|
||||||
if isNum:
|
|
||||||
cmd += " --destination-port-range=%d" % port_num
|
|
||||||
else:
|
|
||||||
cmd += " --destination-port-range=%s" % port_num
|
|
||||||
cmd += " --direction=Inbound"
|
|
||||||
cmd += " --priority=%d" % maxThreeDigitRule
|
|
||||||
az_cmd(cmd)
|
|
||||||
|
|
||||||
def acs_get_config():
|
|
||||||
# Install kubectl / get credentials
|
|
||||||
if not (os.path.exists('./deploy/bin/kubectl')):
|
|
||||||
os.system("mkdir -p ./deploy/bin")
|
|
||||||
az_tryuntil("acs kubernetes install-cli --install-location ./deploy/bin/kubectl", lambda : os.path.exists('./deploy/bin/kubectl'))
|
|
||||||
if not (os.path.exists('./deploy/'+config["acskubeconfig"])):
|
|
||||||
cmd = "acs kubernetes get-credentials"
|
|
||||||
cmd += " --resource-group=%s" % config["resource_group"]
|
|
||||||
cmd += " --name=%s" % config["cluster_name"]
|
|
||||||
cmd += " --file=./deploy/%s" % config["acskubeconfig"]
|
|
||||||
cmd += " --ssh-key-file=%s" % "./deploy/sshkey/id_rsa"
|
|
||||||
az_tryuntil(cmd, lambda : os.path.exists("./deploy/%s" % config["acskubeconfig"]))
|
|
||||||
|
|
||||||
def acs_deploy_addons():
|
|
||||||
kube_dpeloy_configchanges()
|
|
||||||
kube_deploy_addons()
|
|
||||||
|
|
||||||
# other config post deploy -- ACS cluster is complete
|
# other config post deploy -- ACS cluster is complete
|
||||||
# Run prescript, copyfiles, postscript
|
# Run prescript, copyfiles, postscript
|
||||||
def acs_post_deploy():
|
def acs_post_deploy():
|
||||||
|
# Attach DNS name to nodes
|
||||||
|
acs_attach_dns_name()
|
||||||
|
|
||||||
|
# Label nodes
|
||||||
|
ip = get_nodes_from_acs("")
|
||||||
|
acs_label_webui()
|
||||||
|
kubernetes_label_nodes("active", [], args.yes )
|
||||||
|
|
||||||
|
# Copy files, etc.
|
||||||
get_nodes_from_acs()
|
get_nodes_from_acs()
|
||||||
gen_configs()
|
gen_configs()
|
||||||
utils.render_template_directory("./template/kubelet", "./deploy/kubelet", config)
|
utils.render_template_directory("./template/kubelet", "./deploy/kubelet", config)
|
||||||
|
@ -2238,77 +1955,15 @@ def acs_post_deploy():
|
||||||
deploy_on_nodes(config["worker_predeploy"], config["worker_filesdeploy"], config["worker_postdeploy"],
|
deploy_on_nodes(config["worker_predeploy"], config["worker_filesdeploy"], config["worker_postdeploy"],
|
||||||
config["worker_node"])
|
config["worker_node"])
|
||||||
|
|
||||||
def acs_deploy():
|
def acs_attach_dns_name():
|
||||||
config["isacs"] = True
|
get_nodes_from_acs()
|
||||||
create_cluster_id()
|
firstMasterNode = config["kubernetes_master_node"][0]
|
||||||
|
acs_tools.acs_attach_dns_to_node(firstMasterNode, config["master_dns_name"])
|
||||||
generate_key = not os.path.exists("./deploy/sshkey")
|
for i in range(len(config["kubernetes_master_node"])):
|
||||||
|
if (i != 0):
|
||||||
cmd = "group create"
|
acs_tools.acs_attach_dns_to_node(config["kubernetes_master_node"][i])
|
||||||
cmd += " --location=%s" % config["cluster_location"]
|
for node in config["worker_node"]:
|
||||||
cmd += " --name=%s" % config["resource_group"]
|
acs_tools.acs_attach_dns_to_node(node)
|
||||||
az_sys(cmd)
|
|
||||||
|
|
||||||
acs_create_storage()
|
|
||||||
az_create_sql()
|
|
||||||
|
|
||||||
cmd = "acs create --orchestrator-type=kubernetes"
|
|
||||||
cmd += " --resource-group=%s" % config["resource_group"]
|
|
||||||
cmd += " --name=%s" % config["cluster_name"]
|
|
||||||
cmd += " --agent-count=%d" % config["worker_node_num"]
|
|
||||||
cmd += " --master-count=%d" % config["master_node_num"]
|
|
||||||
cmd += " --location=%s" % config["cluster_location"]
|
|
||||||
cmd += " --agent-vm-size=%s" % config["acsagentsize"]
|
|
||||||
cmd += " --admin-username=%s" % config["admin_username"]
|
|
||||||
cmd += " --ssh-key-value=%s" % "./deploy/sshkey/id_rsa.pub"
|
|
||||||
if (generate_key):
|
|
||||||
os.system("rm -r ./deploy/sshkey || true")
|
|
||||||
cmd += " --generate-ssh-keys"
|
|
||||||
az_sys(cmd)
|
|
||||||
|
|
||||||
acs_set_resource_grp() # overwrite resource group if machines are elsewhere
|
|
||||||
|
|
||||||
acs_get_config()
|
|
||||||
|
|
||||||
# Get/create public IP addresses for all machines
|
|
||||||
Nodes = acs_get_machinesAndIPs(True)
|
|
||||||
|
|
||||||
# Label nodes
|
|
||||||
ip = get_nodes_from_acs("")
|
|
||||||
acs_label_webui()
|
|
||||||
kubernetes_label_nodes("active", [], args.yes )
|
|
||||||
|
|
||||||
# Add rules for NSG
|
|
||||||
acs_add_nsg_rules({"HTTPAllow" : 80, "RestfulAPIAllow" : 5000, "AllowKubernetesServicePorts" : "30000-32767"})
|
|
||||||
|
|
||||||
# Attach DNS name to master
|
|
||||||
acs_attach_dns_name()
|
|
||||||
|
|
||||||
return Nodes
|
|
||||||
|
|
||||||
def acs_get_storage_key():
|
|
||||||
cmd = "storage account keys list"
|
|
||||||
cmd += " --account-name=%s" % config["mountpoints"]["rootshare"]["accountname"]
|
|
||||||
cmd += " --resource-group=%s" % config["resource_group"]
|
|
||||||
keys = az_cmd(cmd)
|
|
||||||
return keys[0]["value"]
|
|
||||||
|
|
||||||
def acs_create_storage():
|
|
||||||
# Create storage account
|
|
||||||
cmd = "storage account create"
|
|
||||||
cmd += " --name=%s" % config["mountpoints"]["rootshare"]["accountname"]
|
|
||||||
cmd += " --resource-group=%s" % config["resource_group"]
|
|
||||||
cmd += " --sku=%s" % config["mountpoints"]["rootshare"]["azstoragesku"]
|
|
||||||
az_sys(cmd)
|
|
||||||
# Create file share
|
|
||||||
azureKey = acs_get_storage_key()
|
|
||||||
config["mountpoints"]["rootshare"]["accesskey"] = azureKey
|
|
||||||
cmd = "storage share create"
|
|
||||||
cmd += " --name=%s" % config["mountpoints"]["rootshare"]["filesharename"]
|
|
||||||
cmd += " --quota=%s" % config["mountpoints"]["rootshare"]["filesharequota"]
|
|
||||||
cmd += " --account-name=%s" % config["mountpoints"]["rootshare"]["accountname"]
|
|
||||||
cmd += " --account-key=%s" % azureKey
|
|
||||||
az_sys(cmd)
|
|
||||||
|
|
||||||
def acs_install_gpu():
|
def acs_install_gpu():
|
||||||
nodes = get_worker_nodes(config["clusterId"])
|
nodes = get_worker_nodes(config["clusterId"])
|
||||||
|
@ -3634,7 +3289,7 @@ def run_command( args, command, nargs, parser ):
|
||||||
if "clusterId" in tmp:
|
if "clusterId" in tmp:
|
||||||
config["clusterId"] = tmp["clusterId"]
|
config["clusterId"] = tmp["clusterId"]
|
||||||
|
|
||||||
add_acs_config()
|
add_acs_config(command)
|
||||||
if verbose and config["isacs"]:
|
if verbose and config["isacs"]:
|
||||||
print "Using Azure Container Services"
|
print "Using Azure Container Services"
|
||||||
|
|
||||||
|
@ -3968,28 +3623,28 @@ def run_command( args, command, nargs, parser ):
|
||||||
run_script_blocks(scriptblocks["acs"])
|
run_script_blocks(scriptblocks["acs"])
|
||||||
elif (len(nargs) >= 1):
|
elif (len(nargs) >= 1):
|
||||||
if nargs[0]=="deploy":
|
if nargs[0]=="deploy":
|
||||||
acs_deploy()
|
acs_tools.acs_deploy() # Core K8s cluster deployment
|
||||||
elif nargs[0]=="getconfig":
|
elif nargs[0]=="getconfig":
|
||||||
acs_get_config()
|
acs_tools.acs_get_config()
|
||||||
elif nargs[0]=="getip":
|
elif nargs[0]=="getip":
|
||||||
ip = acs_get_machinesAndIPsFast()
|
ip = acs_tools.acs_get_machinesAndIPsFast()
|
||||||
print ip
|
print ip
|
||||||
elif nargs[0]=="createip":
|
elif nargs[0]=="createip":
|
||||||
ip = acs_get_machinesAndIPs(True)
|
ip = acs_tools.acs_get_machinesAndIPs(True)
|
||||||
print ip
|
print ip
|
||||||
elif nargs[0]=="label":
|
elif nargs[0]=="label":
|
||||||
ip = get_nodes_from_acs("")
|
ip = get_nodes_from_acs("")
|
||||||
acs_label_webui()
|
acs_label_webui()
|
||||||
elif nargs[0]=="openports":
|
elif nargs[0]=="openports":
|
||||||
acs_add_nsg_rules({"HTTPAllow" : 80, "RestfulAPIAllow" : 5000, "AllowKubernetesServicePorts" : "30000-32767"})
|
acs_tools.acs_add_nsg_rules({"HTTPAllow" : 80, "RestfulAPIAllow" : 5000, "AllowKubernetesServicePorts" : "30000-32767"})
|
||||||
elif nargs[0]=="restartwebui":
|
elif nargs[0]=="restartwebui":
|
||||||
run_script_blocks(scriptblocks["restartwebui"])
|
run_script_blocks(scriptblocks["restartwebui"])
|
||||||
elif nargs[0]=="getserviceaddr":
|
elif nargs[0]=="getserviceaddr":
|
||||||
print "Address: =" + json.dumps(k8sUtils.GetServiceAddress(nargs[1]))
|
print "Address: =" + json.dumps(k8sUtils.GetServiceAddress(nargs[1]))
|
||||||
elif nargs[0]=="storage":
|
elif nargs[0]=="storage":
|
||||||
acs_create_storage()
|
acs_tools.acs_create_storage()
|
||||||
elif nargs[0]=="storagemount":
|
elif nargs[0]=="storagemount":
|
||||||
acs_create_storage()
|
acs_tools.acs_create_storage()
|
||||||
fileshare_install()
|
fileshare_install()
|
||||||
allmountpoints = mount_fileshares_by_service(True)
|
allmountpoints = mount_fileshares_by_service(True)
|
||||||
del_fileshare_links()
|
del_fileshare_links()
|
||||||
|
|
|
@ -366,3 +366,68 @@ def addressInNetwork(ip,net):
|
||||||
ret = False
|
ret = False
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
class ValClass:
|
||||||
|
def __init__(self, initVal):
|
||||||
|
self.val = initVal
|
||||||
|
def set(self, newVal):
|
||||||
|
self.val = newVal
|
||||||
|
|
||||||
|
def shellquote(s):
|
||||||
|
return "'" + s.replace("'", "'\\''") + "'"
|
||||||
|
|
||||||
|
def tryuntil(cmdLambda, stopFn, updateFn, waitPeriod=5):
|
||||||
|
while not stopFn():
|
||||||
|
try:
|
||||||
|
output = cmdLambda() # if exception occurs here, update does not occur
|
||||||
|
#print "Output: {0}".format(output)
|
||||||
|
updateFn()
|
||||||
|
toStop = False
|
||||||
|
try:
|
||||||
|
toStop = stopFn()
|
||||||
|
except Exception as e:
|
||||||
|
print "Exception {0} -- stopping anyways".format(e)
|
||||||
|
toStop = True
|
||||||
|
if toStop:
|
||||||
|
#print "Returning {0}".format(output)
|
||||||
|
return output
|
||||||
|
except Exception as e:
|
||||||
|
print "Exception in command {0}".format(e)
|
||||||
|
if not stopFn():
|
||||||
|
print "Not done yet - Sleep for 5 seconds and continue"
|
||||||
|
time.sleep(waitPeriod)
|
||||||
|
|
||||||
|
# Run until stop condition and success
|
||||||
|
def subproc_tryuntil(cmd, stopFn, shell=True, waitPeriod=5):
|
||||||
|
bFirst = ValClass(True)
|
||||||
|
return tryuntil(lambda : subprocess.check_output(cmd, shell), lambda : not bFirst.val and stopFn(), lambda : bFirst.set(False), waitPeriod)
|
||||||
|
|
||||||
|
def subprocrun(cmd, shellArg):
|
||||||
|
#print "Running Cmd: {0} Shell: {1}".format(cmd, shellArg)
|
||||||
|
#embed()
|
||||||
|
return subprocess.check_output(cmd, shell=shellArg)
|
||||||
|
|
||||||
|
# Run once until success (no exception)
|
||||||
|
def subproc_runonce(cmd, shell=True, waitPeriod=5):
|
||||||
|
bFirst = ValClass(True)
|
||||||
|
#print "Running cmd:{0} Shell:{1}".format(cmd, shell)
|
||||||
|
return tryuntil(lambda : subprocrun(cmd, shell), lambda : not bFirst.val, lambda : bFirst.set(False), waitPeriod)
|
||||||
|
|
||||||
|
# Run for N success
|
||||||
|
def subproc_runN(cmd, n, shell=True, waitPeriod=5):
|
||||||
|
bCnt = ValClass(0)
|
||||||
|
return tryuntil(lambda : subprocess.check_output(cmd, shell), lambda : (bCnt.val < n), lambda : bCnt.set(bCnt.val+1), waitPeriod)
|
||||||
|
|
||||||
|
def mergeDict(configDst, configSrc, bOverwrite):
|
||||||
|
for entry in configSrc:
|
||||||
|
# if not isinstance(configSrc[entry], dict):
|
||||||
|
# print "key:{0} val:{1}".format(entry, configSrc[entry])
|
||||||
|
if bOverwrite:
|
||||||
|
configDst.pop(entry, None)
|
||||||
|
if (not entry in configDst) or (configDst[entry] is None) or \
|
||||||
|
(isinstance(configDst[entry], basestring) and configDst[entry].lower() == "null"):
|
||||||
|
if isinstance(configSrc[entry], dict):
|
||||||
|
configDst[entry] = {}
|
||||||
|
mergeDict(configDst[entry], configSrc[entry], bOverwrite)
|
||||||
|
else:
|
||||||
|
#print "settingkey:{0} val:{1}".format(entry, configSrc[entry])
|
||||||
|
configDst[entry] = configSrc[entry]
|
||||||
|
|
Загрузка…
Ссылка в новой задаче