- Package and use Slurm 18.08 instead of default from distro repo
- Slurm "master" contains separate controller and login nodes
- Integrate RemoteFS shared file system into Slurm cluster
- Auto feature tagging on Slurm nodes
- Support CentOS 7, Ubuntu 16.04, Ubuntu 18.04 Batch pools as Slurm
  node targets
- Unify login and Batch pools on cluster user based on login user
- Auto provision passwordless SSH user on compute nodes with login user
  context
- Add slurm cluster commands, including orchestrate command
- Add separate SSH for controller, login, nodes
- Add Slurm configuration doc
- Add Slurm guide
- Add Slurm recipe
- Update usage doc
- Remove deprecated MSI VM extension from monitoring and federation
- Fix pool nodes count on non-existent pool
- Refactor SSH info to allow offsets
- Add fs cluster orchestrate command
This commit is contained in:
Fred Park 2019-01-15 09:56:03 -08:00
Родитель d33f06457d
Коммит 314037f76f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 3C4D545F457737EB
63 изменённых файлов: 7358 добавлений и 430 удалений

Просмотреть файл

@ -108,3 +108,7 @@ credentials:
admin: admin:
username: grafana_username username: grafana_username
password: grafana_user_password password: grafana_user_password
# slurm credentials
slurm:
db_password: db_password
# TODO munge key

111
config_templates/slurm.yaml Normal file
Просмотреть файл

@ -0,0 +1,111 @@
slurm:
storage_account_settings: mystorageaccount
location: <Azure region, e.g., eastus>
resource_group: my-slurm-rg
cluster_id: slurm
controller:
ssh:
username: shipyard
ssh_public_key: /path/to/rsa/publickey.pub
ssh_public_key_data: ssh-rsa ...
ssh_private_key: /path/to/rsa/privatekey
generated_file_export_path: null
public_ip:
enabled: true
static: false
virtual_network:
name: myvnet
resource_group: my-vnet-resource-group
existing_ok: false
address_space: 10.0.0.0/16
subnet:
name: my-slurm-controller-subnet
address_prefix: 10.0.1.0/24
network_security:
ssh:
- '*'
custom_inbound_rules:
myrule:
destination_port_range: 5000-5001
protocol: '*'
source_address_prefix:
- 1.2.3.4
- 5.6.7.0/24
vm_size: STANDARD_D2_V2
vm_count: 2
accelerated_networking: false
additional_prep_script: /path/to/some/script-controller.sh
login:
ssh:
username: shipyard
ssh_public_key: /path/to/rsa/publickey.pub
ssh_public_key_data: ssh-rsa ...
ssh_private_key: /path/to/rsa/privatekey
generated_file_export_path: null
public_ip:
enabled: true
static: false
virtual_network:
name: myvnet
resource_group: my-vnet-resource-group
existing_ok: false
address_space: 10.0.0.0/16
subnet:
name: my-slurm-login-subnet
address_prefix: 10.0.2.0/24
network_security:
ssh:
- '*'
custom_inbound_rules:
myrule:
destination_port_range: 5000-5001
protocol: '*'
source_address_prefix:
- 1.2.3.4
- 5.6.7.0/24
vm_size: STANDARD_D4_V2
vm_count: 1
accelerated_networking: false
additional_prep_script: /path/to/some/script-login.sh
shared_data_volumes:
nfs_server:
mount_path: /shared
store_slurmctld_state: true
slurm_options:
idle_reclaim_time: 00:15:00
elastic_partitions:
partition_1:
batch_pools:
mypool1:
account_service_url: https://...
compute_node_type: dedicated
max_compute_nodes: 32
weight: 0
features:
- arbitrary_constraint_1
reclaim_exclude_num_nodes: 8
mypool2:
account_service_url: https://...
compute_node_type: low_priority
max_compute_nodes: 128
weight: 1
features:
- arbitrary_constraint_2
reclaim_exclude_num_nodes: 0
max_runtime_limit: null
default: true
partition_2:
batch_pools:
mypool3:
account_service_url: https://...
compute_node_type: low_priority
max_compute_nodes: 256
weight: 2
features: []
reclaim_exclude_num_nodes: 0
max_runtime_limit: 1.12:00:00
default: false
unmanaged_partitions:
- partition: 'PartitionName=onprem Nodes=onprem-[0-31] Default=No MaxTime=INFINITE State=UP'
nodes:
- 'NodeName=onprem-[0-31] CPUs=512 Sockets=1 CoresPerSocket=8 ThreadsPerCore=2 RealMemory=512128 State=UNKNOWN'

Просмотреть файл

@ -2798,7 +2798,10 @@ def get_node_counts(batch_client, config, pool_id=None):
account_list_pool_node_counts_options=batchmodels. account_list_pool_node_counts_options=batchmodels.
AccountListPoolNodeCountsOptions( AccountListPoolNodeCountsOptions(
filter='poolId eq \'{}\''.format(pool_id))) filter='poolId eq \'{}\''.format(pool_id)))
pc = list(pc)[0] try:
pc = list(pc)[0]
except IndexError:
raise RuntimeError('pool {} does not exist'.format(pool_id))
except batchmodels.BatchErrorException as ex: except batchmodels.BatchErrorException as ex:
if 'pool does not exist' in ex.message.value: if 'pool does not exist' in ex.message.value:
logger.error('{} pool does not exist'.format(pool_id)) logger.error('{} pool does not exist'.format(pool_id))

Просмотреть файл

@ -55,6 +55,9 @@ _SSH_KEY_PREFIX = 'id_rsa_shipyard'
_REMOTEFS_SSH_KEY_PREFIX = '{}_remotefs'.format(_SSH_KEY_PREFIX) _REMOTEFS_SSH_KEY_PREFIX = '{}_remotefs'.format(_SSH_KEY_PREFIX)
_MONITORING_SSH_KEY_PREFIX = '{}_monitoring'.format(_SSH_KEY_PREFIX) _MONITORING_SSH_KEY_PREFIX = '{}_monitoring'.format(_SSH_KEY_PREFIX)
_FEDERATION_SSH_KEY_PREFIX = '{}_federation'.format(_SSH_KEY_PREFIX) _FEDERATION_SSH_KEY_PREFIX = '{}_federation'.format(_SSH_KEY_PREFIX)
_SLURM_CONTROLLER_SSH_KEY_PREFIX = '{}_slurm_controller'.format(
_SSH_KEY_PREFIX)
_SLURM_LOGIN_SSH_KEY_PREFIX = '{}_slurm_login'.format(_SSH_KEY_PREFIX)
# named tuples # named tuples
PfxSettings = collections.namedtuple( PfxSettings = collections.namedtuple(
'PfxSettings', [ 'PfxSettings', [
@ -99,6 +102,19 @@ def get_federation_ssh_key_prefix():
return _FEDERATION_SSH_KEY_PREFIX return _FEDERATION_SSH_KEY_PREFIX
def get_slurm_ssh_key_prefix(kind):
# type: (str) -> str
"""Get slurm SSH key prefix
:param str kind: kind
:rtype: str
:return: ssh key prefix for slurm
"""
if kind == 'controller':
return _SLURM_CONTROLLER_SSH_KEY_PREFIX
else:
return _SLURM_LOGIN_SSH_KEY_PREFIX
def generate_rdp_password(): def generate_rdp_password():
# type: (None) -> str # type: (None) -> str
"""Generate an RDP password """Generate an RDP password

Просмотреть файл

@ -80,7 +80,7 @@ def _create_virtual_machine_extension(
vm_ext_name = settings.generate_virtual_machine_extension_name( vm_ext_name = settings.generate_virtual_machine_extension_name(
vm_resource, offset) vm_resource, offset)
# try to get storage account resource group # try to get storage account resource group
ssel = settings.federation_storage_account_settings(config) ssel = settings.other_storage_account_settings(config, 'federation')
rg = settings.credentials_storage(config, ssel).resource_group rg = settings.credentials_storage(config, ssel).resource_group
# construct bootstrap command # construct bootstrap command
cmd = './{bsf}{a}{log}{p}{r}{s}{v}'.format( cmd = './{bsf}{a}{log}{p}{r}{s}{v}'.format(
@ -348,18 +348,10 @@ def create_federation_proxy(
) )
fqdn = pip.dns_settings.fqdn fqdn = pip.dns_settings.fqdn
ipinfo = 'fqdn={} public_ip_address={}'.format(fqdn, pip.ip_address) ipinfo = 'fqdn={} public_ip_address={}'.format(fqdn, pip.ip_address)
# install msi vm extension
async_ops['vmext'] = {}
async_ops['vmext'][0] = resource.AsyncOperation(
functools.partial(
resource.create_msi_virtual_machine_extension, compute_client, fs,
vms[0].name, 0, settings.verbose(config)),
max_retries=0,
)
logger.debug('waiting for virtual machine msi extensions to provision')
for offset in async_ops['vmext']: for offset in async_ops['vmext']:
async_ops['vmext'][offset].result() async_ops['vmext'][offset].result()
# install vm extension # install vm extension
async_ops['vmext'] = {}
async_ops['vmext'][0] = resource.AsyncOperation( async_ops['vmext'][0] = resource.AsyncOperation(
functools.partial( functools.partial(
_create_virtual_machine_extension, compute_client, config, fs, _create_virtual_machine_extension, compute_client, config, fs,

Просмотреть файл

@ -54,6 +54,7 @@ from . import monitor
from . import remotefs from . import remotefs
from . import resource from . import resource
from . import settings from . import settings
from . import slurm
from . import storage from . import storage
from . import util from . import util
from .version import __version__ from .version import __version__
@ -296,6 +297,39 @@ _FEDERATIONSERVICES_FILE = (
_ALL_FEDERATION_FILES = [ _ALL_FEDERATION_FILES = [
_FEDERATIONPREP_FILE, _FEDERATIONSERVICES_FILE, _FEDERATIONPREP_FILE, _FEDERATIONSERVICES_FILE,
] ]
_SLURMMASTERPREP_FILE = (
'shipyard_slurm_master_bootstrap.sh',
pathlib.Path(_ROOT_PATH, 'scripts/shipyard_slurm_master_bootstrap.sh')
)
_SLURMCOMPUTENODEPREP_FILE = (
'shipyard_slurm_computenode_nodeprep.sh',
pathlib.Path(_ROOT_PATH, 'scripts/shipyard_slurm_computenode_nodeprep.sh')
)
_SLURMPY_FILE = (
'slurm.py',
pathlib.Path(_ROOT_PATH, 'slurm/slurm.py')
)
_SLURMREQ_FILE = (
'requirements.txt',
pathlib.Path(_ROOT_PATH, 'slurm/requirements.txt')
)
_SLURMCONF_FILE = (
'slurm.conf',
pathlib.Path(_ROOT_PATH, 'slurm/slurm.conf')
)
_SLURMDBDCONF_FILE = (
'slurmdbd.conf',
pathlib.Path(_ROOT_PATH, 'slurm/slurmdbd.conf')
)
_SLURMDBSQL_FILE = (
'slurmdb.sql',
pathlib.Path(_ROOT_PATH, 'slurm/slurmdb.sql')
)
_CONFIGURABLE_SLURM_FILES = {
'slurm': _SLURMCONF_FILE,
'slurmdbd': _SLURMDBDCONF_FILE,
'slurmdbsql': _SLURMDBSQL_FILE,
}
def initialize_globals(verbose): def initialize_globals(verbose):
@ -771,9 +805,6 @@ def _create_storage_cluster_mount_args(
:rtype: tuple :rtype: tuple
:return: (fstab mount, storage cluster arg) :return: (fstab mount, storage cluster arg)
""" """
fstab_mount = None
sc_arg = None
ba, _ = batch.get_batch_account(batch_mgmt_client, config)
# check for vnet/subnet presence # check for vnet/subnet presence
if util.is_none_or_empty(subnet_id): if util.is_none_or_empty(subnet_id):
raise RuntimeError( raise RuntimeError(
@ -782,15 +813,9 @@ def _create_storage_cluster_mount_args(
# get remotefs settings # get remotefs settings
rfs = settings.remotefs_settings(config, sc_id) rfs = settings.remotefs_settings(config, sc_id)
sc = rfs.storage_cluster sc = rfs.storage_cluster
# iterate through shared data volumes and fine storage clusters # perform checks
sdv = settings.global_resources_shared_data_volumes(config) vnet_subid, vnet_rg, _, vnet_name, subnet_name = \
if (sc_id not in sdv or util.explode_arm_subnet_id(subnet_id)
not settings.is_shared_data_volume_storage_cluster(
sdv, sc_id)):
raise RuntimeError(
'No storage cluster {} found in configuration'.format(sc_id))
vnet_subid, vnet_rg, _, vnet_name, subnet_name = _explode_arm_subnet_id(
subnet_id)
# check for same vnet name # check for same vnet name
if vnet_name.lower() != sc.virtual_network.name.lower(): if vnet_name.lower() != sc.virtual_network.name.lower():
raise RuntimeError( raise RuntimeError(
@ -804,6 +829,7 @@ def _create_storage_cluster_mount_args(
'{} with pool virtual network in resource group {}'.format( '{} with pool virtual network in resource group {}'.format(
sc_id, sc.virtual_network.resource_group, vnet_rg)) sc_id, sc.virtual_network.resource_group, vnet_rg))
# cross check vnet subscription id # cross check vnet subscription id
ba, _ = batch.get_batch_account(batch_mgmt_client, config)
_ba_tmp = ba.id.lower().split('/') _ba_tmp = ba.id.lower().split('/')
if vnet_subid.lower() != _ba_tmp[2]: if vnet_subid.lower() != _ba_tmp[2]:
raise RuntimeError( raise RuntimeError(
@ -811,152 +837,12 @@ def _create_storage_cluster_mount_args(
'{} with pool virtual network in subscription {}'.format( '{} with pool virtual network in subscription {}'.format(
sc_id, vnet_subid, _ba_tmp[2])) sc_id, vnet_subid, _ba_tmp[2]))
del _ba_tmp del _ba_tmp
# get vm count # construct host mount path
if sc.vm_count < 1: host_mount_path = '{}/{}'.format(
raise RuntimeError( settings.get_host_mounts_path(False), sc_id)
'storage cluster {} vm_count {} is invalid'.format( # return fstab and sc arg
sc_id, sc.vm_count)) return remotefs.create_storage_cluster_mount_args(
# get fileserver type compute_client, network_client, config, sc_id, host_mount_path)
if sc.file_server.type == 'nfs':
# query first vm for info
vm_name = settings.generate_virtual_machine_name(sc, 0)
vm = compute_client.virtual_machines.get(
resource_group_name=sc.resource_group,
vm_name=vm_name,
)
nic = resource.get_nic_from_virtual_machine(
network_client, sc.resource_group, vm)
# get private ip of vm
remote_ip = nic.ip_configurations[0].private_ip_address
# construct mount options
mo = '_netdev,noauto,nfsvers=4,intr'
amo = settings.shared_data_volume_mount_options(sdv, sc_id)
if util.is_not_empty(amo):
if 'udp' in mo:
raise RuntimeError(
('udp cannot be specified as a mount option for '
'storage cluster {}').format(sc_id))
if 'auto' in mo:
raise RuntimeError(
('auto cannot be specified as a mount option for '
'storage cluster {}').format(sc_id))
if any([x.startswith('nfsvers=') for x in amo]):
raise RuntimeError(
('nfsvers cannot be specified as a mount option for '
'storage cluster {}').format(sc_id))
if any([x.startswith('port=') for x in amo]):
raise RuntimeError(
('port cannot be specified as a mount option for '
'storage cluster {}').format(sc_id))
mo = ','.join((mo, ','.join(amo)))
# construct mount string for fstab
fstab_mount = (
'{remoteip}:{srcpath} {hmp}/{scid} '
'{fstype} {mo} 0 2').format(
remoteip=remote_ip,
srcpath=sc.file_server.mountpoint,
hmp=settings.get_host_mounts_path(False),
scid=sc_id,
fstype=sc.file_server.type,
mo=mo)
elif sc.file_server.type == 'glusterfs':
# walk vms and find non-overlapping ud/fds
primary_ip = None
primary_ud = None
primary_fd = None
backup_ip = None
backup_ud = None
backup_fd = None
vms = {}
# first pass, attempt to populate all ip, ud/fd
for i in range(sc.vm_count):
vm_name = settings.generate_virtual_machine_name(sc, i)
vm = compute_client.virtual_machines.get(
resource_group_name=sc.resource_group,
vm_name=vm_name,
expand=compute_client.virtual_machines.models.
InstanceViewTypes.instance_view,
)
nic = resource.get_nic_from_virtual_machine(
network_client, sc.resource_group, vm)
vms[i] = (vm, nic)
# get private ip and ud/fd of vm
remote_ip = nic.ip_configurations[0].private_ip_address
ud = vm.instance_view.platform_update_domain
fd = vm.instance_view.platform_fault_domain
if primary_ip is None:
primary_ip = remote_ip
primary_ud = ud
primary_fd = fd
if backup_ip is None:
if (primary_ip == backup_ip or primary_ud == ud or
primary_fd == fd):
continue
backup_ip = remote_ip
backup_ud = ud
backup_fd = fd
# second pass, fill in with at least non-overlapping update domains
if backup_ip is None:
for i in range(sc.vm_count):
vm, nic = vms[i]
remote_ip = nic.ip_configurations[0].private_ip_address
ud = vm.instance_view.platform_update_domain
fd = vm.instance_view.platform_fault_domain
if primary_ud != ud:
backup_ip = remote_ip
backup_ud = ud
backup_fd = fd
break
if primary_ip is None or backup_ip is None:
raise RuntimeError(
'Could not find either a primary ip {} or backup ip {} for '
'glusterfs client mount'.format(primary_ip, backup_ip))
logger.debug('primary ip/ud/fd={} backup ip/ud/fd={}'.format(
(primary_ip, primary_ud, primary_fd),
(backup_ip, backup_ud, backup_fd)))
# construct mount options
mo = '_netdev,noauto,transport=tcp,backupvolfile-server={}'.format(
backup_ip)
amo = settings.shared_data_volume_mount_options(sdv, sc_id)
if util.is_not_empty(amo):
if 'auto' in mo:
raise RuntimeError(
('auto cannot be specified as a mount option for '
'storage cluster {}').format(sc_id))
if any([x.startswith('backupvolfile-server=') for x in amo]):
raise RuntimeError(
('backupvolfile-server cannot be specified as a mount '
'option for storage cluster {}').format(sc_id))
if any([x.startswith('transport=') for x in amo]):
raise RuntimeError(
('transport cannot be specified as a mount option for '
'storage cluster {}').format(sc_id))
mo = ','.join((mo, ','.join(amo)))
# construct mount string for fstab, srcpath is the gluster volume
fstab_mount = (
'{remoteip}:/{srcpath} {hmp}/{scid} '
'{fstype} {mo} 0 2').format(
remoteip=primary_ip,
srcpath=settings.get_file_server_glusterfs_volume_name(sc),
hmp=settings.get_host_mounts_path(False),
scid=sc_id,
fstype=sc.file_server.type,
mo=mo)
else:
raise NotImplementedError(
('cannot handle file_server type {} for storage '
'cluster {}').format(sc.file_server.type, sc_id))
if util.is_none_or_empty(fstab_mount):
raise RuntimeError(
('Could not construct an fstab mount entry for storage '
'cluster {}').format(sc_id))
# construct sc_arg
sc_arg = '{}:{}'.format(sc.file_server.type, sc_id)
# log config
if settings.verbose(config):
logger.debug('storage cluster {} fstab mount: {}'.format(
sc_id, fstab_mount))
return (fstab_mount, sc_arg)
def _create_custom_linux_mount_args(config, mount_name): def _create_custom_linux_mount_args(config, mount_name):
@ -1054,28 +940,6 @@ def _pick_node_agent_for_vm(batch_client, config, pool_settings):
return (image_ref_to_use, sku_to_use.id) return (image_ref_to_use, sku_to_use.id)
def _explode_arm_subnet_id(arm_subnet_id):
# type: (str) -> Tuple[str, str, str, str, str]
"""Parses components from ARM subnet id
:param str arm_subnet_id: ARM subnet id
:rtype: tuple
:return: subid, rg, provider, vnet, subnet
"""
tmp = arm_subnet_id.split('/')
try:
subid = tmp[2]
rg = tmp[4]
provider = tmp[6]
vnet = tmp[8]
subnet = tmp[10]
except IndexError:
raise ValueError(
'Error parsing arm_subnet_id. Make sure the virtual network '
'resource id is correct and is postfixed with the '
'/subnets/<subnet_id> portion.')
return subid, rg, provider, vnet, subnet
def _check_for_batch_aad(bc, rmsg): def _check_for_batch_aad(bc, rmsg):
# type: (settings.BatchCredentialSettings, str) -> None # type: (settings.BatchCredentialSettings, str) -> None
"""Check for Batch AAD """Check for Batch AAD
@ -1116,7 +980,7 @@ def _pool_virtual_network_subnet_address_space_check(
# get subnet object # get subnet object
subnet_id = None subnet_id = None
if util.is_not_empty(pool_settings.virtual_network.arm_subnet_id): if util.is_not_empty(pool_settings.virtual_network.arm_subnet_id):
subnet_components = _explode_arm_subnet_id( subnet_components = util.explode_arm_subnet_id(
pool_settings.virtual_network.arm_subnet_id) pool_settings.virtual_network.arm_subnet_id)
logger.debug( logger.debug(
('arm subnet id breakdown: subid={} rg={} provider={} vnet={} ' ('arm subnet id breakdown: subid={} rg={} provider={} vnet={} '
@ -3154,7 +3018,7 @@ def action_fs_cluster_ssh(
'was specified') 'was specified')
cardinal = 0 cardinal = 0
if cardinal is not None and cardinal < 0: if cardinal is not None and cardinal < 0:
raise ValueError('invalid cardinal option value') raise ValueError('invalid cardinal option value')
remotefs.ssh_storage_cluster( remotefs.ssh_storage_cluster(
compute_client, network_client, config, storage_cluster_id, compute_client, network_client, config, storage_cluster_id,
cardinal, hostname, tty, command) cardinal, hostname, tty, command)
@ -3587,8 +3451,11 @@ def action_pool_user_del(batch_client, config):
batch.del_ssh_user(batch_client, config) batch.del_ssh_user(batch_client, config)
def action_pool_ssh(batch_client, config, cardinal, nodeid, tty, command): def action_pool_ssh(
# type: (batchsc.BatchServiceClient, dict, int, str, bool, tuple) -> None batch_client, config, cardinal, nodeid, tty, command,
ssh_username=None, ssh_private_key=None):
# type: (batchsc.BatchServiceClient, dict, int, str, bool, tuple, str,
# str) -> None
"""Action: Pool Ssh """Action: Pool Ssh
:param azure.batch.batch_service_client.BatchServiceClient batch_client: :param azure.batch.batch_service_client.BatchServiceClient batch_client:
batch client batch client
@ -3597,6 +3464,8 @@ def action_pool_ssh(batch_client, config, cardinal, nodeid, tty, command):
:param str nodeid: node id :param str nodeid: node id
:param bool tty: allocate pseudo-tty :param bool tty: allocate pseudo-tty
:param tuple command: command to execute :param tuple command: command to execute
:param str ssh_username: ssh username
:param pathlib.Path ssh_private_key: ssh private key
""" """
_check_batch_client(batch_client) _check_batch_client(batch_client)
if cardinal is not None and util.is_not_empty(nodeid): if cardinal is not None and util.is_not_empty(nodeid):
@ -3609,14 +3478,18 @@ def action_pool_ssh(batch_client, config, cardinal, nodeid, tty, command):
if cardinal is not None and cardinal < 0: if cardinal is not None and cardinal < 0:
raise ValueError('invalid cardinal option value') raise ValueError('invalid cardinal option value')
pool = settings.pool_settings(config) pool = settings.pool_settings(config)
ssh_private_key = pool.ssh.ssh_private_key
if ssh_private_key is None: if ssh_private_key is None:
ssh_private_key = pathlib.Path( ssh_private_key = pool.ssh.ssh_private_key
pool.ssh.generated_file_export_path, crypto.get_ssh_key_prefix()) if ssh_private_key is None:
ssh_private_key = pathlib.Path(
pool.ssh.generated_file_export_path,
crypto.get_ssh_key_prefix())
if util.is_none_or_empty(ssh_username):
ssh_username = pool.ssh.username
ip, port = batch.get_remote_login_setting_for_node( ip, port = batch.get_remote_login_setting_for_node(
batch_client, config, cardinal, nodeid) batch_client, config, cardinal, nodeid)
crypto.connect_or_exec_ssh_command( crypto.connect_or_exec_ssh_command(
ip, port, ssh_private_key, pool.ssh.username, tty=tty, ip, port, ssh_private_key, ssh_username, tty=tty,
command=command) command=command)
@ -3639,7 +3512,7 @@ def action_pool_rdp(batch_client, config, cardinal, nodeid, no_auto=False):
'was specified') 'was specified')
cardinal = 0 cardinal = 0
if cardinal is not None and cardinal < 0: if cardinal is not None and cardinal < 0:
raise ValueError('invalid cardinal option value') raise ValueError('invalid cardinal option value')
pool = settings.pool_settings(config) pool = settings.pool_settings(config)
ip, port = batch.get_remote_login_setting_for_node( ip, port = batch.get_remote_login_setting_for_node(
batch_client, config, cardinal, nodeid) batch_client, config, cardinal, nodeid)
@ -5051,3 +4924,167 @@ def action_fed_jobs_zap(blob_client, config, federation_id, unique_id):
return return
storage.zap_unique_id_from_federation( storage.zap_unique_id_from_federation(
blob_client, config, federation_id, unique_id) blob_client, config, federation_id, unique_id)
def action_slurm_ssh(
compute_client, network_client, table_client, batch_client, config,
tty, command, kind, offset, node_name):
# type: (azure.mgmt.compute.ComputeManagementClient,
# azure.mgmt.network.NetworkManagementClient, dict,
# bool, tuple, str, int, str) -> None
"""Action: Slurm Ssh Controller
:param azure.mgmt.compute.ComputeManagementClient compute_client:
compute client
:param azure.mgmt.network.NetworkManagementClient network_client:
network client
:param dict config: configuration dict
:param bool tty: allocate pseudo-tty
:param tuple command: command
:param str kind: kind
:param int offset: offset
:param str node_name: node name
"""
if util.is_none_or_empty(node_name):
_check_compute_client(compute_client)
_check_network_client(network_client)
vm_res = settings.slurm_settings(config, kind)
if offset is None:
offset = 0
else:
offset = int(offset)
if kind == 'login':
cont_vm_count = settings.slurm_vm_count(config, 'controller')
offset = cont_vm_count + offset
resource.ssh_to_virtual_machine_resource(
compute_client, network_client, vm_res,
crypto.get_slurm_ssh_key_prefix(kind), tty, command, offset=offset)
else:
slurm_opts = settings.slurm_options_settings(config)
# get host name to node id mapping
node_id = storage.get_slurm_host_node_id(
table_client, slurm_opts.cluster_id, node_name)
if util.is_none_or_empty(node_id):
raise RuntimeError(
'No batch node id associated with Slurm node: {}'.format(
node_name))
ss_login = settings.slurm_settings(config, 'login')
ssh_private_key = ss_login.ssh.ssh_private_key
if ssh_private_key is None:
ssh_private_key = pathlib.Path(
ss_login.ssh.generated_file_export_path,
crypto.get_slurm_ssh_key_prefix('login'))
action_pool_ssh(
batch_client, config, None, node_id, tty, command,
ssh_username=ss_login.ssh.username,
ssh_private_key=ssh_private_key)
def action_slurm_cluster_create(
auth_client, resource_client, compute_client, network_client,
blob_client, table_client, queue_client, batch_client, config):
# type: (azure.mgmt.authorization.AuthorizationManagementClient,
# azure.mgmt.resource.resources.ResourceManagementClient,
# azure.mgmt.compute.ComputeManagementClient,
# azure.mgmt.network.NetworkManagementClient,
# azure.storage.blob.BlockBlobService,
# azure.cosmosdb.table.TableService,
# azure.batch.batch_service_client.BatchServiceClient, dict) -> None
"""Action: Slurm Cluster Create
:param azure.mgmt.authorization.AuthorizationManagementClient auth_client:
auth client
:param azure.mgmt.resource.resources.ResourceManagementClient
resource_client: resource client
:param azure.mgmt.compute.ComputeManagementClient compute_client:
compute client
:param azure.mgmt.network.NetworkManagementClient network_client:
network client
:param azure.storage.blob.BlockBlobService blob_client: blob client
:param azure.cosmosdb.table.TableService table_client: table client
:param azure.batch.batch_service_client.BatchServiceClient batch_client:
batch client
:param dict config: configuration dict
"""
_check_resource_client(resource_client)
_check_compute_client(compute_client)
_check_network_client(network_client)
_check_batch_client(batch_client)
# ensure aad creds are populated
mgmt_aad = settings.credentials_management(config)
if (util.is_none_or_empty(mgmt_aad.subscription_id) or
util.is_none_or_empty(mgmt_aad.aad.authority_url)):
raise ValueError('management aad credentials are invalid')
slurm.create_slurm_controller(
auth_client, resource_client, compute_client, network_client,
blob_client, table_client, queue_client, batch_client, config,
_RESOURCES_PATH, _SLURMMASTERPREP_FILE, _SLURMCOMPUTENODEPREP_FILE,
_SLURMPY_FILE, _SLURMREQ_FILE, _CONFIGURABLE_SLURM_FILES)
def action_slurm_cluster_status(compute_client, network_client, config):
# type: (azure.mgmt.compute.ComputeManagementClient,
# azure.mgmt.network.NetworkManagementClient, dict) -> None
"""Action: Slurm Cluster Status
:param azure.mgmt.compute.ComputeManagementClient compute_client:
compute client
:param azure.mgmt.network.NetworkManagementClient network_client:
network client
:param dict config: configuration dict
"""
_check_compute_client(compute_client)
vm_res = settings.slurm_settings(config, 'controller')
cont_vm_count = settings.slurm_vm_count(config, 'controller')
i = 0
while i < cont_vm_count:
resource.stat_virtual_machine_resource(
compute_client, network_client, config, vm_res, offset=i)
i += 1
vm_res = settings.slurm_settings(config, 'login')
login_vm_count = settings.slurm_vm_count(config, 'login')
i = 0
while i < login_vm_count:
resource.stat_virtual_machine_resource(
compute_client, network_client, config, vm_res,
offset=cont_vm_count + i)
i += 1
def action_slurm_cluster_destroy(
resource_client, compute_client, network_client, blob_client,
table_client, queue_client, config, delete_all_resources,
delete_virtual_network, generate_from_prefix, wait):
# type: (azure.mgmt.resource.resources.ResourceManagementClient,
# azure.mgmt.compute.ComputeManagementClient,
# azure.mgmt.network.NetworkManagementClient,
# azure.storage.blob.BlockBlobService,
# azure.cosmosdb.table.TableService,
# azure.storage.queue.QueueService, dict, bool, bool,
# bool, bool) -> None
"""Action: Slurm Cluster Destroy
:param azure.mgmt.resource.resources.ResourceManagementClient
resource_client: resource client
:param azure.mgmt.compute.ComputeManagementClient compute_client:
compute client
:param azure.mgmt.network.NetworkManagementClient network_client:
network client
:param azure.storage.blob.BlockBlobService blob_client: blob client
:param azure.cosmosdb.table.TableService table_client: table client
:param azure.storage.queue.QueueService queue_client: queue client
:param dict config: configuration dict
:param bool delete_all_resources: delete all resources
:param bool delete_virtual_network: delete virtual network
:param bool generate_from_prefix: generate resources from hostname prefix
:param bool wait: wait for deletion to complete
"""
_check_resource_client(resource_client)
_check_compute_client(compute_client)
_check_network_client(network_client)
if (generate_from_prefix and
(delete_all_resources or delete_virtual_network)):
raise ValueError(
'Cannot specify generate_from_prefix and a delete_* option')
slurm.delete_slurm_controller(
resource_client, compute_client, network_client, blob_client,
table_client, queue_client, config,
delete_virtual_network=delete_virtual_network,
delete_resource_group=delete_all_resources,
generate_from_prefix=generate_from_prefix, wait=wait)

Просмотреть файл

@ -423,21 +423,11 @@ def create_monitoring_resource(
async_ops['port80'] = resource.AsyncOperation(functools.partial( async_ops['port80'] = resource.AsyncOperation(functools.partial(
resource.add_inbound_network_security_rule, network_client, ms, resource.add_inbound_network_security_rule, network_client, ms,
'acme80', isr)) 'acme80', isr))
# install msi vm extension
async_ops['vmext'] = {}
async_ops['vmext'][0] = resource.AsyncOperation(
functools.partial(
resource.create_msi_virtual_machine_extension, compute_client, ms,
vms[0].name, 0, settings.verbose(config)),
max_retries=0,
)
logger.debug('waiting for virtual machine msi extensions to provision')
for offset in async_ops['vmext']:
async_ops['vmext'][offset].result()
# ensure port 80 rule is ready # ensure port 80 rule is ready
if servconf.lets_encrypt_enabled and ms.public_ip.enabled: if servconf.lets_encrypt_enabled and ms.public_ip.enabled:
async_ops['port80'].result() async_ops['port80'].result()
# install vm extension # install vm extension
async_ops['vmext'] = {}
async_ops['vmext'][0] = resource.AsyncOperation( async_ops['vmext'][0] = resource.AsyncOperation(
functools.partial( functools.partial(
_create_virtual_machine_extension, compute_client, config, ms, _create_virtual_machine_extension, compute_client, config, ms,

Просмотреть файл

@ -52,6 +52,180 @@ logger = logging.getLogger(__name__)
util.setup_logger(logger) util.setup_logger(logger)
def create_storage_cluster_mount_args(
compute_client, network_client, config, sc_id, host_mount_path):
# type: (azure.mgmt.compute.ComputeManagementClient,
# azure.mgmt.network.NetworkManagementClient,
# dict, str, str) -> Tuple[str, str]
"""Create storage cluster mount arguments
:param azure.mgmt.compute.ComputeManagementClient compute_client:
compute client
:param azure.mgmt.network.NetworkManagementClient network_client:
network client
:param dict config: configuration dict
:param str sc_id: storage cluster id
:param str host_mount_path: host mount path
:rtype: tuple
:return: (fstab mount, storage cluster arg)
"""
fstab_mount = None
sc_arg = None
# get remotefs settings
rfs = settings.remotefs_settings(config, sc_id)
sc = rfs.storage_cluster
# iterate through shared data volumes and find storage clusters
sdv = settings.global_resources_shared_data_volumes(config)
if (sc_id not in sdv or
not settings.is_shared_data_volume_storage_cluster(
sdv, sc_id)):
raise RuntimeError(
'No storage cluster {} found in configuration'.format(sc_id))
# get vm count
if sc.vm_count < 1:
raise RuntimeError(
'storage cluster {} vm_count {} is invalid'.format(
sc_id, sc.vm_count))
# get fileserver type
if sc.file_server.type == 'nfs':
# query first vm for info
vm_name = settings.generate_virtual_machine_name(sc, 0)
vm = compute_client.virtual_machines.get(
resource_group_name=sc.resource_group,
vm_name=vm_name,
)
nic = resource.get_nic_from_virtual_machine(
network_client, sc.resource_group, vm)
# get private ip of vm
remote_ip = nic.ip_configurations[0].private_ip_address
# construct mount options
mo = '_netdev,noauto,nfsvers=4,intr'
amo = settings.shared_data_volume_mount_options(sdv, sc_id)
if util.is_not_empty(amo):
if 'udp' in mo:
raise RuntimeError(
('udp cannot be specified as a mount option for '
'storage cluster {}').format(sc_id))
if 'auto' in mo:
raise RuntimeError(
('auto cannot be specified as a mount option for '
'storage cluster {}').format(sc_id))
if any([x.startswith('nfsvers=') for x in amo]):
raise RuntimeError(
('nfsvers cannot be specified as a mount option for '
'storage cluster {}').format(sc_id))
if any([x.startswith('port=') for x in amo]):
raise RuntimeError(
('port cannot be specified as a mount option for '
'storage cluster {}').format(sc_id))
mo = ','.join((mo, ','.join(amo)))
# construct mount string for fstab
fstab_mount = (
'{remoteip}:{srcpath} {hmp} '
'{fstype} {mo} 0 0').format(
remoteip=remote_ip,
srcpath=sc.file_server.mountpoint,
hmp=host_mount_path,
fstype=sc.file_server.type,
mo=mo)
elif sc.file_server.type == 'glusterfs':
# walk vms and find non-overlapping ud/fds
primary_ip = None
primary_ud = None
primary_fd = None
backup_ip = None
backup_ud = None
backup_fd = None
vms = {}
# first pass, attempt to populate all ip, ud/fd
for i in range(sc.vm_count):
vm_name = settings.generate_virtual_machine_name(sc, i)
vm = compute_client.virtual_machines.get(
resource_group_name=sc.resource_group,
vm_name=vm_name,
expand=compute_client.virtual_machines.models.
InstanceViewTypes.instance_view,
)
nic = resource.get_nic_from_virtual_machine(
network_client, sc.resource_group, vm)
vms[i] = (vm, nic)
# get private ip and ud/fd of vm
remote_ip = nic.ip_configurations[0].private_ip_address
ud = vm.instance_view.platform_update_domain
fd = vm.instance_view.platform_fault_domain
if primary_ip is None:
primary_ip = remote_ip
primary_ud = ud
primary_fd = fd
if backup_ip is None:
if (primary_ip == backup_ip or primary_ud == ud or
primary_fd == fd):
continue
backup_ip = remote_ip
backup_ud = ud
backup_fd = fd
# second pass, fill in with at least non-overlapping update domains
if backup_ip is None:
for i in range(sc.vm_count):
vm, nic = vms[i]
remote_ip = nic.ip_configurations[0].private_ip_address
ud = vm.instance_view.platform_update_domain
fd = vm.instance_view.platform_fault_domain
if primary_ud != ud:
backup_ip = remote_ip
backup_ud = ud
backup_fd = fd
break
if primary_ip is None or backup_ip is None:
raise RuntimeError(
'Could not find either a primary ip {} or backup ip {} for '
'glusterfs client mount'.format(primary_ip, backup_ip))
logger.debug('primary ip/ud/fd={} backup ip/ud/fd={}'.format(
(primary_ip, primary_ud, primary_fd),
(backup_ip, backup_ud, backup_fd)))
# construct mount options
mo = '_netdev,noauto,transport=tcp,backupvolfile-server={}'.format(
backup_ip)
amo = settings.shared_data_volume_mount_options(sdv, sc_id)
if util.is_not_empty(amo):
if 'auto' in mo:
raise RuntimeError(
('auto cannot be specified as a mount option for '
'storage cluster {}').format(sc_id))
if any([x.startswith('backupvolfile-server=') for x in amo]):
raise RuntimeError(
('backupvolfile-server cannot be specified as a mount '
'option for storage cluster {}').format(sc_id))
if any([x.startswith('transport=') for x in amo]):
raise RuntimeError(
('transport cannot be specified as a mount option for '
'storage cluster {}').format(sc_id))
mo = ','.join((mo, ','.join(amo)))
# construct mount string for fstab, srcpath is the gluster volume
fstab_mount = (
'{remoteip}:/{srcpath} {hmp} '
'{fstype} {mo} 0 0').format(
remoteip=primary_ip,
srcpath=settings.get_file_server_glusterfs_volume_name(sc),
hmp=host_mount_path,
fstype=sc.file_server.type,
mo=mo)
else:
raise NotImplementedError(
('cannot handle file_server type {} for storage '
'cluster {}').format(sc.file_server.type, sc_id))
if util.is_none_or_empty(fstab_mount):
raise RuntimeError(
('Could not construct an fstab mount entry for storage '
'cluster {}').format(sc_id))
# construct sc_arg
sc_arg = '{}:{}'.format(sc.file_server.type, sc_id)
# log config
if settings.verbose(config):
logger.debug('storage cluster {} fstab mount: {}'.format(
sc_id, fstab_mount))
return (fstab_mount, sc_arg)
def _create_managed_disk(compute_client, rfs, disk_name): def _create_managed_disk(compute_client, rfs, disk_name):
# type: (azure.mgmt.compute.ComputeManagementClient, # type: (azure.mgmt.compute.ComputeManagementClient,
# settings.RemoteFsSettings, str) -> # settings.RemoteFsSettings, str) ->
@ -444,52 +618,6 @@ def _create_virtual_machine_extension(
) )
def _create_availability_set(compute_client, rfs):
# type: (azure.mgmt.compute.ComputeManagementClient,
# settings.RemoteFsSettings) ->
# msrestazure.azure_operation.AzureOperationPoller
"""Create an availability set
:param azure.mgmt.compute.ComputeManagementClient compute_client:
compute client
:param settings.RemoteFsSettings rfs: remote filesystem settings
:rtype: msrestazure.azure_operation.AzureOperationPoller or None
:return: msrestazure.azure_operation.AzureOperationPoller
"""
if rfs.storage_cluster.vm_count <= 1:
logger.info('insufficient vm_count for availability set')
return None
if rfs.storage_cluster.zone is not None:
logger.info('cannot create an availability set for zonal resource')
return None
as_name = settings.generate_availability_set_name(rfs.storage_cluster)
# check and fail if as exists
try:
compute_client.availability_sets.get(
resource_group_name=rfs.storage_cluster.resource_group,
availability_set_name=as_name,
)
raise RuntimeError('availability set {} exists'.format(as_name))
except msrestazure.azure_exceptions.CloudError as e:
if e.status_code == 404:
pass
else:
raise
logger.debug('creating availability set: {}'.format(as_name))
return compute_client.availability_sets.create_or_update(
resource_group_name=rfs.storage_cluster.resource_group,
availability_set_name=as_name,
# user maximums ud, fd from settings due to region variability
parameters=compute_client.virtual_machines.models.AvailabilitySet(
location=rfs.storage_cluster.location,
platform_update_domain_count=20,
platform_fault_domain_count=rfs.storage_cluster.fault_domains,
sku=compute_client.virtual_machines.models.Sku(
name='Aligned',
),
)
)
def create_storage_cluster( def create_storage_cluster(
resource_client, compute_client, network_client, blob_client, config, resource_client, compute_client, network_client, blob_client, config,
sc_id, bootstrap_file, remotefs_files): sc_id, bootstrap_file, remotefs_files):
@ -633,7 +761,9 @@ def create_storage_cluster(
resource.create_network_interface, network_client, resource.create_network_interface, network_client,
rfs.storage_cluster, subnet, nsg, private_ips, pips, i)) rfs.storage_cluster, subnet, nsg, private_ips, pips, i))
# create availability set if vm_count > 1, this call is not async # create availability set if vm_count > 1, this call is not async
availset = _create_availability_set(compute_client, rfs) availset = resource.create_availability_set(
compute_client, rfs.storage_cluster, rfs.storage_cluster.vm_count,
fault_domains=rfs.storage_cluster.fault_domains)
# wait for nics to be created # wait for nics to be created
logger.debug('waiting for network interfaces to provision') logger.debug('waiting for network interfaces to provision')
nics = {} nics = {}
@ -1257,24 +1387,6 @@ def expand_storage_cluster(
return succeeded return succeeded
def _delete_availability_set(compute_client, rg_name, as_name):
# type: (azure.mgmt.compute.ComputeManagementClient, str, str) ->
# msrestazure.azure_operation.AzureOperationPoller
"""Delete an availability set
:param azure.mgmt.compute.ComputeManagementClient compute_client:
compute client
:param str rg_name: resource group name
:param str as_name: availability set name
:rtype: msrestazure.azure_operation.AzureOperationPoller
:return: async op poller
"""
logger.debug('deleting availability set {}'.format(as_name))
return compute_client.availability_sets.delete(
resource_group_name=rg_name,
availability_set_name=as_name,
)
def delete_storage_cluster( def delete_storage_cluster(
resource_client, compute_client, network_client, blob_client, config, resource_client, compute_client, network_client, blob_client, config,
sc_id, delete_data_disks=False, delete_virtual_network=False, sc_id, delete_data_disks=False, delete_virtual_network=False,
@ -1522,7 +1634,7 @@ def delete_storage_cluster(
if util.is_none_or_empty(as_name) or as_name in deleted: if util.is_none_or_empty(as_name) or as_name in deleted:
continue continue
deleted.add(as_name) deleted.add(as_name)
_delete_availability_set( resource.delete_availability_set(
compute_client, rfs.storage_cluster.resource_group, as_name) compute_client, rfs.storage_cluster.resource_group, as_name)
logger.info('availability set {} deleted'.format(as_name)) logger.info('availability set {} deleted'.format(as_name))
deleted.clear() deleted.clear()

Просмотреть файл

@ -639,13 +639,19 @@ def create_network_interface(
logger.debug('assigning public ip {} to network interface {}'.format( logger.debug('assigning public ip {} to network interface {}'.format(
pip.name, nic_name)) pip.name, nic_name))
# create network ip config # create network ip config
if private_ips is None: if private_ips is None or private_ips[offset] is None:
logger.debug(
'assigning private ip dynamically to network interface {}'.format(
nic_name))
network_ip_config = networkmodels.NetworkInterfaceIPConfiguration( network_ip_config = networkmodels.NetworkInterfaceIPConfiguration(
name=vm_resource.hostname_prefix, name=vm_resource.hostname_prefix,
subnet=subnet, subnet=subnet,
public_ip_address=pip, public_ip_address=pip,
) )
else: else:
logger.debug(
'assigning private ip {} statically to network '
'interface {}'.format(private_ips[offset], nic_name))
network_ip_config = networkmodels.NetworkInterfaceIPConfiguration( network_ip_config = networkmodels.NetworkInterfaceIPConfiguration(
name=vm_resource.hostname_prefix, name=vm_resource.hostname_prefix,
subnet=subnet, subnet=subnet,
@ -656,7 +662,8 @@ def create_network_interface(
private_ip_address_version=networkmodels.IPVersion.ipv4, private_ip_address_version=networkmodels.IPVersion.ipv4,
) )
logger.debug('creating network interface: {}'.format(nic_name)) logger.debug('creating network interface: {} with nsg={}'.format(
nic_name, nsg.name if nsg else None))
return network_client.network_interfaces.create_or_update( return network_client.network_interfaces.create_or_update(
resource_group_name=vm_resource.resource_group, resource_group_name=vm_resource.resource_group,
network_interface_name=nic_name, network_interface_name=nic_name,
@ -671,10 +678,10 @@ def create_network_interface(
def create_virtual_machine( def create_virtual_machine(
compute_client, vm_resource, availset, nics, disks, ssh_pub_key, compute_client, vm_resource, availset, nics, disks, ssh_pub_key,
offset, enable_msi=False): offset, enable_msi=False, tags=None):
# type: (azure.mgmt.compute.ComputeManagementClient, # type: (azure.mgmt.compute.ComputeManagementClient,
# settings.VmResource, computemodels.AvailabilitySet, # settings.VmResource, computemodels.AvailabilitySet,
# dict, dict, computemodels.SshPublicKey, int, bool) -> # dict, dict, computemodels.SshPublicKey, int, bool, dict) ->
# Tuple[int, msrestazure.azure_operation.AzureOperationPoller] # Tuple[int, msrestazure.azure_operation.AzureOperationPoller]
"""Create a virtual machine """Create a virtual machine
:param azure.mgmt.compute.ComputeManagementClient compute_client: :param azure.mgmt.compute.ComputeManagementClient compute_client:
@ -686,6 +693,7 @@ def create_virtual_machine(
:param computemodels.SshPublicKey ssh_pub_key: SSH public key :param computemodels.SshPublicKey ssh_pub_key: SSH public key
:param int offset: vm number :param int offset: vm number
:param bool enable_msi: enable system MSI :param bool enable_msi: enable system MSI
:param dict tags: tags for VM
:rtype: tuple :rtype: tuple
:return: (offset int, msrestazure.azure_operation.AzureOperationPoller) :return: (offset int, msrestazure.azure_operation.AzureOperationPoller)
""" """
@ -784,43 +792,81 @@ def create_virtual_machine(
), ),
identity=identity, identity=identity,
zones=zone, zones=zone,
tags=tags,
), ),
) )
def create_msi_virtual_machine_extension( def create_availability_set(
compute_client, vm_resource, vm_name, offset, verbose=False): compute_client, vm_resource, vm_count, update_domains=None,
fault_domains=None):
# type: (azure.mgmt.compute.ComputeManagementClient, # type: (azure.mgmt.compute.ComputeManagementClient,
# settings.VmResource, str, int, # settings.VmResource, int, Optional[int], Optional[int]) ->
# bool) -> msrestazure.azure_operation.AzureOperationPoller # msrestazure.azure_operation.AzureOperationPoller
"""Create a virtual machine extension """Create an availability set
:param azure.mgmt.compute.ComputeManagementClient compute_client: :param azure.mgmt.compute.ComputeManagementClient compute_client:
compute client compute client
:param settings.VmResource vm_resource: VM resource :param settings.VmResource vm_resource: VM Resource
:param str vm_name: vm name :param int vm_count: VM count
:param int offset: vm number :param int update_domains: update domains
:param bool verbose: verbose logging :param int fault_domains: fault domains
:rtype: msrestazure.azure_operation.AzureOperationPoller :rtype: msrestazure.azure_operation.AzureOperationPoller or None
:return: msrestazure.azure_operation.AzureOperationPoller :return: msrestazure.azure_operation.AzureOperationPoller
""" """
vm_ext_name = settings.generate_virtual_machine_msi_extension_name( if vm_count <= 1:
vm_resource, offset) logger.info('insufficient vm_count for availability set')
logger.debug('creating virtual machine extension: {}'.format(vm_ext_name)) return None
return compute_client.virtual_machine_extensions.create_or_update( if vm_resource.zone is not None:
logger.info('cannot create an availability set for zonal resource')
return None
as_name = settings.generate_availability_set_name(vm_resource)
# check and fail if as exists
try:
compute_client.availability_sets.get(
resource_group_name=vm_resource.resource_group,
availability_set_name=as_name,
)
raise RuntimeError('availability set {} exists'.format(as_name))
except msrestazure.azure_exceptions.CloudError as e:
if e.status_code == 404:
pass
else:
raise
logger.debug('creating availability set: {}'.format(as_name))
if update_domains is None:
update_domains = 20
if fault_domains is None:
fault_domains = 2
return compute_client.availability_sets.create_or_update(
resource_group_name=vm_resource.resource_group, resource_group_name=vm_resource.resource_group,
vm_name=vm_name, availability_set_name=as_name,
vm_extension_name=vm_ext_name, # user maximums ud, fd from settings due to region variability
extension_parameters=compute_client.virtual_machine_extensions.models. parameters=compute_client.virtual_machines.models.AvailabilitySet(
VirtualMachineExtension(
location=vm_resource.location, location=vm_resource.location,
publisher='Microsoft.ManagedIdentity', platform_update_domain_count=update_domains,
virtual_machine_extension_type='ManagedIdentityExtensionForLinux', platform_fault_domain_count=fault_domains,
type_handler_version='1.0', sku=compute_client.virtual_machines.models.Sku(
auto_upgrade_minor_version=True, name='Aligned',
settings={ ),
'port': 50342, )
}, )
),
def delete_availability_set(compute_client, rg_name, as_name):
# type: (azure.mgmt.compute.ComputeManagementClient, str, str) ->
# msrestazure.azure_operation.AzureOperationPoller
"""Delete an availability set
:param azure.mgmt.compute.ComputeManagementClient compute_client:
compute client
:param str rg_name: resource group name
:param str as_name: availability set name
:rtype: msrestazure.azure_operation.AzureOperationPoller
:return: async op poller
"""
logger.debug('deleting availability set {}'.format(as_name))
return compute_client.availability_sets.delete(
resource_group_name=rg_name,
availability_set_name=as_name,
) )
@ -955,11 +1001,11 @@ def deallocate_virtual_machine(compute_client, rg_name, vm_name):
def get_ssh_info( def get_ssh_info(
compute_client, network_client, vm_res, ssh_key_prefix=None, nic=None, compute_client, network_client, vm_res, ssh_key_prefix=None, nic=None,
pip=None): pip=None, offset=0):
# type: (azure.mgmt.compute.ComputeManagementClient, # type: (azure.mgmt.compute.ComputeManagementClient,
# azure.mgmt.network.NetworkManagementClient, # azure.mgmt.network.NetworkManagementClient,
# settings.VmResource, str, networkmodes.NetworkInterface, # settings.VmResource, str, networkmodes.NetworkInterface,
# networkmodels.PublicIPAddress) -> # networkmodels.PublicIPAddress, int) ->
# Tuple[pathlib.Path, int, str, str] # Tuple[pathlib.Path, int, str, str]
"""Get SSH info to a federation proxy """Get SSH info to a federation proxy
:param azure.mgmt.compute.ComputeManagementClient compute_client: :param azure.mgmt.compute.ComputeManagementClient compute_client:
@ -970,10 +1016,11 @@ def get_ssh_info(
:param str ssh_key_prefix: ssh key prefix :param str ssh_key_prefix: ssh key prefix
:param networkmodels.NetworkInterface nic: network interface :param networkmodels.NetworkInterface nic: network interface
:param networkmodels.PublicIPAddress pip: public ip :param networkmodels.PublicIPAddress pip: public ip
:param int offset: offset
:rtype: tuple :rtype: tuple
:return (ssh private key, port, username, ip) :return (ssh private key, port, username, ip)
""" """
vm_name = settings.generate_virtual_machine_name(vm_res, 0) vm_name = settings.generate_virtual_machine_name(vm_res, offset)
try: try:
vm = compute_client.virtual_machines.get( vm = compute_client.virtual_machines.get(
resource_group_name=vm_res.resource_group, resource_group_name=vm_res.resource_group,
@ -1009,10 +1056,11 @@ def get_ssh_info(
def ssh_to_virtual_machine_resource( def ssh_to_virtual_machine_resource(
compute_client, network_client, vm_res, ssh_key_prefix, tty, command): compute_client, network_client, vm_res, ssh_key_prefix, tty, command,
offset=0):
# type: (azure.mgmt.compute.ComputeManagementClient, # type: (azure.mgmt.compute.ComputeManagementClient,
# azure.mgmt.network.NetworkManagementClient, # azure.mgmt.network.NetworkManagementClient,
# settings.VmResource, str, bool, tuple) -> None # settings.VmResource, str, bool, tuple, int) -> None
"""SSH to a node """SSH to a node
:param azure.mgmt.compute.ComputeManagementClient compute_client: :param azure.mgmt.compute.ComputeManagementClient compute_client:
compute client compute client
@ -1022,9 +1070,11 @@ def ssh_to_virtual_machine_resource(
:param str ssh_key_prefix: ssh key prefix :param str ssh_key_prefix: ssh key prefix
:param bool tty: allocate pseudo-tty :param bool tty: allocate pseudo-tty
:param tuple command: command to execute :param tuple command: command to execute
:param int offset: offset
""" """
ssh_priv_key, port, username, ip = get_ssh_info( ssh_priv_key, port, username, ip = get_ssh_info(
compute_client, network_client, vm_res, ssh_key_prefix=ssh_key_prefix) compute_client, network_client, vm_res, ssh_key_prefix=ssh_key_prefix,
offset=offset)
crypto.connect_or_exec_ssh_command( crypto.connect_or_exec_ssh_command(
ip, port, ssh_priv_key, username, tty=tty, command=command) ip, port, ssh_priv_key, username, tty=tty, command=command)
@ -1123,10 +1173,10 @@ def start_virtual_machine_resource(
def stat_virtual_machine_resource( def stat_virtual_machine_resource(
compute_client, network_client, config, vm_res): compute_client, network_client, config, vm_res, offset=0):
# type: (azure.mgmt.compute.ComputeManagementClient, # type: (azure.mgmt.compute.ComputeManagementClient,
# azure.mgmt.network.NetworkManagementClient, dict, # azure.mgmt.network.NetworkManagementClient, dict,
# settings.VmResource) -> None # settings.VmResource, int) -> None
"""Retrieve status of a virtual machine resource """Retrieve status of a virtual machine resource
:param azure.mgmt.compute.ComputeManagementClient compute_client: :param azure.mgmt.compute.ComputeManagementClient compute_client:
compute client compute client
@ -1134,9 +1184,10 @@ def stat_virtual_machine_resource(
network client network client
:param dict config: configuration dict :param dict config: configuration dict
:param settings.VmResource vm_res: resource :param settings.VmResource vm_res: resource
:param int offset: offset
""" """
# retrieve all vms # retrieve all vms
vm_name = settings.generate_virtual_machine_name(vm_res, 0) vm_name = settings.generate_virtual_machine_name(vm_res, offset)
try: try:
vm = compute_client.virtual_machines.get( vm = compute_client.virtual_machines.get(
resource_group_name=vm_res.resource_group, resource_group_name=vm_res.resource_group,

Просмотреть файл

@ -72,8 +72,8 @@ _GPU_VISUALIZATION_INSTANCES = re.compile(
re.IGNORECASE re.IGNORECASE
) )
_RDMA_INSTANCES = re.compile( _RDMA_INSTANCES = re.compile(
# standard a8/a9, h+r, nc+r, nd+r # standard a8/a9, h+r, nc+r, nd+r, hb/hc
r'^standard_((a8|a9)|((h|nc|nd)+[\d]+m?rs?(_v[\d])?))$', r'^standard_((a8|a9)|((h|hb|hc|nc|nd)+[\d]+m?rs?(_v[\d])?))$',
re.IGNORECASE re.IGNORECASE
) )
_PREMIUM_STORAGE_INSTANCES = re.compile( _PREMIUM_STORAGE_INSTANCES = re.compile(
@ -110,6 +110,26 @@ _VM_TCP_NO_TUNE = frozenset((
'standard_b1s', 'standard_b1ms', 'standard_b2s', 'standard_b2ms', 'standard_b1s', 'standard_b1ms', 'standard_b2s', 'standard_b2ms',
'standard_b4ms', 'standard_b8ms', 'standard_b4ms', 'standard_b8ms',
)) ))
_VM_GPU_COUNT = {
1: re.compile(r'^standard_n[cdv]6r?s?(_v[\d])?$', re.IGNORECASE),
2: re.compile(r'^standard_n[cdv]12r?s?(_v[\d])?$', re.IGNORECASE),
4: re.compile(r'^standard_n[cdv]24r?s?(_v[\d])?$', re.IGNORECASE),
8: re.compile(r'^standard_nd40s_v2$', re.IGNORECASE),
}
_VM_GPU_CLASS = {
'tesla_k80': re.compile(r'^standard_n[c][\d]+r?$', re.IGNORECASE),
'tesla_p40': re.compile(r'^standard_n[d][\d]+r?s?$', re.IGNORECASE),
'tesla_p100': re.compile(r'^standard_n[c][\d]+r?s_v2$', re.IGNORECASE),
'tesla_v100': re.compile(
r'^standard_n(([c][\d]+r?s_v3)|(d40s_v2))$', re.IGNORECASE),
'tesla_m60': re.compile(r'^standard_nv[\d]+s?(_v2)?$', re.IGNORECASE),
}
_VM_IB_CLASS = {
'qdr_ib': re.compile(r'^standard_(a8|a9)$', re.IGNORECASE),
'fdr_ib': re.compile(
r'^standard_(((h|nc|nd)+[\d]+m?rs?(_v[\d])?))$', re.IGNORECASE),
'edr_ib': re.compile(r'^standard_(hc|hb)+[\d]+rs$', re.IGNORECASE),
}
_SINGULARITY_COMMANDS = frozenset(('exec', 'run')) _SINGULARITY_COMMANDS = frozenset(('exec', 'run'))
_FORBIDDEN_MERGE_TASK_PROPERTIES = frozenset(( _FORBIDDEN_MERGE_TASK_PROPERTIES = frozenset((
'depends_on', 'depends_on_range', 'multi_instance', 'task_factory' 'depends_on', 'depends_on_range', 'multi_instance', 'task_factory'
@ -447,6 +467,38 @@ FederationProxyOptionsSettings = collections.namedtuple(
'scheduling_after_success_evaluate_autoscale', 'scheduling_after_success_evaluate_autoscale',
] ]
) )
SlurmBatchPoolSettings = collections.namedtuple(
'SlurmBatchPoolSettings', [
'batch_service_url', 'compute_node_type', 'max_compute_nodes',
'weight', 'features', 'reclaim_exclude_num_nodes',
]
)
SlurmPartitionSettings = collections.namedtuple(
'SlurmPartitionSettings', [
'batch_pools', 'max_runtime_limit', 'default',
]
)
SlurmUnmanagedPartitionSettings = collections.namedtuple(
'SlurmUnmanagedPartitionSettings', [
'partition', 'nodes',
]
)
SlurmOptionsSettings = collections.namedtuple(
'SlurmOptionsSettings', [
'cluster_id', 'idle_reclaim_time', 'max_nodes', 'elastic_partitions',
'unmanaged_partitions',
]
)
SlurmSharedDataVolumesSettings = collections.namedtuple(
'SlurmSharedDataVolumesSettings', [
'id', 'host_mount_path', 'store_slurmctld_state',
]
)
SlurmCredentialsSettings = collections.namedtuple(
'SlurmCredentialsSettings', [
'db_password',
]
)
class VmResource(object): class VmResource(object):
@ -664,6 +716,34 @@ def get_gpu_type_from_vm_size(vm_size):
return None return None
def get_num_gpus_from_vm_size(vm_size):
# type: (str) -> int
"""Get number of GPUs from VM size
:param str vm_size: vm size
:rtype: int
:return: number of GPUs
"""
for vm in _VM_GPU_COUNT:
if _VM_GPU_COUNT[vm].match(vm_size):
return vm
raise RuntimeError('vm_size {} has no mapping to number of GPUs'.format(
vm_size))
def get_gpu_class_from_vm_size(vm_size):
# type: (str) -> str
"""Get GPU class from VM size
:param str vm_size: vm size
:rtype: str
:return: GPU class
"""
for c in _VM_GPU_CLASS:
if _VM_GPU_CLASS[c].match(vm_size):
return c
raise RuntimeError('vm_size {} has no mapping to GPU class'.format(
vm_size))
def gpu_configuration_check(config, vm_size=None): def gpu_configuration_check(config, vm_size=None):
# type: (dict, str) -> bool # type: (dict, str) -> bool
"""Check if OS is allowed with a GPU VM """Check if OS is allowed with a GPU VM
@ -760,6 +840,20 @@ def is_rdma_pool(vm_size):
return _RDMA_INSTANCES.match(vm_size) is not None return _RDMA_INSTANCES.match(vm_size) is not None
def get_ib_class_from_vm_size(vm_size):
# type: (str) -> str
"""Get IB class from VM size
:param str vm_size: vm size
:rtype: str
:return: IB class
"""
for c in _VM_IB_CLASS:
if _VM_IB_CLASS[c].match(vm_size):
return c
raise RuntimeError('vm_size {} has no mapping to IB class'.format(
vm_size))
def is_premium_storage_vm_size(vm_size): def is_premium_storage_vm_size(vm_size):
# type: (str) -> bool # type: (str) -> bool
"""Check if vm size is premium storage compatible """Check if vm size is premium storage compatible
@ -1932,6 +2026,23 @@ def set_credentials_registry_password(config, link, is_docker, password):
config['credentials'][kind][link]['password'] = password config['credentials'][kind][link]['password'] = password
def credentials_slurm(config):
# type: (dict) -> SlurmCredentialsSettings
"""Get slurm settings
:param dict config: configuration object
:rtype: SlurmCredentialsSettings
:return: Slurm settings
"""
try:
creds = config['credentials']
except (KeyError, TypeError):
creds = {}
conf = _kv_read_checked(creds, 'slurm', default={})
return SlurmCredentialsSettings(
db_password=_kv_read_checked(conf, 'db_password'),
)
# GLOBAL SETTINGS # GLOBAL SETTINGS
def batch_shipyard_settings(config): def batch_shipyard_settings(config):
# type: (dict) -> BatchShipyardSettings # type: (dict) -> BatchShipyardSettings
@ -4896,23 +5007,274 @@ def federation_settings(config):
) )
def federation_storage_account_settings(config): def slurm_options_settings(config):
# type: (dict) ->str # type: (dict) -> SlurmOptionsSettings
"""Get federation storage account settings selector """Get slurm options settings
:param dict config: configuration dict :param dict config: configuration dict
:rtype: str :rtype: SlurmOptionsSettings
:return: federation storage settings link :return: slurm options settings
""" """
try: try:
conf = config['federation'] conf = config['slurm']['slurm_options']
except KeyError:
conf = {}
cluster_id = config['slurm']['cluster_id']
if util.is_none_or_empty(cluster_id) or len(cluster_id) > 22:
raise ValueError(
'cluster_id is invalid. Must be between 1 and 22 '
'characters in length')
bc = credentials_batch(config)
idle_reclaim_time = _kv_read(conf, 'idle_reclaim_time', default='00:15:00')
idle_reclaim_time = util.convert_string_to_timedelta(idle_reclaim_time)
if idle_reclaim_time.total_seconds == 0:
raise ValueError('idle_reclaim_time must be positive')
max_nodes = 0
partitions = {}
part_conf = _kv_read_checked(conf, 'elastic_partitions')
for key in part_conf:
part = _kv_read_checked(part_conf, key)
batch_pools = {}
pool_conf = _kv_read_checked(part, 'batch_pools', default={})
for pkey in pool_conf:
bpool = _kv_read_checked(pool_conf, pkey)
batch_service_url = _kv_read_checked(bpool, 'account_service_url')
if util.is_none_or_empty(batch_service_url):
batch_service_url = bc.account_service_url
max_compute_nodes = _kv_read(bpool, 'max_compute_nodes')
reclaim_exclude_num_nodes = _kv_read(
bpool, 'reclaim_exclude_num_nodes', default=0)
if reclaim_exclude_num_nodes > max_compute_nodes:
raise ValueError(
'reclaim_exclude_num_nodes {} > '
'max_compute_nodes {}'.format(
reclaim_exclude_num_nodes, max_compute_nodes))
batch_pools[pkey] = SlurmBatchPoolSettings(
batch_service_url=batch_service_url,
compute_node_type=_kv_read_checked(bpool, 'compute_node_type'),
max_compute_nodes=max_compute_nodes,
weight=_kv_read(bpool, 'weight'),
features=_kv_read_checked(bpool, 'features', default=[]),
reclaim_exclude_num_nodes=reclaim_exclude_num_nodes,
)
max_nodes = max(max_nodes, batch_pools[pkey].max_compute_nodes)
max_runtime_limit = _kv_read_checked(part, 'max_runtime_limit')
if util.is_not_empty(max_runtime_limit):
max_runtime_limit = max_runtime_limit.replace('.', '-')
else:
max_runtime_limit = 'UNLIMITED'
partition = SlurmPartitionSettings(
batch_pools=batch_pools,
max_runtime_limit=max_runtime_limit,
default=_kv_read(part, 'default'),
)
partitions[key] = partition
unmanaged_partitions = []
upart_conf = _kv_read_checked(conf, 'unmanaged_partitions', default=[])
for upart in upart_conf:
unmanaged_partitions.append(SlurmUnmanagedPartitionSettings(
partition=_kv_read_checked(upart, 'partition'),
nodes=_kv_read_checked(upart, 'nodes'),
))
return SlurmOptionsSettings(
cluster_id=cluster_id,
idle_reclaim_time=idle_reclaim_time,
max_nodes=max_nodes,
elastic_partitions=partitions,
unmanaged_partitions=unmanaged_partitions,
)
def slurm_settings(config, kind):
# type: (dict) -> VmResource
"""Get slurm settings
:param dict config: configuration dict
:rtype: VmResource
:return: VM resource settings
"""
# general settings
try:
conf = config['slurm']
if util.is_none_or_empty(conf): if util.is_none_or_empty(conf):
raise KeyError raise KeyError
except KeyError: except KeyError:
raise ValueError('federation settings are invalid or missing') raise ValueError('slurm settings are invalid or missing')
location = conf['location']
if util.is_none_or_empty(location):
raise ValueError('invalid location in slurm')
rg = _kv_read_checked(conf, 'resource_group')
if util.is_none_or_empty(rg):
raise ValueError('invalid resource_group in slurm')
zone = _kv_read(conf, 'zone')
hostname_prefix = '{}-{}'.format(
_kv_read_checked(conf, 'cluster_id'),
# Azure doesn't like "login" for DNS
'gateway' if kind == 'login' else kind
)
# get controller settings
try:
conf = conf[kind]
if util.is_none_or_empty(conf):
raise KeyError
except KeyError:
raise ValueError(
'slurm:{} settings are invalid or missing'.format(kind))
# vm settings
vm_size = _kv_read_checked(conf, 'vm_size')
accel_net = _kv_read(conf, 'accelerated_networking', False)
# public ip settings
pip_conf = _kv_read_checked(conf, 'public_ip', {})
pip_enabled = _kv_read(pip_conf, 'enabled', True)
pip_static = _kv_read(pip_conf, 'static', False)
# sc network security settings
ns_conf = conf['network_security']
ns_inbound = {
'ssh': InboundNetworkSecurityRule(
destination_port_range='22',
source_address_prefix=_kv_read_checked(ns_conf, 'ssh', ['*']),
protocol='tcp',
),
}
if not isinstance(ns_inbound['ssh'].source_address_prefix, list):
raise ValueError('expected list for ssh network security rule')
if 'custom_inbound_rules' in ns_conf:
for key in ns_conf['custom_inbound_rules']:
ns_inbound[key] = InboundNetworkSecurityRule(
destination_port_range=_kv_read_checked(
ns_conf['custom_inbound_rules'][key],
'destination_port_range'),
source_address_prefix=_kv_read_checked(
ns_conf['custom_inbound_rules'][key],
'source_address_prefix'),
protocol=_kv_read_checked(
ns_conf['custom_inbound_rules'][key], 'protocol'),
)
if not isinstance(ns_inbound[key].source_address_prefix, list):
raise ValueError(
'expected list for network security rule {} '
'source_address_prefix'.format(key))
# ssh settings
ssh_conf = conf['ssh']
ssh_username = _kv_read_checked(ssh_conf, 'username')
ssh_public_key = _kv_read_checked(ssh_conf, 'ssh_public_key')
if util.is_not_empty(ssh_public_key):
ssh_public_key = pathlib.Path(ssh_public_key)
ssh_public_key_data = _kv_read_checked(ssh_conf, 'ssh_public_key_data')
ssh_private_key = _kv_read_checked(ssh_conf, 'ssh_private_key')
if util.is_not_empty(ssh_private_key):
ssh_private_key = pathlib.Path(ssh_private_key)
if (ssh_public_key is not None and
util.is_not_empty(ssh_public_key_data)):
raise ValueError('cannot specify both an SSH public key file and data')
if (ssh_public_key is None and
util.is_none_or_empty(ssh_public_key_data) and
ssh_private_key is not None):
raise ValueError(
'cannot specify an SSH private key with no public key specified')
ssh_gen_file_path = _kv_read_checked(
ssh_conf, 'generated_file_export_path', '.')
return VmResource(
location=location,
resource_group=rg,
zone=zone,
hostname_prefix=hostname_prefix,
virtual_network=virtual_network_settings(
conf,
default_resource_group=rg,
default_existing_ok=False,
default_create_nonexistant=True,
),
network_security=NetworkSecuritySettings(
inbound=ns_inbound,
),
vm_size=vm_size,
accelerated_networking=accel_net,
public_ip=PublicIpSettings(
enabled=pip_enabled,
static=pip_static,
),
ssh=SSHSettings(
username=ssh_username,
expiry_days=9999,
ssh_public_key=ssh_public_key,
ssh_public_key_data=ssh_public_key_data,
ssh_private_key=ssh_private_key,
generate_docker_tunnel_script=False,
generated_file_export_path=ssh_gen_file_path,
hpn_server_swap=False,
allow_docker_access=False,
),
)
def slurm_vm_count(config, kind):
# type: (dict, str) -> int
"""Get Slurm controller vm count
:param dict config: configuration dict
:param str kind: kind
:rtype: int
:return: vm count
"""
conf = _kv_read_checked(_kv_read_checked(config, 'slurm'), kind)
return _kv_read(conf, 'vm_count')
def slurm_additional_prep_script(config, kind):
# type: (dict, str) -> int
"""Get Slurm additional prep script
:param dict config: configuration dict
:param str kind: kind
:rtype: str
:return: prep script location
"""
conf = _kv_read_checked(_kv_read_checked(config, 'slurm'), kind)
return _kv_read(conf, 'additional_prep_script')
def slurm_shared_data_volumes(config):
# type: (dict) -> List[str]
"""Get Slurm shared data volumes
:param dict config: configuration dict
:rtype: List[str]
:return: list of SlurmSharedDataVolumesSettings
"""
conf = _kv_read_checked(config, 'slurm')
sdv = _kv_read_checked(conf, 'shared_data_volumes', default={})
vols = []
state = False
for sdkey in sdv:
store_slurmctld_state = _kv_read(sdv[sdkey], 'store_slurmctld_state')
if store_slurmctld_state:
if state:
raise ValueError(
'only one shared data volume should be designated as '
'store_slurmctld_state')
state = True
vols.append(SlurmSharedDataVolumesSettings(
id=sdkey,
host_mount_path=_kv_read_checked(sdv[sdkey], 'host_mount_path'),
store_slurmctld_state=store_slurmctld_state,
))
return vols
def other_storage_account_settings(config, key):
# type: (dict, str) ->str
"""Get other storage account settings selector
:param dict config: configuration dict
:param str key: config key
:rtype: str
:return: other storage settings link
"""
try:
conf = config[key]
if util.is_none_or_empty(conf):
raise KeyError
except KeyError:
raise ValueError('{} settings are invalid or missing'.format(key))
ssel = _kv_read_checked(conf, 'storage_account_settings') ssel = _kv_read_checked(conf, 'storage_account_settings')
if util.is_none_or_empty(ssel): if util.is_none_or_empty(ssel):
raise ValueError( raise ValueError(
'federation storage_account_settings are invalid or missing') '{} storage_account_settings are invalid or missing'.format(key))
return ssel return ssel
@ -4924,7 +5286,18 @@ def federation_credentials_storage(config):
:return: federation storage cred settings :return: federation storage cred settings
""" """
return credentials_storage( return credentials_storage(
config, federation_storage_account_settings(config)) config, other_storage_account_settings(config, 'federation'))
def slurm_credentials_storage(config):
# type: (dict) -> StorageCredentialsSettings
"""Get slurm storage account settings
:param dict config: configuration dict
:rtype: StorageCredentialsSettings
:return: slurm storage cred settings
"""
return credentials_storage(
config, other_storage_account_settings(config, 'slurm'))
def generate_availability_set_name(vr): def generate_availability_set_name(vr):

1216
convoy/slurm.py Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -31,7 +31,6 @@ from builtins import ( # noqa
next, oct, open, pow, round, super, filter, map, zip) next, oct, open, pow, round, super, filter, map, zip)
# stdlib imports # stdlib imports
import datetime import datetime
import hashlib
import json import json
import logging import logging
import os import os
@ -81,6 +80,7 @@ _STORAGE_CONTAINERS = {
'table_monitoring': None, 'table_monitoring': None,
'table_federation_global': None, 'table_federation_global': None,
'table_federation_jobs': None, 'table_federation_jobs': None,
'table_slurm': None,
'queue_federation': None, 'queue_federation': None,
# TODO remove following in future release # TODO remove following in future release
'table_registry': None, 'table_registry': None,
@ -120,6 +120,7 @@ def set_storage_configuration(sep, postfix, sa, sakey, saep, sasexpiry):
_STORAGE_CONTAINERS['table_monitoring'] = sep + 'monitor' _STORAGE_CONTAINERS['table_monitoring'] = sep + 'monitor'
_STORAGE_CONTAINERS['table_federation_jobs'] = sep + 'fedjobs' _STORAGE_CONTAINERS['table_federation_jobs'] = sep + 'fedjobs'
_STORAGE_CONTAINERS['table_federation_global'] = sep + 'fedglobal' _STORAGE_CONTAINERS['table_federation_global'] = sep + 'fedglobal'
_STORAGE_CONTAINERS['table_slurm'] = sep + 'slurm'
_STORAGE_CONTAINERS['queue_federation'] = sep + 'fed' _STORAGE_CONTAINERS['queue_federation'] = sep + 'fed'
# TODO remove following containers in future release # TODO remove following containers in future release
_STORAGE_CONTAINERS['table_registry'] = sep + 'registry' _STORAGE_CONTAINERS['table_registry'] = sep + 'registry'
@ -427,8 +428,7 @@ def _add_global_resource(
'global resource type: {}'.format(grtype)) 'global resource type: {}'.format(grtype))
for gr in resources: for gr in resources:
resource = '{}:{}'.format(prefix, gr) resource = '{}:{}'.format(prefix, gr)
resource_sha1 = hashlib.sha1( resource_sha1 = util.hash_string(resource)
resource.encode('utf8')).hexdigest()
logger.info('adding global resource: {} hash={}'.format( logger.info('adding global resource: {} hash={}'.format(
resource, resource_sha1)) resource, resource_sha1))
table_client.insert_or_replace_entity( table_client.insert_or_replace_entity(
@ -619,15 +619,6 @@ def remove_resources_from_monitoring(
sc_id)) sc_id))
def hash_string(strdata):
"""Hash a string
:param str strdata: string data to hash
:rtype: str
:return: hexdigest
"""
return hashlib.sha1(strdata.encode('utf8')).hexdigest()
def hash_pool_and_service_url(pool_id, batch_service_url): def hash_pool_and_service_url(pool_id, batch_service_url):
"""Hash a pool and service url """Hash a pool and service url
:param str pool_id: pool id :param str pool_id: pool id
@ -635,7 +626,8 @@ def hash_pool_and_service_url(pool_id, batch_service_url):
:rtype: str :rtype: str
:return: hashed pool and service url :return: hashed pool and service url
""" """
return hash_string('{}${}'.format(batch_service_url.rstrip('/'), pool_id)) return util.hash_string('{}${}'.format(
batch_service_url.rstrip('/'), pool_id))
def hash_federation_id(federation_id): def hash_federation_id(federation_id):
@ -644,7 +636,7 @@ def hash_federation_id(federation_id):
:rtype: str :rtype: str
:return: hashed federation id :return: hashed federation id
""" """
fedhash = hash_string(federation_id) fedhash = util.hash_string(federation_id)
logger.debug('federation id {} -> {}'.format(federation_id, fedhash)) logger.debug('federation id {} -> {}'.format(federation_id, fedhash))
return fedhash return fedhash
@ -656,7 +648,8 @@ def generate_job_id_locator_partition_key(federation_id, job_id):
:rtype: str :rtype: str
:return: hashed fedhash and job id :return: hashed fedhash and job id
""" """
return '{}${}'.format(hash_string(federation_id), hash_string(job_id)) return '{}${}'.format(
util.hash_string(federation_id), util.hash_string(job_id))
def create_federation_id( def create_federation_id(
@ -1185,7 +1178,7 @@ def _pack_sequences(ent, unique_id):
def _retrieve_and_merge_sequence( def _retrieve_and_merge_sequence(
table_client, pk, unique_id, kind, target, entity_must_not_exist): table_client, pk, unique_id, kind, target, entity_must_not_exist):
rk = hash_string(target) rk = util.hash_string(target)
try: try:
ent = table_client.get_entity( ent = table_client.get_entity(
_STORAGE_CONTAINERS['table_federation_jobs'], pk, rk) _STORAGE_CONTAINERS['table_federation_jobs'], pk, rk)
@ -1335,7 +1328,7 @@ def list_blocked_actions_in_federation(
except azure.common.AzureMissingResourceHttpError: except azure.common.AzureMissingResourceHttpError:
pass pass
else: else:
rk = hash_string( rk = util.hash_string(
job_id if util.is_not_empty(job_id) else job_schedule_id) job_id if util.is_not_empty(job_id) else job_schedule_id)
try: try:
entities = [table_client.get_entity( entities = [table_client.get_entity(
@ -1399,7 +1392,7 @@ def list_queued_actions_in_federation(
except azure.common.AzureMissingResourceHttpError: except azure.common.AzureMissingResourceHttpError:
pass pass
else: else:
rk = hash_string( rk = util.hash_string(
job_id if util.is_not_empty(job_id) else job_schedule_id) job_id if util.is_not_empty(job_id) else job_schedule_id)
try: try:
entities = [table_client.get_entity( entities = [table_client.get_entity(
@ -1727,6 +1720,75 @@ def zap_unique_id_from_federation(
print(json.dumps(rawout, sort_keys=True, indent=4)) print(json.dumps(rawout, sort_keys=True, indent=4))
def create_slurm_partition(
table_client, queue_client, config, cluster_id, partition_name,
batch_service_url, pool_id, compute_node_type, max_compute_nodes,
hostlist):
partpool_hash = util.hash_string('{}-{}'.format(
partition_name, batch_service_url, pool_id))
# insert partition entity
entity = {
'PartitionKey': 'PARTITIONS${}'.format(cluster_id),
'RowKey': '{}${}'.format(partition_name, partpool_hash),
'BatchServiceUrl': batch_service_url,
'BatchPoolId': pool_id,
'ComputeNodeType': compute_node_type,
'HostList': hostlist,
'BatchShipyardSlurmVersion': 1,
}
logger.debug(
'inserting slurm partition {}:{} entity to table for '
'cluster {}'.format(partition_name, pool_id, cluster_id))
try:
table_client.insert_entity(_STORAGE_CONTAINERS['table_slurm'], entity)
except azure.common.AzureConflictHttpError:
logger.error('partition {}:{} cluster id {} already exists'.format(
partition_name, pool_id, cluster_id))
if util.confirm_action(
config, 'overwrite existing partition {}:{} for '
'cluster {}; this can result in undefined behavior'.format(
partition_name, pool_id, cluster_id)):
table_client.insert_or_replace_entity(
_STORAGE_CONTAINERS['table_slurm'], entity)
else:
raise
# create queue
qname = '{}-{}'.format(cluster_id, partpool_hash)
logger.debug('creating queue: {}'.format(qname))
queue_client.create_queue(qname)
def get_slurm_host_node_id(table_client, cluster_id, host):
node_id = None
try:
entity = table_client.get_entity(
_STORAGE_CONTAINERS['table_slurm'],
'{}${}'.format('HOSTS', cluster_id), host)
node_id = entity['BatchNodeId']
except (azure.common.AzureMissingResourceHttpError, KeyError):
pass
return node_id
def clear_slurm_table_entities(table_client, cluster_id):
logger.debug('deleting slurm cluster {} entities in table'.format(
cluster_id))
tablename = _STORAGE_CONTAINERS['table_slurm']
keys = ['HOSTS', 'PARTITIONS']
for key in keys:
try:
pk = '{}${}'.format(key, cluster_id)
entities = table_client.query_entities(
tablename,
filter='PartitionKey eq \'{}\''.format(pk))
except azure.common.AzureMissingResourceHttpError:
pass
else:
batch_delete_entities(
table_client, tablename, pk, [x['RowKey'] for x in entities]
)
def _check_file_and_upload(blob_client, file, key, container=None): def _check_file_and_upload(blob_client, file, key, container=None):
# type: (azure.storage.blob.BlockBlobService, tuple, str, str) -> None # type: (azure.storage.blob.BlockBlobService, tuple, str, str) -> None
"""Upload file to blob storage if necessary """Upload file to blob storage if necessary
@ -1825,6 +1887,38 @@ def upload_for_nonbatch(blob_client, files, kind):
return ret return ret
def upload_to_container(blob_client, sa, files, container, gen_sas=True):
# type: (azure.storage.blob.BlockBlobService,
# settings.StorageCredentialsSettings, List[tuple],
# str, bool) -> dict
"""Upload files to a specific blob storage container
:param azure.storage.blob.BlockBlobService blob_client: blob client
:param settings.StorageCredentialsSettings sa: storage account
:param list files: files to upload
:param str container: container
:param bool gen_sas: generate a SAS URL for blob
:rtype: dict
:return: sas url dict
"""
sas_urls = {}
for file in files:
_check_file_and_upload(blob_client, file, None, container=container)
sas_urls[file[0]] = 'https://{}.blob.{}/{}/{}'.format(
sa.account, sa.endpoint, container, file[0],
)
if gen_sas:
sas_urls[file[0]] = '{}?{}'.format(
sas_urls[file[0]],
blob_client.generate_blob_shared_access_signature(
container, file[0],
permission=azureblob.BlobPermissions.READ,
expiry=datetime.datetime.utcnow() +
datetime.timedelta(days=_DEFAULT_SAS_EXPIRY_DAYS)
)
)
return sas_urls
def create_global_lock_blob(blob_client, kind): def create_global_lock_blob(blob_client, kind):
# type: (azure.storage.blob.BlockBlobService, str) -> None # type: (azure.storage.blob.BlockBlobService, str) -> None
"""Create a global lock blob """Create a global lock blob
@ -1982,7 +2076,8 @@ def clear_storage_containers(
continue continue
if (key == 'table_monitoring' or if (key == 'table_monitoring' or
key == 'table_federation_global' or key == 'table_federation_global' or
key == 'table_federation_jobs'): key == 'table_federation_jobs' or
key == 'table_slurm'):
continue continue
try: try:
_clear_table( _clear_table(
@ -2052,7 +2147,7 @@ def create_storage_containers_nonbatch(
:param azure.storage.blob.BlockBlobService blob_client: blob client :param azure.storage.blob.BlockBlobService blob_client: blob client
:param azure.cosmosdb.table.TableService table_client: table client :param azure.cosmosdb.table.TableService table_client: table client
:param azure.storage.queue.QueueService queue_service: queue client :param azure.storage.queue.QueueService queue_service: queue client
:param str kind: kind, "remotefs", "monitoring" or "federation" :param str kind: kind, "remotefs", "monitoring", "federation", or "slurm"
""" """
if kind == 'federation': if kind == 'federation':
create_storage_containers_nonbatch( create_storage_containers_nonbatch(
@ -2143,6 +2238,46 @@ def delete_storage_containers_nonbatch(
logger.warning('queue not found: {}'.format(contname)) logger.warning('queue not found: {}'.format(contname))
def delete_file_share_directory(storage_settings, share, directory):
# type: (StorageCredentialsSettings, str, str) -> None
"""Delete file share directory recursively
:param StorageCredentialsSettings storage_settings: storage settings
:param str share: share
:param str directory: directory to delete
"""
file_client = azurefile.FileService(
account_name=storage_settings.account,
account_key=storage_settings.account_key,
endpoint_suffix=storage_settings.endpoint)
logger.info(
'recursively deleting files and directories in share {} at '
'directory {}'.format(share, directory))
del_dirs = []
dirs = [directory]
while len(dirs) > 0:
dir = dirs.pop()
try:
objects = file_client.list_directories_and_files(
share, directory_name=dir)
except azure.common.AzureMissingResourceHttpError:
logger.warning('directory {} does not exist on share {}'.format(
directory, share))
continue
del_dirs.append(dir)
for obj in objects:
path = '{}/{}'.format(dir or '', obj.name)
if type(obj) == azurefile.models.File:
logger.debug('deleting file {} on share {}'.format(
path, share))
file_client.delete_file(share, '', path)
else:
dirs.append(path)
del_dirs.append(path)
for dir in del_dirs[::-1]:
logger.debug('deleting directory {} on share {}'.format(dir, share))
file_client.delete_directory(share, dir)
def delete_storage_containers_boot_diagnostics( def delete_storage_containers_boot_diagnostics(
blob_client, vm_name, vm_id): blob_client, vm_name, vm_id):
# type: (azureblob.BlockBlobService, str, str) -> None # type: (azureblob.BlockBlobService, str, str) -> None

Просмотреть файл

@ -458,6 +458,16 @@ def compute_md5_for_file(file, as_base64, blocksize=65536):
return hasher.hexdigest() return hasher.hexdigest()
def hash_string(strdata):
# type: (str) -> str
"""Hash a string
:param str strdata: string data to hash
:rtype: str
:return: hexdigest
"""
return hashlib.sha1(strdata.encode('utf8')).hexdigest()
def subprocess_with_output( def subprocess_with_output(
cmd, shell=False, cwd=None, env=None, suppress_output=False): cmd, shell=False, cwd=None, env=None, suppress_output=False):
# type: (str, bool, str, dict, bool) -> int # type: (str, bool, str, dict, bool) -> int
@ -622,3 +632,25 @@ def ip_from_address_prefix(cidr, start_offset=None, max=None):
last = first + max - 1 last = first + max - 1
for i in range(first, last + 1): for i in range(first, last + 1):
yield socket.inet_ntoa(struct.pack('>L', i)) yield socket.inet_ntoa(struct.pack('>L', i))
def explode_arm_subnet_id(arm_subnet_id):
# type: (str) -> Tuple[str, str, str, str, str]
"""Parses components from ARM subnet id
:param str arm_subnet_id: ARM subnet id
:rtype: tuple
:return: subid, rg, provider, vnet, subnet
"""
tmp = arm_subnet_id.split('/')
try:
subid = tmp[2]
rg = tmp[4]
provider = tmp[6]
vnet = tmp[8]
subnet = tmp[10]
except IndexError:
raise ValueError(
'Error parsing arm_subnet_id. Make sure the virtual network '
'resource id is correct and is postfixed with the '
'/subnets/<subnet_id> portion.')
return subid, rg, provider, vnet, subnet

Просмотреть файл

@ -59,6 +59,7 @@ class ConfigType(enum.Enum):
RemoteFS = 5, RemoteFS = 5,
Monitor = 6, Monitor = 6,
Federation = 7, Federation = 7,
Slurm = 8,
# global defines # global defines
@ -92,6 +93,10 @@ _SCHEMAS = {
'name': 'Federation', 'name': 'Federation',
'schema': pathlib.Path(_ROOT_PATH, 'schemas/federation.yaml'), 'schema': pathlib.Path(_ROOT_PATH, 'schemas/federation.yaml'),
}, },
ConfigType.Slurm: {
'name': 'Slurm',
'schema': pathlib.Path(_ROOT_PATH, 'schemas/slurm.yaml'),
},
} }
# configure loggers # configure loggers

Просмотреть файл

@ -0,0 +1,41 @@
# Dockerfile for Slurm on CentOS 7 for Batch Shipyard
FROM centos:7
MAINTAINER Fred Park <https://github.com/Azure/batch-shipyard>
WORKDIR /tmp
ENV SLURM_VERSION=18.08.5-2
RUN yum install -y epel-release \
&& yum makecache -y fast \
&& yum groupinstall -y \
"Development Tools" \
&& yum install -y \
curl \
file \
python \
perl-devel \
ruby \
ruby-devel \
munge-devel \
pam-devel \
mariadb-devel \
numactl-devel \
&& gem install fpm \
&& yum clean all
RUN yum install -y numactl-devel perl-devel
RUN curl -fSsL https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 | tar -jxpf - \
&& cd slurm-${SLURM_VERSION} \
&& ./configure --prefix=/tmp/slurm-build --sysconfdir=/etc/slurm --with-pam_dir=/usr/lib64/security/ \
&& make -j4 \
&& make -j4 contrib \
&& make install \
&& cd /root \
&& fpm -s dir -t rpm -v 1.0 -n slurm-${SLURM_VERSION} --prefix=/usr -C /tmp/slurm-build .
FROM alpine:3.9
COPY --from=0 /root/slurm-*.rpm /root/
COPY slurm*.service /root/

Просмотреть файл

@ -0,0 +1,15 @@
[Unit]
Description=Slurm controller daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurm.conf
Documentation=man:slurmctld(8)
[Service]
Type=forking
EnvironmentFile=-/etc/default/slurmctld
ExecStart=/usr/sbin/slurmctld $SLURMCTLD_OPTIONS
ExecReload=/bin/kill -HUP $MAINPID
PIDFile=/var/run/slurmctld.pid
[Install]
WantedBy=multi-user.target

Просмотреть файл

@ -0,0 +1,19 @@
[Unit]
Description=Slurm node daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurm.conf
Documentation=man:slurmd(8)
[Service]
Type=forking
EnvironmentFile=-/etc/default/slurmd
ExecStart=/usr/sbin/slurmd -d /usr/sbin/slurmstepd $SLURMD_OPTIONS
ExecReload=/bin/kill -HUP $MAINPID
PIDFile=/var/run/slurmd.pid
KillMode=process
LimitNOFILE=51200
LimitMEMLOCK=infinity
LimitSTACK=infinity
[Install]
WantedBy=multi-user.target

Просмотреть файл

@ -0,0 +1,15 @@
[Unit]
Description=Slurm DBD accounting daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurmdbd.conf
Documentation=man:slurmdbd(8)
[Service]
Type=forking
EnvironmentFile=-/etc/default/slurmdbd
ExecStart=/usr/sbin/slurmdbd $SLURMDBD_OPTIONS
ExecReload=/bin/kill -HUP $MAINPID
PIDFile=/var/run/slurmdbd.pid
[Install]
WantedBy=multi-user.target

Просмотреть файл

@ -0,0 +1,38 @@
# Dockerfile for Slurm on Ubuntu 16.04 for Batch Shipyard
FROM ubuntu:16.04
MAINTAINER Fred Park <https://github.com/Azure/batch-shipyard>
WORKDIR /tmp
ENV SLURM_VERSION=18.08.5-2
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
curl \
file \
python \
ruby \
ruby-dev \
libmunge-dev \
libpam0g-dev \
libmariadb-client-lgpl-dev \
libmysqlclient-dev \
numactl \
&& gem install fpm \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN curl -fSsL https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 | tar -jxvpf - \
&& cd slurm-${SLURM_VERSION} \
&& ./configure --prefix=/tmp/slurm-build --sysconfdir=/etc/slurm --with-pam_dir=/lib/x86_64-linux-gnu/security/ \
&& make -j4 \
&& make -j4 contrib \
&& make install \
&& cd /root \
&& fpm -s dir -t deb -v 1.0 -n slurm-${SLURM_VERSION} --prefix=/usr -C /tmp/slurm-build .
FROM alpine:3.9
COPY --from=0 /root/slurm-*.deb /root/
COPY slurm*.service /root/

Просмотреть файл

@ -0,0 +1,15 @@
[Unit]
Description=Slurm controller daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurm.conf
Documentation=man:slurmctld(8)
[Service]
Type=forking
EnvironmentFile=-/etc/default/slurmctld
ExecStart=/usr/sbin/slurmctld $SLURMCTLD_OPTIONS
ExecReload=/bin/kill -HUP $MAINPID
PIDFile=/var/run/slurmctld.pid
[Install]
WantedBy=multi-user.target

Просмотреть файл

@ -0,0 +1,19 @@
[Unit]
Description=Slurm node daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurm.conf
Documentation=man:slurmd(8)
[Service]
Type=forking
EnvironmentFile=-/etc/default/slurmd
ExecStart=/usr/sbin/slurmd -d /usr/sbin/slurmstepd $SLURMD_OPTIONS
ExecReload=/bin/kill -HUP $MAINPID
PIDFile=/var/run/slurmd.pid
KillMode=process
LimitNOFILE=51200
LimitMEMLOCK=infinity
LimitSTACK=infinity
[Install]
WantedBy=multi-user.target

Просмотреть файл

@ -0,0 +1,15 @@
[Unit]
Description=Slurm DBD accounting daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurmdbd.conf
Documentation=man:slurmdbd(8)
[Service]
Type=forking
EnvironmentFile=-/etc/default/slurmdbd
ExecStart=/usr/sbin/slurmdbd $SLURMDBD_OPTIONS
ExecReload=/bin/kill -HUP $MAINPID
PIDFile=/var/run/slurmdbd.pid
[Install]
WantedBy=multi-user.target

Просмотреть файл

@ -0,0 +1,38 @@
# Dockerfile for Slurm on Ubuntu 18.04 for Batch Shipyard
FROM ubuntu:18.04
MAINTAINER Fred Park <https://github.com/Azure/batch-shipyard>
WORKDIR /tmp
ENV SLURM_VERSION=18.08.5-2
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
curl \
file \
python \
ruby \
ruby-dev \
libmunge-dev \
libpam0g-dev \
libmariadb-client-lgpl-dev \
libmysqlclient-dev \
numactl \
&& gem install fpm \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN curl -fSsL https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 | tar -jxvpf - \
&& cd slurm-${SLURM_VERSION} \
&& ./configure --prefix=/tmp/slurm-build --sysconfdir=/etc/slurm --with-pam_dir=/lib/x86_64-linux-gnu/security/ \
&& make -j4 \
&& make -j4 contrib \
&& make install \
&& cd /root \
&& fpm -s dir -t deb -v 1.0 -n slurm-${SLURM_VERSION} --prefix=/usr -C /tmp/slurm-build .
FROM alpine:3.9
COPY --from=0 /root/slurm-*.deb /root/
COPY slurm*.service /root/

Просмотреть файл

@ -0,0 +1,15 @@
[Unit]
Description=Slurm controller daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurm.conf
Documentation=man:slurmctld(8)
[Service]
Type=forking
EnvironmentFile=-/etc/default/slurmctld
ExecStart=/usr/sbin/slurmctld $SLURMCTLD_OPTIONS
ExecReload=/bin/kill -HUP $MAINPID
PIDFile=/var/run/slurmctld.pid
[Install]
WantedBy=multi-user.target

Просмотреть файл

@ -0,0 +1,19 @@
[Unit]
Description=Slurm node daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurm.conf
Documentation=man:slurmd(8)
[Service]
Type=forking
EnvironmentFile=-/etc/default/slurmd
ExecStart=/usr/sbin/slurmd -d /usr/sbin/slurmstepd $SLURMD_OPTIONS
ExecReload=/bin/kill -HUP $MAINPID
PIDFile=/var/run/slurmd.pid
KillMode=process
LimitNOFILE=51200
LimitMEMLOCK=infinity
LimitSTACK=infinity
[Install]
WantedBy=multi-user.target

Просмотреть файл

@ -0,0 +1,15 @@
[Unit]
Description=Slurm DBD accounting daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurmdbd.conf
Documentation=man:slurmdbd(8)
[Service]
Type=forking
EnvironmentFile=-/etc/default/slurmdbd
ExecStart=/usr/sbin/slurmdbd $SLURMDBD_OPTIONS
ExecReload=/bin/kill -HUP $MAINPID
PIDFile=/var/run/slurmdbd.pid
[Install]
WantedBy=multi-user.target

Просмотреть файл

@ -26,6 +26,10 @@ Batch Shipyard.
7. [Federation](17-batch-shipyard-configuration-federation.md) - 7. [Federation](17-batch-shipyard-configuration-federation.md) -
Batch Shipyard federation proxy configuration. This configuration is entirely Batch Shipyard federation proxy configuration. This configuration is entirely
optional unless using the federation capabilities of Batch Shipyard. optional unless using the federation capabilities of Batch Shipyard.
8. [Slurm](18-batch-shipyard-configuration-slurm.md) -
Batch Shipyard [Slurm](https://slurm.schedmd.com/) configuration. This
configuration is entirely optional unless using the Slurm on Batch
capabilities of Batch Shipyard.
Note that all potential properties are described here and that specifying Note that all potential properties are described here and that specifying
all such properties may result in invalid configuration as some properties all such properties may result in invalid configuration as some properties

Просмотреть файл

@ -0,0 +1,300 @@
# Batch Shipyard Slurm Configuration
This page contains in-depth details on how to configure a
[Slurm](https://slurm.schedmd.com/) configuration file for Batch Shipyard.
## Schema
The Slurm schema is as follows:
```yaml
slurm:
storage_account_settings: mystorageaccount
location: <Azure region, e.g., eastus>
resource_group: my-slurm-rg
cluster_id: slurm
controller:
ssh:
username: shipyard
ssh_public_key: /path/to/rsa/publickey.pub
ssh_public_key_data: ssh-rsa ...
ssh_private_key: /path/to/rsa/privatekey
generated_file_export_path: null
public_ip:
enabled: true
static: false
virtual_network:
name: myvnet
resource_group: my-vnet-resource-group
existing_ok: false
address_space: 10.0.0.0/16
subnet:
name: my-slurm-controller-subnet
address_prefix: 10.0.1.0/24
network_security:
ssh:
- '*'
custom_inbound_rules:
myrule:
destination_port_range: 5000-5001
protocol: '*'
source_address_prefix:
- 1.2.3.4
- 5.6.7.0/24
vm_size: STANDARD_D2_V2
vm_count: 2
accelerated_networking: false
additional_prep_script: /path/to/some/script-controller.sh
login:
ssh:
username: shipyard
ssh_public_key: /path/to/rsa/publickey.pub
ssh_public_key_data: ssh-rsa ...
ssh_private_key: /path/to/rsa/privatekey
generated_file_export_path: null
public_ip:
enabled: true
static: false
virtual_network:
name: myvnet
resource_group: my-vnet-resource-group
existing_ok: false
address_space: 10.0.0.0/16
subnet:
name: my-slurm-login-subnet
address_prefix: 10.0.2.0/24
network_security:
ssh:
- '*'
custom_inbound_rules:
myrule:
destination_port_range: 5000-5001
protocol: '*'
source_address_prefix:
- 1.2.3.4
- 5.6.7.0/24
vm_size: STANDARD_D4_V2
vm_count: 1
accelerated_networking: false
additional_prep_script: /path/to/some/script-login.sh
shared_data_volumes:
nfs_server:
mount_path: /shared
store_slurmctld_state: true
slurm_options:
idle_reclaim_time: 00:15:00
elastic_partitions:
partition_1:
batch_pools:
mypool1:
account_service_url: https://...
compute_node_type: dedicated
max_compute_nodes: 32
weight: 0
features:
- arbitrary_constraint_1
reclaim_exclude_num_nodes: 8
mypool2:
account_service_url: https://...
compute_node_type: low_priority
max_compute_nodes: 128
weight: 1
features:
- arbitrary_constraint_2
reclaim_exclude_num_nodes: 0
max_runtime_limit: null
default: true
partition_2:
batch_pools:
mypool3:
account_service_url: https://...
compute_node_type: low_priority
max_compute_nodes: 256
weight: 2
features: []
reclaim_exclude_num_nodes: 0
max_runtime_limit: 1.12:00:00
default: false
unmanaged_partitions:
- partition: 'PartitionName=onprem Nodes=onprem-[0-31] Default=No MaxTime=INFINITE State=UP'
nodes:
- 'NodeName=onprem-[0-31] CPUs=512 Sockets=1 CoresPerSocket=8 ThreadsPerCore=2 RealMemory=512128 State=UNKNOWN'
```
The `slurm` property has the following members:
* (required) `storage_account_settings` is the storage account link to store
all Slurm metadata. Any `slurm` command that must store metadata or
actions uses this storage account.
* (required) `location` is the Azure region name for the resources, e.g.,
`eastus` or `northeurope`.
* (required) `resource_group` this is the resource group to use for the
Slurm resources.
* (required) `cluster_id` is the name of the Slurm cluster to create. This
is also the DNS label prefix to apply to each virtual machine and resource
allocated for the Slurm cluster. It should be unique.
There are two required sections for resources that comprise the Slurm
cluster: `controller` and `login`. The `controller` section specifies the VM
configuration which hosts the Slurm controller (and possibly the Slurm DBD).
The `login` section specifies the VM configuration which hosts the login nodes
for the Slurm cluster.
Both the `controller` and `login` sections have the following identical
configuration properties:
* (required) `ssh` is the SSH admin user to create on the machine.
If you are running Batch Shipyard on Windows, please refer to
[these instructions](85-batch-shipyard-ssh-docker-tunnel.md#ssh-keygen)
on how to generate an SSH keypair for use with Batch Shipyard.
* (required) `username` is the admin user to create on all virtual machines
* (optional) `ssh_public_key` is the path to a pre-existing ssh public
key to use. If this is not specified, an RSA public/private key pair will
be generated for use in your current working directory (with a
non-colliding name for auto-generated SSH keys for compute pools, i.e.,
`id_rsa_shipyard_remotefs`). On Windows only, if this is option is not
specified, the SSH keys are not auto-generated (unless `ssh-keygen.exe`
can be invoked in the current working directory or is in `%PATH%`).
This option cannot be specified with `ssh_public_key_data`.
* (optional) `ssh_public_key_data` is the raw RSA public key data in
OpenSSH format, e.g., a string starting with `ssh-rsa ...`. Only one
key may be specified. This option cannot be specified with
`ssh_public_key`.
* (optional) `ssh_private_key` is the path to an existing SSH private key
to use against either `ssh_public_key` or `ssh_public_key_data` for
connecting to storage nodes and performing operations that require SSH
such as cluster resize and detail status. This option should only be
specified if either `ssh_public_key` or `ssh_public_key_data` are
specified.
* (optional) `generated_file_export_path` is an optional path to specify
for where to create the RSA public/private key pair.
* (optional) `public_ip` are public IP properties for the virtual machine.
* (optional) `enabled` designates if public IPs should be assigned. The
default is `true`. Note that if public IP is disabled, then you must
create an alternate means for accessing the Slurm resource virtual
machine through a "jumpbox" on the virtual network. If this property
is set to `false` (disabled), then any action requiring SSH, or the
SSH command itself, will occur against the private IP address of the
virtual machine.
* (optional) `static` is to specify if static public IPs should be assigned
to each virtual machine allocated. The default is `false` which
results in dynamic public IP addresses. A "static" FQDN will be provided
per virtual machine, regardless of this setting if public IPs are
enabled.
* (required) `virtual_network` is the virtual network to use for the
Slurm resource.
* (required) `name` is the virtual network name
* (optional) `resource_group` is the resource group for the virtual
network. If this is not specified, the resource group name falls back
to the resource group specified in the Slurm resource.
* (optional) `existing_ok` allows use of a pre-existing virtual network.
The default is `false`.
* (required if creating, optional otherwise) `address_space` is the
allowed address space for the virtual network.
* (required) `subnet` specifies the subnet properties.
* (required) `name` is the subnet name.
* (required) `address_prefix` is the subnet address prefix to use for
allocation of the Slurm resource virtual machine to.
* (required) `network_security` defines the network security rules to apply
to the Slurm resource virtual machine.
* (required) `ssh` is the rule for which address prefixes to allow for
connecting to sshd port 22 on the virtual machine. In the example, `"*"`
allows any IP address to connect. This is an array property which allows
multiple address prefixes to be specified.
* (optional) `grafana` rule allows grafana HTTPS (443) server port to be
exposed to the specified address prefix. Multiple address prefixes
can be specified.
* (optional) `prometheus` rule allows the Prometheus server port to be
exposed to the specified address prefix. Multiple address prefixes
can be specified.
* (optional) `custom_inbound_rules` are custom inbound rules for other
services that you need to expose.
* (required) `<rule name>` is the name of the rule; the example uses
`myrule`. Each rule name should be unique.
* (required) `destination_port_range` is the ports on each virtual
machine that will be exposed. This can be a single port and
should be a string.
* (required) `source_address_prefix` is an array of address
prefixes to allow.
* (required) `protocol` is the protocol to allow. Valid values are
`tcp`, `udp` and `*` (which means any protocol).
* (required) `vm_size` is the virtual machine instance size to use.
* (required) `vm_count` is the number of virtual machines to allocate of
this instance type. For `controller`, a value greater than `1` will create
a HA Slurm cluster. Additionally, a value of greater than `1` will
automatically place the VMs in an availability set.
* (optional) `accelerated_networking` enables or disables
[accelerated networking](https://docs.microsoft.com/azure/virtual-network/create-vm-accelerated-networking-cli).
The default is `false` if not specified.
* (optional) `additional_prep_script` property specifies a local file which
will be uploaded then executed for additional prep/configuration that should
be applied to each Slurm resource.
There are two required sections for specifying how the Slurm
cluster is configured: `shared_data_volumes` and `slurm_options` sections.
The `shared_data_volumes` section configures shared file systems (or
RemoteFS clusters as provisioned by Batch Shipyard). The `slurm_options`
section configures the Slurm partitions.
The following describes the `shared_data_volumes` configuration:
* (required) Storage cluster id is a named dictionary key that refers
to a defined storage cluster in the global configuration file (and
subsequently the RemoteFS configuration).
* (required) `mount_path` is the mount path across all Slurm resources
and compute nodes.
* (required) `store_slurmctld_state` designates this shared data volume
as the volume that hosts the slurmctld state for HA failover.
The following describes the `slurm_options` configuration:
* (required) `idle_reclaim_time` specifies the amount of time required to
pass while nodes are idle for them to be reclaimed (or suspended) by Slurm.
The format for this property is a timedelta with a string
representation of "d.HH:mm:ss". "HH:mm:ss" is required but "d" is optional.
* (required) `elastic_partitions` specifies the Slurm partitions to create
for elastic cloud bursting onto Azure Batch
* (required) Unique name of the partition
* (required) `batch_pools` specifies the Batch pools which will be
dynamically sized by Batch Shipyard and Slurm. All Batch pools
should be pre-allocated (unless using the `orchestrate` command
in conjunction with using one pool) with 0 nodes.
* (required) Batch Pool Id
* (optional) `account_service_url` is the Batch account
service URL associated with this Batch pool. Currently,
this is restricted to the service url specified in the
credentials file.
* (required) `compute_node_type` is the compute node type
to allocate, can be either `dedicated` or `low_priority`.
* (required) `max_compute_nodes` is the maximum number of
compute nodes that can be allocated.
* (required) `weight` is this weight for this Batch pool in
this partition. See the Slurm documentation for more details.
* (optional) `features` are additional features labeled on
this partition.
* (optional) `reclaim_exclude_num_nodes` is the number of
nodes to exclude from reclaiming for this Batch pool.
* (optional) `max_runtime_limit` imposes a maximum runtime limit
for this partition. The format for this property is a timedelta
with a string representation of "d.HH:mm:ss". "HH:mm:ss" is
required but "d" is optional.
* (required) `default` designates this partition as the default
partition.
* (optional) `unmanaged_partitions` specifies partitions which are not
managed by Batch Shipyard but those that you wish to join to the Slurm
controller. This is useful for joining on-premises nodes within the same
Virtual Network (or peered) to the Slurm cluster. Each sequence member
has the properties:
* (required) `partition` specifies the partition entry in the Slurm
configuration file.
* (required) `nodes` is a sequence of Slurm node entries in the Slurm
configuration file as it relates to the partition.
## Slurm with Batch Shipyard Guide
Please see the [full guide](69-batch-shipyard-slurm.md) for
relevant terminology and information on how this feature works in Batch
Shipyard.
## Full template
A full template of a Slurm cluster configuration file can be found
[here](https://github.com/Azure/batch-shipyard/tree/master/config_templates).
Note that these templates cannot be used as-is and must be modified to fit
your scenario.

Просмотреть файл

@ -225,12 +225,14 @@ instead:
cert Certificate actions cert Certificate actions
data Data actions data Data actions
diag Diagnostics actions diag Diagnostics actions
fed Federation actions
fs Filesystem in Azure actions fs Filesystem in Azure actions
jobs Jobs actions jobs Jobs actions
keyvault KeyVault actions keyvault KeyVault actions
misc Miscellaneous actions misc Miscellaneous actions
monitor Monitoring actions monitor Monitoring actions
pool Pool actions pool Pool actions
slurm Slurm on Batch actions
storage Storage actions storage Storage actions
``` ```
@ -238,6 +240,7 @@ instead:
* `cert` commands deal with certificates to be used with Azure Batch * `cert` commands deal with certificates to be used with Azure Batch
* `data` commands deal with data ingress and egress from Azure * `data` commands deal with data ingress and egress from Azure
* `diag` commands deal with diganostics for Azure Batch * `diag` commands deal with diganostics for Azure Batch
* `fed` commandsd del with Batch Shipyard Federations
* `fs` commands deal with Batch Shipyard provisioned remote filesystems in * `fs` commands deal with Batch Shipyard provisioned remote filesystems in
Azure Azure
* `jobs` commands deal with Azure Batch jobs and tasks * `jobs` commands deal with Azure Batch jobs and tasks
@ -246,6 +249,7 @@ Shipyard
* `misc` commands are miscellaneous commands that don't fall into other * `misc` commands are miscellaneous commands that don't fall into other
categories categories
* `pool` commands deal with Azure Batch pools * `pool` commands deal with Azure Batch pools
* `slurm` commands deal with Slurm on Batch
* `storage` commands deal with Batch Shipyard metadata on Azure Storage * `storage` commands deal with Batch Shipyard metadata on Azure Storage
## `account` Command ## `account` Command
@ -499,14 +503,15 @@ parts of a remote filesystem:
### `fs cluster` Command ### `fs cluster` Command
`fs cluster` command has the following sub-commands: `fs cluster` command has the following sub-commands:
``` ```
add Create a filesystem storage cluster in Azure add Create a filesystem storage cluster in Azure
del Delete a filesystem storage cluster in Azure del Delete a filesystem storage cluster in Azure
expand Expand a filesystem storage cluster in Azure expand Expand a filesystem storage cluster in Azure
resize Resize a filesystem storage cluster in Azure. orchestrate Orchestrate a filesystem storage cluster in Azure with the...
ssh Interactively login via SSH to a filesystem... resize Resize a filesystem storage cluster in Azure.
start Starts a previously suspended filesystem... ssh Interactively login via SSH to a filesystem storage cluster...
status Query status of a filesystem storage cluster... start Starts a previously suspended filesystem storage cluster in...
suspend Suspend a filesystem storage cluster in Azure status Query status of a filesystem storage cluster in Azure
suspend Suspend a filesystem storage cluster in Azure
``` ```
As the `fs.yaml` configuration file can contain multiple storage cluster As the `fs.yaml` configuration file can contain multiple storage cluster
definitions, all `fs cluster` commands require the argument definitions, all `fs cluster` commands require the argument
@ -534,6 +539,8 @@ storage cluster to perform actions against.
the file server. the file server.
* `--no-rebalance` rebalances the data and metadata among the disks for * `--no-rebalance` rebalances the data and metadata among the disks for
better data spread and performance after the disk is added to the array. better data spread and performance after the disk is added to the array.
* `orchestrate` will create the remote disks and the remote fs cluster as
defined in the fs config file
* `resize` resizes the storage cluster with additional virtual machines as * `resize` resizes the storage cluster with additional virtual machines as
specified in the configuration. This is an experimental feature. specified in the configuration. This is an experimental feature.
* `ssh` will interactively log into a virtual machine in the storage cluster. * `ssh` will interactively log into a virtual machine in the storage cluster.
@ -915,6 +922,73 @@ configuration file to all nodes in the specified pool
* `user del` will delete the SSH or RDP user defined in the pool * `user del` will delete the SSH or RDP user defined in the pool
configuration file from all nodes in the specified pool configuration file from all nodes in the specified pool
## `slurm` Command
The `slurm` command has the following sub-commands:
```
cluster Slurm cluster actions
ssh Slurm SSH actions
```
The `slurm cluster` sub-command has the following sub-sub-commands:
```
create Create a Slurm cluster with controllers and login nodes
destroy Destroy a Slurm controller
orchestrate Orchestrate a Slurm cluster with shared file system and
Batch...
status Query status of a Slurm controllers and login nodes
```
The `slurm ssh` sub-command has the following sub-sub-commands:
```
controller Interactively login via SSH to a Slurm controller virtual...
login Interactively login via SSH to a Slurm login/gateway virtual...
node Interactively login via SSH to a Slurm compute node virtual...
```
* `cluster create` will create the Slurm controller and login portions of
the cluster
* `cluster destroy` will destroy the Slurm controller and login portions of
the cluster
* `--delete-resource-group` will delete the entire resource group that
contains the Slurm resources. Please take care when using this
option as any resource in the resoure group is deleted which may be
other resources that are not Batch Shipyard related.
* `--delete-virtual-network` will delete the virtual network and all of
its subnets
* `--generate-from-prefix` will attempt to generate all resource names
using conventions used. This is helpful when there was an issue with
creation/deletion and the original virtual machine resources
cannot be enumerated. Note that OS disks cannot be deleted with this
option. Please use an alternate means (i.e., the Azure Portal) to
delete disks that may have been used by the Slurm resource VMs.
* `--no-wait` does not wait for deletion completion. It is not recommended
to use this parameter.
* `cluster orchestrate` will orchestrate the entire Slurm cluster with a
single Batch pool
* `--storage-cluster-id` will orchestrate the specified RemoteFS shared
file system
* `cluster status` queries the status of the Slurm controller and login nodes
* `ssh controller` will SSH into the Slurm controller nodes if permitted with
the controller SSH user
* `COMMAND` is an optional argument to specify the command to run. If your
command has switches, preface `COMMAND` with double dash as per POSIX
convention, e.g., `pool ssh -- sudo docker ps -a`.
* `--offset` is the cardinal offset of the controller node
* `--tty` allocates a pseudo-terminal
* `ssh login` will SSH into the Slurm login nodes with the cluster user
identity
* `COMMAND` is an optional argument to specify the command to run. If your
command has switches, preface `COMMAND` with double dash as per POSIX
convention, e.g., `pool ssh -- sudo docker ps -a`.
* `--offset` is the cardinal offset of the login node
* `--tty` allocates a pseudo-terminal
* `ssh node` will SSH into a Batch compute node with the cluster user identity
* `COMMAND` is an optional argument to specify the command to run. If your
command has switches, preface `COMMAND` with double dash as per POSIX
convention, e.g., `pool ssh -- sudo docker ps -a`.
* `--node-name` is the required Slurm node name
* `--tty` allocates a pseudo-terminal
## `storage` Command ## `storage` Command
The `storage` command has the following sub-commands: The `storage` command has the following sub-commands:
``` ```

Просмотреть файл

@ -0,0 +1,319 @@
# Slurm on Batch with Batch Shipyard
The focus of this article is to explain the Slurm on Batch functionality
in Batch Shipyard and how to effectively deploy your workload for
traditional lift-and-shift scheduling while leveraging some
Platform-as-a-Service capabilities of Azure Batch.
## Overview
The [Slurm](https://slurm.schedmd.com/) workload manager is an open-source
job scheduler that is widely used among many institutional and supercomputing
sites. Azure Batch provides an abstraction for managing lower-layer VM
complexities and automated recovery through Batch pools. Batch Shipyard
provides an integration between Slurm and Batch pools where the Slurm cluster
controller and login nodes are provisioned and connected to compute nodes in
Batch pools in an on-demand fashion.
### Why?
Why is this feature useful when you can use
[Azure Batch](https://azure.microsoft.com/services/batch/) natively as a job
scheduler or leverage
[Azure CycleCloud](https://azure.microsoft.com/features/azure-cyclecloud/)?
Some users or organizations may prefer the use of Slurm native
tooling and execution workflows which are not currently possible with Azure
Batch; either due to workflow familiarity or existing investments in the
ecosystem. Additionally, Azure Batch may not provide some of the rich job
scheduling and accounting functionality available in Slurm that may be
required for some organizational workflows. Moreover, some requirements such
as standing up a separate VM for CycleCloud or managing the underlying
Slurm compute node infrastructure may not be an amenable solution for some
users or organizations.
Slurm on Batch with Batch Shipyard attempts to mix the advantages of both
worlds by combining the Slurm scheduler with platform benefits of Azure Batch
compute node orchestration and management.
## Major Features
* Simple and automated Slurm cluster creation
* Automatic HA support of Slurm controllers and the ability to create
multiple login nodes
* Ability to specify arbitrary elastic partitions which may be comprised of a
non-hetergeneous mixture of Batch pools
* Automatic linking of shared file systems (RemoteFS clusters) between
all Slurm resources
* Support for concurrent dedicated and low priority compute nodes within
partitions
* Automatic feature tagging of nodes, including VM size and capabilities
* Automatic generic resource configuration for GPU VMs
* Automatic on-demand resizing of compute node resources including
user-specified idle reclaim timeouts and node reclaim exclusion filters
* Support for custom preparation scripts on all Slurm resources
* Goal-seeking engine to recover from compute node allocation failures
* Default cluster user SSH is linked to login nodes and compute nodes for
easy logins and file access across non-controller resources
* Supports most Batch Shipyard configuration options on the pool, including
distributed scratch, container runtime installations, monitoring integration,
shared file system mounting, automatic GPU setup, etc.
* Supports joining pre-existing partitions and nodes which may be on-premises
with elastic on-demand nodes
## Mental Model
### Slurm Dynamic Node Allocation and Deallocation
A Slurm cluster on Batch with Batch Shipyard utilizes the
[Slurm Elastic Computing (Cloud Bursting)](https://slurm.schedmd.com/elastic_computing.html)
functionality which is based on Slurm's
[Power Save](https://slurm.schedmd.com/power_save.html) capabilities.
In a nutshell, Slurm will `resume` nodes when needed to process jobs and
`suspend` nodes once there is no need for the nodes to run (i.e., relinquish
them back to the cloud).
When Slurm decides that new nodes should be provisioned, the `resume`
command triggers the `Batch Shipyard Slurm Helper` which allocates compute
nodes on the appropriate Batch pool targeting a specific Azure region.
Batch Shipyard handles the complexity of node name assignment, host DNS
registration, and ensuring the controller updates the node information with
the appropriate IP address.
When Slurm decides that nodes should be removed via `suspend`, the
`Batch Shipyard Slurm Helper` will deallocate these nodes in their
respective pools and release the node names back for availability.
### Batch Pools as Slurm Compute Nodes
A Batch Shipyard provisioned Slurm cluster is built on top of different
resources in Azure. To more readily explain the concepts that form a Batch
Shipyard Slurm cluster, let's start with a high-level conceptual
layout of all of the components and possible interactions.
```
+---------------+
| |
+----------+ +-----------------> Azure Storage <----------------+
| | | | | |
| Azure | | +---------------+ |
| Resource | | |
| Manager | | +-------------+ |
| | | | | |
+------^---+ | +-------------> Azure Batch +------------+ |
| | | | | | |
MSI | MSI | | MSI +-------------+ | |
| | | | |
+-------------------------------------------------------------------------------+
| | | | | | |
| | | | +----v----+--------+ |
| +------------------------+ | | |
| | | | | | | +--------+ | |
| | +--+------+----+-+ | | | | | |
| | | | <-----------------------------> | slurmd | | |
| | | Batch Shipyard | | | | | | |
| | | Slurm Helper | | | +--------+ | |
| | | | | | | |
| | +----------------+ | +----------------+ | +--------------+ | |
| | | | | | | | | |
| | +-----------+ | | Batch Shipyard | | | Slurm client | | |
| | | | | | Remote FS VMs | | | tools | | |
| | | slurmctld | | | | | | | | |
| | | | +------> Subnet A <-----+ +--------------+ | |
| | +-----------+ | | 10.0.1.0/24 | | | |
| | | +-------^--------+ | Azure Batch | |
| | Slurm Controller Nodes | | | Compute Nodes | |
| | | | | | |
| | Subnet B | | | Subnet D | |
| | 10.0.2.0/24 | | | 10.1.0.0/16 | |
| +----------^-------------+ | +------------------+ |
| | +--------+---------+ |
| | | | |
| | | +--------------+ | |
| | | | | | |
| +-------------------+ | Slurm client | | |
| | | tools | | |
| | | | | |
| | +--------------+ | |
| | | |
| | Login Nodes | |
| | | |
| | Subnet C | |
| | 10.0.3.0/24 | |
| Virtual Network +---------^--------+ |
| 10.0.0.0/8 | |
+-------------------------------------------------------------------------------+
|
SSH |
|
+-------+------+
| |
| Cluster User |
| |
+--------------+
```
The base layer for all of the resources within a Slurm cluster on Batch is
an Azure Virtual Network. This virtual network can be shared
amongst other network-level resources such as network interfaces. The virtual
network can be "partitioned" into sub-address spaces through the use of
subnets. In the example above, we have four subnets where
`Subnet A 10.0.1.0/24` hosts the Batch Shipyard RemoteFS shared file system,
`Subnet B 10.0.2.0/24` contains the Slurm controller nodes,
`Subnet C 10.0.3.0/24` contains the login nodes,
and `Subnet D 10.1.0.0/16` contains a pool or a collection of pools of
Azure Batch compute nodes to serve as dynamically allocated Slurm
compute nodes.
One (or more) RemoteFS shared file systems can be used as a common file system
between login nodes and the Slurm compute nodes (provisioned as Batch compute
nodes). One of these file systems is also designated to store `slurmctld`
state for HA/failover for standby Slurm controller nodes. Cluster users
login to the Slurm cluster via the login nodes where the shared file system
is mounted and the Slurm client tools are installed which submit to the
controller nodes.
Slurm configuration and munge keys are propagated to the provisioned compute
nodes in Batch pools along with mounting the appropriate RemoteFS shared
file systems. Once these nodes are provisioned and idle, the node information
is updated on the controller nodes to be available for Slurm job scheduling.
When Slurm signals that nodes are no longer needed, the Batch Shipyard
Slurm helper will then translate the Slurm node names back to Batch compute
node ids and deprovision appropriately.
## Walkthrough
The following is a brief walkthrough of configuring a Slurm on Batch cluster
with Batch Shipyard.
### Azure Active Directory Authentication Required
Azure Active Directory authentication is required to create a Slurm cluster.
When executing either the `slurm cluster create` or `slurm cluster orchestrate`
command, your service principal must be at least `Owner` or a
[custom role](https://docs.microsoft.com/azure/active-directory/role-based-access-control-custom-roles)
that does not prohibit the following action along with the ability to
create/read/write resources for the subscription:
* `Microsoft.Authorization/*/Write`
This action is required to enable
[Azure Managed Service Identity](https://docs.microsoft.com/azure/active-directory/managed-service-identity/overview)
on the Batch Shipyard Slurm Helper which runs on controller nodes.
### Configuration
The configuration for a Slurm cluster with Batch Shipyard is generally
composed of two major parts: the Slurm configuration the normal global and
pool configurations.
#### Slurm Cluster Configuration
The Slurm cluster configuration is defined by a Slurm configuration
file. Please refer to the full
[Slurm cluster configuration documentation](18-batch-shipyard-configuration-slurm.md)
for more detailed explanations of each option and for those not shown below.
Conceptually, this file consists of five major parts:
```yaml
slurm:
# 1. define general settings
storage_account_settings: mystorageaccount
location: <Azure region, e.g., eastus>
resource_group: my-slurm-rg
cluster_id: slurm
# 2. define controller node settings
controller:
ssh:
# SSH access/user to the controller nodes, independent of other resources
public_ip:
# ...
virtual_network:
# Virtual Network should be the same for all resources, with a differing subnet
network_security:
# Optional, but recommended network security rules
vm_size: # appropriate VM size
vm_count: # Number greater than 1 will create an HA Slurm cluster
# 3. define login node settings
login:
ssh:
# The cluster user SSH and username settings
public_ip:
# ...
virtual_network:
# Virtual Network should be the same for all resources, with a differing subnet
network_security:
# Optional, but recommended network security rules
vm_size: # appropriate VM size
vm_count: # Number greater than 1 will create multiple login nodes
# 4. define shared file systems
shared_data_volumes:
nfs_server: # Batch Shipyard RemoteFS storage cluster id
mount_path: # The mount path across all Slurm resources
store_slurmctld_state: # at least one shared data volume must set this to true
# 5. define Slurm options
slurm_options:
idle_reclaim_time: # amount of idle time before Slurm issues suspend on nodes
elastic_partitions: # define Slurm elastic cloud bursting partitions
partition_1:
batch_pools:
mypool1: # pool id, must be pre-allocated with zero nodes
account_service_url: https://... # currently this must be the same as the Batch account specified in config.yaml
compute_node_type: # dedicated or low_priority nodes
max_compute_nodes: # maximum number of VMs to allocate
weight: # Slurm weight
features:
# arbitrary constraint sequence
reclaim_exclude_num_nodes: # number of nodes to exclude from idle reclaim.
# Once allocated, these number of nodes are not reclaimed.
# can define multiple pools
max_runtime_limit: # maximum runtime for jobs in this partition
default: # is the default partition, one partition must have this set to true
unmanaged_partitions:
# for pre-existing partitions (or on-prem partitions)
```
#### Global Configuration
[Global configuration](12-batch-shipyard-configuration-global.md) should
contain the appropriate RemoteFS shared file system/data volumes that are
to be used across all Slurm resources under
`global_resources`:`volumes`:`shared_data_volumes`. More than one RemoteFS
shared data volume can be specified.
Optionally, if your workload will be container driven, you can specify
image pre-loads here as per normal convention under `global_resources`.
#### Pool Configuration
[Pool configuration](13-batch-shipyard-configuration-pool.md) should
be used to create all necessary pools used for Slurm elastic partitions
beforehand. This file is not explicitly used for `slurm cluster create` and
only for `slurm cluster orchestrate` if orchestrating a Slurm cluster with
one pool. If not utilizing the orchestrate command, then it is required
to create pools individually before issuing `slurm cluster create`.
Most pool properties apply with no modifications for Slurm clusters. By
default, all Slurm nodes have Docker installed. Do not use `native` mode
for Slurm compute nodes.
### Limitations
This is a non-exhaustive list of potential limitations while using
the Slurm on Batch feature in Batch Shipyard.
* All pools must reside under the Batch account linked to the global
configuration. This limitation will be lifted at a later date.
* Shared file system (shared data volume) support is currently limited
to supported RemoteFS provisioned storage clusters: NFS and GlusterFS.
* Network Security Groups (NSGs) should permit communication between
Slurm resources for all required communication channels and ports.
* LDAP for centralized user control is not implemented, but can be
customized per the `additional_prep_script` option on the `controller` and
`login` section of the Slurm configuration file and using
`additional_node_prep` for compute nodes.
* PAM-based auth restrictions for preventing users from logging into
compute nodes without a running job is not yet implemented.
* An action aggregator in the Batch Shipyard Slurm helper that would
improve resize operation performance is not yet implemented.
* Suspending and resuming the Slurm controller and login nodes is not
yet supported.
### Quotas
Ensure that you have sufficient core and pool quota for your Batch account.
Please note that *all* quotas (except for the number of Batch accounts
per region per subscription) apply to each individual Batch account
separately. User subscription based Batch accounts share the underlying
subscription regional core quotas.
## Sample Usage
Please see the sample [Slurm recipe](../recipes/Slurm-NFS) for a working
example.

Просмотреть файл

@ -148,7 +148,7 @@ the user that will execute the task should be present within the Docker
container. SSH clients will also need to be transparently directed to container. SSH clients will also need to be transparently directed to
connect to the alternate port and ignore input prompts since these connect to the alternate port and ignore input prompts since these
programs will be run in non-interactive mode. If you cannot override your MPI programs will be run in non-interactive mode. If you cannot override your MPI
runtime remote shell options, you can use an SSH config file stored in the runtime remote shell options, you can use an SSH `config` file stored in the
respective root or user's `.ssh` directory alongside the keys: respective root or user's `.ssh` directory alongside the keys:
``` ```

Просмотреть файл

@ -60,9 +60,9 @@ factors that Batch Shipyard has no control over.
#### What is `native` under pool `platform_image` and `custom_image`? #### What is `native` under pool `platform_image` and `custom_image`?
`native` designates to Batch Shipyard to attempt to create the pool such `native` designates to Batch Shipyard to attempt to create the pool such
that the pool works under native Docker mode where the compute nodes that the pool works under native Docker mode where the compute nodes
understand how to launch and execute Docker containers. Please understand "natively" understand how to launch and execute Docker containers. Please
that only a subset of `platform_image` combinations are compatible with understand that only a subset of `platform_image` combinations are compatible
`native` mode. You can refer to the with `native` mode. You can refer to the
[Batch Shipyard Platform Image support doc](25-batch-shipyard-platform-image-support.md) [Batch Shipyard Platform Image support doc](25-batch-shipyard-platform-image-support.md)
for more information. Compliant for more information. Compliant
[custom images](63-batch-shipyard-custom-images.md) are compatible with [custom images](63-batch-shipyard-custom-images.md) are compatible with

Просмотреть файл

@ -43,7 +43,7 @@ is found through `%PATH%` or is in the current working directory.
* Compute pool resize down (i.e., removing nodes from a pool) is not supported * Compute pool resize down (i.e., removing nodes from a pool) is not supported
when peer-to-peer transfer is enabled. when peer-to-peer transfer is enabled.
* The maximum number of compute nodes with peer-to-peer enabled is currently * The maximum number of compute nodes with peer-to-peer enabled is currently
40 for Linux pools for non-UserSubscription Batch accounts. This check is 100 for Linux pools for non-UserSubscription Batch accounts. This check is
no longer performed before a pool is created and will instead result in no longer performed before a pool is created and will instead result in
a ResizeError on the pool if not all compute nodes can be allocated. a ResizeError on the pool if not all compute nodes can be allocated.
* Data movement between Batch tasks as defined by `input_data`:`azure_batch` * Data movement between Batch tasks as defined by `input_data`:`azure_batch`

Просмотреть файл

@ -479,7 +479,7 @@ class ServiceProxy():
return self.batch_clients[batch_account] return self.batch_clients[batch_account]
except KeyError: except KeyError:
client = azure.batch.BatchServiceClient( client = azure.batch.BatchServiceClient(
self.creds.batch_creds, base_url=service_url) self.creds.batch_creds, batch_url=service_url)
self._modify_client_for_retry_and_user_agent(client) self._modify_client_for_retry_and_user_agent(client)
self.batch_clients[batch_account] = client self.batch_clients[batch_account] = client
logger.debug('batch client created for account: {}'.format( logger.debug('batch client created for account: {}'.format(

Просмотреть файл

@ -281,7 +281,7 @@ def _get_batch_credentials(
return _BATCH_CLIENTS[batch_account] return _BATCH_CLIENTS[batch_account]
except KeyError: except KeyError:
creds = create_msi_credentials(cloud, resource_id=resource_id) creds = create_msi_credentials(cloud, resource_id=resource_id)
client = azure.batch.BatchServiceClient(creds, base_url=service_url) client = azure.batch.BatchServiceClient(creds, batch_url=service_url)
_modify_client_for_retry_and_user_agent(client) _modify_client_for_retry_and_user_agent(client)
_BATCH_CLIENTS[batch_account] = client _BATCH_CLIENTS[batch_account] = client
logger.debug('batch client created for account: {}'.format( logger.debug('batch client created for account: {}'.format(

Просмотреть файл

@ -20,6 +20,7 @@ pages:
- RemoteFS: 15-batch-shipyard-configuration-fs.md - RemoteFS: 15-batch-shipyard-configuration-fs.md
- Monitoring: 16-batch-shipyard-configuration-monitor.md - Monitoring: 16-batch-shipyard-configuration-monitor.md
- Federation: 17-batch-shipyard-configuration-federation.md - Federation: 17-batch-shipyard-configuration-federation.md
- Slurm : 18-batch-shipyard-configuration-federation.md
- CLI Commands and Usage: 20-batch-shipyard-usage.md - CLI Commands and Usage: 20-batch-shipyard-usage.md
- Platform Image support: 25-batch-shipyard-platform-image-support.md - Platform Image support: 25-batch-shipyard-platform-image-support.md
- In-Depth Feature Guides: - In-Depth Feature Guides:
@ -31,6 +32,7 @@ pages:
- Remote Filesystems: 65-batch-shipyard-remote-fs.md - Remote Filesystems: 65-batch-shipyard-remote-fs.md
- Resource Monitoring: 66-batch-shipyard-resource-monitoring.md - Resource Monitoring: 66-batch-shipyard-resource-monitoring.md
- Federations: 68-batch-shipyard-federation.md - Federations: 68-batch-shipyard-federation.md
- Slurm on Batch: 69-batch-shipyard-slurm.md
- Data Movement: 70-batch-shipyard-data-movement.md - Data Movement: 70-batch-shipyard-data-movement.md
- Azure KeyVault for Credential Management: 74-batch-shipyard-azure-keyvault.md - Azure KeyVault for Credential Management: 74-batch-shipyard-azure-keyvault.md
- Credential Encryption: 75-batch-shipyard-credential-encryption.md - Credential Encryption: 75-batch-shipyard-credential-encryption.md

Просмотреть файл

@ -66,7 +66,8 @@ shipyard jobs tasks list --jobid blast --taskid merge-task-00001 --poll-until-ta
# optionally egress the results.txt file from the compute node to local machine # optionally egress the results.txt file from the compute node to local machine
shipyard data files task --filespec blast,merge-task-00001,wd/results.txt shipyard data files task --filespec blast,merge-task-00001,wd/results.txt
# clean-up # clean up
shipyard jobs del -y --wiat jobs-blast.yaml shipyard jobs del -y jobs-split.yaml
shipyard jobs del -y jobs-blast.yaml
shipyard pool del -y shipyard pool del -y
``` ```

Просмотреть файл

@ -23,8 +23,9 @@ Use the following links to quickly navigate to recipe collections:
4. [Genomics and Bioinformatics](#genomics) 4. [Genomics and Bioinformatics](#genomics)
5. [Molecular Dynamics (MD)](#md) 5. [Molecular Dynamics (MD)](#md)
6. [RemoteFS](#remotefs) 6. [RemoteFS](#remotefs)
7. [Video Processing](#video) 7. [Slurm on Batch](#slurm)
8. [Windows](#windows) 8. [Video Processing](#video)
9. [Windows](#windows)
## <a name="benchmarks"></a>Benchmarks ## <a name="benchmarks"></a>Benchmarks
#### [HPCG-Infiniband-IntelMPI](./HPCG-Infiniband-IntelMPI) #### [HPCG-Infiniband-IntelMPI](./HPCG-Infiniband-IntelMPI)
@ -193,6 +194,11 @@ GlusterFS storage cluster.
This RemoteFS-NFS recipe contains information on how to provision a sample This RemoteFS-NFS recipe contains information on how to provision a sample
single VM NFS server. single VM NFS server.
## <a name="slurm"></a>Slurm on Batch
#### [Slurm+NFS](./Slurm+NFS)
This recipe contains information on how to orchestrate a
[Slurm](https://slurm.schedmd.com/) cluster with an NFS shared file system.
## <a name="video"></a>Video Processing ## <a name="video"></a>Video Processing
#### [FFmpeg-GPU](./FFmpeg-GPU) #### [FFmpeg-GPU](./FFmpeg-GPU)
This recipe contains information on how to containerize This recipe contains information on how to containerize

Просмотреть файл

@ -49,7 +49,7 @@ or unspecified
### Batch Shipyard Commands ### Batch Shipyard Commands
After you have created your RemoteFS GlusterFS storage cluster via After you have created your RemoteFS GlusterFS storage cluster via
`fs cluster add`, then you can issue `pool add` with the above config `fs cluster orchestrate`, then you can issue `pool add` with the above config
which will create a Batch pool and automatically link your GlusterFS which will create a Batch pool and automatically link your GlusterFS
storage cluster against your Batch pool. You can then use data placed on storage cluster against your Batch pool. You can then use data placed on
the storage cluster in your containerized workloads. the storage cluster in your containerized workloads.

Просмотреть файл

@ -2,7 +2,7 @@ remote_fs:
resource_group: my-resource-group resource_group: my-resource-group
location: <Azure region, e.g., eastus> location: <Azure region, e.g., eastus>
managed_disks: managed_disks:
premium: true sku: premium_lrs
disk_size_gb: 1023 disk_size_gb: 1023
disk_names: disk_names:
- p30-disk0a - p30-disk0a

Просмотреть файл

@ -45,20 +45,15 @@ there are at least 2 VMs, thus disks should be mapped in their respective
cardinal entry. cardinal entry.
### Commands to create the GlusterFS storage cluster ### Commands to create the GlusterFS storage cluster
After modifying the configuration files as required, then you must create After modifying the configuration files as required, you can orchestrate
the managed disks as the first step. The following assumes the configuration the entire GlusterFS shared file system with `fs cluster orchestrate`. The
files are in the current working directory. First all of the managed disks `orchestrate` command wraps up the disk allocation (`fs disks add`) and file
used by the storage cluster must be provisioned: server creation (`fs cluster add`) into one command. The commands can be
invoked separately if desired. The following assumes the configuration files
are in the current working directory.
```shell ```shell
SHIPYARD_CONFIGDIR=. ./shipyard fs disks add SHIPYARD_CONFIGDIR=. ./shipyard fs cluster orchestrate mystoragecluster
```
After the managed disks have been created, then you can create the cluster
with:
```shell
SHIPYARD_CONFIGDIR=. ./shipyard fs cluster add mystoragecluster
``` ```
This assumes that the storage cluster id is `mystoragecluster`. After the This assumes that the storage cluster id is `mystoragecluster`. After the

Просмотреть файл

@ -2,7 +2,7 @@ remote_fs:
resource_group: my-resource-group resource_group: my-resource-group
location: <Azure region, e.g., eastus> location: <Azure region, e.g., eastus>
managed_disks: managed_disks:
premium: true sku: premium_lrs
disk_size_gb: 1023 disk_size_gb: 1023
disk_names: disk_names:
- p30-disk0a - p30-disk0a

Просмотреть файл

@ -40,20 +40,15 @@ here.
is only a single VM, thus all disks should be mapped in the `"0"` entry. is only a single VM, thus all disks should be mapped in the `"0"` entry.
### Commands to create the NFS file server ### Commands to create the NFS file server
After modifying the configuration files as required, then you must create After modifying the configuration files as required, you can orchestrate
the managed disks as the first step. The following assumes the configuration the entire NFS file server with `fs cluster orchestrate`. The `orchestrate`
files are in the current working directory. First all of the managed disks command wraps up the disk allocation (`fs disks add`) and file server
used by the file server must be provisioned: creation (`fs cluster add`) into one command. The commands can be invoked
separately if desired. The following assumes the configuration files are in
the current working directory.
```shell ```shell
SHIPYARD_CONFIGDIR=. ./shipyard fs disks add SHIPYARD_CONFIGDIR=. ./shipyard fs cluster orchestrate mystoragecluster
```
After the managed disks have been created, then you can create the cluster
with:
```shell
SHIPYARD_CONFIGDIR=. ./shipyard fs cluster add mystoragecluster
``` ```
This assumes that the storage cluster id is `mystoragecluster`. After the This assumes that the storage cluster id is `mystoragecluster`. After the

100
recipes/Slurm+NFS/README.md Normal file
Просмотреть файл

@ -0,0 +1,100 @@
# Slurm+NFS
This recipe shows how to orchestrate a Slurm on Batch cluster with a single
Batch pool providing compute node VMs for Slurm workloads along with a shared
NFS filesystem.
## Configuration
Please see refer to this [set of sample configuration files](./config) for
this recipe.
### Credentials Configuration
The credentials configuration should have `management` Azure Active Directory
credentials defined along with a valid storage account. The `management`
section can be supplied through environment variables instead if preferred.
The `batch` section should also be populated which associates all of the
Batch pools used by Slurm partitions. Additionally, a `slurm` section with the
`db_password` must be defined.
### Pool Configuration
The pool configuration can be modified as necessary for the requisite OS
and other tooling that should be installed. The `vm_count` should be kept
as `0` for both `dedicated` and `low_priority` during the initial allocation
as Slurm's elastic cloud bursting will size the pools appropriately.
### FS Configuration
The remote fs configuration file requires modification. Properties to
modify are:
* `resource_group` all resource groups should be modified to fit your
scenario.
* `location` should be modified to the Azure region where you would like
the storage cluster created. If linking against Azure Batch compute nodes,
it should be in the same region as your Azure Batch account.
* `managed_disks` should be modified for the number, size and type of
managed disks to allocate for the file server.
* `storage_clusters` should be modified to have a unique name instead of
`mystoragecluster` if you prefer.
* `hostname_prefix` should be modified to your perferred resource name
prefix.
* `virtual_network` should be modified for the address prefixes and subnet
properties that you prefer.
* `network_security` should be modified for inbound network security rules
to apply for SSH and external NFSv4 client mounts. If no NFSv4 clients
external to the virtual network are needed, then the entire `nfs` security
rule can be omitted.
* `file_server` options such as `mountpoint` and `mount_options` should be
modified to your scenario. Type should not be modified from `nfs`.
* `vm_size` can be modified for the file server depending upon your scenario.
If using premium managed disks, then a premium VM size must be selected
here.
* `vm_disk_map` contains all of the disks used for each VM. For `nfs`, there
is only a single VM, thus all disks should be mapped in the `"0"` entry.
### Slurm Configuration
The Slurm configuration should include the appropriate location and virtual
network settings for the controller and login nodes, in addition to defining
the appropriate elastic partitions. Please see the
[Slurm on Batch](../../docs/69-batch-shipyard-slurm.md) guide and the
[Slurm configuration](../../docs/18-batch-shipayrd-slurm.md) document for more
information on each option.
### Commands to orchestrate the Slurm cluster
After modifying the configuration files as required, you can orchestrate
the entire Slurm cluster creation with `slurm cluster orchestrate`. The
`orchestrate` command wraps up the NFS disk allocation (`fs disks add`), NFS
file server creation (`fs cluster add`), Batch pool allocation (`pool add`),
and Slurm controller/login creation (`slurm cluster create`) into one command.
The commands can be invoked separately if desired. The following assumes the
configuration files are in the current working directory.
```shell
# ensure all configuration files are in the appropriate directory
export SHIPYARD_CONFIGDIR=.
# orchestrate the Slurm cluster
./shipyard slurm cluster orchestrate --storage-cluster-id nfs -y
```
You can log into the login nodes by issuing the command:
```shell
./shipyard slurm ssh login
```
which will default to logging into the first login node (since this cluster
only has one login node, it is the only possible node to log in to).
There you will be able to run your Slurm commands such as `sbatch`, `squeue`,
`salloc`, `srun`, etc..
To delete the Slurm cluster:
```shell
# delete the Batch pool providing Slurm compute nodes
./shipyard pool del -y
# delete the Slurm controller and login nodes
./shipyard slurm cluster destroy -y
# delete the RemoteFS shared file system
./shipyard fs cluster del nfs -y --delete-data-disks
```

Просмотреть файл

@ -0,0 +1,8 @@
batch_shipyard:
storage_account_settings: mystorageaccount
global_resources:
volumes:
shared_data_volumes:
nfs:
container_path: $AZ_BATCH_NODE_SHARED_DIR/nfs # this is not used
volume_driver: storage_cluster

Просмотреть файл

@ -0,0 +1,37 @@
credentials:
batch:
aad:
endpoint: https://batch.core.windows.net/
directory_id: <AAD directory id>
application_id: <AAD application id, if using SP login>
auth_key: <Auth key for SP login>
rsa_private_key_pem: <RSA private key for SP login>
x509_cert_sha1_thumbprint: <X509 cert thumbprint for SP login>
user: <AAD username for directory if using user login>
password: <AAD password for username above if using user login without multi-factor auth>
token_cache:
enabled: true
filename: .aad_token_cache
account_service_url: https://<myaccount>.<region>.batch.azure.com
resource_group: my-resource-group
management:
aad:
endpoint: https://management.azure.com/
directory_id: <AAD directory id>
application_id: <AAD application id, if using SP login>
auth_key: <Auth key for SP login>
rsa_private_key_pem: <RSA private key for SP login>
x509_cert_sha1_thumbprint: <X509 cert thumbprint for SP login>
user: <AAD username for directory if using user login>
password: <AAD password for username above if using user login without multi-factor auth>
token_cache:
enabled: true
filename: .aad_token_cache
subscription_id: <subscription id>
storage:
mystorageaccount:
account: <storage account name>
account_key: <storage account key>
endpoint: core.windows.net
slurm:
db_password: <slurm db pass>

Просмотреть файл

@ -0,0 +1,49 @@
remote_fs:
resource_group: my-resource-group
location: <Azure region, e.g., eastus>
managed_disks:
sku: premium_lrs
disk_size_gb: 1023
disk_names:
- p30-disk0a
- p30-disk1a
storage_clusters:
nfs:
hostname_prefix: mystoragecluster
ssh:
username: shipyard
file_server:
server_options:
'*':
- rw
- no_root_squash
- no_subtree_check
mount_options:
- noatime
- nodiratime
mountpoint: /shared
type: nfs
network_security:
nfs:
- <ip address prefix in cidr notation for allowable external clients>
ssh:
- '*'
virtual_network:
address_space: 10.0.0.0/8
existing_ok: true
name: myvnet
subnet:
address_prefix: 10.0.1.0/24
name: my-nfs-server-subnet
public_ip:
enabled: true
static: false
vm_count: 1
vm_size: STANDARD_F16S
vm_disk_map:
'0':
disk_array:
- p30-disk0a
- p30-disk1a
filesystem: btrfs
raid_level: 0

Просмотреть файл

@ -0,0 +1,18 @@
pool_specification:
id: slurmpool
virtual_network:
name: myvnet
resource_group: my-resource-group
address_space: 10.0.0.0/8
subnet:
name: batch-nodes
address_prefix: 10.1.0.0/16
vm_configuration:
platform_image:
offer: UbuntuServer
publisher: Canonical
sku: 18.04-LTS
vm_count:
dedicated: 0
low_priority: 0
vm_size: STANDARD_F2

Просмотреть файл

@ -0,0 +1,55 @@
slurm:
storage_account_settings: mystorageaccount
resource_group: my-resource-group
location: <Azure region, e.g., eastus>
cluster_id: myslurmcluster
controller:
ssh:
username: shipyardadmin
public_ip:
enabled: true
virtual_network:
address_space: 10.0.0.0/8
existing_ok: true
name: myvnet
subnet:
address_prefix: 10.0.2.0/24
name: slurm-controller-subnet
network_security:
ssh:
- '*'
vm_size: STANDARD_D2_V2
vm_count: 1
login:
ssh:
username: shipyard
public_ip:
enabled: true
virtual_network:
address_space: 10.0.0.0/8
existing_ok: true
name: myvnet
subnet:
address_prefix: 10.0.3.0/24
name: slurm-login-subnet
network_security:
ssh:
- '*'
vm_size: STANDARD_D2_V2
vm_count: 1
shared_data_volumes:
nfs:
host_mount_path: /shared
store_slurmctld_state: true
slurm_options:
idle_reclaim_time: 00:30:00
elastic_partitions:
mypart1:
batch_pools:
slurmpool:
account_service_url: https://<myaccount>.<region>.batch.azure.com
compute_node_type: dedicated
max_compute_nodes: 32
weight: 0
reclaim_exclude_num_nodes: 4
default: true

Просмотреть файл

@ -231,3 +231,9 @@ mapping:
type: str type: str
password_keyvault_secret_id: password_keyvault_secret_id:
type: str type: str
slurm:
type: map
mapping:
db_password:
type: text
required: true

268
schemas/slurm.yaml Normal file
Просмотреть файл

@ -0,0 +1,268 @@
desc: Slurm Configuration Schema
type: map
mapping:
slurm:
type: map
mapping:
storage_account_settings:
type: str
required: true
location:
type: str
required: true
resource_group:
type: str
required: true
cluster_id:
type: str
required: true
controller:
type: map
required: true
mapping:
ssh:
type: map
required: true
mapping:
username:
type: str
required: true
ssh_public_key:
type: str
ssh_public_key_data:
type: str
ssh_private_key:
type: str
generated_file_export_path:
type: str
public_ip:
type: map
mapping:
enabled:
type: bool
static:
type: bool
virtual_network:
type: map
required: true
mapping:
name:
type: str
required: true
resource_group:
type: str
existing_ok:
type: bool
address_space:
type: str
subnet:
type: map
mapping:
name:
type: str
required: true
address_prefix:
type: str
required: true
network_security:
type: map
required: true
mapping:
ssh:
type: seq
required: true
sequence:
- type: str
custom_inbound_rules:
type: map
mapping:
regex;([a-zA-Z0-9]+):
type: map
mapping:
destination_port_range:
type: str
required: true
protocol:
type: str
enum: ['*', 'tcp', 'udp']
source_address_prefix:
type: seq
required: true
sequence:
- type: str
vm_size:
type: str
required: true
vm_count:
type: int
required: true
range:
min: 1
max: 3
accelerated_networking:
type: bool
additional_prep_script:
type: str
login:
type: map
required: true
mapping:
ssh:
type: map
required: true
mapping:
username:
type: str
required: true
ssh_public_key:
type: str
ssh_public_key_data:
type: str
ssh_private_key:
type: str
generated_file_export_path:
type: str
public_ip:
type: map
mapping:
enabled:
type: bool
static:
type: bool
virtual_network:
type: map
required: true
mapping:
name:
type: str
required: true
resource_group:
type: str
existing_ok:
type: bool
address_space:
type: str
subnet:
type: map
mapping:
name:
type: str
required: true
address_prefix:
type: str
required: true
network_security:
type: map
required: true
mapping:
ssh:
type: seq
required: true
sequence:
- type: str
custom_inbound_rules:
type: map
mapping:
regex;([a-zA-Z0-9]+):
type: map
mapping:
destination_port_range:
type: str
required: true
protocol:
type: str
enum: ['*', 'tcp', 'udp']
source_address_prefix:
type: seq
required: true
sequence:
- type: str
vm_size:
type: str
required: true
vm_count:
type: int
required: true
range:
min: 1
accelerated_networking:
type: bool
additional_prep_script:
type: str
shared_data_volumes:
type: map
required: true
mapping:
regex;([a-zA-Z0-9]+):
type: map
required: true
mapping:
host_mount_path:
type: str
required: true
store_slurmctld_state:
type: bool
required: true
slurm_options:
type: map
required: true
mapping:
idle_reclaim_time:
type: str
elastic_partitions:
type: map
required: true
mapping:
regex;([a-zA-Z0-9]+):
type: map
mapping:
batch_pools:
type: map
required: true
mapping:
regex;([a-zA-Z0-9]+):
type: map
mapping:
account_service_url:
type: str
compute_node_type:
type: str
required: true
enum: ['dedicated', 'low_priority']
max_compute_nodes:
type: int
required: true
range:
min: 1
weight:
type: int
required: true
range:
min: 0
features:
type: seq
sequence:
- type: text
reclaim_exclude_num_nodes:
type: int
range:
min: 0
max_runtime_limit:
type: str
default:
type: bool
required: true
unmanaged_partitions:
type: seq
sequence:
- type: map
mapping:
partition:
type: str
required: true
nodes:
type: seq
required: true
sequence:
- type: str

Просмотреть файл

@ -6,10 +6,10 @@ set -e
set -o pipefail set -o pipefail
# version consts # version consts
DOCKER_CE_VERSION_DEBIAN=18.03.1 DOCKER_CE_VERSION_DEBIAN=18.09.1
# consts # consts
DOCKER_CE_PACKAGE_DEBIAN="docker-ce=${DOCKER_CE_VERSION_DEBIAN}~ce~3-0~" DOCKER_CE_PACKAGE_DEBIAN="docker-ce=5:${DOCKER_CE_VERSION_DEBIAN}~3-0~"
SHIPYARD_VAR_DIR=/var/batch-shipyard SHIPYARD_VAR_DIR=/var/batch-shipyard
SHIPYARD_CONF_FILE=${SHIPYARD_VAR_DIR}/federation.json SHIPYARD_CONF_FILE=${SHIPYARD_VAR_DIR}/federation.json

Просмотреть файл

@ -6,10 +6,10 @@ set -e
set -o pipefail set -o pipefail
# version consts # version consts
DOCKER_CE_VERSION_DEBIAN=18.03.1 DOCKER_CE_VERSION_DEBIAN=18.09.1
# consts # consts
DOCKER_CE_PACKAGE_DEBIAN="docker-ce=${DOCKER_CE_VERSION_DEBIAN}~ce~3-0~" DOCKER_CE_PACKAGE_DEBIAN="docker-ce=5:${DOCKER_CE_VERSION_DEBIAN}~3-0~"
SHIPYARD_VAR_DIR=/var/batch-shipyard SHIPYARD_VAR_DIR=/var/batch-shipyard
SHIPYARD_CONF_FILE=${SHIPYARD_VAR_DIR}/heimdall.json SHIPYARD_CONF_FILE=${SHIPYARD_VAR_DIR}/heimdall.json
PROMETHEUS_VAR_DIR=${SHIPYARD_VAR_DIR}/prometheus PROMETHEUS_VAR_DIR=${SHIPYARD_VAR_DIR}/prometheus

Просмотреть файл

@ -943,8 +943,9 @@ install_kata_containers() {
process_fstab_entry() { process_fstab_entry() {
local desc=$1 local desc=$1
local mountpoint=$2 local fstab_entry=$2
local fstab_entry=$3 IFS=' ' read -ra fs <<< "$fstab_entry"
local mountpoint="${fs[1]}"
log INFO "Creating host directory for $desc at $mountpoint" log INFO "Creating host directory for $desc at $mountpoint"
mkdir -p "$mountpoint" mkdir -p "$mountpoint"
chmod 777 "$mountpoint" chmod 777 "$mountpoint"
@ -976,15 +977,14 @@ process_fstab_entry() {
} }
mount_storage_clusters() { mount_storage_clusters() {
if [ -n "$sc_args" ]; then if [ -n "$SHIPYARD_STORAGE_CLUSTER_FSTAB" ]; then
log DEBUG "Mounting storage clusters" log DEBUG "Mounting storage clusters"
# eval and split fstab var to expand vars (this is ok since it is set by shipyard) IFS='#' read -ra fstab_mounts <<< "$SHIPYARD_STORAGE_CLUSTER_FSTAB"
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB") for fstab in "${fstab_mounts[@]}"; do
IFS='#' read -ra fstabs <<< "$fstab_mounts" # eval and split fstab var to expand vars
i=0 fstab_entry=$(eval echo "$fstab")
for sc_arg in "${sc_args[@]}"; do IFS=' ' read -ra parts <<< "$fstab_entry"
IFS=':' read -ra sc <<< "$sc_arg" mount "${parts[1]}"
mount "${MOUNTS_PATH}"/"${sc[1]}"
done done
log INFO "Storage clusters mounted" log INFO "Storage clusters mounted"
fi fi
@ -1000,10 +1000,10 @@ process_storage_clusters() {
for sc_arg in "${sc_args[@]}"; do for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg" IFS=':' read -ra sc <<< "$sc_arg"
fstab_entry="${fstabs[$i]}" fstab_entry="${fstabs[$i]}"
process_fstab_entry "$sc_arg" "$MOUNTS_PATH/${sc[1]}" "$fstab_entry" process_fstab_entry "$sc_arg" "$fstab_entry"
i=$((i + 1)) i=$((i + 1))
done done
log INFO "Storage clusters mounted" log INFO "Storage clusters processed"
fi fi
} }
@ -1029,9 +1029,9 @@ process_custom_fstab() {
# eval and split fstab var to expand vars # eval and split fstab var to expand vars
fstab_entry=$(eval echo "$fstab") fstab_entry=$(eval echo "$fstab")
IFS=' ' read -ra parts <<< "$fstab_entry" IFS=' ' read -ra parts <<< "$fstab_entry"
process_fstab_entry "${parts[2]}" "${parts[1]}" "$fstab_entry" process_fstab_entry "${parts[2]}" "$fstab_entry"
done done
log INFO "Custom mounts via fstab mounted" log INFO "Custom mounts via fstab processed"
fi fi
} }
@ -1637,26 +1637,9 @@ elif [ -f "$nodeprepfinished" ]; then
install_and_start_node_exporter install_and_start_node_exporter
install_and_start_cadvisor install_and_start_cadvisor
# mount any storage clusters # mount any storage clusters
if [ -n "$sc_args" ]; then mount_storage_clusters
# eval and split fstab var to expand vars (this is ok since it is set by shipyard)
fstab_mounts=$(eval echo "$SHIPYARD_STORAGE_CLUSTER_FSTAB")
IFS='#' read -ra fstabs <<< "$fstab_mounts"
i=0
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
mount "${MOUNTS_PATH}"/"${sc[1]}"
done
fi
# mount any custom mounts # mount any custom mounts
if [ -n "$SHIPYARD_CUSTOM_MOUNTS_FSTAB" ]; then mount_custom_fstab
IFS='#' read -ra fstab_mounts <<< "$SHIPYARD_CUSTOM_MOUNTS_FSTAB"
for fstab in "${fstab_mounts[@]}"; do
# eval and split fstab var to expand vars
fstab_entry=$(eval echo "$fstab")
IFS=' ' read -ra parts <<< "$fstab_entry"
mount "${parts[1]}"
done
fi
# mount glusterfs on compute volumes # mount glusterfs on compute volumes
if [ $gluster_on_compute -eq 1 ]; then if [ $gluster_on_compute -eq 1 ]; then
if [ $custom_image -eq 1 ]; then if [ $custom_image -eq 1 ]; then

Просмотреть файл

@ -0,0 +1,576 @@
#!/usr/bin/env bash
# shellcheck disable=SC1091
set -e
set -o pipefail
# version consts
SLURM_VERSION=18.08.5-2
# consts
SLURM_PACKAGE_DEBIAN="slurm-${SLURM_VERSION}_1.0_amd64"
SLURM_PACKAGE_CENTOS="slurm-${SLURM_VERSION}-1.0-1.x86_64"
SLURM_CONF_DIR=/etc/slurm
AZFILE_MOUNT_DIR=/azfile-slurm
SHIPYARD_VAR_DIR=/var/batch-shipyard
SHIPYARD_CONF_FILE=${SHIPYARD_VAR_DIR}/slurm.json
SHIPYARD_HOST_FILE=${SHIPYARD_VAR_DIR}/slurm_host
SHIPYARD_COMPLETED_ASSIGNMENT_FILE=${SHIPYARD_VAR_DIR}/slurm_host.assigned
SHIPYARD_PROVISION_FAILED_FILE=${SHIPYARD_VAR_DIR}/slurm_host.failed
HOSTNAME=$(hostname -s)
HOSTNAME=${HOSTNAME,,}
IP_ADDRESS=$(ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1)
log() {
local level=$1
shift
echo "$(date -u -Ins) - $level - $*"
}
# dump uname immediately
uname -ar
# try to get distrib vars
if [ -e /etc/os-release ]; then
. /etc/os-release
DISTRIB_ID=$ID
DISTRIB_RELEASE=$VERSION_ID
DISTRIB_CODENAME=$VERSION_CODENAME
if [ -z "$DISTRIB_CODENAME" ]; then
if [ "$DISTRIB_ID" == "debian" ] && [ "$DISTRIB_RELEASE" == "9" ]; then
DISTRIB_CODENAME=stretch
fi
fi
else
if [ -e /etc/lsb-release ]; then
. /etc/lsb-release
fi
fi
if [ -z "${DISTRIB_ID+x}" ] || [ -z "${DISTRIB_RELEASE+x}" ]; then
log ERROR "Unknown DISTRIB_ID or DISTRIB_RELEASE."
exit 1
fi
if [ -z "${DISTRIB_CODENAME}" ]; then
log WARNING "Unknown DISTRIB_CODENAME."
fi
DISTRIB_ID=${DISTRIB_ID,,}
DISTRIB_RELEASE=${DISTRIB_RELEASE,,}
DISTRIB_CODENAME=${DISTRIB_CODENAME,,}
# set distribution specific vars
PACKAGER=
PACKAGE_SUFFIX=
SLURM_PACKAGE=
if [ "$DISTRIB_ID" == "ubuntu" ]; then
PACKAGER=apt
PACKAGE_SUFFIX=deb
SLURM_PACKAGE="${SLURM_PACKAGE_DEBIAN}.${PACKAGE_SUFFIX}"
elif [ "$DISTRIB_ID" == "debian" ]; then
PACKAGER=apt
PACKAGE_SUFFIX=deb
SLURM_PACKAGE="${SLURM_PACKAGE_DEBIAN}.${PACKAGE_SUFFIX}"
elif [[ $DISTRIB_ID == centos* ]] || [ "$DISTRIB_ID" == "rhel" ]; then
PACKAGER=yum
PACKAGE_SUFFIX=rpm
SLURM_PACKAGE="${SLURM_PACKAGE_CENTOS}.${PACKAGE_SUFFIX}"
else
PACKAGER=zypper
PACKAGE_SUFFIX=rpm
SLURM_PACKAGE="${SLURM_PACKAGE_CENTOS}.${PACKAGE_SUFFIX}"
fi
if [ "$PACKAGER" == "apt" ]; then
export DEBIAN_FRONTEND=noninteractive
fi
# globals
aad_cloud=
cluster_id=
cluster_user=
queue_assign=
storage_account=
storage_key=
storage_ep=
storage_prefix=
shipyardversion=
# process command line options
while getopts "h?a:i:q:s:u:v:" opt; do
case "$opt" in
h|\?)
echo "shipyard_slurm_computenode_bootstrap.sh parameters"
echo ""
echo "-a [aad cloud type] AAD cloud type for MSI"
echo "-i [id] cluster id"
echo "-q [assign] queue names"
echo "-s [storage account:storage key:storage ep:prefix] storage config"
echo "-u [user] cluster username"
echo "-v [version] batch-shipyard version"
echo ""
exit 1
;;
a)
aad_cloud=${OPTARG,,}
;;
i)
cluster_id=${OPTARG}
;;
q)
queue_assign=${OPTARG}
;;
s)
IFS=':' read -ra ss <<< "${OPTARG}"
storage_account=${ss[0]}
storage_key=${ss[1]}
storage_ep=${ss[2]}
storage_prefix=${ss[3]}
;;
u)
cluster_user=${OPTARG}
;;
v)
shipyardversion=$OPTARG
;;
esac
done
shift $((OPTIND-1))
[ "$1" = "--" ] && shift
# check required params
if [ -z "$aad_cloud" ]; then
log ERROR "AAD cloud type not specified"
exit 1
fi
check_for_buggy_ntfs_mount() {
# Check to ensure sdb1 mount is not mounted as ntfs
set +e
mount | grep /dev/sdb1 | grep fuseblk
local rc=$?
set -e
if [ $rc -eq 0 ]; then
log ERROR "/dev/sdb1 temp disk is mounted as fuseblk/ntfs"
exit 1
fi
}
download_file_as() {
log INFO "Downloading: $1 as $2"
local retries=10
set +e
while [ $retries -gt 0 ]; do
if curl -fSsL "$1" -o "$2"; then
break
fi
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not download: $1"
exit 1
fi
sleep 1
done
set -e
}
add_repo() {
local url=$1
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [ "$PACKAGER" == "apt" ]; then
curl -fSsL "$url" | apt-key add -
rc=$?
elif [ "$PACKAGER" == "yum" ]; then
yum-config-manager --add-repo "$url"
rc=$?
elif [ "$PACKAGER" == "zypper" ]; then
zypper addrepo "$url"
rc=$?
fi
if [ $rc -eq 0 ]; then
break
fi
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not add repo: $url"
exit 1
fi
sleep 1
done
set -e
}
refresh_package_index() {
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [ "$PACKAGER" == "apt" ]; then
apt-get update
rc=$?
elif [ "$PACKAGER" == "yum" ]; then
yum makecache -y fast
rc=$?
elif [ "$PACKAGER" == "zypper" ]; then
zypper -n --gpg-auto-import-keys ref
rc=$?
fi
if [ $rc -eq 0 ]; then
break
fi
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not update package index"
exit 1
fi
sleep 1
done
set -e
}
install_packages() {
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [ "$PACKAGER" == "apt" ]; then
apt-get install -y -q -o Dpkg::Options::="--force-confnew" --no-install-recommends "$@"
rc=$?
elif [ "$PACKAGER" == "yum" ]; then
yum install -y "$@"
rc=$?
elif [ "$PACKAGER" == "zypper" ]; then
zypper -n in "$@"
rc=$?
fi
if [ $rc -eq 0 ]; then
break
fi
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not install packages ($PACKAGER): $*"
exit 1
fi
sleep 1
done
set -e
}
install_local_packages() {
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [ "$PACKAGER" == "apt" ]; then
dpkg -i "$@"
rc=$?
else
rpm -Uvh --nodeps "$@"
rc=$?
fi
if [ $rc -eq 0 ]; then
break
fi
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not install local packages: $*"
exit 1
fi
sleep 1
done
set -e
}
start_and_check_slurmd() {
local retries=120
local rc
set +e
systemctl start slurmd
while [ $retries -gt 0 ]; do
if systemctl --no-pager status slurmd; then
break
fi
sleep 1
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "slurmd could not start properly"
exit 1
fi
systemctl restart slurmd
done
set -e
}
create_batch_shipyard_slurm_config() {
mkdir -p ${SHIPYARD_VAR_DIR}
chmod 755 ${SHIPYARD_VAR_DIR}
cat > ${SHIPYARD_CONF_FILE} << EOF
{
"aad_cloud": "$aad_cloud",
"storage": {
"account": "$storage_account",
"account_key": "$storage_key",
"endpoint": "$storage_ep",
"entity_prefix": "$storage_prefix",
"queues": {
"assign": "$queue_assign"
},
"azfile_mount_dir": "$AZFILE_MOUNT_DIR"
},
"cluster_id": "$cluster_id",
"cluster_user": "$cluster_user",
"ip_address": "$IP_ADDRESS",
"logging_id": "$AZ_BATCH_NODE_ID",
"batch": {
"account": "$AZ_BATCH_ACCOUNT_NAME",
"pool_id": "$AZ_BATCH_POOL_ID",
"node_id": "$AZ_BATCH_NODE_ID",
"is_dedicated": "$AZ_BATCH_NODE_IS_DEDICATED"
},
"batch_shipyard": {
"var_path": "$SHIPYARD_VAR_DIR",
"version": "$shipyardversion"
}
}
EOF
chmod 600 "$SHIPYARD_CONF_FILE"
log INFO "Batch Shipyard slurm config created"
}
check_provisioning_status() {
local host=$1
local reset_host=$2
set +e
docker run --rm -v "${SHIPYARD_VAR_DIR}:${SHIPYARD_VAR_DIR}:ro" \
"alfpark/batch-shipyard:${shipyardversion}-slurm" \
check-provisioning-status --conf "${SHIPYARD_CONF_FILE}" \
--host "$1"
rc=$?
set -e
if [ $rc -ne 0 ]; then
log ERROR "Provisioning interrupt detected for host $1"
if [ "$reset_host" -eq 1 ] && [ ! -s "$SHIPYARD_PROVISION_FAILED_FILE" ]; then
host="${host}-$RANDOM"
log DEBUG "Resetting host name to avoid collision: $host"
hostnamectl set-hostname "${host}"
hostnamectl status
log DEBUG "Rebooting for hostname propagation to DNS"
touch "$SHIPYARD_PROVISION_FAILED_FILE"
shutdown -r now
fi
exit $rc
fi
}
log INFO "Bootstrap start"
echo "Configuration:"
echo "--------------"
echo "OS Distribution: $DISTRIB_ID $DISTRIB_RELEASE $DISTRIB_CODENAME"
echo "Hostname: $HOSTNAME"
echo "IP Address: $IP_ADDRESS"
echo "Batch Shipyard Version: $shipyardversion"
echo "AAD cloud: $aad_cloud"
echo "Storage: $storage_account:$storage_prefix"
echo "Cluster Id: $cluster_id"
echo "Cluster user: $cluster_user"
echo "Assign queue: $queue_assign"
echo ""
# check sdb1 mount
check_for_buggy_ntfs_mount
# set sudoers to not require tty
sed -i 's/^Defaults[ ]*requiretty/# Defaults requiretty/g' /etc/sudoers
# if provisioning failed previously, don't proceed further
if [ -s "$SHIPYARD_PROVISION_FAILED_FILE" ]; then
log ERROR "Slurm host provisioning failed."
exit 1
fi
# post-reboot token push steps
if [ -s "$SHIPYARD_HOST_FILE" ]; then
log INFO "Host assignment file found. Assuming reboot was successful."
hostnamectl status
# slurmd is manually started since storage clusters are manually mounted
# check slurmd in a loop, sometimes it can fail starting due to GPU not ready
start_and_check_slurmd
# update host entity with batch node id and ip address
if [ ! -s "$SHIPYARD_COMPLETED_ASSIGNMENT_FILE" ]; then
host=$(<${SHIPYARD_HOST_FILE})
log DEBUG "Host from hostfile is: $host"
check_provisioning_status "$host" 1
docker run --rm -v "${SHIPYARD_CONF_FILE}:${SHIPYARD_CONF_FILE}:ro" \
-v "${AZFILE_MOUNT_DIR}:${AZFILE_MOUNT_DIR}:rw" \
"alfpark/batch-shipyard:${shipyardversion}-slurm" \
complete-node-assignment --conf "${SHIPYARD_CONF_FILE}" \
--host "$host"
touch "$SHIPYARD_COMPLETED_ASSIGNMENT_FILE"
fi
log INFO "Bootstrap completed"
exit 0
fi
# write batch shipyard config
create_batch_shipyard_slurm_config
echo "Fetching host assignment"
docker run --rm -v "${SHIPYARD_VAR_DIR}:${SHIPYARD_VAR_DIR}:rw" \
-v "${AZFILE_MOUNT_DIR}:${AZFILE_MOUNT_DIR}:rw" \
"alfpark/batch-shipyard:${shipyardversion}-slurm" \
get-node-assignment --conf "${SHIPYARD_CONF_FILE}"
host=$(<${SHIPYARD_HOST_FILE})
echo "Hostname assignment retrieved: $host"
check_provisioning_status "$host" 0
# set cluster user and passwordless SSH for MPI jobs
echo "Setting up cluster user: ${cluster_user}"
useradd -o -u 2000 -N -g 1000 -p '!' -s /bin/bash -m -d "/home/${cluster_user}" "${cluster_user}"
ssh_dir="/home/${cluster_user}/.ssh"
mkdir -p "$ssh_dir"
chmod 700 "$ssh_dir"
echo "$SHIPYARD_SLURM_CLUSTER_USER_SSH_PUBLIC_KEY" > "${ssh_dir}/id_rsa.pub"
chmod 644 "${ssh_dir}/id_rsa.pub"
echo "$SHIPYARD_SLURM_CLUSTER_USER_SSH_PUBLIC_KEY" >> "${ssh_dir}/authorized_keys"
chmod 600 "${ssh_dir}/authorized_keys"
cat > "${ssh_dir}/config" << EOF
Host 10.*
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
EOF
chmod 600 "${ssh_dir}/config"
mv slurm_cluster_user_ssh_private_key "${ssh_dir}/id_rsa"
chmod 600 "${ssh_dir}/id_rsa"
chown -R "${cluster_user}:_azbatchgrp" "$ssh_dir"
echo "Cluster user setup complete"
# add slurm user
groupadd -g 64030 slurm
useradd -u 64030 -N -g 64030 -p '!' -s /bin/bash -m -d /home/slurm slurm
slurm_uid=$(id -u slurm)
slurm_gid=$(id -g slurm)
# install slurm packages
if [ "$DISTRIB_ID" == "centos" ]; then
install_packages epel-release
fi
install_packages hwloc numactl munge
slurm_docker_image="alfpark/slurm:${SLURM_VERSION}-${DISTRIB_ID}-${DISTRIB_RELEASE}"
docker pull "$slurm_docker_image"
mkdir -p /tmp/slurm
docker run --rm -v /tmp/slurm:/tmp/slurm "$slurm_docker_image" \
/bin/sh -c 'cp -r /root/* /tmp/slurm/'
install_local_packages "/tmp/slurm/${SLURM_PACKAGE}"
cp /tmp/slurm/slurmd.service /etc/systemd/system/
rm -rf /tmp/slurm
docker rmi "$slurm_docker_image"
mkdir -p "$SLURM_CONF_DIR" /var/spool/slurm /var/log/slurm
chown -R slurm:slurm /var/spool/slurm /var/log/slurm
cat << EOF > "/etc/ld.so.conf.d/slurm.conf"
/usr/lib/slurm
EOF
ldconfig
ldconfig -p | grep libslurmfull
systemctl daemon-reload
# mount Azure file share
cat << EOF > "/root/.azfile_creds"
username=$storage_account
password=$storage_key
EOF
chmod 600 /root/.azfile_creds
mkdir -p "$AZFILE_MOUNT_DIR"
chmod 755 "$AZFILE_MOUNT_DIR"
share="${storage_prefix}slurm"
echo "//${storage_account}.file.${storage_ep}/${share} ${AZFILE_MOUNT_DIR} cifs vers=3.0,credentials=/root/.azfile_creds,uid=${slurm_uid},gid=${slurm_gid},_netdev,serverino 0 0" >> /etc/fstab
mount "$AZFILE_MOUNT_DIR"
azfile_cluster_path="${AZFILE_MOUNT_DIR}/${cluster_id}"
# configure munge
shared_munge_key_path="${azfile_cluster_path}/munge"
shared_munge_key="${shared_munge_key_path}/munge.key"
# export munge key to storage
# poll for munge key
echo "Waiting for munge key"
while [ ! -s "$shared_munge_key" ]; do
sleep 1
done
echo "Munge key found."
cp -f "$shared_munge_key" /etc/munge/munge.key
chmod 400 /etc/munge/munge.key
chown munge:munge /etc/munge/munge.key
if [ "$DISTRIB_ID" == "centos" ]; then
systemctl start munge
fi
munge -n | unmunge
systemctl enable munge
systemctl restart munge
systemctl --no-pager status munge
# configure slurm
mkdir -p /var/spool/slurmd
chown -R slurm:slurm /var/spool/slurmd
# construct cgroup conf files
cat << EOF > "${SLURM_CONF_DIR}/cgroup.conf"
CgroupAutomount=yes
ConstrainCores=yes
ConstrainDevices=yes
#ConstrainRAMSpace=yes
EOF
cat << EOF > "${SLURM_CONF_DIR}/cgroup_allowed_devices_file.conf"
/dev/null
/dev/urandom
/dev/zero
/dev/sda*
/dev/sdb*
/dev/cpu/*/*
/dev/pts/*
/dev/nvidia*
/dev/infiniband/*
EOF
# copy configuration file
slurm_conf_azfile_path="${azfile_cluster_path}/slurm/conf"
echo "Waiting for slurm configuration file in $slurm_conf_azfile_path"
while [ ! -s "${slurm_conf_azfile_path}/slurm.conf" ]; do
sleep 1
done
echo "Slurm configuration file found."
cp -f "${slurm_conf_azfile_path}/slurm.conf" "${SLURM_CONF_DIR}/slurm.conf"
chmod 644 "${SLURM_CONF_DIR}/slurm.conf"
check_provisioning_status "$host" 0
# set hostname, reboot required
hostnamectl set-hostname "$host"
hostnamectl status
# construct gres.conf for GPUs
set +e
gpus=$(lspci | grep -i nvidia | awk '{print $1}' | cut -d : -f 1)
set -e
if [ -n "$gpus" ]; then
gres_file="${SLURM_CONF_DIR}/gres.conf"
count=0
for i in $gpus; do
CPUAFFINITY=$(cat /sys/class/pci_bus/"$i":00/cpulistaffinity)
echo "NodeName=${host} Name=gpu File=/dev/nvidia${count} CPUs=${CPUAFFINITY}" >> "$gres_file"
count=$((count+1))
done
chmod 644 "$gres_file"
chown slurm:slurm "$gres_file"
fi
log INFO "Rebooting for hostname propagation to DNS"
shutdown -r now
# TODO add slum pam auth (prevent user from SSHing into a compute node without an allocation)
#install_packages libpam-slurm
#echo "" >> /etc/pam.d/sshd
#echo "account required pam_slurm.so" >> /etc/pam.d/sshd

Просмотреть файл

@ -0,0 +1,858 @@
#!/usr/bin/env bash
# shellcheck disable=SC1039,SC1091,SC2129
set -e
set -o pipefail
# version consts
SLURM_VERSION=18.08.5-2
DOCKER_CE_VERSION_DEBIAN=18.09.2
GLUSTER_VERSION_DEBIAN=4.1
GLUSTER_VERSION_CENTOS=41
# consts
DOCKER_CE_PACKAGE_DEBIAN="docker-ce=5:${DOCKER_CE_VERSION_DEBIAN}~3-0~"
SLURM_CONF_DIR=/etc/slurm
AZFILE_MOUNT_DIR=/azfile-slurm
SHIPYARD_VAR_DIR=/var/batch-shipyard
SHIPYARD_SLURM_PY=${SHIPYARD_VAR_DIR}/slurm.py
SHIPYARD_CONF_FILE=${SHIPYARD_VAR_DIR}/slurm.json
HOSTNAME=$(hostname -s)
HOSTNAME=${HOSTNAME,,}
SHIPYARD_STORAGE_CLUSTER_FSTAB=$(<sdv.fstab)
log() {
local level=$1
shift
echo "$(date -u -Ins) - $level - $*"
}
# dump uname immediately
uname -ar
# try to get distrib vars
if [ -e /etc/os-release ]; then
. /etc/os-release
DISTRIB_ID=$ID
DISTRIB_RELEASE=$VERSION_ID
DISTRIB_CODENAME=$VERSION_CODENAME
if [ -z "$DISTRIB_CODENAME" ]; then
if [ "$DISTRIB_ID" == "debian" ] && [ "$DISTRIB_RELEASE" == "9" ]; then
DISTRIB_CODENAME=stretch
fi
fi
else
if [ -e /etc/lsb-release ]; then
. /etc/lsb-release
fi
fi
if [ -z "${DISTRIB_ID+x}" ] || [ -z "${DISTRIB_RELEASE+x}" ]; then
log ERROR "Unknown DISTRIB_ID or DISTRIB_RELEASE."
exit 1
fi
if [ -z "${DISTRIB_CODENAME}" ]; then
log WARNING "Unknown DISTRIB_CODENAME."
fi
DISTRIB_ID=${DISTRIB_ID,,}
DISTRIB_RELEASE=${DISTRIB_RELEASE,,}
DISTRIB_CODENAME=${DISTRIB_CODENAME,,}
# set distribution specific vars
PACKAGER=
USER_MOUNTPOINT=/mnt/resource
SYSTEMD_PATH=/lib/systemd/system
if [ "$DISTRIB_ID" == "ubuntu" ]; then
PACKAGER=apt
USER_MOUNTPOINT=/mnt
elif [ "$DISTRIB_ID" == "debian" ]; then
PACKAGER=apt
elif [[ $DISTRIB_ID == centos* ]] || [ "$DISTRIB_ID" == "rhel" ]; then
PACKAGER=yum
else
PACKAGER=zypper
SYSTEMD_PATH=/usr/lib/systemd/system
fi
if [ "$PACKAGER" == "apt" ]; then
export DEBIAN_FRONTEND=noninteractive
fi
# globals
aad_cloud=
cluster_id=
cluster_name=
cluster_user=
controller_primary=
controller_secondary=
controller_tertiary=
is_primary=0
is_login_node=0
num_controllers=
sc_args=
slurm_state_path=
storage_account=
storage_prefix=
storage_rg=
shipyardversion=
# process command line options
while getopts "h?a:c:i:lm:p:s:u:v:" opt; do
case "$opt" in
h|\?)
echo "shipyard_slurm_master_bootstrap.sh parameters"
echo ""
echo "-a [aad cloud type] AAD cloud type for MSI"
echo "-c [primary:secondary:tertiary] controller hosts"
echo "-i [id] cluster id"
echo "-l is login node"
echo "-m [type:scid] mount storage cluster"
echo "-p [path] state save path"
echo "-s [storage account:resource group:prefix] storage config"
echo "-u [user] cluster username"
echo "-v [version] batch-shipyard version"
echo ""
exit 1
;;
a)
aad_cloud=${OPTARG,,}
;;
c)
IFS=':' read -ra cont <<< "${OPTARG,,}"
controller_primary=${cont[0]}
if [ "$controller_primary" == "$HOSTNAME" ]; then
is_primary=1
fi
controller_secondary=${cont[1]}
controller_tertiary=${cont[2]}
num_controllers=${#cont[@]}
;;
i)
IFS='-' read -ra clus <<< "${OPTARG,,}"
cluster_id=${OPTARG}
cluster_name=${clus[0]}
;;
l)
is_login_node=1
;;
m)
IFS=',' read -ra sc_args <<< "${OPTARG,,}"
;;
p)
slurm_state_path=${OPTARG}
;;
s)
IFS=':' read -ra ss <<< "${OPTARG,,}"
storage_account=${ss[0]}
storage_rg=${ss[1]}
storage_prefix=${ss[2]}
;;
u)
cluster_user=${OPTARG}
;;
v)
shipyardversion=$OPTARG
;;
esac
done
shift $((OPTIND-1))
[ "$1" = "--" ] && shift
check_for_buggy_ntfs_mount() {
# Check to ensure sdb1 mount is not mounted as ntfs
set +e
mount | grep /dev/sdb1 | grep fuseblk
local rc=$?
set -e
if [ $rc -eq 0 ]; then
log ERROR "/dev/sdb1 temp disk is mounted as fuseblk/ntfs"
exit 1
fi
}
execute_command_with_retry() {
local retries=30
set +e
while [ $retries -gt 0 ]; do
"$@"
rc=$?
if [ $rc -eq 0 ]; then
break
fi
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Command failed: $*"
exit $rc
fi
sleep 1
done
set -e
}
download_file_as() {
log INFO "Downloading: $1 as $2"
local retries=10
set +e
while [ $retries -gt 0 ]; do
if curl -fSsL "$1" -o "$2"; then
break
fi
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not download: $1"
exit 1
fi
sleep 1
done
set -e
}
add_repo() {
local url=$1
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [ "$PACKAGER" == "apt" ]; then
curl -fSsL "$url" | apt-key add -
rc=$?
elif [ "$PACKAGER" == "yum" ]; then
yum-config-manager --add-repo "$url"
rc=$?
elif [ "$PACKAGER" == "zypper" ]; then
zypper addrepo "$url"
rc=$?
fi
if [ $rc -eq 0 ]; then
break
fi
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not add repo: $url"
exit 1
fi
sleep 1
done
set -e
}
refresh_package_index() {
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [ "$PACKAGER" == "apt" ]; then
apt-get update
rc=$?
elif [ "$PACKAGER" == "yum" ]; then
yum makecache -y fast
rc=$?
elif [ "$PACKAGER" == "zypper" ]; then
zypper -n --gpg-auto-import-keys ref
rc=$?
fi
if [ $rc -eq 0 ]; then
break
fi
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not update package index"
exit 1
fi
sleep 1
done
set -e
}
install_packages() {
set +e
local retries=120
local rc
while [ $retries -gt 0 ]; do
if [ "$PACKAGER" == "apt" ]; then
apt-get install -y -q -o Dpkg::Options::="--force-confnew" --no-install-recommends "$@"
rc=$?
elif [ "$PACKAGER" == "yum" ]; then
yum install -y "$@"
rc=$?
elif [ "$PACKAGER" == "zypper" ]; then
zypper -n in "$@"
rc=$?
fi
if [ $rc -eq 0 ]; then
break
fi
retries=$((retries-1))
if [ $retries -eq 0 ]; then
log ERROR "Could not install packages ($PACKAGER): $*"
exit 1
fi
sleep 1
done
set -e
}
install_docker_host_engine() {
log DEBUG "Installing Docker Host Engine"
# set vars
local srvstart="systemctl start docker.service"
local srvstop="systemctl stop docker.service"
local srvdisable="systemctl disable docker.service"
local srvstatus="systemctl --no-pager status docker.service"
if [ "$PACKAGER" == "apt" ]; then
local repo=https://download.docker.com/linux/"${DISTRIB_ID}"
local gpgkey="${repo}"/gpg
local dockerversion="${DOCKER_CE_PACKAGE_DEBIAN}${DISTRIB_ID}-${DISTRIB_CODENAME}"
local prereq_pkgs="apt-transport-https ca-certificates curl gnupg2 software-properties-common"
elif [ "$PACKAGER" == "yum" ]; then
local repo=https://download.docker.com/linux/centos/docker-ce.repo
local dockerversion="${DOCKER_CE_PACKAGE_CENTOS}"
local prereq_pkgs="yum-utils device-mapper-persistent-data lvm2"
elif [ "$PACKAGER" == "zypper" ]; then
if [[ "$DISTRIB_RELEASE" == 12-sp3* ]]; then
local repodir=SLE_12_SP3
fi
local repo="http://download.opensuse.org/repositories/Virtualization:containers/${repodir}/Virtualization:containers.repo"
local dockerversion="${DOCKER_CE_PACKAGE_SLES}"
fi
# refresh package index
refresh_package_index
# install required software first
# shellcheck disable=SC2086
install_packages $prereq_pkgs
if [ "$PACKAGER" == "apt" ]; then
# add gpgkey for repo
add_repo "$gpgkey"
# add repo
add-apt-repository "deb [arch=amd64] $repo $(lsb_release -cs) stable"
else
add_repo "$repo"
fi
# refresh index
refresh_package_index
# install docker engine
install_packages "$dockerversion"
# disable docker from auto-start due to temp disk issues
$srvstop
$srvdisable
# ensure docker daemon modifications are idempotent
rm -rf /var/lib/docker
mkdir -p /etc/docker
echo "{ \"data-root\": \"$USER_MOUNTPOINT/docker\", \"hosts\": [ \"unix:///var/run/docker.sock\", \"tcp://127.0.0.1:2375\" ] }" > /etc/docker/daemon.json
# ensure no options are specified after dockerd
sed -i 's|^ExecStart=/usr/bin/dockerd.*|ExecStart=/usr/bin/dockerd|' "${SYSTEMD_PATH}"/docker.service
systemctl daemon-reload
$srvstart
$srvstatus
docker info
log INFO "Docker Host Engine installed"
}
install_storage_cluster_dependencies() {
if [ -z "$sc_args" ]; then
return
fi
log DEBUG "Installing storage cluster dependencies"
if [ "$PACKAGER" == "zypper" ]; then
if [[ "$DISTRIB_RELEASE" == 12-sp3* ]]; then
local repodir=SLE_12_SP3
fi
local repo="http://download.opensuse.org/repositories/filesystems/${repodir}/filesystems.repo"
fi
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
server_type=${sc[0]}
if [ "$server_type" == "nfs" ]; then
if [ "$PACKAGER" == "apt" ]; then
install_packages nfs-common nfs4-acl-tools
elif [ "$PACKAGER" == "yum" ] ; then
install_packages nfs-utils nfs4-acl-tools
systemctl enable rpcbind
systemctl start rpcbind
elif [ "$PACKAGER" == "zypper" ]; then
install_packages nfs-client nfs4-acl-tools
systemctl enable rpcbind
systemctl start rpcbind
fi
elif [ "$server_type" == "glusterfs" ]; then
if [ "$PACKAGER" == "apt" ]; then
if [ "$DISTRIB_ID" == "debian" ]; then
add_repo "http://download.gluster.org/pub/gluster/glusterfs/${GLUSTER_VERSION_DEBIAN}/rsa.pub"
else
add-apt-repository ppa:gluster/glusterfs-${GLUSTER_VERSION_DEBIAN}
fi
install_packages glusterfs-client acl
elif [ "$PACKAGER" == "yum" ] ; then
install_packages centos-release-gluster${GLUSTER_VERSION_CENTOS}
install_packages glusterfs-server acl
elif [ "$PACKAGER" == "zypper" ]; then
add_repo "$repo"
"$PACKAGER" -n --gpg-auto-import-keys ref
install_packages glusterfs acl
fi
else
log ERROR "Unknown file server type ${sc[0]} for ${sc[1]}"
exit 1
fi
done
log INFO "Storage cluster dependencies installed"
}
process_fstab_entry() {
local desc=$1
local fstab_entry=$2
IFS=' ' read -ra fs <<< "$fstab_entry"
local mountpoint="${fs[1]}"
log INFO "Creating host directory for $desc at $mountpoint"
mkdir -p "$mountpoint"
chmod 777 "$mountpoint"
echo "INFO: Adding $mountpoint to fstab"
echo "$fstab_entry" >> /etc/fstab
tail -n1 /etc/fstab
echo "INFO: Mounting $mountpoint"
local START
START=$(date -u +"%s")
set +e
while :
do
if mount "$mountpoint"; then
break
else
local NOW
NOW=$(date -u +"%s")
local DIFF=$(((NOW-START)/60))
# fail after 5 minutes of attempts
if [ $DIFF -ge 5 ]; then
echo "ERROR: Could not mount $desc on $mountpoint"
exit 1
fi
sleep 1
fi
done
set -e
log INFO "$mountpoint mounted."
}
process_storage_clusters() {
if [ -n "$sc_args" ]; then
log DEBUG "Processing storage clusters"
IFS='#' read -ra fstabs <<< "$SHIPYARD_STORAGE_CLUSTER_FSTAB"
i=0
for sc_arg in "${sc_args[@]}"; do
IFS=':' read -ra sc <<< "$sc_arg"
fstab_entry="${fstabs[$i]//,noauto/,auto}"
process_fstab_entry "$sc_arg" "$fstab_entry"
i=$((i + 1))
done
log INFO "Storage clusters processed"
fi
}
install_systemd_unit_file() {
cat << EOF > /etc/systemd/system/batch-shipyard-slurm.service
[Unit]
Description=Batch Shipyard Slurm Helper
After=network.target network-online.target
Wants=network-online.target
[Service]
Type=simple
TimeoutStartSec=0
Restart=always
LimitNOFILE=65536
LimitCORE=infinity
OOMScoreAdjust=-100
IOSchedulingClass=best-effort
IOSchedulingPriority=0
Environment=LC_CTYPE=en_US.UTF-8 PYTHONIOENCODING=utf-8
WorkingDirectory=/var/batch-shipyard
ExecStart=${SHIPYARD_SLURM_PY} daemon --conf ${SHIPYARD_CONF_FILE}
StandardOutput=null
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
log INFO "systemd unit file installed"
}
create_batch_shipyard_slurm_config() {
mkdir -p ${SHIPYARD_VAR_DIR}
chmod 755 ${SHIPYARD_VAR_DIR}
# get timeouts
local resume_timeout
local suspend_timeout
resume_timeout=$(grep '^ResumeTimeout=' slurm.conf | cut -d '=' -f 2)
suspend_timeout=$(grep '^SuspendTimeout=' slurm.conf | cut -d '=' -f 2)
cat > ${SHIPYARD_CONF_FILE} << EOF
{
"aad_cloud": "$aad_cloud",
"storage": {
"account": "$storage_account",
"resource_group": "$storage_rg",
"entity_prefix": "$storage_prefix",
"queues": {
"action": "$cluster_id"
},
"azfile_mount_dir": "$AZFILE_MOUNT_DIR"
},
"cluster_id": "$cluster_id",
"cluster_name": "$cluster_name",
"logging_id": "$HOSTNAME",
"is_primary": "$is_primary",
"timeouts": {
"resume": $resume_timeout,
"suspend": $suspend_timeout
},
"batch_shipyard": {
"var_path": "$SHIPYARD_VAR_DIR",
"version": "$shipyardversion"
}
}
EOF
chmod 600 "$SHIPYARD_CONF_FILE"
log INFO "Batch Shipyard slurm config created"
}
log INFO "Bootstrap start"
echo "Configuration:"
echo "--------------"
echo "OS Distribution: $DISTRIB_ID $DISTRIB_RELEASE $DISTRIB_CODENAME"
echo "Hostname: $HOSTNAME"
echo "Batch Shipyard Version: $shipyardversion"
echo "AAD cloud: $aad_cloud"
echo "Storage: $storage_account:$storage_rg:$storage_prefix"
echo "Storage cluster mount: ${sc_args[*]}"
echo "Cluster Id: $cluster_id"
echo "Cluster Name: $cluster_name"
echo "Cluster user: $cluster_user"
echo "Controllers: $controller_primary backups($controller_secondary,$controller_tertiary)"
echo "Number of controllers: $num_controllers"
echo "Is Primary Controller: $is_primary"
echo "Is Login node: $is_login_node"
echo ""
if [ "$is_primary" -eq 1 ] && [ "$is_login_node" -eq 1 ]; then
log ERROR "Cannot be designated as primary and login simultaneously"
exit 1
fi
# check sdb1 mount
check_for_buggy_ntfs_mount
# set sudoers to not require tty
sed -i 's/^Defaults[ ]*requiretty/# Defaults requiretty/g' /etc/sudoers
# install docker
install_docker_host_engine
# install required base software
install_packages build-essential libffi-dev libssl-dev python3-dev
curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3
# check or install dependencies for storage cluster mount
if [ -n "$sc_args" ]; then
install_storage_cluster_dependencies
fi
# process and mount storage clusters
process_storage_clusters
# write batch shipyard config
create_batch_shipyard_slurm_config
# align uid/gid/permissions to batch pool
usermod -u 2000 "$cluster_user"
groupmod -n _azbatchgrp "$cluster_user"
chown -R "${cluster_user}:_azbatchgrp" "/home/${cluster_user}"
useradd -o -u 1000 -N -g 1000 -p '!' -s /bin/bash -m -d /home/_azbatch _azbatch
# install program deps and copy main program
pip3 install -r requirements.txt
chmod 755 slurm.py
cp -f slurm.py "$SHIPYARD_SLURM_PY"
# add slurm user
groupadd -g 64030 slurm
useradd -u 64030 -N -g 64030 -p '!' -s /bin/bash -m -d /home/slurm slurm
slurm_uid=$(id -u slurm)
slurm_gid=$(id -g slurm)
# install all slurm-related packages
if [ "$is_login_node" -eq 1 ]; then
install_packages munge
else
install_packages munge
if [ "$is_primary" -eq 1 ]; then
install_packages mariadb-server libmysqlclient20 libmariadb3
fi
fi
slurm_docker_image="alfpark/slurm:${SLURM_VERSION}-${DISTRIB_ID}-${DISTRIB_RELEASE}"
docker pull "$slurm_docker_image"
mkdir -p /tmp/slurm
docker run --rm -v /tmp/slurm:/tmp/slurm "$slurm_docker_image" \
/bin/sh -c 'cp -r /root/* /tmp/slurm/'
dpkg -i "/tmp/slurm/slurm-${SLURM_VERSION}_1.0_amd64.deb"
if [ "$is_login_node" -eq 0 ]; then
cp /tmp/slurm/slurmctld.service /etc/systemd/system/
if [ "$is_primary" -eq 1 ]; then
cp /tmp/slurm/slurmdbd.service /etc/systemd/system/
fi
fi
rm -rf /tmp/slurm
docker rmi "$slurm_docker_image"
mkdir -p "$SLURM_CONF_DIR" /var/spool/slurm /var/log/slurm
chown -R slurm:slurm /var/spool/slurm /var/log/slurm
cat << EOF > "/etc/ld.so.conf.d/slurm.conf"
/usr/lib/slurm
EOF
ldconfig
ldconfig -p | grep libslurmfull
systemctl daemon-reload
# retrieve storage account key and endpoint
echo "Retrieving storage account credentials for fileshare"
sa=$(${SHIPYARD_SLURM_PY} sakey --conf "${SHIPYARD_CONF_FILE}")
IFS=' ' read -ra ss <<< "${sa}"
storage_key=${ss[0]}
storage_ep=${ss[1]}
storage_ep="${storage_ep%"${storage_ep##*[![:space:]]}"}"
# mount Azure file share
cat << EOF > "/root/.azfile_creds"
username=$storage_account
password=$storage_key
EOF
chmod 600 /root/.azfile_creds
mkdir -p "$AZFILE_MOUNT_DIR"
chmod 755 "$AZFILE_MOUNT_DIR"
share="${storage_prefix}slurm"
echo "//${storage_account}.file.${storage_ep}/${share} ${AZFILE_MOUNT_DIR} cifs vers=3.0,credentials=/root/.azfile_creds,uid=${slurm_uid},gid=${slurm_gid},_netdev,serverino 0 0" >> /etc/fstab
mount "$AZFILE_MOUNT_DIR"
azfile_cluster_path="${AZFILE_MOUNT_DIR}/${cluster_id}"
mkdir -p "$azfile_cluster_path"
slurm_log_path="${azfile_cluster_path}/slurm/logs"
mkdir -p "$slurm_log_path"
# create resume/suspend scripts
if [ "$is_login_node" -eq 0 ]; then
resume_script="${SHIPYARD_VAR_DIR}/resume.sh"
resume_fail_script="${SHIPYARD_VAR_DIR}/resume-fail.sh"
suspend_script="${SHIPYARD_VAR_DIR}/suspend.sh"
cat > ${resume_script} << 'EOF'
#!/usr/bin/env bash
hostfile="$(mktemp /tmp/slurm_resume.XXXXXX)"
hosts=$(scontrol show hostnames $1)
touch $hostfile
for host in $hosts; do
part=$(sinfo -h -n $host -N -o "%R")
echo "$host $part" >> $hostfile
done
EOF
cat >> ${resume_script} << EOF
${SHIPYARD_SLURM_PY} resume --conf ${SHIPYARD_CONF_FILE} \\
EOF
cat >> ${resume_script} << 'EOF'
--hostfile $hostfile \
EOF
cat >> ${resume_script} << EOF
>> ${slurm_log_path}/power-save.log 2>&1
EOF
cat >> ${resume_script} << 'EOF'
ec=$?
rm -f $hostfile
exit $ec
EOF
cat > ${resume_fail_script} << 'EOF'
#!/usr/bin/env bash
hostfile="$(mktemp /tmp/slurm_resume_fail.XXXXXX)"
hosts=$(scontrol show hostnames $1)
touch $hostfile
for host in $hosts; do
part=$(sinfo -h -n $host -N -o "%R")
echo "$host $part" >> $hostfile
done
EOF
cat >> ${resume_fail_script} << EOF
${SHIPYARD_SLURM_PY} resume-fail --conf ${SHIPYARD_CONF_FILE} \\
EOF
cat >> ${resume_fail_script} << 'EOF'
--hostfile $hostfile \
EOF
cat >> ${resume_fail_script} << EOF
>> ${slurm_log_path}/power-save.log 2>&1
EOF
cat >> ${resume_fail_script} << 'EOF'
ec=$?
rm -f $hostfile
exit $ec
EOF
cat > ${suspend_script} << 'EOF'
#!/usr/bin/env bash
hostfile="$(mktemp /tmp/slurm_resume.XXXXXX)"
scontrol show hostnames $1 > $hostfile
EOF
cat >> ${suspend_script} << EOF
${SHIPYARD_SLURM_PY} suspend --conf ${SHIPYARD_CONF_FILE} \\
EOF
cat >> ${suspend_script} << 'EOF'
--hostfile $hostfile \
EOF
cat >> ${suspend_script} << EOF
>> ${slurm_log_path}/power-save.log 2>&1
EOF
cat >> ${suspend_script} << 'EOF'
ec=$?
rm -f $hostfile
exit $ec
EOF
chmod 755 "${resume_script}" "${resume_fail_script}" "${suspend_script}"
fi
chown -R slurm:slurm "${SHIPYARD_VAR_DIR}"
# configure munge
shared_munge_key_path="${azfile_cluster_path}/munge"
shared_munge_key="${shared_munge_key_path}/munge.key"
# export munge key to storage
if [ "$is_primary" -eq 1 ]; then
munge -n | unmunge
mkdir -p "$shared_munge_key_path"
cp -f /etc/munge/munge.key "$shared_munge_key"
# ensure munge key is "marked" read/write to prevent read-only deletion failures
chmod 660 "$shared_munge_key"
else
# poll for munge key
echo "Waiting for primary munge key"
while [ ! -s "$shared_munge_key" ]; do
sleep 1
done
cp -f "$shared_munge_key" /etc/munge/munge.key
chmod 400 /etc/munge/munge.key
chown munge:munge /etc/munge/munge.key
munge -n | unmunge
fi
systemctl enable munge
systemctl restart munge
systemctl --no-pager status munge
# start mariadb and prepare database
if [ "$is_primary" -eq 1 ]; then
systemctl enable mariadb
systemctl start mariadb
systemctl --no-pager status mariadb
# create db table
chmod 600 slurmdb.sql
cp slurmdb.sql "${SLURM_CONF_DIR}/"
# shellcheck disable=SC2002
cat "${SLURM_CONF_DIR}/slurmdb.sql" | mysql -u root
fi
# copy and modify configuration files
if [ "$is_primary" -eq 1 ]; then
# create state save location
mkdir -p "${slurm_state_path}"
chown -R slurm:slurm "${slurm_state_path}"
chmod 750 "${slurm_state_path}"
cp slurm.conf "${SLURM_CONF_DIR}/"
sed -i "s|{SHIPYARD_VAR_DIR}|${SHIPYARD_VAR_DIR}|g" "${SLURM_CONF_DIR}/slurm.conf"
sed -i "s|{SLURM_LOG_PATH}|${slurm_log_path}|g" "${SLURM_CONF_DIR}/slurm.conf"
sed -i "s|{HOSTNAME}|${HOSTNAME}|g" "${SLURM_CONF_DIR}/slurm.conf"
sed -i "s|{SLURMCTLD_STATE_SAVE_PATH}|${slurm_state_path}|g" "${SLURM_CONF_DIR}/slurm.conf"
sed -i "s|{SLURMCTLD_HOST_PRIMARY}|${controller_primary}|g" "${SLURM_CONF_DIR}/slurm.conf"
if [ -n "$controller_secondary" ]; then
sed -i "s|^#{SLURMCTLD_HOST_SECONDARY}|SlurmctldHost=${controller_secondary}|g" "${SLURM_CONF_DIR}/slurm.conf"
fi
if [ -n "$controller_tertiary" ]; then
sed -i "s|^#{SLURMCTLD_HOST_TERTIARY}|SlurmctldHost=${controller_tertiary}|g" "${SLURM_CONF_DIR}/slurm.conf"
fi
cp slurmdbd.conf "${SLURM_CONF_DIR}/"
sed -i "s|{SLURM_LOG_PATH}|${slurm_log_path}|g" "${SLURM_CONF_DIR}/slurmdbd.conf"
sed -i "s|{HOSTNAME}|${HOSTNAME}|g" "${SLURM_CONF_DIR}/slurmdbd.conf"
chown slurm:slurm "${SLURM_CONF_DIR}/slurm.conf"
chmod 644 "${SLURM_CONF_DIR}/slurm.conf"
chmod 600 "${SLURM_CONF_DIR}/slurmdbd.conf"
fi
# start slurm db service
if [ "$is_primary" -eq 1 ]; then
systemctl enable slurmdbd
systemctl start slurmdbd
systemctl --no-pager status slurmdbd
# delay before executing as dbd may not be fully up
sleep 5
# initialize account in db
execute_command_with_retry sacctmgr -i add cluster "$cluster_name"
execute_command_with_retry sacctmgr -i add account compute-account description="Compute accounts" Organization="$cluster_name"
execute_command_with_retry sacctmgr -i create user "$cluster_user" account=compute-account adminlevel=None
fi
# copy config and block for secondary/tertiary
# start slurm controller service
slurm_conf_azfile_path="${azfile_cluster_path}/slurm/conf"
if [ "$is_primary" -eq 1 ]; then
systemctl enable slurmctld
systemctl start slurmctld
systemctl --no-pager status slurmctld
mkdir -p "$slurm_conf_azfile_path"
cp "${SLURM_CONF_DIR}/slurm.conf" "${slurm_conf_azfile_path}/"
# ensure slurm conf is "marked" read/write to prevent read-only deletion failures
chmod 660 "${slurm_conf_azfile_path}/slurm.conf"
else
echo "Waiting for primary Slurm configuration file"
while [ ! -s "${slurm_conf_azfile_path}/slurm.conf" ]; do
sleep 1
done
echo "Slurm configuration file found."
cp -f "${slurm_conf_azfile_path}/slurm.conf" "${SLURM_CONF_DIR}/slurm.conf"
chown slurm:slurm "${SLURM_CONF_DIR}/slurm.conf"
chmod 644 "${SLURM_CONF_DIR}/slurm.conf"
if [ "$is_login_node" -eq 0 ]; then
systemctl enable slurmctld
systemctl start slurmctld
systemctl --no-pager status slurmctld
fi
fi
# start daemon
if [ "$is_login_node" -eq 0 ]; then
# setup systemd unit file
install_systemd_unit_file
# start batch shipyard slurm daemon mode
systemctl enable batch-shipyard-slurm
systemctl start batch-shipyard-slurm
systemctl --no-pager status batch-shipyard-slurm
fi
log INFO "Bootstrap completed"

Просмотреть файл

@ -56,6 +56,7 @@ class CliContext(object):
"""CliContext class: holds context for CLI commands""" """CliContext class: holds context for CLI commands"""
def __init__(self): def __init__(self):
"""Ctor for CliContext""" """Ctor for CliContext"""
self.cleanup = True
self.show_config = False self.show_config = False
self.verbose = False self.verbose = False
self.yes = False self.yes = False
@ -67,6 +68,7 @@ class CliContext(object):
self.conf_fs = None self.conf_fs = None
self.conf_monitor = None self.conf_monitor = None
self.conf_federation = None self.conf_federation = None
self.conf_slurm = None
# clients # clients
self.batch_mgmt_client = None self.batch_mgmt_client = None
self.batch_client = None self.batch_client = None
@ -221,6 +223,50 @@ class CliContext(object):
convoy.clients.create_storage_clients() convoy.clients.create_storage_clients()
self._cleanup_after_initialize() self._cleanup_after_initialize()
def initialize_for_slurm(self, init_batch=False):
# type: (CliContext, bool) -> None
"""Initialize context for slurm commands
:param CliContext self: this
:param bool init_batch: initialize batch
"""
self._read_credentials_config()
self._set_global_cli_options()
if self.verbose:
logger.debug('initializing for slurm actions')
self._init_keyvault_client()
self._init_config(
skip_global_config=False, skip_pool_config=not init_batch,
skip_monitor_config=True, skip_federation_config=True,
fs_storage=not init_batch)
self.conf_slurm = self._form_conf_path(
self.conf_slurm, 'slurm')
if self.conf_slurm is None:
raise ValueError('slurm conf file was not specified')
self.conf_slurm = CliContext.ensure_pathlib_conf(
self.conf_slurm)
convoy.validator.validate_config(
convoy.validator.ConfigType.Slurm, self.conf_slurm)
self._read_config_file(self.conf_slurm)
self._ensure_credentials_section('storage')
self._ensure_credentials_section('slurm')
self.auth_client, self.resource_client, self.compute_client, \
self.network_client, self.storage_mgmt_client, \
self.batch_mgmt_client, self.batch_client = \
convoy.clients.create_all_clients(
self, batch_clients=init_batch)
# inject storage account keys if via aad
convoy.fleet.fetch_storage_account_keys_from_aad(
self.storage_mgmt_client, self.config, fs_storage=not init_batch)
# call populate global settings again to adjust for slurm storage
sc = convoy.settings.slurm_credentials_storage(self.config)
convoy.fleet.populate_global_settings(
self.config, fs_storage=not init_batch, sc=sc)
self.blob_client, self.table_client, self.queue_client = \
convoy.clients.create_storage_clients()
self._cleanup_after_initialize()
def initialize_for_keyvault(self): def initialize_for_keyvault(self):
# type: (CliContext) -> None # type: (CliContext) -> None
"""Initialize context for keyvault commands """Initialize context for keyvault commands
@ -311,6 +357,8 @@ class CliContext(object):
"""Cleanup after initialize_for_* funcs """Cleanup after initialize_for_* funcs
:param CliContext self: this :param CliContext self: this
""" """
if not self.cleanup:
return
# free conf objects # free conf objects
del self.conf_credentials del self.conf_credentials
del self.conf_fs del self.conf_fs
@ -319,6 +367,7 @@ class CliContext(object):
del self.conf_jobs del self.conf_jobs
del self.conf_monitor del self.conf_monitor
del self.conf_federation del self.conf_federation
del self.conf_slurm
# free cli options # free cli options
del self.verbose del self.verbose
del self.yes del self.yes
@ -860,6 +909,19 @@ def monitor_option(f):
callback=callback)(f) callback=callback)(f)
def slurm_option(f):
def callback(ctx, param, value):
clictx = ctx.ensure_object(CliContext)
clictx.conf_slurm = value
return value
return click.option(
'--slurm',
expose_value=False,
envvar='SHIPYARD_SLURM_CONF',
help='Slurm config file',
callback=callback)(f)
def _storage_cluster_id_argument(f): def _storage_cluster_id_argument(f):
def callback(ctx, param, value): def callback(ctx, param, value):
return value return value
@ -930,6 +992,12 @@ def federation_options(f):
return f return f
def slurm_options(f):
f = slurm_option(f)
f = _azure_subscription_id_option(f)
return f
@click.group(context_settings=_CONTEXT_SETTINGS) @click.group(context_settings=_CONTEXT_SETTINGS)
@click.version_option(version=convoy.__version__) @click.version_option(version=convoy.__version__)
@click.pass_context @click.pass_context
@ -1019,6 +1087,23 @@ def fs_cluster_add(ctx, storage_cluster_id):
ctx.blob_client, ctx.config, storage_cluster_id) ctx.blob_client, ctx.config, storage_cluster_id)
@cluster.command('orchestrate')
@common_options
@fs_cluster_options
@keyvault_options
@aad_options
@pass_cli_context
def fs_cluster_orchestrate(ctx, storage_cluster_id):
"""Orchestrate a filesystem storage cluster in Azure with the
specified disks"""
ctx.initialize_for_fs()
convoy.fleet.action_fs_disks_add(
ctx.resource_client, ctx.compute_client, ctx.config)
convoy.fleet.action_fs_cluster_add(
ctx.resource_client, ctx.compute_client, ctx.network_client,
ctx.blob_client, ctx.config, storage_cluster_id)
@cluster.command('resize') @cluster.command('resize')
@common_options @common_options
@fs_cluster_options @fs_cluster_options
@ -2807,6 +2892,180 @@ def fed_jobs_zap(
ctx.blob_client, ctx.config, federation_id, unique_id) ctx.blob_client, ctx.config, federation_id, unique_id)
@cli.group()
@pass_cli_context
def slurm(ctx):
"""Slurm on Batch actions"""
pass
@slurm.group()
@pass_cli_context
def ssh(ctx):
"""Slurm SSH actions"""
pass
@ssh.command('controller')
@click.option(
'--offset', help='Controller VM offset')
@click.option(
'--tty', is_flag=True, help='Allocate a pseudo-tty')
@common_options
@slurm_options
@click.argument('command', nargs=-1)
@keyvault_options
@aad_options
@pass_cli_context
def slurm_ssh_controller(ctx, offset, tty, command):
"""Interactively login via SSH to a Slurm controller virtual
machine in Azure"""
ctx.initialize_for_slurm()
convoy.fleet.action_slurm_ssh(
ctx.compute_client, ctx.network_client, None, None, ctx.config,
tty, command, 'controller', offset, None)
@ssh.command('login')
@click.option(
'--offset', help='Controller VM offset')
@click.option(
'--tty', is_flag=True, help='Allocate a pseudo-tty')
@common_options
@slurm_options
@click.argument('command', nargs=-1)
@keyvault_options
@aad_options
@pass_cli_context
def slurm_ssh_login(ctx, offset, tty, command):
"""Interactively login via SSH to a Slurm login/gateway virtual
machine in Azure"""
ctx.initialize_for_slurm()
convoy.fleet.action_slurm_ssh(
ctx.compute_client, ctx.network_client, None, None, ctx.config,
tty, command, 'login', offset, None)
@ssh.command('node')
@click.option(
'--node-name', help='Slurm node name')
@click.option(
'--tty', is_flag=True, help='Allocate a pseudo-tty')
@common_options
@slurm_options
@click.argument('command', nargs=-1)
@keyvault_options
@aad_options
@pass_cli_context
def slurm_ssh_node(ctx, node_name, tty, command):
"""Interactively login via SSH to a Slurm compute node virtual
machine in Azure"""
ctx.initialize_for_slurm(init_batch=True)
if convoy.util.is_none_or_empty(node_name):
raise ValueError('node name must be specified')
convoy.fleet.action_slurm_ssh(
ctx.compute_client, ctx.network_client, ctx.table_client,
ctx.batch_client, ctx.config, tty, command, 'node', None, node_name)
@slurm.group()
@pass_cli_context
def cluster(ctx):
"""Slurm cluster actions"""
pass
@cluster.command('create')
@common_options
@slurm_options
@keyvault_options
@aad_options
@pass_cli_context
def slurm_cluster_create(ctx):
"""Create a Slurm cluster with controllers and login nodes"""
ctx.initialize_for_slurm(init_batch=True)
convoy.fleet.action_slurm_cluster_create(
ctx.auth_client, ctx.resource_client, ctx.compute_client,
ctx.network_client, ctx.blob_client, ctx.table_client,
ctx.queue_client, ctx.batch_client, ctx.config)
@cluster.command('orchestrate')
@click.option(
'--storage-cluster-id', help='Storage cluster id to create')
@common_options
@slurm_options
@batch_options
@keyvault_options
@aad_options
@pass_cli_context
def slurm_cluster_orchestrate(ctx, storage_cluster_id):
"""Orchestrate a Slurm cluster with shared file system and Batch pool"""
if convoy.util.is_not_empty(storage_cluster_id):
ctx.cleanup = False
ctx.initialize_for_fs()
convoy.fleet.action_fs_disks_add(
ctx.resource_client, ctx.compute_client, ctx.config)
convoy.fleet.action_fs_cluster_add(
ctx.resource_client, ctx.compute_client, ctx.network_client,
ctx.blob_client, ctx.config, storage_cluster_id)
ctx.cleanup = True
else:
logger.warning(
'skipping fs cluster orchestration as no storage cluster id '
'was specified')
ctx.initialize_for_slurm(init_batch=True)
convoy.fleet.action_pool_add(
ctx.resource_client, ctx.compute_client, ctx.network_client,
ctx.batch_mgmt_client, ctx.batch_client, ctx.blob_client,
ctx.table_client, ctx.keyvault_client, ctx.config)
convoy.fleet.action_slurm_cluster_create(
ctx.auth_client, ctx.resource_client, ctx.compute_client,
ctx.network_client, ctx.blob_client, ctx.table_client,
ctx.queue_client, ctx.batch_client, ctx.config)
@cluster.command('status')
@common_options
@slurm_options
@keyvault_options
@aad_options
@pass_cli_context
def slurm_cluster_status(ctx):
"""Query status of a Slurm controllers and login nodes"""
ctx.initialize_for_slurm()
convoy.fleet.action_slurm_cluster_status(
ctx.compute_client, ctx.network_client, ctx.config)
@cluster.command('destroy')
@click.option(
'--delete-resource-group', is_flag=True,
help='Delete all resources in the Slurm controller resource group')
@click.option(
'--delete-virtual-network', is_flag=True, help='Delete virtual network')
@click.option(
'--generate-from-prefix', is_flag=True,
help='Generate resources to delete from Slurm controller hostname prefix')
@click.option(
'--no-wait', is_flag=True, help='Do not wait for deletion to complete')
@common_options
@slurm_options
@keyvault_options
@aad_options
@pass_cli_context
def slurm_cluster_destroy(
ctx, delete_resource_group, delete_virtual_network,
generate_from_prefix, no_wait):
"""Destroy a Slurm controller"""
ctx.initialize_for_slurm(init_batch=True)
convoy.fleet.action_slurm_cluster_destroy(
ctx.resource_client, ctx.compute_client, ctx.network_client,
ctx.blob_client, ctx.table_client, ctx.queue_client, ctx.config,
delete_resource_group, delete_virtual_network, generate_from_prefix,
not no_wait)
if __name__ == '__main__': if __name__ == '__main__':
convoy.util.setup_logger(logger) convoy.util.setup_logger(logger)
cli() cli()

25
slurm/Dockerfile Normal file
Просмотреть файл

@ -0,0 +1,25 @@
# Dockerfile for Azure/batch-shipyard (Slurm)
FROM alpine:3.9
MAINTAINER Fred Park <https://github.com/Azure/batch-shipyard>
# copy in files
COPY slurm.py requirements.txt /opt/batch-shipyard/
# add base packages and python dependencies
RUN apk update \
&& apk add --update --no-cache \
musl build-base python3 python3-dev openssl-dev libffi-dev \
ca-certificates cifs-utils bash \
&& python3 -m pip install --no-cache-dir --upgrade pip \
&& pip3 install --no-cache-dir --upgrade -r /opt/batch-shipyard/requirements.txt \
&& apk del --purge \
build-base python3-dev openssl-dev libffi-dev \
&& rm /var/cache/apk/* \
&& rm -f /opt/batch-shipyard/requirements.txt
# pre-compile files
RUN python3 -m compileall -f /opt/batch-shipyard
# set entrypoint
ENTRYPOINT ["python3", "/opt/batch-shipyard/slurm.py"]

8
slurm/requirements.txt Normal file
Просмотреть файл

@ -0,0 +1,8 @@
azure-batch==6.0.0
azure-cosmosdb-table==1.0.5
azure-mgmt-storage==3.1.1
azure-mgmt-resource==2.1.0
azure-storage-queue==1.4.0
msrestazure==0.5.1
python-dateutil==2.7.5
requests==2.21.0

119
slurm/slurm.conf Normal file
Просмотреть файл

@ -0,0 +1,119 @@
#
# See the slurm.conf man page for more information.
#
ClusterName={CLUSTER_NAME}
SlurmctldHost={SLURMCTLD_HOST_PRIMARY}
#{SLURMCTLD_HOST_SECONDARY}
#{SLURMCTLD_HOST_TERTIARY}
SlurmUser=slurm
SlurmctldPort=6817
SlurmdPort=6818
AuthType=auth/munge
StateSaveLocation={SLURMCTLD_STATE_SAVE_PATH}
#SlurmdSpoolDir=/var/lib/slurm-llnl/slurmd
#SlurmctldPidFile=/var/run/slurm-llnl/slurmctld.pid
#SlurmdPidFile=/var/run/slurm-llnl/slurmd.pid
#PluginDir=/usr/lib/x86_64-linux-gnu/slurm-wlm
SlurmdSpoolDir=/var/spool/slurm/slurmd
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmdPidFile=/var/run/slurmd.pid
PluginDir=/usr/lib/slurm
# LOGGING
SlurmctldDebug=5
# slurmctld log file has no %h option, so need to log locally
#SlurmctldLogFile=/var/log/slurm-llnl/slurmctld.log
SlurmctldLogFile=/var/log/slurm/slurmctld.log
SlurmdDebug=5
SlurmdLogFile={SLURM_LOG_PATH}/slurmd-%h.log
# PROCESS TRACKING
#ProctrackType=proctrack/pgid
ProctrackType=proctrack/cgroup
SwitchType=switch/none
MpiDefault=none
#FirstJobId=
ReturnToService=1
#MaxJobCount=
#PlugStackConfig=
#PropagatePrioProcess=
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#Prolog=/etc/slurm/prolog.d/*
#Epilog=/etc/slurm/epilog.d/*
#SrunProlog=
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
TaskPlugin=task/affinity,task/cgroup
#TrackWCKey=no
#TmpFS=
#UsePAM=
# TIMERS
SlurmctldTimeout=300
SlurmdTimeout=300
InactiveLimit=0
MinJobAge=300
KillWait=30
Waittime=0
# SCHEDULING
SchedulerType=sched/backfill
#SchedulerAuth=
SelectType=select/linear
#SelectType=select/cons_res
#SelectTypeParameters=CR_Core_Memory,CR_CORE_DEFAULT_DIST_BLOCK,CR_ONE_TASK_PER_CORE
FastSchedule=1
#PriorityType=priority/multifactor
#PriorityDecayHalfLife=14-0
#PriorityUsageResetPeriod=14-0
#PriorityWeightFairshare=100000
#PriorityWeightAge=1000
#PriorityWeightPartition=10000
#PriorityWeightJobSize=1000
#PriorityMaxAge=1-0
JobCompType=jobcomp/none
#JobCompLoc=
# ACCOUNTING
JobAcctGatherType=jobacct_gather/cgroup
#JobAcctGatherFrequency=30
AccountingStorageTRES=gres/gpu
DebugFlags=CPU_Bind,gres
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost={HOSTNAME}
AccountingStoragePort=6819
#AccountingStorageLoc=
AccountingStoragePass=/var/run/munge/munge.socket.2
AccountingStorageUser=slurm
# POWER SAVE
SuspendProgram={SHIPYARD_VAR_DIR}/suspend.sh
ResumeProgram={SHIPYARD_VAR_DIR}/resume.sh
ResumeFailProgram={SHIPYARD_VAR_DIR}/resume-fail.sh
SuspendTime={IDLE_RECLAIM_TIME_SEC}
SuspendRate=0
ResumeRate=0
SuspendTimeout=1200
ResumeTimeout=1200
#{SUSPEND_EXC_NODES}
# TreeWidth must be at least as large as the maximum node count for cloud nodes
TreeWidth={MAX_NODES}
# GENERIC RESOURCES
#{GRES_TYPES}
# PARTITIONS AND NODES
#{ADDITIONAL_NODES}
#{ADDITIONAL_PARTITIONS}

1472
slurm/slurm.py Executable file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

6
slurm/slurmdb.sql Normal file
Просмотреть файл

@ -0,0 +1,6 @@
create database slurm_acct_db;
create user 'slurm'@'localhost';
set password for 'slurm'@'localhost' = password('{SLURM_DB_PASSWORD}');
grant usage on *.* to 'slurm'@'localhost';
grant all privileges on slurm_acct_db.* to 'slurm'@'localhost';
flush privileges;

44
slurm/slurmdbd.conf Normal file
Просмотреть файл

@ -0,0 +1,44 @@
#
# Example slurmdbd.conf file.
#
# See the slurmdbd.conf man page for more information.
# Archive info
#ArchiveJobs=yes
#ArchiveDir="/tmp"
#ArchiveSteps=yes
#ArchiveScript=
#JobPurge=12
#StepPurge=1
# Authentication info
AuthType=auth/munge
#AuthInfo=/var/run/munge/munge.socket.2
# slurmDBD info
DbdAddr={HOSTNAME}
DbdHost={HOSTNAME}
DbdPort=6819
SlurmUser=slurm
#MessageTimeout=300
#DefaultQOS=normal,standby
#PrivateData=accounts,users,usage,jobs
#TrackWCKey=yes
DebugLevel=4
LogFile={SLURM_LOG_PATH}/slurmdbd-{HOSTNAME}.log
#PidFile=/var/run/slurm-llnl/slurmdbd.pid
#PluginDir=/usr/lib/x86_64-linux-gnu/slurm-wlm
PidFile=/var/run/slurmdbd.pid
PluginDir=/usr/lib/slurm
# Database info
StorageType=accounting_storage/mysql
#StorageHost=localhost
#StoragePort=1234
StoragePass={SLURM_DB_PASSWORD}
StorageUser=slurm
StorageLoc=slurm_acct_db