Родитель
c5fa85adcb
Коммит
8a396f0e18
|
@ -5,6 +5,8 @@
|
||||||
### Added
|
### Added
|
||||||
- `random` and `file` task factories. See task factory guide for more
|
- `random` and `file` task factories. See task factory guide for more
|
||||||
information.
|
information.
|
||||||
|
- Summary statistics: `pool stats` and `jobs stats`. See the usage doc for
|
||||||
|
more information.
|
||||||
- Delete unusable nodes from pool with `--all-unusable` option for
|
- Delete unusable nodes from pool with `--all-unusable` option for
|
||||||
`pool delnode`
|
`pool delnode`
|
||||||
- CNTK-GPU-Infiniband-IntelMPI recipe
|
- CNTK-GPU-Infiniband-IntelMPI recipe
|
||||||
|
@ -34,6 +36,8 @@ guide for more information.
|
||||||
- Job priority support
|
- Job priority support
|
||||||
- Job migration support
|
- Job migration support
|
||||||
- Compute node fill type support
|
- Compute node fill type support
|
||||||
|
- New commands: `jobs enable` and `jobs disable`. Please see the usage doc
|
||||||
|
for more information.
|
||||||
- From Scratch: Step-by-Step guide
|
- From Scratch: Step-by-Step guide
|
||||||
- Azure Cloud Shell information
|
- Azure Cloud Shell information
|
||||||
|
|
||||||
|
|
309
convoy/batch.py
309
convoy/batch.py
|
@ -45,6 +45,7 @@ import tempfile
|
||||||
import time
|
import time
|
||||||
# non-stdlib imports
|
# non-stdlib imports
|
||||||
import azure.batch.models as batchmodels
|
import azure.batch.models as batchmodels
|
||||||
|
import dateutil.tz
|
||||||
# local imports
|
# local imports
|
||||||
from . import autoscale
|
from . import autoscale
|
||||||
from . import crypto
|
from . import crypto
|
||||||
|
@ -820,6 +821,178 @@ def del_pool(batch_client, config, pool_id=None):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def pool_stats(batch_client, config, pool_id=None):
|
||||||
|
# type: (azure.batch.batch_service_client.BatchServiceClient, dict,
|
||||||
|
# str) -> None
|
||||||
|
"""Get pool stats
|
||||||
|
:param batch_client: The batch client to use.
|
||||||
|
:type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
|
||||||
|
:param dict config: configuration dict
|
||||||
|
:param str pool_id: pool id
|
||||||
|
"""
|
||||||
|
if util.is_none_or_empty(pool_id):
|
||||||
|
pool_id = settings.pool_id(config)
|
||||||
|
try:
|
||||||
|
pool = batch_client.pool.get(
|
||||||
|
pool_id=pool_id,
|
||||||
|
pool_get_options=batchmodels.PoolGetOptions(expand='stats'),
|
||||||
|
)
|
||||||
|
except batchmodels.batch_error.BatchErrorException as ex:
|
||||||
|
if 'The specified pool does not exist' in ex.message.value:
|
||||||
|
logger.error('pool {} does not exist'.format(pool_id))
|
||||||
|
return
|
||||||
|
if pool.stats is not None and pool.stats.usage_stats is not None:
|
||||||
|
usage_stats = '{} * Total core hours: {} (last updated: {})'.format(
|
||||||
|
os.linesep,
|
||||||
|
pool.stats.usage_stats.dedicated_core_time,
|
||||||
|
pool.stats.usage_stats.last_update_time,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
usage_stats = ''
|
||||||
|
nodes = list(batch_client.compute_node.list(pool_id))
|
||||||
|
nsc = []
|
||||||
|
runnable_nodes = 0
|
||||||
|
for key, value in _node_state_counts(nodes)._asdict().items():
|
||||||
|
if key == 'running' or key == 'idle':
|
||||||
|
runnable_nodes += value
|
||||||
|
nsc.append(' * {}: {}'.format(key, value))
|
||||||
|
node_up_times = []
|
||||||
|
node_alloc_times = []
|
||||||
|
node_start_times = []
|
||||||
|
tasks_run = []
|
||||||
|
tasks_running = []
|
||||||
|
now = datetime.datetime.now(dateutil.tz.tzutc())
|
||||||
|
for node in nodes:
|
||||||
|
if node.last_boot_time is not None:
|
||||||
|
node_up_times.append((now - node.last_boot_time).total_seconds())
|
||||||
|
if (node.start_task_info is not None and
|
||||||
|
node.start_task_info.end_time is not None):
|
||||||
|
node_alloc_times.append(
|
||||||
|
(node.start_task_info.end_time -
|
||||||
|
node.allocation_time).total_seconds()
|
||||||
|
)
|
||||||
|
node_start_times.append(
|
||||||
|
(node.start_task_info.end_time -
|
||||||
|
node.last_boot_time).total_seconds()
|
||||||
|
)
|
||||||
|
tasks_run.append(node.total_tasks_run)
|
||||||
|
tasks_running.append(node.running_tasks_count)
|
||||||
|
total_running_tasks = sum(tasks_running)
|
||||||
|
runnable_task_slots = runnable_nodes * pool.max_tasks_per_node
|
||||||
|
total_task_slots = (
|
||||||
|
pool.current_dedicated_nodes + pool.current_low_priority_nodes
|
||||||
|
) * pool.max_tasks_per_node
|
||||||
|
busy_task_slots_fraction = (
|
||||||
|
0 if total_task_slots == 0 else
|
||||||
|
total_running_tasks / runnable_task_slots
|
||||||
|
)
|
||||||
|
version = 'N/A'
|
||||||
|
for md in pool.metadata:
|
||||||
|
if md.name == settings.get_metadata_version_name():
|
||||||
|
version = md.value
|
||||||
|
break
|
||||||
|
log = [
|
||||||
|
'* Batch Shipyard version: {}'.format(version),
|
||||||
|
'* Total nodes: {}'.format(
|
||||||
|
pool.current_dedicated_nodes + pool.current_low_priority_nodes
|
||||||
|
),
|
||||||
|
' * Dedicated nodes: {0} ({1:.1f}% of target){2}'.format(
|
||||||
|
pool.current_dedicated_nodes,
|
||||||
|
100 * (
|
||||||
|
1 if pool.target_dedicated_nodes == 0 else
|
||||||
|
pool.current_dedicated_nodes / pool.target_dedicated_nodes),
|
||||||
|
usage_stats,
|
||||||
|
),
|
||||||
|
' * Low Priority nodes: {0} ({1:.1f}% of target)'.format(
|
||||||
|
pool.current_low_priority_nodes,
|
||||||
|
100 * (
|
||||||
|
1 if pool.target_low_priority_nodes == 0 else
|
||||||
|
pool.current_low_priority_nodes /
|
||||||
|
pool.target_low_priority_nodes)
|
||||||
|
),
|
||||||
|
'* Node states:',
|
||||||
|
os.linesep.join(nsc),
|
||||||
|
]
|
||||||
|
if len(node_up_times) > 0:
|
||||||
|
log.extend([
|
||||||
|
'* Node uptime:',
|
||||||
|
' * Mean: {}'.format(
|
||||||
|
datetime.timedelta(
|
||||||
|
seconds=(sum(node_up_times) / len(node_up_times)))
|
||||||
|
),
|
||||||
|
' * Min: {}'.format(
|
||||||
|
datetime.timedelta(seconds=min(node_up_times))
|
||||||
|
),
|
||||||
|
' * Max: {}'.format(
|
||||||
|
datetime.timedelta(seconds=max(node_up_times))
|
||||||
|
),
|
||||||
|
])
|
||||||
|
if len(node_alloc_times) > 0:
|
||||||
|
log.extend([
|
||||||
|
'* Time taken for node creation to ready:',
|
||||||
|
' * Mean: {}'.format(
|
||||||
|
datetime.timedelta(
|
||||||
|
seconds=(sum(node_alloc_times) / len(node_alloc_times)))
|
||||||
|
),
|
||||||
|
' * Min: {}'.format(
|
||||||
|
datetime.timedelta(seconds=min(node_alloc_times))
|
||||||
|
),
|
||||||
|
' * Max: {}'.format(
|
||||||
|
datetime.timedelta(seconds=max(node_alloc_times))
|
||||||
|
),
|
||||||
|
])
|
||||||
|
if len(node_start_times) > 0:
|
||||||
|
log.extend([
|
||||||
|
'* Time taken for last boot startup (includes prep):',
|
||||||
|
' * Mean: {}'.format(
|
||||||
|
datetime.timedelta(
|
||||||
|
seconds=(sum(node_start_times) / len(node_start_times)))
|
||||||
|
),
|
||||||
|
' * Min: {}'.format(
|
||||||
|
datetime.timedelta(seconds=min(node_start_times))
|
||||||
|
),
|
||||||
|
' * Max: {}'.format(
|
||||||
|
datetime.timedelta(seconds=max(node_start_times))
|
||||||
|
),
|
||||||
|
])
|
||||||
|
if len(tasks_running) > 0:
|
||||||
|
log.extend([
|
||||||
|
'* Running tasks:',
|
||||||
|
' * Sum: {}'.format(total_running_tasks),
|
||||||
|
' * Mean: {}'.format(total_running_tasks / len(tasks_running)),
|
||||||
|
' * Min: {}'.format(min(tasks_running)),
|
||||||
|
' * Max: {}'.format(max(tasks_running)),
|
||||||
|
])
|
||||||
|
if len(tasks_run) > 0:
|
||||||
|
log.extend([
|
||||||
|
'* Total tasks run:',
|
||||||
|
' * Sum: {}'.format(sum(tasks_run)),
|
||||||
|
' * Mean: {}'.format(sum(tasks_run) / len(tasks_run)),
|
||||||
|
' * Min: {}'.format(min(tasks_run)),
|
||||||
|
' * Max: {}'.format(max(tasks_run)),
|
||||||
|
])
|
||||||
|
log.extend([
|
||||||
|
'* Task scheduling slots:',
|
||||||
|
' * Busy: {0} ({1:.2f}% of runnable)'.format(
|
||||||
|
total_running_tasks, 100 * busy_task_slots_fraction
|
||||||
|
),
|
||||||
|
' * Available: {0} ({1:.2f}% of runnable)'.format(
|
||||||
|
runnable_task_slots - total_running_tasks,
|
||||||
|
100 * (1 - busy_task_slots_fraction)
|
||||||
|
),
|
||||||
|
' * Runnable: {0} ({1:.2f}% of total)'.format(
|
||||||
|
runnable_task_slots,
|
||||||
|
100 * (
|
||||||
|
runnable_task_slots / total_task_slots
|
||||||
|
if total_task_slots > 0 else 0
|
||||||
|
),
|
||||||
|
),
|
||||||
|
' * Total: {}'.format(total_task_slots),
|
||||||
|
])
|
||||||
|
logger.info('statistics summary for pool {}{}{}'.format(
|
||||||
|
pool_id, os.linesep, os.linesep.join(log)))
|
||||||
|
|
||||||
|
|
||||||
def pool_autoscale_disable(batch_client, config):
|
def pool_autoscale_disable(batch_client, config):
|
||||||
# type: (batch.BatchServiceClient, dict) -> None
|
# type: (batch.BatchServiceClient, dict) -> None
|
||||||
"""Enable autoscale formula
|
"""Enable autoscale formula
|
||||||
|
@ -1069,6 +1242,131 @@ def update_job_with_pool(batch_client, config, jobid=None, poolid=None):
|
||||||
job_id, poolid))
|
job_id, poolid))
|
||||||
|
|
||||||
|
|
||||||
|
def job_stats(batch_client, config, jobid=None):
|
||||||
|
# type: (azure.batch.batch_service_client.BatchServiceClient, dict,
|
||||||
|
# str) -> None
|
||||||
|
"""Job stats
|
||||||
|
:param batch_client: The batch client to use.
|
||||||
|
:type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
|
||||||
|
:param dict config: configuration dict
|
||||||
|
:param str jobid: job id to query
|
||||||
|
"""
|
||||||
|
if jobid is not None:
|
||||||
|
try:
|
||||||
|
job = batch_client.job.get(
|
||||||
|
job_id=jobid,
|
||||||
|
job_get_options=batchmodels.JobGetOptions(expand='stats'),
|
||||||
|
)
|
||||||
|
except batchmodels.batch_error.BatchErrorException as ex:
|
||||||
|
if 'The specified job does not exist' in ex.message.value:
|
||||||
|
raise RuntimeError('job {} does not exist'.format(jobid))
|
||||||
|
jobs = [job]
|
||||||
|
else:
|
||||||
|
jobs = list(batch_client.job.list(
|
||||||
|
job_list_options=batchmodels.JobListOptions(expand='stats')))
|
||||||
|
job_count = 0
|
||||||
|
job_times = []
|
||||||
|
task_times = []
|
||||||
|
task_wall_times = []
|
||||||
|
task_counts = batchmodels.TaskCounts(0, 0, 0, 0, 0, 'validated')
|
||||||
|
total_tasks = 0
|
||||||
|
for job in jobs:
|
||||||
|
job_count += 1
|
||||||
|
# get task counts
|
||||||
|
tc = batch_client.job.get_task_counts(job_id=job.id)
|
||||||
|
task_counts.active += tc.active
|
||||||
|
task_counts.running += tc.running
|
||||||
|
task_counts.completed += tc.completed
|
||||||
|
task_counts.succeeded += tc.succeeded
|
||||||
|
task_counts.failed += tc.failed
|
||||||
|
total_tasks += tc.active + tc.running + tc.completed
|
||||||
|
if (tc.validation_status !=
|
||||||
|
batchmodels.TaskCountValidationStatus.validated):
|
||||||
|
task_counts.validation_status = tc.validation_status
|
||||||
|
if job.execution_info.end_time is not None:
|
||||||
|
job_times.append(
|
||||||
|
(job.execution_info.end_time -
|
||||||
|
job.execution_info.start_time).total_seconds())
|
||||||
|
# get task-level execution info
|
||||||
|
tasks = batch_client.task.list(
|
||||||
|
job_id=job.id,
|
||||||
|
task_list_options=batchmodels.TaskListOptions(
|
||||||
|
filter='(state eq \'running\') or (state eq \'completed\')',
|
||||||
|
select='id,state,stats,executionInfo',
|
||||||
|
))
|
||||||
|
for task in tasks:
|
||||||
|
if task.stats is not None:
|
||||||
|
task_wall_times.append(
|
||||||
|
task.stats.wall_clock_time.total_seconds())
|
||||||
|
if (task.execution_info is not None and
|
||||||
|
task.execution_info.end_time is not None):
|
||||||
|
task_times.append(
|
||||||
|
(task.execution_info.end_time -
|
||||||
|
task.execution_info.start_time).total_seconds())
|
||||||
|
log = [
|
||||||
|
'* Total jobs: {}'.format(job_count),
|
||||||
|
'* Total tasks: {} ({})'.format(
|
||||||
|
total_tasks, task_counts.validation_status
|
||||||
|
),
|
||||||
|
' * Active: {}'.format(task_counts.active),
|
||||||
|
' * Running: {}'.format(task_counts.running),
|
||||||
|
' * Completed: {}'.format(task_counts.completed),
|
||||||
|
' * Succeeded: {0} ({1:.2f}% of completed)'.format(
|
||||||
|
task_counts.succeeded,
|
||||||
|
100 * task_counts.succeeded / task_counts.completed
|
||||||
|
if task_counts.completed > 0 else 0
|
||||||
|
),
|
||||||
|
' * Failed: {0} ({1:.2f}% of completed)'.format(
|
||||||
|
task_counts.failed,
|
||||||
|
100 * task_counts.failed / task_counts.completed
|
||||||
|
if task_counts.completed > 0 else 0
|
||||||
|
),
|
||||||
|
]
|
||||||
|
if len(job_times) > 0:
|
||||||
|
log.extend([
|
||||||
|
'* Job creation to completion time:',
|
||||||
|
' * Mean: {}'.format(
|
||||||
|
datetime.timedelta(seconds=(sum(job_times) / len(job_times)))
|
||||||
|
),
|
||||||
|
' * Min: {}'.format(
|
||||||
|
datetime.timedelta(seconds=min(job_times))
|
||||||
|
),
|
||||||
|
' * Max: {}'.format(
|
||||||
|
datetime.timedelta(seconds=max(job_times))
|
||||||
|
),
|
||||||
|
])
|
||||||
|
if len(task_times) > 0:
|
||||||
|
log.extend([
|
||||||
|
'* Task end-to-end time:',
|
||||||
|
' * Mean: {}'.format(
|
||||||
|
datetime.timedelta(seconds=(sum(task_times) / len(task_times)))
|
||||||
|
),
|
||||||
|
' * Min: {}'.format(
|
||||||
|
datetime.timedelta(seconds=min(task_times))
|
||||||
|
),
|
||||||
|
' * Max: {}'.format(
|
||||||
|
datetime.timedelta(seconds=max(task_times))
|
||||||
|
),
|
||||||
|
])
|
||||||
|
if len(task_wall_times) > 0:
|
||||||
|
log.extend([
|
||||||
|
'* Task command walltime (running and completed):',
|
||||||
|
' * Mean: {}'.format(
|
||||||
|
datetime.timedelta(
|
||||||
|
seconds=(sum(task_wall_times) / len(task_wall_times)))
|
||||||
|
),
|
||||||
|
' * Min: {}'.format(
|
||||||
|
datetime.timedelta(seconds=min(task_wall_times))
|
||||||
|
),
|
||||||
|
' * Max: {}'.format(
|
||||||
|
datetime.timedelta(seconds=max(task_wall_times))
|
||||||
|
),
|
||||||
|
])
|
||||||
|
logger.info('statistics summary for {}{}{}'.format(
|
||||||
|
'job {}'.format(jobid) if jobid is not None else 'all jobs',
|
||||||
|
os.linesep, os.linesep.join(log)))
|
||||||
|
|
||||||
|
|
||||||
def disable_jobs(
|
def disable_jobs(
|
||||||
batch_client, config, disable_tasks_action, jobid=None,
|
batch_client, config, disable_tasks_action, jobid=None,
|
||||||
disabling_state_ok=False, term_tasks=False):
|
disabling_state_ok=False, term_tasks=False):
|
||||||
|
@ -1661,15 +1959,17 @@ def terminate_tasks(
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
def list_nodes(batch_client, config, nodes=None):
|
def list_nodes(batch_client, config, pool_id=None, nodes=None):
|
||||||
# type: (batch.BatchServiceClient, dict, list) -> None
|
# type: (batch.BatchServiceClient, dict, str, list) -> None
|
||||||
"""Get a list of nodes
|
"""Get a list of nodes
|
||||||
:param batch_client: The batch client to use.
|
:param batch_client: The batch client to use.
|
||||||
:type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
|
:type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
|
||||||
:param dict config: configuration dict
|
:param dict config: configuration dict
|
||||||
:param lsit nodes: list of nodes
|
:param str pool_id: pool id
|
||||||
|
:param list nodes: list of nodes
|
||||||
"""
|
"""
|
||||||
pool_id = settings.pool_id(config)
|
if util.is_none_or_empty(pool_id):
|
||||||
|
pool_id = settings.pool_id(config)
|
||||||
logger.debug('listing nodes for pool {}'.format(pool_id))
|
logger.debug('listing nodes for pool {}'.format(pool_id))
|
||||||
if nodes is None:
|
if nodes is None:
|
||||||
nodes = batch_client.compute_node.list(pool_id)
|
nodes = batch_client.compute_node.list(pool_id)
|
||||||
|
@ -2325,7 +2625,6 @@ def check_jobs_for_auto_pool(config):
|
||||||
else:
|
else:
|
||||||
autopool.append(True)
|
autopool.append(True)
|
||||||
if autopool.count(False) == len(autopool):
|
if autopool.count(False) == len(autopool):
|
||||||
logger.debug('autopool not detected for jobs')
|
|
||||||
return False
|
return False
|
||||||
elif autopool.count(True) == len(autopool):
|
elif autopool.count(True) == len(autopool):
|
||||||
logger.debug('autopool detected for jobs')
|
logger.debug('autopool detected for jobs')
|
||||||
|
|
|
@ -2509,6 +2509,17 @@ def action_pool_listimages(batch_client, config):
|
||||||
_list_docker_images(batch_client, config)
|
_list_docker_images(batch_client, config)
|
||||||
|
|
||||||
|
|
||||||
|
def action_pool_stats(batch_client, config, pool_id):
|
||||||
|
# type: (batchsc.BatchServiceClient, dict, str) -> None
|
||||||
|
"""Action: Pool Stats
|
||||||
|
:param azure.batch.batch_service_client.BatchServiceClient batch_client:
|
||||||
|
batch client
|
||||||
|
:param dict config: configuration dict
|
||||||
|
:param str pool_id: pool id
|
||||||
|
"""
|
||||||
|
batch.pool_stats(batch_client, config, pool_id=pool_id)
|
||||||
|
|
||||||
|
|
||||||
def action_pool_autoscale_disable(batch_client, config):
|
def action_pool_autoscale_disable(batch_client, config):
|
||||||
# type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
|
# type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
|
||||||
"""Action: Pool Autoscale Disable
|
"""Action: Pool Autoscale Disable
|
||||||
|
@ -2867,6 +2878,17 @@ def action_jobs_enable(batch_client, config, jobid):
|
||||||
batch.enable_jobs(batch_client, config, jobid=jobid)
|
batch.enable_jobs(batch_client, config, jobid=jobid)
|
||||||
|
|
||||||
|
|
||||||
|
def action_jobs_stats(batch_client, config, job_id):
|
||||||
|
# type: (batchsc.BatchServiceClient, dict, str) -> None
|
||||||
|
"""Action: Jobs Stats
|
||||||
|
:param azure.batch.batch_service_client.BatchServiceClient batch_client:
|
||||||
|
batch client
|
||||||
|
:param dict config: configuration dict
|
||||||
|
:param str job_id: job id
|
||||||
|
"""
|
||||||
|
batch.job_stats(batch_client, config, jobid=job_id)
|
||||||
|
|
||||||
|
|
||||||
def action_storage_del(
|
def action_storage_del(
|
||||||
blob_client, queue_client, table_client, config, clear_tables, poolid):
|
blob_client, queue_client, table_client, config, clear_tables, poolid):
|
||||||
# type: (azureblob.BlockBlobService, azurequeue.QueueService,
|
# type: (azureblob.BlockBlobService, azurequeue.QueueService,
|
||||||
|
|
|
@ -388,6 +388,8 @@ file
|
||||||
* `--requeue` requeue running tasks
|
* `--requeue` requeue running tasks
|
||||||
* `--terminate` terminate running tasks
|
* `--terminate` terminate running tasks
|
||||||
* `--wait` wait for running tasks to complete
|
* `--wait` wait for running tasks to complete
|
||||||
|
* `stats` will generate a statistics summary of a job or jobs
|
||||||
|
* `--jobid` will query the specified job instead of all jobs
|
||||||
* `term` will terminate jobs found in the jobs configuration file. If an
|
* `term` will terminate jobs found in the jobs configuration file. If an
|
||||||
autopool is specified for all jobs and a jobid option is not specified,
|
autopool is specified for all jobs and a jobid option is not specified,
|
||||||
the storage associated with the autopool will be cleaned up.
|
the storage associated with the autopool will be cleaned up.
|
||||||
|
@ -456,12 +458,13 @@ The `pool` command has the following sub-commands:
|
||||||
dsu Delete an SSH user from all nodes in pool
|
dsu Delete an SSH user from all nodes in pool
|
||||||
grls Get remote login settings for all nodes in...
|
grls Get remote login settings for all nodes in...
|
||||||
list List all pools in the Batch account
|
list List all pools in the Batch account
|
||||||
listimages List Docker images in the pool
|
listimages List Docker images in a pool
|
||||||
listnodes List nodes in pool
|
listnodes List nodes in pool
|
||||||
listskus List available VM configurations available to...
|
listskus List available VM configurations available to...
|
||||||
rebootnode Reboot a node or nodes in a pool
|
rebootnode Reboot a node or nodes in a pool
|
||||||
resize Resize a pool
|
resize Resize a pool
|
||||||
ssh Interactively login via SSH to a node in the...
|
ssh Interactively login via SSH to a node in a...
|
||||||
|
stats Get statistics about a pool
|
||||||
udi Update Docker images in a pool
|
udi Update Docker images in a pool
|
||||||
```
|
```
|
||||||
* `add` will add the pool defined in the pool configuration file to the
|
* `add` will add the pool defined in the pool configuration file to the
|
||||||
|
@ -514,6 +517,9 @@ configuration file
|
||||||
the pool to connect to as listed by `grls`
|
the pool to connect to as listed by `grls`
|
||||||
* `--nodeid` is the node id to connect to in the pool
|
* `--nodeid` is the node id to connect to in the pool
|
||||||
* `--tty` allocates a pseudo-terminal
|
* `--tty` allocates a pseudo-terminal
|
||||||
|
* `stats` will generate a statistics summary of the pool
|
||||||
|
* `--poolid` will query the specified pool instead of the pool from the
|
||||||
|
pool configuration file
|
||||||
* `udi` will update Docker images on all compute nodes of the pool. This
|
* `udi` will update Docker images on all compute nodes of the pool. This
|
||||||
command requires a valid SSH user.
|
command requires a valid SSH user.
|
||||||
* `--image` will restrict the update to just the image or image:tag
|
* `--image` will restrict the update to just the image or image:tag
|
||||||
|
|
31
shipyard.py
31
shipyard.py
|
@ -1178,7 +1178,7 @@ def pool_dsu(ctx):
|
||||||
@aad_options
|
@aad_options
|
||||||
@pass_cli_context
|
@pass_cli_context
|
||||||
def pool_ssh(ctx, cardinal, nodeid, tty, command):
|
def pool_ssh(ctx, cardinal, nodeid, tty, command):
|
||||||
"""Interactively login via SSH to a node in the pool"""
|
"""Interactively login via SSH to a node in a pool"""
|
||||||
ctx.initialize_for_batch()
|
ctx.initialize_for_batch()
|
||||||
convoy.fleet.action_pool_ssh(
|
convoy.fleet.action_pool_ssh(
|
||||||
ctx.batch_client, ctx.config, cardinal, nodeid, tty, command)
|
ctx.batch_client, ctx.config, cardinal, nodeid, tty, command)
|
||||||
|
@ -1253,11 +1253,25 @@ def pool_udi(ctx, image, digest, ssh):
|
||||||
@aad_options
|
@aad_options
|
||||||
@pass_cli_context
|
@pass_cli_context
|
||||||
def pool_listimages(ctx):
|
def pool_listimages(ctx):
|
||||||
"""List Docker images in the pool"""
|
"""List Docker images in a pool"""
|
||||||
ctx.initialize_for_batch()
|
ctx.initialize_for_batch()
|
||||||
convoy.fleet.action_pool_listimages(ctx.batch_client, ctx.config)
|
convoy.fleet.action_pool_listimages(ctx.batch_client, ctx.config)
|
||||||
|
|
||||||
|
|
||||||
|
@pool.command('stats')
|
||||||
|
@click.option('--poolid', help='Get stats on specified pool')
|
||||||
|
@common_options
|
||||||
|
@batch_options
|
||||||
|
@keyvault_options
|
||||||
|
@aad_options
|
||||||
|
@pass_cli_context
|
||||||
|
def pool_stats(ctx, poolid):
|
||||||
|
"""Get statistics about a pool"""
|
||||||
|
ctx.initialize_for_batch()
|
||||||
|
convoy.fleet.action_pool_stats(
|
||||||
|
ctx.batch_client, ctx.config, pool_id=poolid)
|
||||||
|
|
||||||
|
|
||||||
@pool.group()
|
@pool.group()
|
||||||
@pass_cli_context
|
@pass_cli_context
|
||||||
def autoscale(ctx):
|
def autoscale(ctx):
|
||||||
|
@ -1530,6 +1544,19 @@ def jobs_enable(ctx, jobid):
|
||||||
convoy.fleet.action_jobs_enable(ctx.batch_client, ctx.config, jobid)
|
convoy.fleet.action_jobs_enable(ctx.batch_client, ctx.config, jobid)
|
||||||
|
|
||||||
|
|
||||||
|
@jobs.command('stats')
|
||||||
|
@click.option('--jobid', help='Get stats only on the specified job id')
|
||||||
|
@common_options
|
||||||
|
@batch_options
|
||||||
|
@keyvault_options
|
||||||
|
@aad_options
|
||||||
|
@pass_cli_context
|
||||||
|
def jobs_stats(ctx, jobid):
|
||||||
|
"""Get statistics about jobs"""
|
||||||
|
ctx.initialize_for_batch()
|
||||||
|
convoy.fleet.action_jobs_stats(ctx.batch_client, ctx.config, job_id=jobid)
|
||||||
|
|
||||||
|
|
||||||
@cli.group()
|
@cli.group()
|
||||||
@pass_cli_context
|
@pass_cli_context
|
||||||
def data(ctx):
|
def data(ctx):
|
||||||
|
|
Загрузка…
Ссылка в новой задаче