Prevent invalid mix of HPC offer and non-RDMA VM
- Fix unusable nodes on allocation exception in pool stats - Expand network tuning exemptions
This commit is contained in:
Родитель
260e1609ee
Коммит
093cfdbc83
11
CHANGELOG.md
11
CHANGELOG.md
|
@ -2,6 +2,17 @@
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
### Changed
|
||||
- Prevent invalid configuration of HPC offers with non-RDMA VM sizes
|
||||
- Expanded network tuning exemptions for new Dv3 and Ev3 sizes
|
||||
|
||||
### Fixed
|
||||
- NV driver updates
|
||||
- Various OS updates and Docker issues
|
||||
- CentOS 7.3 to 7.4 Nvidia driver breakage
|
||||
- Regression in `pool ssh` on Windows
|
||||
- Exception in unusable nodes with pool stats on allocation
|
||||
|
||||
## [2.9.4] - 2017-09-12
|
||||
### Changed
|
||||
- Update dependencies to latest available
|
||||
|
|
|
@ -288,7 +288,9 @@ def _block_for_nodes_ready(
|
|||
errors.append('{}: {}'.format(err.code, err.message))
|
||||
if (err.code == 'AccountCoreQuotaReached' or
|
||||
(err.code == 'AccountLowPriorityCoreQuotaReached' and
|
||||
pool.target_dedicated_nodes == 0)):
|
||||
pool.target_dedicated_nodes == 0) or
|
||||
(err.code == 'AllocationTimedout' and
|
||||
pool.target_dedicated_nodes > 0)):
|
||||
fatal_resize_error = True
|
||||
if fatal_resize_error:
|
||||
pool_stats(batch_client, config, pool_id=pool_id)
|
||||
|
@ -908,8 +910,10 @@ def pool_stats(batch_client, config, pool_id=None):
|
|||
(node.start_task_info.end_time -
|
||||
node.last_boot_time).total_seconds()
|
||||
)
|
||||
tasks_run.append(node.total_tasks_run)
|
||||
tasks_running.append(node.running_tasks_count)
|
||||
if node.total_tasks_run is not None:
|
||||
tasks_run.append(node.total_tasks_run)
|
||||
if node.running_tasks_count is not None:
|
||||
tasks_running.append(node.running_tasks_count)
|
||||
total_running_tasks = sum(tasks_running)
|
||||
runnable_task_slots = runnable_nodes * pool.max_tasks_per_node
|
||||
total_task_slots = (
|
||||
|
|
|
@ -1696,8 +1696,15 @@ def _adjust_settings_for_pool_creation(config):
|
|||
config, vm_size=pool.vm_size)
|
||||
if not allowed and util.is_none_or_empty(node_agent):
|
||||
raise ValueError(
|
||||
('Unsupported Docker Host VM Config, publisher={} offer={} '
|
||||
('unsupported Docker Host VM Config, publisher={} offer={} '
|
||||
'sku={} vm_size={}').format(publisher, offer, sku, pool.vm_size))
|
||||
# ensure HPC offers are matched with RDMA sizes
|
||||
if ((offer == 'centos-hpc' or offer == 'sles-hpc') and
|
||||
not settings.is_rdma_pool(pool.vm_size)):
|
||||
raise ValueError(
|
||||
('cannot allocate an HPC VM config of publisher={} offer={} '
|
||||
'sku={} with a non-RDMA vm_size={}').format(
|
||||
publisher, offer, sku, pool.vm_size))
|
||||
# compute total vm count
|
||||
pool_total_vm_count = pool.vm_count.dedicated + pool.vm_count.low_priority
|
||||
# ensure enough vhds for custom image pools
|
||||
|
@ -2250,9 +2257,9 @@ def action_pool_add(
|
|||
raise RuntimeError(
|
||||
'attempting to create a pool that already exists: {}'.format(
|
||||
settings.pool_id(config)))
|
||||
_adjust_settings_for_pool_creation(config)
|
||||
storage.create_storage_containers(blob_client, table_client, config)
|
||||
storage.clear_storage_containers(blob_client, table_client, config)
|
||||
_adjust_settings_for_pool_creation(config)
|
||||
storage.populate_global_resource_blobs(blob_client, table_client, config)
|
||||
_add_pool(
|
||||
resource_client, compute_client, network_client, batch_mgmt_client,
|
||||
|
@ -2681,10 +2688,10 @@ def action_jobs_add(
|
|||
else:
|
||||
raise RuntimeError(
|
||||
'pool with id of {} already exists'.format(pool_id))
|
||||
_adjust_settings_for_pool_creation(config)
|
||||
# create storage containers and clear
|
||||
storage.create_storage_containers(blob_client, table_client, config)
|
||||
storage.clear_storage_containers(blob_client, table_client, config)
|
||||
_adjust_settings_for_pool_creation(config)
|
||||
storage.populate_global_resource_blobs(
|
||||
blob_client, table_client, config)
|
||||
# create autopool specification object
|
||||
|
|
|
@ -69,13 +69,13 @@ _PREMIUM_STORAGE_INSTANCE_PREFIXES = frozenset((
|
|||
_PREMIUM_STORAGE_INSTANCE_SUFFIXES = frozenset((
|
||||
's', 's_v3',
|
||||
))
|
||||
_VM_TCP_NO_TUNE = (
|
||||
_VM_TCP_NO_TUNE = frozenset((
|
||||
'basic_a0', 'basic_a1', 'basic_a2', 'basic_a3', 'basic_a4', 'standard_a0',
|
||||
'standard_a1', 'standard_a2', 'standard_a3', 'standard_a5', 'standard_a6',
|
||||
'standard_a1_v2', 'standard_a2_v2', 'standard_a3_v2', 'standard_a4_v2',
|
||||
'standard_a2m_v2', 'standard_a4m_v2', 'standard_d1', 'standard_d2',
|
||||
'standard_d1_v2', 'standard_f1'
|
||||
)
|
||||
'standard_d1_v2', 'standard_f1', 'standard_d2_v3', 'standard_e2_v3',
|
||||
))
|
||||
# named tuples
|
||||
PoolVmCountSettings = collections.namedtuple(
|
||||
'PoolVmCountSettings', [
|
||||
|
|
Загрузка…
Ссылка в новой задаче