Prevent invalid mix of HPC offer and non-RDMA VM

- Fix unusable nodes on allocation exception in pool stats
- Expand network tuning exemptions
This commit is contained in:
Fred Park 2017-09-22 08:21:16 -07:00
Родитель 260e1609ee
Коммит 093cfdbc83
4 изменённых файлов: 31 добавлений и 9 удалений

Просмотреть файл

@ -2,6 +2,17 @@
## [Unreleased]
### Changed
- Prevent invalid configuration of HPC offers with non-RDMA VM sizes
- Expanded network tuning exemptions for new Dv3 and Ev3 sizes
### Fixed
- NV driver updates
- Various OS updates and Docker issues
- CentOS 7.3 to 7.4 Nvidia driver breakage
- Regression in `pool ssh` on Windows
- Exception in unusable nodes with pool stats on allocation
## [2.9.4] - 2017-09-12
### Changed
- Update dependencies to latest available

Просмотреть файл

@ -288,7 +288,9 @@ def _block_for_nodes_ready(
errors.append('{}: {}'.format(err.code, err.message))
if (err.code == 'AccountCoreQuotaReached' or
(err.code == 'AccountLowPriorityCoreQuotaReached' and
pool.target_dedicated_nodes == 0)):
pool.target_dedicated_nodes == 0) or
(err.code == 'AllocationTimedout' and
pool.target_dedicated_nodes > 0)):
fatal_resize_error = True
if fatal_resize_error:
pool_stats(batch_client, config, pool_id=pool_id)
@ -908,8 +910,10 @@ def pool_stats(batch_client, config, pool_id=None):
(node.start_task_info.end_time -
node.last_boot_time).total_seconds()
)
tasks_run.append(node.total_tasks_run)
tasks_running.append(node.running_tasks_count)
if node.total_tasks_run is not None:
tasks_run.append(node.total_tasks_run)
if node.running_tasks_count is not None:
tasks_running.append(node.running_tasks_count)
total_running_tasks = sum(tasks_running)
runnable_task_slots = runnable_nodes * pool.max_tasks_per_node
total_task_slots = (

Просмотреть файл

@ -1696,8 +1696,15 @@ def _adjust_settings_for_pool_creation(config):
config, vm_size=pool.vm_size)
if not allowed and util.is_none_or_empty(node_agent):
raise ValueError(
('Unsupported Docker Host VM Config, publisher={} offer={} '
('unsupported Docker Host VM Config, publisher={} offer={} '
'sku={} vm_size={}').format(publisher, offer, sku, pool.vm_size))
# ensure HPC offers are matched with RDMA sizes
if ((offer == 'centos-hpc' or offer == 'sles-hpc') and
not settings.is_rdma_pool(pool.vm_size)):
raise ValueError(
('cannot allocate an HPC VM config of publisher={} offer={} '
'sku={} with a non-RDMA vm_size={}').format(
publisher, offer, sku, pool.vm_size))
# compute total vm count
pool_total_vm_count = pool.vm_count.dedicated + pool.vm_count.low_priority
# ensure enough vhds for custom image pools
@ -2250,9 +2257,9 @@ def action_pool_add(
raise RuntimeError(
'attempting to create a pool that already exists: {}'.format(
settings.pool_id(config)))
_adjust_settings_for_pool_creation(config)
storage.create_storage_containers(blob_client, table_client, config)
storage.clear_storage_containers(blob_client, table_client, config)
_adjust_settings_for_pool_creation(config)
storage.populate_global_resource_blobs(blob_client, table_client, config)
_add_pool(
resource_client, compute_client, network_client, batch_mgmt_client,
@ -2681,10 +2688,10 @@ def action_jobs_add(
else:
raise RuntimeError(
'pool with id of {} already exists'.format(pool_id))
_adjust_settings_for_pool_creation(config)
# create storage containers and clear
storage.create_storage_containers(blob_client, table_client, config)
storage.clear_storage_containers(blob_client, table_client, config)
_adjust_settings_for_pool_creation(config)
storage.populate_global_resource_blobs(
blob_client, table_client, config)
# create autopool specification object

Просмотреть файл

@ -69,13 +69,13 @@ _PREMIUM_STORAGE_INSTANCE_PREFIXES = frozenset((
_PREMIUM_STORAGE_INSTANCE_SUFFIXES = frozenset((
's', 's_v3',
))
_VM_TCP_NO_TUNE = (
_VM_TCP_NO_TUNE = frozenset((
'basic_a0', 'basic_a1', 'basic_a2', 'basic_a3', 'basic_a4', 'standard_a0',
'standard_a1', 'standard_a2', 'standard_a3', 'standard_a5', 'standard_a6',
'standard_a1_v2', 'standard_a2_v2', 'standard_a3_v2', 'standard_a4_v2',
'standard_a2m_v2', 'standard_a4m_v2', 'standard_d1', 'standard_d2',
'standard_d1_v2', 'standard_f1'
)
'standard_d1_v2', 'standard_f1', 'standard_d2_v3', 'standard_e2_v3',
))
# named tuples
PoolVmCountSettings = collections.namedtuple(
'PoolVmCountSettings', [