Support max increment of VMs in scenario autoscale
- Allow definition of weekdays/workhours - Resolves #210
This commit is contained in:
Родитель
e65dd9c196
Коммит
cf0797790f
|
@ -21,19 +21,29 @@ pool_specification:
|
|||
resize_timeout: 00:20:00
|
||||
node_fill_type: pack
|
||||
autoscale:
|
||||
evaluation_interval: 00:05:00
|
||||
evaluation_interval: 00:15:00
|
||||
scenario:
|
||||
name: active_tasks
|
||||
maximum_vm_count:
|
||||
dedicated: 16
|
||||
low_priority: 8
|
||||
maximum_vm_increment_per_evaluation:
|
||||
dedicated: 4
|
||||
low_priority: -1
|
||||
node_deallocation_option: taskcompletion
|
||||
sample_lookback_interval: 00:10:00
|
||||
required_sample_percentage: 70
|
||||
bias_last_sample: true
|
||||
bias_node_type: low_priority
|
||||
rebalance_preemption_percentage: 50
|
||||
formula: ''
|
||||
time_ranges:
|
||||
weekdays:
|
||||
start: 1
|
||||
end: 5
|
||||
work_hours:
|
||||
start: 8
|
||||
end: 17
|
||||
formula: null
|
||||
inter_node_communication_enabled: true
|
||||
reboot_on_start_task_failed: false
|
||||
attempt_recovery_on_unusable: false
|
||||
|
|
|
@ -44,6 +44,12 @@ AutoscaleMinMax = collections.namedtuple(
|
|||
'min_target_low_priority',
|
||||
'max_target_dedicated',
|
||||
'max_target_low_priority',
|
||||
'max_inc_dedicated',
|
||||
'max_inc_low_priority',
|
||||
'weekday_start',
|
||||
'weekday_end',
|
||||
'workhour_start',
|
||||
'workhour_end',
|
||||
]
|
||||
)
|
||||
|
||||
|
@ -142,19 +148,28 @@ def _formula_tasks(pool):
|
|||
'redistVMs = rebalance ? min(preemptcount, remainingVMs) : 0',
|
||||
'dedicatedVMs = min(maxTargetDedicated, '
|
||||
'dedicatedVMs + redistVMs + minTargetDedicated)',
|
||||
'dedicatedVMs = min($CurrentDedicatedNodes + maxIncDedicated, '
|
||||
'dedicatedVMs)',
|
||||
'remainingVMs = max(0, reqVMs - dedicatedVMs)',
|
||||
'lowPriVMs = min(maxTargetLowPriority, '
|
||||
'remainingVMs + minTargetLowPriority)',
|
||||
'lowPriVMs = min($CurrentLowPriorityNodes + maxIncLowPriority, '
|
||||
'lowPriVMs)',
|
||||
'$TargetDedicatedNodes = dedicatedVMs',
|
||||
'$TargetLowPriorityNodes = lowPriVMs',
|
||||
]
|
||||
elif pool.autoscale.scenario.bias_node_type == 'dedicated':
|
||||
target_vms = [
|
||||
'dedicatedVMs = min(maxTargetDedicated, reqVMs)',
|
||||
'dedicatedVMs = min($CurrentDedicatedNodes + maxIncDedicated, '
|
||||
'dedicatedVMs)',
|
||||
'remainingVMs = max(0, reqVMs - dedicatedVMs)',
|
||||
'$TargetDedicatedNodes = dedicatedVMs',
|
||||
'$TargetLowPriorityNodes = min(maxTargetLowPriority, '
|
||||
'lowPriVMs = min(maxTargetLowPriority, '
|
||||
'remainingVMs + minTargetLowPriority)',
|
||||
'lowPriVMs = min($CurrentLowPriorityNodes + maxIncLowPriority, '
|
||||
'lowPriVMs)',
|
||||
'$TargetDedicatedNodes = dedicatedVMs',
|
||||
'$TargetLowPriorityNodes = lowPriVMs',
|
||||
]
|
||||
elif pool.autoscale.scenario.bias_node_type == 'low_priority':
|
||||
target_vms = [
|
||||
|
@ -163,10 +178,15 @@ def _formula_tasks(pool):
|
|||
'redistVMs = rebalance ? min(preemptcount, remainingVMs) : 0',
|
||||
'lowPriVMs = max(minTargetLowPriority, '
|
||||
'reqVMs - redistVMs + minTargetLowPriority)',
|
||||
'lowPriVMs = min($CurrentLowPriorityNodes + maxIncLowPriority, '
|
||||
'lowPriVMs)',
|
||||
'remainingVMs = max(0, reqVMs - lowPriVMs)',
|
||||
'$TargetLowPriorityNodes = lowPriVMs',
|
||||
'$TargetDedicatedNodes = min(maxTargetDedicated, '
|
||||
'dedicatedVMs = min(maxTargetDedicated, '
|
||||
'remainingVMs + minTargetDedicated)',
|
||||
'dedicatedVMs = min($CurrentDedicatedNodes + maxIncDedicated, '
|
||||
'dedicatedVMs)',
|
||||
'$TargetLowPriorityNodes = lowPriVMs',
|
||||
'$TargetDedicatedNodes = dedicatedVMs',
|
||||
]
|
||||
else:
|
||||
raise ValueError(
|
||||
|
@ -179,6 +199,8 @@ def _formula_tasks(pool):
|
|||
'minTargetLowPriority = {}'.format(minmax.min_target_low_priority),
|
||||
'maxTargetDedicated = {}'.format(minmax.max_target_dedicated),
|
||||
'maxTargetLowPriority = {}'.format(minmax.max_target_low_priority),
|
||||
'maxIncDedicated = {}'.format(minmax.max_inc_dedicated),
|
||||
'maxIncLowPriority = {}'.format(minmax.max_inc_low_priority),
|
||||
req_vms,
|
||||
target_vms,
|
||||
'$NodeDeallocationOption = {}'.format(
|
||||
|
@ -198,16 +220,20 @@ def _formula_day_of_week(pool):
|
|||
if pool.autoscale.scenario.name == 'workday':
|
||||
target_vms = [
|
||||
'now = time()',
|
||||
'isWorkHours = now.hour >= 8 && now.hour < 18',
|
||||
'isWeekday = now.weekday >= 1 && now.weekday <= 5',
|
||||
'isWorkHours = now.hour >= workhourStart && '
|
||||
'now.hour <= workhourEnd',
|
||||
'isWeekday = now.weekday >= weekdayStart && '
|
||||
'now.weekday <= weekdayEnd',
|
||||
'isPeakTime = isWeekday && isWorkHours',
|
||||
]
|
||||
elif (pool.autoscale.scenario.name ==
|
||||
'workday_with_offpeak_max_low_priority'):
|
||||
target_vms = [
|
||||
'now = time()',
|
||||
'isWorkHours = now.hour >= 8 && now.hour < 18',
|
||||
'isWeekday = now.weekday >= 1 && now.weekday <= 5',
|
||||
'isWorkHours = now.hour >= workhourStart && '
|
||||
'now.hour <= workhourEnd',
|
||||
'isWeekday = now.weekday >= weekdayStart && '
|
||||
'now.weekday <= weekdayEnd',
|
||||
'isPeakTime = isWeekday && isWorkHours',
|
||||
'$TargetLowPriorityNodes = maxTargetLowPriority',
|
||||
]
|
||||
|
@ -220,12 +246,14 @@ def _formula_day_of_week(pool):
|
|||
elif pool.autoscale.scenario.name == 'weekday':
|
||||
target_vms = [
|
||||
'now = time()',
|
||||
'isPeakTime = now.weekday >= 1 && now.weekday <= 5',
|
||||
'isPeakTime = now.weekday >= weekdayStart && '
|
||||
'now.weekday <= weekdayEnd',
|
||||
]
|
||||
elif pool.autoscale.scenario.name == 'weekend':
|
||||
target_vms = [
|
||||
'now = time()',
|
||||
'isPeakTime = now.weekday >= 6 && now.weekday <= 7',
|
||||
'isPeakTime = now.weekday < weekdayStart && '
|
||||
'now.weekday > weekdayEnd',
|
||||
]
|
||||
else:
|
||||
raise ValueError('autoscale scenario name invalid: {}'.format(
|
||||
|
@ -259,6 +287,10 @@ def _formula_day_of_week(pool):
|
|||
'minTargetLowPriority = {}'.format(minmax.min_target_low_priority),
|
||||
'maxTargetDedicated = {}'.format(minmax.max_target_dedicated),
|
||||
'maxTargetLowPriority = {}'.format(minmax.max_target_low_priority),
|
||||
'weekdayStart = {}'.format(minmax.weekday_start),
|
||||
'weekdayEnd = {}'.format(minmax.weekday_end),
|
||||
'workhourStart = {}'.format(minmax.workhour_start),
|
||||
'workhourEnd = {}'.format(minmax.workhour_end),
|
||||
target_vms,
|
||||
'$NodeDeallocationOption = {}'.format(
|
||||
pool.autoscale.scenario.node_deallocation_option),
|
||||
|
@ -291,12 +323,29 @@ def _get_minmax(pool):
|
|||
raise ValueError(
|
||||
'min target low priority {} > max target low priority {}'.format(
|
||||
min_target_low_priority, max_target_low_priority))
|
||||
max_inc_dedicated = (
|
||||
pool.autoscale.scenario.maximum_vm_increment_per_evaluation.dedicated
|
||||
)
|
||||
max_inc_low_priority = (
|
||||
pool.autoscale.scenario.
|
||||
maximum_vm_increment_per_evaluation.low_priority
|
||||
)
|
||||
if max_inc_dedicated <= 0:
|
||||
max_inc_dedicated = _UNBOUND_MAX_NODES
|
||||
if max_inc_low_priority <= 0:
|
||||
max_inc_low_priority = _UNBOUND_MAX_NODES
|
||||
return AutoscaleMinMax(
|
||||
max_tasks_per_node=pool.max_tasks_per_node,
|
||||
min_target_dedicated=min_target_dedicated,
|
||||
min_target_low_priority=min_target_low_priority,
|
||||
max_target_dedicated=max_target_dedicated,
|
||||
max_target_low_priority=max_target_low_priority,
|
||||
max_inc_dedicated=max_inc_dedicated,
|
||||
max_inc_low_priority=max_inc_low_priority,
|
||||
weekday_start=pool.autoscale.scenario.weekday_start,
|
||||
weekday_end=pool.autoscale.scenario.weekday_end,
|
||||
workhour_start=pool.autoscale.scenario.workhour_start,
|
||||
workhour_end=pool.autoscale.scenario.workhour_end,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -153,12 +153,17 @@ PoolAutoscaleScenarioSettings = collections.namedtuple(
|
|||
'PoolAutoscaleScenarioSettings', [
|
||||
'name',
|
||||
'maximum_vm_count',
|
||||
'maximum_vm_increment_per_evaluation',
|
||||
'node_deallocation_option',
|
||||
'sample_lookback_interval',
|
||||
'required_sample_percentage',
|
||||
'rebalance_preemption_percentage',
|
||||
'bias_last_sample',
|
||||
'bias_node_type',
|
||||
'weekday_start',
|
||||
'weekday_end',
|
||||
'workhour_start',
|
||||
'workhour_end',
|
||||
]
|
||||
)
|
||||
PoolAutoscaleSettings = collections.namedtuple(
|
||||
|
@ -965,6 +970,8 @@ def pool_autoscale_settings(config):
|
|||
mvc = _kv_read_checked(scenconf, 'maximum_vm_count')
|
||||
if mvc is None:
|
||||
raise ValueError('maximum_vm_count must be specified')
|
||||
mvipe = _kv_read_checked(
|
||||
scenconf, 'maximum_vm_increment_per_evaluation', default={})
|
||||
ndo = _kv_read_checked(
|
||||
scenconf, 'node_deallocation_option', 'taskcompletion')
|
||||
if (ndo is not None and
|
||||
|
@ -977,9 +984,14 @@ def pool_autoscale_settings(config):
|
|||
sli = util.convert_string_to_timedelta(sli)
|
||||
else:
|
||||
sli = datetime.timedelta(minutes=10)
|
||||
tr = _kv_read_checked(scenconf, 'time_ranges', default={})
|
||||
trweekday = _kv_read_checked(tr, 'weekdays', default={})
|
||||
trworkhour = _kv_read_checked(tr, 'work_hours', default={})
|
||||
scenario = PoolAutoscaleScenarioSettings(
|
||||
name=_kv_read_checked(scenconf, 'name').lower(),
|
||||
maximum_vm_count=_pool_vm_count(config, conf=mvc),
|
||||
maximum_vm_increment_per_evaluation=_pool_vm_count(
|
||||
config, conf=mvipe),
|
||||
node_deallocation_option=ndo,
|
||||
sample_lookback_interval=sli,
|
||||
required_sample_percentage=_kv_read(
|
||||
|
@ -990,6 +1002,10 @@ def pool_autoscale_settings(config):
|
|||
scenconf, 'bias_last_sample', True),
|
||||
bias_node_type=_kv_read_checked(
|
||||
scenconf, 'bias_node_type', 'auto').lower(),
|
||||
weekday_start=_kv_read(trweekday, 'start', default=1),
|
||||
weekday_end=_kv_read(trweekday, 'end', default=5),
|
||||
workhour_start=_kv_read(trworkhour, 'start', default=8),
|
||||
workhour_end=_kv_read(trworkhour, 'end', default=17),
|
||||
)
|
||||
else:
|
||||
scenario = None
|
||||
|
|
|
@ -29,19 +29,29 @@ pool_specification:
|
|||
resize_timeout: 00:20:00
|
||||
node_fill_type: pack
|
||||
autoscale:
|
||||
evaluation_interval: 00:05:00
|
||||
evaluation_interval: 00:15:00
|
||||
scenario:
|
||||
name: active_tasks
|
||||
maximum_vm_count:
|
||||
dedicated: 16
|
||||
low_priority: 8
|
||||
maximum_vm_increment_per_evaluation:
|
||||
dedicated: 4
|
||||
low_priority: -1
|
||||
node_deallocation_option: taskcompletion
|
||||
sample_lookback_interval: 00:10:00
|
||||
required_sample_percentage: 70
|
||||
bias_last_sample: true
|
||||
bias_node_type: low_priority
|
||||
rebalance_preemption_percentage: 50
|
||||
formula: ''
|
||||
time_ranges:
|
||||
weekdays:
|
||||
start: 1
|
||||
end: 5
|
||||
work_hours:
|
||||
start: 8
|
||||
end: 17
|
||||
formula: null
|
||||
inter_node_communication_enabled: true
|
||||
reboot_on_start_task_failed: false
|
||||
attempt_recovery_on_unusable: false
|
||||
|
@ -217,7 +227,9 @@ each node type for `scenario` based autoscale.
|
|||
timedelta with a string representation of "d.HH:mm:ss". "HH:mm:ss" is
|
||||
required, but "d" is optional, if specified. If not specified, the
|
||||
default is 15 minutes. The smallest value that can be specified is 5
|
||||
minutes.
|
||||
minutes. Use caution when specifying a small `evaluation_interval`
|
||||
values which can cause pool resizing errors and instability with
|
||||
volatile target counts.
|
||||
* (optional) `scenario` is a pre-set autoscale scenario where a formula
|
||||
will be generated with the parameters specified within this property.
|
||||
* (required) `name` is the autoscale scenario name to apply. Valid
|
||||
|
@ -234,6 +246,14 @@ each node type for `scenario` based autoscale.
|
|||
nodes that can be allocated.
|
||||
* (optional) `low_priority` is the maximum number of low priority
|
||||
compute nodes that can be allocated.
|
||||
* (optional) `maximum_vm_increment_per_evaluation` is the maximum
|
||||
amount of VMs to increase per evaluation. Specifying a non-positive
|
||||
value (i.e., less than or equal to `0`) for either of the following
|
||||
properties will result in effectively no increment limit.
|
||||
* (optional) `dedicated` is the maximum increase in VMs per
|
||||
evaluation.
|
||||
* (optional) `low_priority` is the maximum increase in VMs per
|
||||
evaluation.
|
||||
* (optional) `node_deallocation_option` is the node deallocation option
|
||||
to apply. When a pool is resized down and a node is selected for
|
||||
removal, what action is performed for the running task is specified
|
||||
|
@ -264,6 +284,20 @@ each node type for `scenario` based autoscale.
|
|||
count reaches the indicated threshold percentage of the total
|
||||
current dedicated and low priority nodes. The default is `null`
|
||||
or no rebalancing is performed.
|
||||
* (optional) `time_ranges` defines the time ranges for the day-of-week
|
||||
based scenarios.
|
||||
* (optional) `weekdays` defines the days of the week which should
|
||||
be considered weekdays, where `1` = Monday.
|
||||
* (optional) `start` defines the inclusive start weekday day
|
||||
of the week as an integer. The default is `1`.
|
||||
* (optional) `end` defines the inclusive end weekday day
|
||||
of the week as an integer. The default is `5`.
|
||||
* (optional) `work_hours` defines the hours of the day in the
|
||||
work day with a range from `0` to `23`, inclusive.
|
||||
* (optional) `start` defines the inclusive start hour of
|
||||
the work day as an integer. The default is `8`.
|
||||
* (optional) `end` defines the inclusive end hour of
|
||||
the work day as an integer. The default is `17`.
|
||||
* (optional) `formula` is a custom autoscale formula to apply to the pool.
|
||||
If both `formula` and `scenario` are specified, then `formula` is used.
|
||||
* (optional) `inter_node_communication_enabled` designates if this pool is set
|
||||
|
|
|
@ -47,10 +47,9 @@ words, "tasks with satisified dependencies awaiting node assignment".
|
|||
pending tasks for the pool. Tasks categorized under this metric are
|
||||
tasks in active state with satisifed dependencies and running
|
||||
tasks, in other words, "tasks pending completion".
|
||||
* `workday` will autoscale the pool according to Monday-Friday workdays.
|
||||
* `workday` will autoscale the pool according to the workdays specified.
|
||||
* `workday_with_offpeak_max_low_priority` will autoscale the pool according
|
||||
to Monday-Friday workdays and for off work time, use maximum number of
|
||||
low priority nodes.
|
||||
to workdays and for off work time, use maximum number of low priority nodes.
|
||||
* `weekday` will autoscale the pool if it is a weekday.
|
||||
* `weekend` will autoscale the pool if it is a weekend.
|
||||
|
||||
|
@ -69,7 +68,9 @@ pool to resize down to zero nodes.
|
|||
Additionally, there are options that can modify and fine-tune these scenarios
|
||||
as needed:
|
||||
|
||||
* `node_deallocation_option` which specify when a node is targeted for
|
||||
* `maximum_vm_increment_per_evaluation` sets limits on the maximum amount
|
||||
of dedicated or low priority VMs to increase after an evaluation.
|
||||
* `node_deallocation_option` which specifies when a node is targeted for
|
||||
deallocation but has a running task, what should be the action applied to
|
||||
the task: `requeue`, `terminate`, `taskcompletion`, and `retaineddata`.
|
||||
Please see [this doc](https://docs.microsoft.com/azure/batch/batch-automatic-scaling#variables)
|
||||
|
@ -93,11 +94,14 @@ favor either `dedicated` or `low_priority`. This applies to all scenarios.
|
|||
for dedicated nodes when the pre-empted node count reaches the indicated
|
||||
threshold percentage of the total current dedicated and low priority nodes.
|
||||
This applies only to `active_tasks` and `pending_tasks` scenarios.
|
||||
* `time_ranges` allows specification of which days of the week should be
|
||||
considered weekdays and which hours should be considered as part of working
|
||||
hours. These options only apply to the day-of-the-week based scenarios.
|
||||
|
||||
An example autoscale specification in the pool configuration may be:
|
||||
```yaml
|
||||
autoscale:
|
||||
evaluation_interval: 00:05:00
|
||||
evaluation_interval: 00:10:00
|
||||
scenario:
|
||||
name: active_tasks
|
||||
maximum_vm_count:
|
||||
|
@ -106,15 +110,16 @@ An example autoscale specification in the pool configuration may be:
|
|||
```
|
||||
|
||||
This example would apply the `active_tasks` scenario to the associated
|
||||
pool with an evaluation interval of every 5 minutes. This means that the
|
||||
pool with an evaluation interval of every 10 minutes. This means that the
|
||||
autoscale formula is evaluated by the service and can have updates applied
|
||||
every 5 minutes. Note that having a small evaluation interval may result
|
||||
in undesirable behavior of the pool being resized constantly (or even
|
||||
resize failures if the prior resize is still ongoing when the autoscale
|
||||
evaluation happens again and results in a different target node count).
|
||||
The `active_tasks` scenario also includes a `maximum_vm_count` to ensure that
|
||||
the autoscale formula does not result in target node counts that exceed
|
||||
16 dedicated and 8 low priority nodes.
|
||||
every 10 minutes. Note that having a small evaluation interval may result
|
||||
in undesirable behavior of the pool being resized constantly. This can result
|
||||
in pool stability issues including resize failures if the prior resize is
|
||||
still ongoing when the autoscale evaluation happens again and results in
|
||||
a different target node count. For this example, the `active_tasks` scenario
|
||||
also includes a `maximum_vm_count` to ensure that the autoscale formula
|
||||
does not result in target node counts that exceed 16 dedicated and 8 low
|
||||
priority nodes.
|
||||
|
||||
### Formula-based Autoscaling
|
||||
Formula-based autoscaling allows users with expertise in creating autoscale
|
||||
|
|
|
@ -81,17 +81,21 @@ mapping:
|
|||
name:
|
||||
type: str
|
||||
required: true
|
||||
enum: ['active_tasks', 'pending_tasks', 'workday', 'workday_with_offpeak_max_low_priority', 'weekday', 'weekend']
|
||||
maximum_vm_count:
|
||||
type: map
|
||||
mapping:
|
||||
dedicated:
|
||||
type: int
|
||||
range:
|
||||
min: 0
|
||||
low_priority:
|
||||
type: int
|
||||
range:
|
||||
min: 0
|
||||
maximum_vm_increment_per_evaluation:
|
||||
type: map
|
||||
mapping:
|
||||
dedicated:
|
||||
type: int
|
||||
low_priority:
|
||||
type: int
|
||||
node_deallocation_option:
|
||||
type: str
|
||||
enum: ['requeue', 'retaineddata', 'taskcompletion', 'terminate']
|
||||
|
@ -112,6 +116,39 @@ mapping:
|
|||
range:
|
||||
min: 0
|
||||
max: 100
|
||||
time_ranges:
|
||||
type: map
|
||||
mapping:
|
||||
weekdays:
|
||||
type: map
|
||||
mapping:
|
||||
start:
|
||||
type: int
|
||||
required: true
|
||||
range:
|
||||
min: 0
|
||||
max: 6
|
||||
end:
|
||||
type: int
|
||||
required: true
|
||||
range:
|
||||
min: 0
|
||||
max: 6
|
||||
work_hours:
|
||||
type: map
|
||||
mapping:
|
||||
start:
|
||||
type: int
|
||||
required: true
|
||||
range:
|
||||
min: 0
|
||||
max: 23
|
||||
end:
|
||||
type: int
|
||||
required: true
|
||||
range:
|
||||
min: 0
|
||||
max: 23
|
||||
formula:
|
||||
type: str
|
||||
inter_node_communication_enabled:
|
||||
|
|
Загрузка…
Ссылка в новой задаче