зеркало из https://github.com/mozilla/gecko-dev.git
245 строки
8.8 KiB
Python
245 строки
8.8 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
from __future__ import absolute_import, print_function, unicode_literals
|
|
|
|
import logging
|
|
import re
|
|
import os
|
|
import requests
|
|
|
|
from .graph import Graph
|
|
from . import files_changed
|
|
from .taskgraph import TaskGraph
|
|
from .util.seta import is_low_value_task
|
|
from .util.taskcluster import find_task_id
|
|
from slugid import nice as slugid
|
|
|
|
logger = logging.getLogger(__name__)
|
|
TASK_REFERENCE_PATTERN = re.compile('<([^>]+)>')
|
|
|
|
_optimizations = {}
|
|
|
|
|
|
def optimize_task_graph(target_task_graph, params, do_not_optimize, existing_tasks=None):
|
|
"""
|
|
Perform task optimization, without optimizing tasks named in
|
|
do_not_optimize.
|
|
"""
|
|
named_links_dict = target_task_graph.graph.named_links_dict()
|
|
label_to_taskid = {}
|
|
|
|
# This proceeds in two phases. First, mark all optimized tasks (those
|
|
# which will be removed from the graph) as such, including a replacement
|
|
# taskId where applicable. Second, generate a new task graph containing
|
|
# only the non-optimized tasks, with all task labels resolved to taskIds
|
|
# and with task['dependencies'] populated.
|
|
annotate_task_graph(target_task_graph=target_task_graph,
|
|
params=params,
|
|
do_not_optimize=do_not_optimize,
|
|
named_links_dict=named_links_dict,
|
|
label_to_taskid=label_to_taskid,
|
|
existing_tasks=existing_tasks)
|
|
return get_subgraph(target_task_graph, named_links_dict, label_to_taskid), label_to_taskid
|
|
|
|
|
|
def resolve_task_references(label, task_def, taskid_for_edge_name):
|
|
def repl(match):
|
|
key = match.group(1)
|
|
try:
|
|
return taskid_for_edge_name[key]
|
|
except KeyError:
|
|
# handle escaping '<'
|
|
if key == '<':
|
|
return key
|
|
raise KeyError("task '{}' has no dependency named '{}'".format(label, key))
|
|
|
|
def recurse(val):
|
|
if isinstance(val, list):
|
|
return [recurse(v) for v in val]
|
|
elif isinstance(val, dict):
|
|
if val.keys() == ['task-reference']:
|
|
return TASK_REFERENCE_PATTERN.sub(repl, val['task-reference'])
|
|
else:
|
|
return {k: recurse(v) for k, v in val.iteritems()}
|
|
else:
|
|
return val
|
|
return recurse(task_def)
|
|
|
|
|
|
def optimize_task(task, params):
|
|
"""
|
|
Optimize a single task by running its optimizations in order until one
|
|
succeeds.
|
|
"""
|
|
for opt in task.optimizations:
|
|
opt_type, args = opt[0], opt[1:]
|
|
opt_fn = _optimizations[opt_type]
|
|
opt_result = opt_fn(task, params, *args)
|
|
if opt_result:
|
|
return opt_result
|
|
|
|
return False
|
|
|
|
|
|
def annotate_task_graph(target_task_graph, params, do_not_optimize,
|
|
named_links_dict, label_to_taskid, existing_tasks):
|
|
"""
|
|
Annotate each task in the graph with .optimized (boolean) and .task_id
|
|
(possibly None), following the rules for optimization and calling the task
|
|
kinds' `optimize_task` method.
|
|
|
|
As a side effect, label_to_taskid is updated with labels for all optimized
|
|
tasks that are replaced with existing tasks.
|
|
"""
|
|
|
|
# set .optimized for all tasks, and .task_id for optimized tasks
|
|
# with replacements
|
|
for label in target_task_graph.graph.visit_postorder():
|
|
task = target_task_graph.tasks[label]
|
|
named_task_dependencies = named_links_dict.get(label, {})
|
|
|
|
# check whether any dependencies have been optimized away
|
|
dependencies = [target_task_graph.tasks[l] for l in named_task_dependencies.itervalues()]
|
|
for t in dependencies:
|
|
if t.optimized and not t.task_id:
|
|
raise Exception(
|
|
"task {} was optimized away, but {} depends on it".format(
|
|
t.label, label))
|
|
|
|
# if this task is blacklisted, don't even consider optimizing
|
|
replacement_task_id = None
|
|
if label in do_not_optimize:
|
|
optimized = False
|
|
# Let's check whether this task has been created before
|
|
elif existing_tasks is not None and label in existing_tasks:
|
|
optimized = True
|
|
replacement_task_id = existing_tasks[label]
|
|
# otherwise, examine the task itself (which may be an expensive operation)
|
|
else:
|
|
opt_result = optimize_task(task, params)
|
|
|
|
# use opt_result to determine values for optimized, replacement_task_id
|
|
optimized = bool(opt_result)
|
|
replacement_task_id = opt_result if opt_result and opt_result is not True else None
|
|
|
|
task.optimized = optimized
|
|
task.task_id = replacement_task_id
|
|
if replacement_task_id:
|
|
label_to_taskid[label] = replacement_task_id
|
|
|
|
if optimized:
|
|
if replacement_task_id:
|
|
logger.debug("optimizing `{}`, replacing with task `{}`"
|
|
.format(label, replacement_task_id))
|
|
else:
|
|
logger.debug("optimizing `{}` away".format(label))
|
|
# note: any dependent tasks will fail when they see this
|
|
else:
|
|
if replacement_task_id:
|
|
raise Exception("{}: optimize_task returned False with a taskId".format(label))
|
|
|
|
|
|
def get_subgraph(annotated_task_graph, named_links_dict, label_to_taskid):
|
|
"""
|
|
Return the subgraph of annotated_task_graph consisting only of
|
|
non-optimized tasks and edges between them.
|
|
|
|
To avoid losing track of taskIds for tasks optimized away, this method
|
|
simultaneously substitutes real taskIds for task labels in the graph, and
|
|
populates each task definition's `dependencies` key with the appropriate
|
|
taskIds. Task references are resolved in the process.
|
|
"""
|
|
|
|
# resolve labels to taskIds and populate task['dependencies']
|
|
tasks_by_taskid = {}
|
|
for label in annotated_task_graph.graph.visit_postorder():
|
|
task = annotated_task_graph.tasks[label]
|
|
if task.optimized:
|
|
continue
|
|
task.task_id = label_to_taskid[label] = slugid()
|
|
named_task_dependencies = {
|
|
name: label_to_taskid[label]
|
|
for name, label in named_links_dict.get(label, {}).iteritems()}
|
|
task.task = resolve_task_references(task.label, task.task, named_task_dependencies)
|
|
task.task.setdefault('dependencies', []).extend(named_task_dependencies.itervalues())
|
|
tasks_by_taskid[task.task_id] = task
|
|
|
|
# resolve edges to taskIds
|
|
edges_by_taskid = (
|
|
(label_to_taskid.get(left), label_to_taskid.get(right), name)
|
|
for (left, right, name) in annotated_task_graph.graph.edges
|
|
)
|
|
# ..and drop edges that are no longer in the task graph
|
|
edges_by_taskid = set(
|
|
(left, right, name)
|
|
for (left, right, name) in edges_by_taskid
|
|
if left in tasks_by_taskid and right in tasks_by_taskid
|
|
)
|
|
|
|
return TaskGraph(
|
|
tasks_by_taskid,
|
|
Graph(set(tasks_by_taskid), edges_by_taskid))
|
|
|
|
|
|
def optimization(name):
|
|
def wrap(func):
|
|
if name in _optimizations:
|
|
raise Exception("multiple optimizations with name {}".format(name))
|
|
_optimizations[name] = func
|
|
return func
|
|
return wrap
|
|
|
|
|
|
@optimization('index-search')
|
|
def opt_index_search(task, params, index_path):
|
|
try:
|
|
task_id = find_task_id(
|
|
index_path,
|
|
use_proxy=bool(os.environ.get('TASK_ID')))
|
|
|
|
return task_id or True
|
|
except requests.exceptions.HTTPError:
|
|
pass
|
|
|
|
return False
|
|
|
|
|
|
@optimization('seta')
|
|
def opt_seta(task, params):
|
|
bbb_task = False
|
|
|
|
# for bbb tasks we need to send in the buildbot buildername
|
|
if task.task.get('provisionerId', '') == 'buildbot-bridge':
|
|
label = task.task.get('payload').get('buildername')
|
|
bbb_task = True
|
|
else:
|
|
label = task.label
|
|
|
|
# we would like to return 'False, None' while it's high_value_task
|
|
# and we wouldn't optimize it. Otherwise, it will return 'True, None'
|
|
if is_low_value_task(label,
|
|
params.get('project'),
|
|
params.get('pushlog_id'),
|
|
params.get('pushdate'),
|
|
bbb_task):
|
|
# Always optimize away low-value tasks
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
@optimization('skip-unless-changed')
|
|
def opt_files_changed(task, params, file_patterns):
|
|
# pushlog_id == -1 - this is the case when run from a cron.yml job
|
|
if params.get('pushlog_id') == -1:
|
|
return True
|
|
|
|
changed = files_changed.check(params, file_patterns)
|
|
if not changed:
|
|
logger.debug('no files found matching a pattern in `skip-unless-changed` for ' +
|
|
task.label)
|
|
return True
|
|
return False
|