Bug 1280231: Load kinds in order by dependencies; r=jonasfj

This enables kinds that generate tasks based on those output by another kind.
For example, the test kind might generate a set of test tasks for each build
task.

MozReview-Commit-ID: K7ha9OmJ6gd

--HG--
extra : source : 0852b38cd86c42ebba0f9e74d7470a263969b784
extra : amend_source : f3e8c306afe29ae75bd1f93d8b76ff2b27ad8ed1
extra : histedit_source : aa1ae93aba51025a0e1bd2ecf473aaa33235e4c7%2C2c704328e983a3d75a834b069431e4f166389b02
This commit is contained in:
Dustin J. Mitchell 2016-06-27 22:31:06 +00:00
Родитель 4cfe2b182c
Коммит c0fe75d7d2
6 изменённых файлов: 126 добавлений и 56 удалений

Просмотреть файл

@ -41,6 +41,19 @@ Python class implementing the kind in its ``implementation`` key. That
implementation may rely on lots of code shared with other kinds, or contain a
completely unique implementation of some functionality.
The full list of pre-defined keys in this file is:
``implementation``
Class implementing this kind, in the form ``<module-path>:<object-path>``.
This class should be a subclass of ``taskgraph.kind.base:Kind``.
``kind-dependencies``
Kinds which should be loaded before this one. This is useful when the kind
will use the list of already-created tasks to determine which tasks to
create, for example adding an upload-symbols task after every build task.
Any other keys are subject to interpretation by the kind implementation.
The result is a nice segmentation of implementation so that the more esoteric
in-tree projects can do their crazy stuff in an isolated kind without making
the bread-and-butter build and test configuration more complicated.

Просмотреть файл

@ -14,6 +14,38 @@ from .optimize import optimize_task_graph
logger = logging.getLogger(__name__)
class Kind(object):
def __init__(self, name, path, config):
self.name = name
self.path = path
self.config = config
def _get_impl_class(self):
# load the class defined by implementation
try:
impl = self.config['implementation']
except KeyError:
raise KeyError("{!r} does not define implementation".format(self.path))
if impl.count(':') != 1:
raise TypeError('{!r} implementation does not have the form "module:object"'
.format(self.path))
impl_module, impl_object = impl.split(':')
impl_class = __import__(impl_module)
for a in impl_module.split('.')[1:]:
impl_class = getattr(impl_class, a)
for a in impl_object.split('.'):
impl_class = getattr(impl_class, a)
return impl_class
def load_tasks(self, parameters, loaded_tasks):
impl_class = self._get_impl_class()
return impl_class.load_tasks(self.name, self.path, self.config,
parameters, loaded_tasks)
class TaskGraphGenerator(object):
"""
The central controller for taskgraph. This handles all phases of graph
@ -122,33 +154,28 @@ class TaskGraphGenerator(object):
with open(kind_yml) as f:
config = yaml.load(f)
# load the class defined by implementation
try:
impl = config['implementation']
except KeyError:
raise KeyError("{!r} does not define implementation".format(kind_yml))
if impl.count(':') != 1:
raise TypeError('{!r} implementation does not have the form "module:object"'
.format(kind_yml))
impl_module, impl_object = impl.split(':')
impl_class = __import__(impl_module)
for a in impl_module.split('.')[1:]:
impl_class = getattr(impl_class, a)
for a in impl_object.split('.'):
impl_class = getattr(impl_class, a)
for task in impl_class.load_tasks(kind_name, path, config, self.parameters):
yield task
yield Kind(kind_name, path, config)
def _run(self):
logger.info("Loading kinds")
# put the kinds into a graph and sort topologically so that kinds are loaded
# in post-order
kinds = {kind.name: kind for kind in self._load_kinds()}
edges = set()
for kind in kinds.itervalues():
for dep in kind.config.get('kind-dependencies', []):
edges.add((kind.name, dep, 'kind-dependency'))
kind_graph = Graph(set(kinds), edges)
logger.info("Generating full task set")
all_tasks = {}
for task in self._load_kinds():
if task.label in all_tasks:
raise Exception("duplicate tasks with label " + task.label)
all_tasks[task.label] = task
for kind_name in kind_graph.visit_postorder():
logger.debug("Loading tasks for kind {}".format(kind_name))
kind = kinds[kind_name]
for task in kind.load_tasks(self.parameters, list(all_tasks.values())):
if task.label in all_tasks:
raise Exception("duplicate tasks with label " + task.label)
all_tasks[task.label] = task
full_task_set = TaskGraph(all_tasks, Graph(set(all_tasks), set()))
yield 'full_task_set', full_task_set

Просмотреть файл

@ -45,7 +45,7 @@ class Task(object):
@classmethod
@abc.abstractmethod
def load_tasks(cls, kind, path, config, parameters):
def load_tasks(cls, kind, path, config, parameters, loaded_tasks):
"""
Load the tasks for a given kind.
@ -58,6 +58,11 @@ class Task(object):
The `parameters` give details on which to base the task generation.
See `taskcluster/docs/parameters.rst` for details.
At the time this method is called, all kinds on which this kind depends
(that is, specified in the `kind-dependencies` key in `self.config`
have already loaded their tasks, and those tasks are available in
the list `loaded_tasks`.
The return value is a list of Task instances.
"""

Просмотреть файл

@ -35,7 +35,7 @@ class DockerImageTask(base.Task):
super(DockerImageTask, self).__init__(*args, **kwargs)
@classmethod
def load_tasks(cls, kind, path, config, params):
def load_tasks(cls, kind, path, config, params, loaded_tasks):
# TODO: make this match the pushdate (get it from a parameter rather than vcs)
pushdate = time.strftime('%Y%m%d%H%M%S', time.gmtime())

Просмотреть файл

@ -306,7 +306,7 @@ class LegacyTask(base.Task):
super(LegacyTask, self).__init__(*args, **kwargs)
@classmethod
def load_tasks(cls, kind, path, config, params):
def load_tasks(cls, kind, path, config, params, loaded_tasks):
root = os.path.abspath(os.path.join(path, config['legacy_path']))
project = params['project']

Просмотреть файл

@ -6,7 +6,7 @@ from __future__ import absolute_import, print_function, unicode_literals
import unittest
from ..generator import TaskGraphGenerator
from ..generator import TaskGraphGenerator, Kind
from .. import graph
from ..kind import base
from mozunit import main
@ -19,9 +19,9 @@ class FakeTask(base.Task):
super(FakeTask, self).__init__(**kwargs)
@classmethod
def load_tasks(cls, kind, path, config, parameters):
def load_tasks(cls, kind, path, config, parameters, loaded_tasks):
return [cls(kind=kind,
label='t-{}'.format(i),
label='{}-t-{}'.format(kind, i),
attributes={'tasknum': str(i)},
task={},
i=i)
@ -30,7 +30,7 @@ class FakeTask(base.Task):
def get_dependencies(self, full_task_set):
i = self.i
if i > 0:
return [('t-{}'.format(i - 1), 'prev')]
return [('{}-t-{}'.format(self.kind, i - 1), 'prev')]
else:
return []
@ -38,67 +38,92 @@ class FakeTask(base.Task):
return False, None
class WithFakeTask(TaskGraphGenerator):
class FakeKind(Kind):
def _get_impl_class(self):
return FakeTask
def load_tasks(self, parameters, loaded_tasks):
FakeKind.loaded_kinds.append(self.name)
return super(FakeKind, self).load_tasks(parameters, loaded_tasks)
class WithFakeKind(TaskGraphGenerator):
def _load_kinds(self):
return FakeTask.load_tasks('fake', '/fake', {}, {})
for kind_name, deps in self.parameters['kinds']:
yield FakeKind(
kind_name, '/fake',
{'kind-dependencies': deps} if deps else {})
class TestGenerator(unittest.TestCase):
def setUp(self):
self.target_tasks = []
def maketgg(self, target_tasks=None, kinds=[('fake', [])]):
FakeKind.loaded_kinds = []
self.target_tasks = target_tasks or []
def target_tasks_method(full_task_graph, parameters):
return self.target_tasks
self.tgg = WithFakeTask('/root', {}, target_tasks_method)
return WithFakeKind('/root', {'kinds': kinds}, target_tasks_method)
def test_kind_ordering(self):
"When task kinds depend on each other, they are loaded in postorder"
self.tgg = self.maketgg(kinds=[
('fake3', ['fake2', 'fake1']),
('fake2', ['fake1']),
('fake1', []),
])
self.tgg._run_until('full_task_set')
self.assertEqual(FakeKind.loaded_kinds, ['fake1', 'fake2', 'fake3'])
def test_full_task_set(self):
"The full_task_set property has all tasks"
self.tgg = self.maketgg()
self.assertEqual(self.tgg.full_task_set.graph,
graph.Graph({'t-0', 't-1', 't-2'}, set()))
self.assertEqual(self.tgg.full_task_set.tasks.keys(),
['t-0', 't-1', 't-2'])
graph.Graph({'fake-t-0', 'fake-t-1', 'fake-t-2'}, set()))
self.assertEqual(sorted(self.tgg.full_task_set.tasks.keys()),
sorted(['fake-t-0', 'fake-t-1', 'fake-t-2']))
def test_full_task_graph(self):
"The full_task_graph property has all tasks, and links"
self.tgg = self.maketgg()
self.assertEqual(self.tgg.full_task_graph.graph,
graph.Graph({'t-0', 't-1', 't-2'},
graph.Graph({'fake-t-0', 'fake-t-1', 'fake-t-2'},
{
('t-1', 't-0', 'prev'),
('t-2', 't-1', 'prev'),
('fake-t-1', 'fake-t-0', 'prev'),
('fake-t-2', 'fake-t-1', 'prev'),
}))
self.assertEqual(self.tgg.full_task_graph.tasks.keys(),
['t-0', 't-1', 't-2'])
self.assertEqual(sorted(self.tgg.full_task_graph.tasks.keys()),
sorted(['fake-t-0', 'fake-t-1', 'fake-t-2']))
def test_target_task_set(self):
"The target_task_set property has the targeted tasks"
self.target_tasks = ['t-1']
self.tgg = self.maketgg(['fake-t-1'])
self.assertEqual(self.tgg.target_task_set.graph,
graph.Graph({'t-1'}, set()))
graph.Graph({'fake-t-1'}, set()))
self.assertEqual(self.tgg.target_task_set.tasks.keys(),
['t-1'])
['fake-t-1'])
def test_target_task_graph(self):
"The target_task_graph property has the targeted tasks and deps"
self.target_tasks = ['t-1']
self.tgg = self.maketgg(['fake-t-1'])
self.assertEqual(self.tgg.target_task_graph.graph,
graph.Graph({'t-0', 't-1'},
{('t-1', 't-0', 'prev')}))
graph.Graph({'fake-t-0', 'fake-t-1'},
{('fake-t-1', 'fake-t-0', 'prev')}))
self.assertEqual(sorted(self.tgg.target_task_graph.tasks.keys()),
sorted(['t-0', 't-1']))
sorted(['fake-t-0', 'fake-t-1']))
def test_optimized_task_graph(self):
"The optimized task graph contains task ids"
self.target_tasks = ['t-2']
self.tgg = self.maketgg(['fake-t-2'])
tid = self.tgg.label_to_taskid
self.assertEqual(
self.tgg.optimized_task_graph.graph,
graph.Graph({tid['t-0'], tid['t-1'], tid['t-2']}, {
(tid['t-1'], tid['t-0'], 'prev'),
(tid['t-2'], tid['t-1'], 'prev'),
})
)
graph.Graph({tid['fake-t-0'], tid['fake-t-1'], tid['fake-t-2']}, {
(tid['fake-t-1'], tid['fake-t-0'], 'prev'),
(tid['fake-t-2'], tid['fake-t-1'], 'prev'),
}))
if __name__ == '__main__':
main()