Bug 1383880: add support for optimizing tasks based on SCHEDULES; r=ahal

This adds some new optimization strategies. For tests, we use Either(SETA, SkipUnlessSchedules), thereby giving both mechanisms a chance to skip tasks. On try, SETA is omitted. MozReview-Commit-ID: GL4tlwyeBa6 --HG-- extra : rebase_source : 4cf3efc9c57bb14d2f44147c8881d0a0a18703d6 extra : source : 046d705929f7a41e977eec19c8503afccdec7592
2017-08-23 16:21:06 +00:00 · 2017-08-23 16:21:06 +00:00 · 0ad6fa01b1
--- a/build/sparse-profiles/taskgraph
+++ b/build/sparse-profiles/taskgraph
@ -22,6 +22,10 @@ path:tools/lint/
 # for new-style try pushes
 path:try_task_config.json
 # Moz.build files are read in filesystem mode
 glob:**/moz.build
 glob:**/*.mozbuild
 # Tooltool manifests also need to be opened. Assume they
 # are all somewhere in "tooltool-manifests" directories.
 glob:**/tooltool-manifests/**
--- a/taskcluster/taskgraph/optimize.py
+++ b/taskcluster/taskgraph/optimize.py
@ -24,10 +24,14 @@ from .taskgraph import TaskGraph
 from .util.seta import is_low_value_task
 from .util.taskcluster import find_task_id
 from .util.parameterization import resolve_task_references
 from mozbuild.util import memoize
 from slugid import nice as slugid
 from mozbuild.frontend import reader
 logger = logging.getLogger(__name__)
 TOPSRCDIR = os.path.abspath(os.path.join(__file__, '../../../'))
 def optimize_task_graph(target_task_graph, params, do_not_optimize,
                        existing_tasks=None, strategies=None):
@ -71,6 +75,8 @@ def _make_default_strategies():
        'index-search': IndexSearch(),
        'seta': SETA(),
        'skip-unless-changed': SkipUnlessChanged(),
        'skip-unless-schedules': SkipUnlessSchedules(),
        'skip-unless-schedules-or-seta': Either(SkipUnlessSchedules(), SETA()),
    }
@ -244,6 +250,37 @@ class OptimizationStrategy(object):
        return False
 class Either(OptimizationStrategy):
    """Given one or more optimization strategies, remove a task if any of them
    says to, and replace with a task if any finds a replacement (preferring the
    earliest).  By default, each substrategy gets the same arg, but split_args
    can return a list of args for each strategy, if desired."""
    def __init__(self, *substrategies, **kwargs):
        self.substrategies = substrategies
        self.split_args = kwargs.pop('split_args', None)
        if not self.split_args:
            self.split_args = lambda arg: [arg] * len(substrategies)
        if kwargs:
            raise TypeError("unexpected keyword args")
    def _for_substrategies(self, arg, fn):
        for sub, arg in zip(self.substrategies, self.split_args(arg)):
            rv = fn(sub, arg)
            if rv:
                return rv
        return False
    def should_remove_task(self, task, params, arg):
        return self._for_substrategies(
            arg,
            lambda sub, arg: sub.should_remove_task(task, params, arg))
    def should_replace_task(self, task, params, arg):
        return self._for_substrategies(
            arg,
            lambda sub, arg: sub.should_replace_task(task, params, arg))
 class IndexSearch(OptimizationStrategy):
    def should_remove_task(self, task, params, index_paths):
        "If this task has no dependencies, don't run it.."
@ -300,3 +337,30 @@ class SkipUnlessChanged(OptimizationStrategy):
                         task.label)
            return True
        return False
 class SkipUnlessSchedules(OptimizationStrategy):
    @memoize
    def scheduled_by_push(self, repository, revision):
        changed_files = files_changed.get_changed_files(repository, revision)
        config = reader.EmptyConfig(TOPSRCDIR)
        rdr = reader.BuildReader(config)
        components = set()
        for p, m in rdr.files_info(changed_files).items():
            components |= set(m['SCHEDULES'].components)
        return components
    def should_remove_task(self, task, params, conditions):
        if params.get('pushlog_id') == -1:
            return False
        scheduled = self.scheduled_by_push(params['head_repository'], params['head_rev'])
        conditions = set(conditions)
        # if *any* of the condition components are scheduled, do not optimize
        if conditions & scheduled:
            return False
        return True
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@ -18,6 +18,7 @@ import time
 from copy import deepcopy
 from mozbuild.util import memoize
 from mozbuild import schedules
 from taskgraph.util.attributes import TRUNK_PROJECTS
 from taskgraph.util.hash import hash_path
 from taskgraph.util.treeherder import split_symbol
@ -179,6 +180,10 @@ task_description_schema = Schema({
        {'seta': None},
        # skip this task if none of the given file patterns match
        {'skip-unless-changed': [basestring]},
        # skip this task if unless the change files' SCHEDULES contains any of these components
        {'skip-unless-schedules': list(schedules.ALL_COMPONENTS)},
        # skip if SETA or skip-unless-schedules says to
        {'skip-unless-schedules-or-seta': list(schedules.ALL_COMPONENTS)},
    ),
    # the provisioner-id/worker-type for the task.  The following parameters will