Bug 1629642 - [taskgraph] Add an option to sum confidence thresholds of manifests in bugbug scheduler, r=marco

This adds a parameter that will cause a task to sum all the confidence thresholds of the relative manifests it contains to gather a larger overall task confidence. This also adds a new strategy + shadow-scheduler to go along with it. Differential Revision: https://phabricator.services.mozilla.com/D71314
2020-04-24 10:20:06 +00:00 · 2020-04-24 10:20:06 +00:00 · 542d9d9bb9
--- a/taskcluster/ci/source-test/shadow-scheduler.yml
+++ b/taskcluster/ci/source-test/shadow-scheduler.yml
@ -44,17 +44,6 @@ bugbug_all:
        env:
            TASKGRAPH_OPTIMIZE_STRATEGIES: taskgraph.optimize:experimental.bugbug_all

-bugbug_debug:
-    description: Runs the bugbug_debug optimization strategy instead of the default.
-    treeherder:
-        symbol: SS(bugbug_debug)
-    index:
-        product: source
-        job-name: shadow-scheduler-bugbug_debug
-    worker:
-        env:
-            TASKGRAPH_OPTIMIZE_STRATEGIES: taskgraph.optimize:experimental.bugbug_debug
-
 bugbug_all_low:
    description: Runs the bugbug_all optimization strategy (with a low confidence threshold) instead of the default.
    treeherder:
@ -77,6 +66,28 @@ bugbug_all_high:
        env:
            TASKGRAPH_OPTIMIZE_STRATEGIES: taskgraph.optimize:experimental.bugbug_all_high

+bugbug_combined_high:
+    description: Runs the bugbug_combined_high optimization strategy instead of the default.
+    treeherder:
+        symbol: SS(bugbug_combined_high)
+    index:
+        product: source
+        job-name: shadow-scheduler-bugbug_combined_high
+    worker:
+        env:
+            TASKGRAPH_OPTIMIZE_STRATEGIES: taskgraph.optimize:experimental.bugbug_combined_high
+
+bugbug_debug:
+    description: Runs the bugbug_debug optimization strategy instead of the default.
+    treeherder:
+        symbol: SS(bugbug_debug)
+    index:
+        product: source
+        job-name: shadow-scheduler-bugbug_debug
+    worker:
+        env:
+            TASKGRAPH_OPTIMIZE_STRATEGIES: taskgraph.optimize:experimental.bugbug_debug
+
 bugbug_reduced:
    description: Runs the bugbug_reduced optimization strategy instead of the default.
    treeherder:
--- a/taskcluster/taskgraph/optimize/init.py
+++ b/taskcluster/taskgraph/optimize/init.py
@ -408,6 +408,11 @@ class experimental(object):
    }
    """Doesn't limit platforms, high confidence threshold."""

+    bugbug_combined_high = {
+        'test': Any('skip-unless-schedules', 'bugbug-high'),
+    }
+    """Combines the weights of all groups, high confidence threshold."""
+
    bugbug_debug = {
        'test': Any('skip-unless-schedules', 'bugbug', 'platform-debug'),
    }
--- a/taskcluster/taskgraph/optimize/bugbug.py
+++ b/taskcluster/taskgraph/optimize/bugbug.py
@ -17,6 +17,11 @@ from taskgraph.util.taskcluster import requests_retry_session

 logger = logging.getLogger(__name__)

+# Preset confidence thresholds.
+CT_LOW = 0.5
+CT_MEDIUM = 0.7
+CT_HIGH = 0.9
+

 class BugbugTimeoutException(Exception):
    pass
@ -30,25 +35,32 @@ class SkipUnlessDebug(OptimizationStrategy):
        return not (task.attributes.get('build_type') == "debug")


-@register_strategy("bugbug", args=(0.7,))
-@register_strategy("bugbug-low", args=(0.5,))
-@register_strategy("bugbug-high", args=(0.9,))
-@register_strategy("bugbug-reduced", args=(0.7, True))
-@register_strategy("bugbug-reduced-high", args=(0.9, True))
+@register_strategy("bugbug", args=(CT_MEDIUM,))
+@register_strategy("bugbug-combined-high", args=(CT_HIGH, False, True))
+@register_strategy("bugbug-low", args=(CT_LOW,))
+@register_strategy("bugbug-high", args=(CT_HIGH,))
+@register_strategy("bugbug-reduced", args=(CT_MEDIUM, True))
+@register_strategy("bugbug-reduced-high", args=(CT_HIGH, True))
 class BugBugPushSchedules(OptimizationStrategy):
    """Query the 'bugbug' service to retrieve relevant tasks and manifests.

    Args:
        confidence_threshold (float): The minimum confidence threshold (in
            range [0, 1]) needed for a task to be scheduled.
+        use_reduced_tasks (bool): Whether or not to use the reduced set of tasks
+            provided by the bugbug service (default: False).
+        combine_weights (bool): If True, sum the confidence thresholds of all
+            groups within a task to find the overall task confidence. Otherwise
+            the maximum confidence threshold is used (default: False).
    """
    BUGBUG_BASE_URL = "https://bugbug.herokuapp.com"
    RETRY_TIMEOUT = 4 * 60  # seconds
    RETRY_INTERVAL = 5      # seconds

-    def __init__(self, confidence_threshold, use_reduced_tasks=False):
+    def __init__(self, confidence_threshold, use_reduced_tasks=False, combine_weights=False):
        self.confidence_threshold = confidence_threshold
        self.use_reduced_tasks = use_reduced_tasks
+        self.combine_weights = combine_weights

    @memoized_property
    def session(self):
@ -86,11 +98,6 @@ class BugBugPushSchedules(OptimizationStrategy):
        rev = params['head_rev']
        data = self.run_query('/push/{branch}/{rev}/schedules'.format(branch=branch, rev=rev))

-        groups = set(
-            group
-            for group, confidence in data.get('groups', {}).items()
-            if confidence >= self.confidence_threshold
-        )
        if not self.use_reduced_tasks:
            tasks = set(
                task
@ -109,7 +116,21 @@ class BugBugPushSchedules(OptimizationStrategy):
            if task.label not in tasks:
                return True

-        elif not bool(set(task.attributes['test_manifests']) & groups):
-            return True
-
            return False
+
+        # If a task contains more than one group, figure out which confidence
+        # threshold to use. If 'self.combine_weights' is set, add up all
+        # confidence thresholds. Otherwise just use the max.
+        task_confidence = 0
+        for group, confidence in data.get("groups", {}).items():
+            if group not in test_manifests:
+                continue
+
+            if self.combine_weights:
+                task_confidence = round(
+                    task_confidence + confidence - task_confidence * confidence, 2
+                )
+            else:
+                task_confidence = max(task_confidence, confidence)
+
+        return task_confidence < self.confidence_threshold
--- a/taskcluster/taskgraph/test/test_optimize_strategies.py
+++ b/taskcluster/taskgraph/test/test_optimize_strategies.py
@ -98,6 +98,23 @@ def test_optimization_strategy(responses, params, tasks, strategy, expected):
        ['task-0'],
    ),

+    # tasks containing multiple groups have a higher overall confidence with combined_weights
+    pytest.param(
+        (0.75, False, False),
+        {'groups': {'foo/test.ini': 0.5, 'bar/test.ini': 0.5}},
+        [],
+    ),
+    pytest.param(
+        (0.75, False, True),
+        {'groups': {'foo/test.ini': 0.5, 'bar/test.ini': 0.5}},
+        ['task-0'],
+    ),
+    pytest.param(
+        (0.76, False, True),
+        {'groups': {'foo/test.ini': 0.5, 'bar/test.ini': 0.5}},
+        [],
+    ),
+
    # tasks matching "tasks" or "groups" selected
    pytest.param(
        (0.1,),