From 27d78d7059ecca9da6840779b5e400f40e9676cd Mon Sep 17 00:00:00 2001
From: Andrew Halberstadt <ahalberstadt@mozilla.com>
Date: Thu, 4 Jun 2020 18:45:09 +0000
Subject: [PATCH] Bug 1633866 - [taskgraph] Create a 'bugbug' based test
 manifest loader, r=marco

Loads manifests using bugbug's push/schedules endpoint. For now we use as low
confidence threshold to select manifests. This is to try and both:

1) Improve regression detection rate
2) Save resources

at the same time. This way we theoretically shouldn't regress either dimension.

Note that we still optimize with CT_MEDIUM. My thinking is that at least all
of the "extra" stuff is still guaranteed to be relevant. Though we may want to
consider making these two thresholds match.

Differential Revision: https://phabricator.services.mozilla.com/D76523
---
 taskcluster/taskgraph/transforms/tests.py | 10 +++++++-
 taskcluster/taskgraph/util/chunking.py    | 28 +++++++++++++++++++++--
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/taskcluster/taskgraph/transforms/tests.py b/taskcluster/taskgraph/transforms/tests.py
index ebedfc1209b8..f04dec2935a5 100644
--- a/taskcluster/taskgraph/transforms/tests.py
+++ b/taskcluster/taskgraph/transforms/tests.py
@@ -1403,7 +1403,8 @@ def set_test_manifests(config, tasks):
 
         mozinfo = guess_mozinfo_from_task(task)
 
-        loader = manifest_loaders[config.params['test_manifest_loader']]
+        loader_cls = manifest_loaders[config.params['test_manifest_loader']]
+        loader = loader_cls(config.params)
         task['test-manifests'] = loader.get_manifests(
             task['suite'],
             frozenset(mozinfo.items()),
@@ -1414,6 +1415,13 @@ def set_test_manifests(config, tasks):
         if not task['test-manifests']['active'] and not task['test-manifests']['skipped']:
             continue
 
+        # The default loader loads all manifests. If we use a non-default
+        # loader, we'll only run some subset of manifests and the hardcoded
+        # chunk numbers will no longer be valid. Dynamic chunking should yield
+        # better results.
+        if config.params['test_manifest_loader'] != 'default':
+            task['chunks'] = "dynamic"
+
         yield task
 
 
diff --git a/taskcluster/taskgraph/util/chunking.py b/taskcluster/taskgraph/util/chunking.py
index fa4602f533c6..fd63222d8cc3 100644
--- a/taskcluster/taskgraph/util/chunking.py
+++ b/taskcluster/taskgraph/util/chunking.py
@@ -22,6 +22,7 @@ from moztest.resolve import (
 )
 
 from taskgraph import GECKO
+from taskgraph.util.bugbug import CT_LOW, push_schedules
 
 here = os.path.abspath(os.path.dirname(__file__))
 resolver = TestResolver.from_environment(cwd=here, loader_cls=TestManifestLoader)
@@ -93,7 +94,8 @@ def chunk_manifests(suite, platform, chunks, manifests):
         A list of length `chunks` where each item contains a list of manifests
         that run in that chunk.
     """
-    runtimes = get_runtimes(platform, suite)
+    manifests = set(manifests)
+    runtimes = {k: v for k, v in get_runtimes(platform, suite).items() if k in manifests}
 
     if "web-platform-tests" not in suite:
         return [
@@ -154,6 +156,10 @@ def chunk_manifests(suite, platform, chunks, manifests):
 
 @six.add_metaclass(ABCMeta)
 class BaseManifestLoader(object):
+
+    def __init__(self, params):
+        self.params = params
+
     @abstractmethod
     def get_manifests(self, flavor, subsuite, mozinfo):
         """Compute which manifests should run for the given flavor, subsuite and mozinfo.
@@ -237,6 +243,24 @@ class DefaultLoader(BaseManifestLoader):
         return {"active": list(active), "skipped": list(skipped)}
 
 
+class BugbugLoader(DefaultLoader):
+    """Load manifests using metadata from the TestResolver, and then
+    filter them based on a query to bugbug."""
+    CONFIDENCE_THRESHOLD = CT_LOW
+
+    @memoize
+    def get_manifests(self, suite, mozinfo):
+        manifests = super(BugbugLoader, self).get_manifests(suite, mozinfo)
+
+        data = push_schedules(self.params['project'], self.params['head_rev'])
+        bugbug_manifests = {m for m, c in data.get('groups', {}).items()
+                            if c >= self.CONFIDENCE_THRESHOLD}
+
+        manifests['active'] = list(set(manifests['active']) & bugbug_manifests)
+        return manifests
+
+
 manifest_loaders = {
-    'default': DefaultLoader(),
+    'bugbug': BugbugLoader,
+    'default': DefaultLoader,
 }