diff --git a/ci_info/push.py b/ci_info/push.py index 30e4832..99e2271 100644 --- a/ci_info/push.py +++ b/ci_info/push.py @@ -11,10 +11,16 @@ from adr.util.memoize import memoize, memoized_property from loguru import logger HGMO_JSON_URL = "https://hg.mozilla.org/integration/{branch}/rev/{rev}?style=json" +HGMO_JSON_PUSHES_URL = "https://hg.mozilla.org/integration/{branch}/json-pushes?version=2&startID={push_id_start}&endID={push_id_end}" TASKGRAPH_ARTIFACT_URL = "https://index.taskcluster.net/v1/task/gecko.v2.autoland.revision.{rev}.taskgraph.decision/artifacts/public/{artifact}" SHADOW_SCHEDULER_ARTIFACT_URL = "https://index.taskcluster.net/v1/task/gecko.v2.autoland.revision.{rev}.source/shadow-scheduler-{name}/artifacts/public/shadow-scheduler/optimized_tasks.list" +# The maximum number of parents or children to look for previous/next task runs, +# when the task did not run on the currently considered push. +MAX_DEPTH = 14 + + class Status(Enum): PASS = 0 FAIL = 1 @@ -130,6 +136,20 @@ class Push: self._id = self._hgmo['pushid'] return self._id + def create_push(self, push_id): + url = HGMO_JSON_PUSHES_URL.format(branch=self.branch, push_id_start=push_id - 1, push_id_end=push_id) + print(url) + r = requests.get(url) + r.raise_for_status() + result = r.json()["pushes"][str(push_id)] + + push = Push(result["changesets"][::-1]) + # avoids the need to query hgmo to find this info + push._id = push_id + push._date = result["date"] + + return push + @memoized_property def parent(self): """Returns the parent push of this push. @@ -137,13 +157,16 @@ class Push: Returns: Push: A `Push` instance representing the parent push. """ - other = self - while True: - for rev in other._hgmo['parents']: - parent = Push(rev) - if parent.id != self.id: - return parent - other = parent + return self.create_push(self.id - 1) + + @memoized_property + def child(self): + """Returns the child push of this push. + + Returns: + Push: A `Push` instance representing the child push. + """ + return self.create_push(self.id + 1) @memoized_property def tasks(self): @@ -263,15 +286,32 @@ class Push: set: Set of task labels (str). """ failclass = ('not classified', 'fixed by commit') - candidate_regressions = set() - for label, summary in self.label_summaries.items(): - if summary.status == Status.PASS: - continue - if all(c not in failclass for c in summary.classifications): - continue + passing_labels = set() + candidate_regressions = {} + + count = 0 + other = self + while count < MAX_DEPTH + 1: + for label, summary in other.label_summaries.items(): + if label in passing_labels: + # It passed in one of the pushes between the current and its + # children, so it is definitely not a regression in the current. + continue + + if summary.status == Status.PASS: + passing_labels.add(label) + continue + + if all(c not in failclass for c in summary.classifications): + passing_labels.add(label) + continue + + candidate_regressions[label] = count + + other = other.child + count += 1 - candidate_regressions.add(label) return candidate_regressions @memoized_property @@ -279,26 +319,24 @@ class Push: """All regressions, both likely and definite. Each regression is associated with an integer, which is the number of - parent pushes that didn't run the label. A count of 0 means the label - passed on the previous push. A count of 3 means there were three pushes - between this one and the last time the task passed (so any one of them - could have caused it). A count of 99 means that the maximum number of - parents were searched without finding the task and we gave up. + parent and children pushes that didn't run the label. A count of 0 means + the label failed on the current push and passed on the previous push. + A count of 3 means there were three pushes between the failure and the + last time the task passed (so any one of them could have caused it). + A count of MAX_DEPTH means that the maximum number of parents were + searched without finding the task and we gave up. Returns: dict: A dict of the form {