Refactor push_health.py recipe logic into a re-useable 'Push' class

This commit is contained in:
Andrew Halberstadt 2019-05-23 09:43:36 -04:00
Родитель 2ac5074d83
Коммит b5e4451eae
4 изменённых файлов: 287 добавлений и 101 удалений

1
ci_info/__init__.py Normal file
Просмотреть файл

@ -0,0 +1 @@
from .push import Push

268
ci_info/push.py Normal file
Просмотреть файл

@ -0,0 +1,268 @@
from argparse import Namespace
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import List
import requests
from adr.query import run_query
from adr.util.memoize import memoize, memoized_property
HGMO_JSON_URL = "https://hg.mozilla.org/integration/{branch}/rev/{rev}?style=json"
class Status(Enum):
PASS = 0
FAIL = 1
INTERMITTENT = 2
@dataclass
class Task:
"""Contains information pertaining to a single task."""
label: str
duration: int
result: str
classification: str
@dataclass
class LabelSummary:
"""Summarizes the overall state of a task label (across retriggers)."""
label: str
tasks: List[Task]
def __post_init__(self):
assert all(t.label == self.label for t in self.tasks)
@property
def classifications(self):
return set(t.classification for t in self.tasks)
@property
def results(self):
return set(t.result for t in self.tasks)
@memoized_property
def status(self):
overall_status = None
for task in self.tasks:
if task.result in ('busted', 'exception', 'testfailed'):
status = Status.FAIL
else:
status = Status.PASS
if overall_status is None:
overall_status = status
elif status != overall_status:
overall_status = Status.INTERMITTENT
return overall_status
class Push:
def __init__(self, rev, branch='autoland'):
"""A representation of a single push.
Args:
rev (str): Revision of the top-most commit in the push.
branch (str): Branch to look on (default: autoland).
"""
self.rev = rev
self.branch = branch
@property
def backedoutby(self):
"""The revision of the commit which backs out this one or None.
Returns:
str or None: The commit revision which backs this push out (or None).
"""
return self._hgmo.get('backedoutby') or None
@property
def backedout(self):
"""Whether the push was backed out or not.
Returns:
bool: True if this push was backed out.
"""
return bool(self.backedoutby)
@property
def pushid(self):
"""The push id.
Returns:
int: The push id.
"""
return self._hgmo['pushid']
@property
def parent(self):
"""Returns the parent push of this push.
Returns:
Push: A `Push` instance representing the parent push.
"""
while True:
for rev in other._hgmo['parents']:
parent = Push(rev)
if parent.pushid != self.pushid:
return parent
@memoized_property
def tasks(self):
"""All tasks that ran on the push, including retriggers and backfills.
Returns:
list: A list of `Task` objects.
"""
args = Namespace(rev=self.rev)
data = run_query('push_results', args)['data']
return [Task(**kwargs) for kwargs in data]
@property
def task_labels(self):
"""The set of task labels that ran on this push.
Returns:
set: A set of task labels (str).
"""
return set([t.label for t in self.tasks])
@memoized_property
def target_task_labels(self):
"""The set of all task labels that could possibly run on this push.
Returns:
set: A set of task labels.
"""
return set(self._get_decision_artifact('target-tasks.json'))
@memoized_property
def scheduled_task_labels(self):
"""The set of task labels that were originally scheduled to run on this push.
This excludes retriggers and backfills.
Returns:
set: A set of task labels (str).
"""
tasks = self._get_decision_artifact('task-graph.json').values()
return set([t['label'] for t in tasks])
@property
def unscheduled_task_labels(self):
"""The set of task labels from tasks that were not originally scheduled on
the push (i.e they were scheduled via backfill or Add New Jobs).
Returns:
set: A set of task labels (str).
"""
return self.task_labels - self.scheduled_task_labels
@memoized_property
def label_summaries(self):
"""All label summaries combining retriggers.
Returns:
dict: A dictionary of the form {<label>: [<LabelSummary>]}."""
labels = defaultdict(list)
for task in self.tasks:
labels[task.label].append(task)
labels = {label: LabelSummary(label, tasks) for label, tasks in labels.items()}
return labels
@memoized_property
def duration(self):
"""The total duration of all tasks that ran on the push.
Returns:
int: Runtime in hours.
"""
return int(sum(t.duration for t in self.tasks) / 3600)
@memoized_property
def scheduled_duration(self):
"""The total runtime of tasks excluding retriggers and backfills.
Returns:
int: Runtime in hours.
"""
seen = set()
duration = 0
for task in self.tasks:
if task.label not in seen:
seen.add(task.label)
duration += task.duration
return int(duration / 3600)
@memoized_property
def regressions(self):
"""The set of all task labels that were regressed by this push.
Returns:
set: Set of task labels (str).
"""
regressions = set()
for label, summary in self.label_summaries.items():
if summary.status == Status.PASS:
continue
if any(c in ('not classified', 'fixed by commit') for c in summary.classifications):
regressions.add(label)
return regressions
@property
def regressions_missed(self):
"""The set of all task labels that were regressed by this push and were
not caught by a task that was initially scheduled. E.g the regression was
a retrigger/backfill.
Returns: set: Set of task labels (str).
"""
return self.regressions - self.scheduled_task_labels
@property
def regressions_caught(self):
"""The set of all task labels that were regressed by this push and were
caught by a task that was initially scheduled.
Returns: set: Set of task labels (str).
"""
return self.regressions & self.scheduled_task_labels
@memoized_property
def _decision_artifact_urls(self):
"""All artifact urls from the Decision task of this push.
Returns:
list: A list of urls.
"""
return run_query('decision_artifacts', Namespace(rev=self.rev))['data'][0]['artifacts']
@memoize
def _get_decision_artifact(self, name):
"""Get an artifact from Decision task of this push.
Args:
name (str): Name of the artifact fetch.
Returns:
dict: JSON representation of the artifact.
"""
for url in self._decision_artifact_urls:
if url.rsplit('/', 1)[1] == name:
return requests.get(url).json()
@memoized_property
def _hgmo(self):
"""A JSON dict obtained from hg.mozilla.org.
Returns:
dict: Information regarding this push.
"""
url = HGMO_JSON_URL.format(branch=self.branch, rev=self.rev)
return requests.get(url).json()

Просмотреть файл

@ -4,6 +4,7 @@ version = "0.1.0"
description = "Collection of CI related ActiveData recipes."
packages = [
{ include = "ci_info" },
{ include = "recipes" },
]
include = [

Просмотреть файл

@ -1,119 +1,35 @@
"""
Show information related to how "healthy" a push looks. Only works with
autoland for now.
Show information related to how "healthy" a push looks (autoland only).
.. code-block:: bash
adr push_health -r <revision>
"""
from argparse import Namespace
from collections import defaultdict, namedtuple
from dataclasses import dataclass
from enum import Enum
import requests
from adr.query import run_query
from adr.util import memoize
class Status(Enum):
PASS = 0
FAIL = 1
INTERMITTENT = 2
@dataclass
class Task:
label: str
duration: int
result: Status
classification: str
@memoize
def get_decision_artifact_urls(rev):
"""Return all artifact urls from the Decision task of the given revision.
This function is memoized, so it will only run the 'decision_artifacts'
query a single time for any given revision.
Args:
rev (str): Revision associated with the push on treeherder.
Returns:
list: List of artifact urls.
"""
return run_query('decision_artifacts', Namespace(rev=rev))['data'][0]['artifacts']
def get_decision_artifact(rev, name):
"""Get an artifact from Decision task of the given revision.
Args:
rev (str): Revision associated with the push on treeherder.
name (str): Name of the artifact fetch.
Returns:
dict: JSON representation of the artifact.
"""
for url in get_decision_artifact_urls(rev):
if url.rsplit('/', 1)[1] == name:
return requests.get(url).json()
from ci_info import Push
def run(args):
target_task_set = set(get_decision_artifact(args.rev, 'target-tasks.json'))
task_set = set([v['label'] for v in get_decision_artifact(args.rev, 'task-graph.json').values()])
push = Push(args.rev)
data = run_query('push_results', args)['data']
tasks = [Task(**kwargs) for kwargs in data]
num_scheduled = len(push.scheduled_task_labels)
num_total = len(push.target_task_labels)
percentage = round(float(num_scheduled) / num_total * 100, 1)
labels = defaultdict(lambda: {'status': None, 'classifications': set()})
duration = reg_caught = reg_missed = 0
for task in tasks:
label = labels[task.label]
s = Status.FAIL if task.result in ('busted', 'exception', 'testfailed') else Status.PASS
if label['status'] is None:
label['status'] = s
# Don't count retriggers in total duration as it is beyond the
# scheduler's control.
duration += task.duration
elif label['status'] != s:
label['status'] = Status.INTERMITTENT
label['classifications'].add(task.classification)
for label, value in labels.items():
status = value['status']
classifications = value['classifications']
if status == Status.PASS:
continue
if any(c in ('not classified', 'fixed by commit') for c in classifications):
if label in task_set:
reg_caught += 1
else:
reg_missed += 1
header = [
return [[
'Tasks Scheduled',
'Tasks Total',
'Percentage',
'Total Hours',
'Backed Out',
'Regressions Caught',
'Regressions Missed',
]
num_scheduled = len(task_set)
num_total = len(target_task_set)
percentage = round(float(num_scheduled) / num_total * 100, 1)
hours = int(duration / 3600)
result = [[num_scheduled, num_total, percentage, hours, reg_caught, reg_missed]]
result.insert(0, header)
return result
], [
num_scheduled,
num_total,
percentage,
push.scheduled_duration,
push.backedout,
len(push.regressions_caught),
len(push.regressions_missed),
]]