Bug 1571364 - Synthesize backfill report

This commit is contained in:
octavian-negru 2019-09-16 16:53:05 +03:00 коммит произвёл ionutgoldan
Родитель 9b095eb04b
Коммит 185ac4e43f
3 изменённых файлов: 139 добавлений и 26 удалений

Просмотреть файл

@ -5,7 +5,7 @@ from operator import itemgetter as get_key
import pytest
from treeherder.model.models import Job
from treeherder.perf.alerts import IdentifyingRetriggerables
from treeherder.perf.alerts import IdentifyAlertRetriggerables
from treeherder.perf.models import PerformanceDatum
NON_RETRIGGERABLE_JOB_ID = 9
@ -125,18 +125,18 @@ def prepare_graph_data_scenario(push_ids_to_keep, highlighted_push_id, perf_aler
# Unit tests
def test_identifying_retriggerables_as_unit():
def test_identify_retriggerables_as_unit():
# basic instantiation & usage
one_day = datetime.timedelta(days=1)
with pytest.raises(ValueError):
_ = IdentifyingRetriggerables(range_width=0, time_interval=one_day)
_ = IdentifyAlertRetriggerables(max_data_points=0, time_interval=one_day)
with pytest.raises(ValueError):
_ = IdentifyingRetriggerables(range_width=4, time_interval=one_day)
_ = IdentifyAlertRetriggerables(max_data_points=4, time_interval=one_day)
with pytest.raises(TypeError):
_ = IdentifyingRetriggerables(range_width=5, time_interval=1)
_ = IdentifyAlertRetriggerables(max_data_points=5, time_interval=1)
# its small private methods
annotated_data_points = [
@ -147,7 +147,7 @@ def test_identifying_retriggerables_as_unit():
{'job_id': 5, 'push_id': 3},
{'job_id': 6, 'push_id': 3},
]
operation = IdentifyingRetriggerables(range_width=5, time_interval=one_day)
operation = IdentifyAlertRetriggerables(max_data_points=5, time_interval=one_day)
flattened_data_points = operation._one_data_point_per_push(annotated_data_points)
push_counter = Counter([data_point['push_id'] for data_point in flattened_data_points])
@ -165,8 +165,8 @@ def test_identifying_retriggerables_as_unit():
# Component tests
def test_identifying_retriggerables_selects_all_data_points(gapped_performance_data, test_perf_alert):
identify_retriggerables = IdentifyingRetriggerables(range_width=5, time_interval=ONE_DAY_INTERVAL)
def test_identify_retriggerables_selects_all_data_points(gapped_performance_data, test_perf_alert):
identify_retriggerables = IdentifyAlertRetriggerables(max_data_points=5, time_interval=ONE_DAY_INTERVAL)
data_points_to_retrigger = identify_retriggerables(test_perf_alert)
assert len(data_points_to_retrigger) == 5
@ -181,17 +181,17 @@ def test_identifying_retriggerables_selects_all_data_points(gapped_performance_d
assert max_push_timestamp <= datetime.datetime(year=2013, month=11, day=14)
def test_identifying_retriggerables_selects_even_single_data_point(single_performance_datum, test_perf_alert):
identify_retriggerables = IdentifyingRetriggerables(range_width=5, time_interval=ONE_DAY_INTERVAL)
def test_identify_retriggerables_selects_even_single_data_point(single_performance_datum, test_perf_alert):
identify_retriggerables = IdentifyAlertRetriggerables(max_data_points=5, time_interval=ONE_DAY_INTERVAL)
data_points_to_retrigger = identify_retriggerables(test_perf_alert)
assert len(data_points_to_retrigger) == 1
assert {4} == set(map(get_key("job_id"), data_points_to_retrigger))
def test_identifying_retriggerables_doesnt_select_out_of_range_data_points(
def test_identify_retriggerables_doesnt_select_out_of_range_data_points(
retriggerable_and_nonretriggerable_performance_data, test_perf_alert):
identify_retriggerables = IdentifyingRetriggerables(range_width=5, time_interval=ONE_DAY_INTERVAL)
identify_retriggerables = IdentifyAlertRetriggerables(max_data_points=5, time_interval=ONE_DAY_INTERVAL)
data_points_to_retrigger = identify_retriggerables(test_perf_alert)
job_ids_to_retrigger = set(map(get_key("job_id"), data_points_to_retrigger))

Просмотреть файл

@ -1,7 +1,8 @@
import datetime
import logging
import time
from collections import namedtuple
from datetime import (datetime,
timedelta)
from itertools import zip_longest
from typing import (List,
Tuple)
@ -41,7 +42,7 @@ def generate_new_alerts_in_series(signature):
# (1) the last alert, if there is one
# (2) the alerts max age
# (use whichever is newer)
max_alert_age = (datetime.datetime.now() -
max_alert_age = (datetime.now() -
settings.PERFHERDER_ALERTS_MAX_AGE)
series = PerformanceDatum.objects.filter(signature=signature).filter(
push_timestamp__gte=max_alert_age).order_by('push_timestamp')
@ -104,7 +105,7 @@ def generate_new_alerts_in_series(signature):
prev_push_id=prev.push_id,
defaults={
'manually_created': False,
'created': datetime.datetime.utcfromtimestamp(
'created': datetime.utcfromtimestamp(
cur.push_timestamp)
})
@ -133,7 +134,7 @@ class AlertsPicker:
For this algorithm, regressions are considered the most important, followed by improvements.
'''
def __init__(self, max_alerts: int, max_improvements: int, platforms_of_interest: Tuple[str]):
def __init__(self, max_alerts: int, max_improvements: int, platforms_of_interest: Tuple[str, ...]):
'''
:param max_alerts: the maximum number of selected alerts
:param max_improvements: max when handling only improvements
@ -157,7 +158,7 @@ class AlertsPicker:
self.max_improvements = max_improvements
self.ordered_platforms_of_interest = platforms_of_interest
def extract_important_alerts(self, alerts: Tuple[PerformanceAlert]):
def extract_important_alerts(self, alerts: Tuple[PerformanceAlert, ...]):
if any(not isinstance(alert, PerformanceAlert) for alert in alerts):
raise ValueError('Provided parameter does not contain only PerformanceAlert objects.')
relevant_alerts = self._extract_by_relevant_platforms(alerts)
@ -247,18 +248,18 @@ class AlertsPicker:
return sorted_alerts
class IdentifyingRetriggerables:
def __init__(self, range_width: int, time_interval: datetime.timedelta, logger=None):
if range_width < 1:
class IdentifyAlertRetriggerables:
def __init__(self, max_data_points: int, time_interval: timedelta, logger=None):
if max_data_points < 1:
raise ValueError('Cannot set range width less than 1')
if range_width % 2 == 0:
if max_data_points % 2 == 0:
raise ValueError('Must provide odd range width')
if not isinstance(time_interval, datetime.timedelta):
if not isinstance(time_interval, timedelta):
raise TypeError('Must provide time interval as timedelta')
self._range_width = range_width
self._range_width = max_data_points
self._time_interval = time_interval
self.log = logger or logging.getLogger(self.__call__.__name__)
self.log = logger or logging.getLogger(self.__class__.__name__)
def __call__(self, alert: PerformanceAlert) -> List[dict]:
"""
@ -279,10 +280,10 @@ class IdentifyingRetriggerables:
return data_points_to_retrigger
def min_timestamp(self, alert_push_time: datetime.datetime) -> datetime.datetime:
def min_timestamp(self, alert_push_time: datetime) -> datetime:
return alert_push_time - self._time_interval
def max_timestamp(self, alert_push_time: datetime.datetime) -> datetime.datetime:
def max_timestamp(self, alert_push_time: datetime) -> datetime:
return alert_push_time + self._time_interval
def _fetch_suspect_data_points(self, alert: PerformanceAlert) -> QuerySet:
@ -331,3 +332,54 @@ class IdentifyingRetriggerables:
if retrigger_range < self._range_width:
self.log.warning('Found small backfill range (of size {} instead of {})'
.format(retrigger_range, self._range_width))
class IdentifyLatestRetriggerables:
def __init__(self, since: datetime, data_points_lookup_interval: timedelta):
'''
Acquire/instantiate data used for finding alerts.
:param since: datetime since the lookup will occur.
:param data_points_lookup_interval: time range before/after data point in which search neighboring data points occurs.
'''
self.since = since
self.picker = AlertsPicker(
max_alerts=5,
max_improvements=2,
platforms_of_interest=('windows10', 'windows7', 'linux', 'osx', 'android'))
self.find_alert_retriggerables = IdentifyAlertRetriggerables(
max_data_points=5,
time_interval=data_points_lookup_interval)
def __call__(self, frameworks: List[str], repositories: List[str]) -> dict:
summaries_to_retrigger = self._fetch_by(
self._summaries(self.since),
frameworks,
repositories
)
return self._identify_retriggerables(summaries_to_retrigger)
def _summaries(self, timestamp: datetime) -> QuerySet:
return (PerformanceAlertSummary.objects
.select_related('framework', 'repository')
.filter(created__gte=timestamp))
def _fetch_by(self, summaries_to_retrigger: QuerySet, frameworks: List[str], repositories: List[str]) -> QuerySet:
if frameworks:
summaries_to_retrigger = summaries_to_retrigger.filter(framework__name__in=frameworks)
return summaries_to_retrigger.filter(repository__name__in=repositories)
def _identify_retriggerables(self, summaries_to_retrigger: QuerySet) -> dict:
json_output = []
for summary in summaries_to_retrigger:
important_alerts = self.picker.extract_important_alerts(
summary.alerts.all())
summary_record = {"alert_summary_id": summary.id, "alerts": []}
for alert in important_alerts:
data_points = self.find_alert_retriggerables(alert)
summary_record["alerts"].append(
{"id": alert.id, "data_points_to_retrigger": data_points})
json_output.append(summary_record)
return json_output

Просмотреть файл

@ -0,0 +1,61 @@
from argparse import ArgumentError
from datetime import (datetime,
timedelta)
from typing import (List,
Tuple)
import simplejson
from django.core.management.base import BaseCommand
from treeherder.perf.alerts import IdentifyLatestRetriggerables
from treeherder.perf.models import PerformanceFramework
class Command(BaseCommand):
repos_to_retrigger_on = ['autoland', 'mozilla-inbound', 'mozilla-beta']
help = "Select most relevant alerts and identify jobs to retrigger."
def add_arguments(self, parser):
parser.add_argument(
'--time-window',
action='store',
type=int,
default=60,
help="How far back to look for alerts to retrigger (expressed in minutes)."
)
parser.add_argument(
'--frameworks',
nargs='+',
default=None,
help="Defaults to all registered performance frameworks."
)
parser.add_argument(
'--repositories',
nargs='+',
default=Command.repos_to_retrigger_on,
help=f"Defaults to {Command.repos_to_retrigger_on}."
)
def handle(self, *args, **options):
frameworks, repositories, since, days_to_lookup = self._parse_args(**options)
self._validate_args(frameworks, repositories)
latest_retriggerables = IdentifyLatestRetriggerables(since, days_to_lookup)(frameworks, repositories)
return simplejson.dumps(latest_retriggerables, default=str)
def _parse_args(self, **options) -> Tuple[List, List, datetime, timedelta]:
return (options['frameworks'],
options['repositories'],
datetime.now() - timedelta(minutes=options['time_window']),
timedelta(days=1))
def _validate_args(self, frameworks: List[str], repositories: List[str]):
if frameworks:
available_frameworks = set(PerformanceFramework.objects.
values_list('name', flat=True))
if not set(frameworks).issubset(available_frameworks):
raise ArgumentError('Unknown framework provided.')
if repositories:
if not set(repositories).issubset(set(Command.repos_to_retrigger_on)):
raise ArgumentError('Unknown repository provided.')