зеркало из https://github.com/mozilla/treeherder.git
Bug 1571364 - Synthesize backfill report
This commit is contained in:
Родитель
9b095eb04b
Коммит
185ac4e43f
|
@ -5,7 +5,7 @@ from operator import itemgetter as get_key
|
|||
import pytest
|
||||
|
||||
from treeherder.model.models import Job
|
||||
from treeherder.perf.alerts import IdentifyingRetriggerables
|
||||
from treeherder.perf.alerts import IdentifyAlertRetriggerables
|
||||
from treeherder.perf.models import PerformanceDatum
|
||||
|
||||
NON_RETRIGGERABLE_JOB_ID = 9
|
||||
|
@ -125,18 +125,18 @@ def prepare_graph_data_scenario(push_ids_to_keep, highlighted_push_id, perf_aler
|
|||
|
||||
|
||||
# Unit tests
|
||||
def test_identifying_retriggerables_as_unit():
|
||||
def test_identify_retriggerables_as_unit():
|
||||
# basic instantiation & usage
|
||||
one_day = datetime.timedelta(days=1)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
_ = IdentifyingRetriggerables(range_width=0, time_interval=one_day)
|
||||
_ = IdentifyAlertRetriggerables(max_data_points=0, time_interval=one_day)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
_ = IdentifyingRetriggerables(range_width=4, time_interval=one_day)
|
||||
_ = IdentifyAlertRetriggerables(max_data_points=4, time_interval=one_day)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
_ = IdentifyingRetriggerables(range_width=5, time_interval=1)
|
||||
_ = IdentifyAlertRetriggerables(max_data_points=5, time_interval=1)
|
||||
|
||||
# its small private methods
|
||||
annotated_data_points = [
|
||||
|
@ -147,7 +147,7 @@ def test_identifying_retriggerables_as_unit():
|
|||
{'job_id': 5, 'push_id': 3},
|
||||
{'job_id': 6, 'push_id': 3},
|
||||
]
|
||||
operation = IdentifyingRetriggerables(range_width=5, time_interval=one_day)
|
||||
operation = IdentifyAlertRetriggerables(max_data_points=5, time_interval=one_day)
|
||||
flattened_data_points = operation._one_data_point_per_push(annotated_data_points)
|
||||
push_counter = Counter([data_point['push_id'] for data_point in flattened_data_points])
|
||||
|
||||
|
@ -165,8 +165,8 @@ def test_identifying_retriggerables_as_unit():
|
|||
|
||||
|
||||
# Component tests
|
||||
def test_identifying_retriggerables_selects_all_data_points(gapped_performance_data, test_perf_alert):
|
||||
identify_retriggerables = IdentifyingRetriggerables(range_width=5, time_interval=ONE_DAY_INTERVAL)
|
||||
def test_identify_retriggerables_selects_all_data_points(gapped_performance_data, test_perf_alert):
|
||||
identify_retriggerables = IdentifyAlertRetriggerables(max_data_points=5, time_interval=ONE_DAY_INTERVAL)
|
||||
data_points_to_retrigger = identify_retriggerables(test_perf_alert)
|
||||
|
||||
assert len(data_points_to_retrigger) == 5
|
||||
|
@ -181,17 +181,17 @@ def test_identifying_retriggerables_selects_all_data_points(gapped_performance_d
|
|||
assert max_push_timestamp <= datetime.datetime(year=2013, month=11, day=14)
|
||||
|
||||
|
||||
def test_identifying_retriggerables_selects_even_single_data_point(single_performance_datum, test_perf_alert):
|
||||
identify_retriggerables = IdentifyingRetriggerables(range_width=5, time_interval=ONE_DAY_INTERVAL)
|
||||
def test_identify_retriggerables_selects_even_single_data_point(single_performance_datum, test_perf_alert):
|
||||
identify_retriggerables = IdentifyAlertRetriggerables(max_data_points=5, time_interval=ONE_DAY_INTERVAL)
|
||||
data_points_to_retrigger = identify_retriggerables(test_perf_alert)
|
||||
|
||||
assert len(data_points_to_retrigger) == 1
|
||||
assert {4} == set(map(get_key("job_id"), data_points_to_retrigger))
|
||||
|
||||
|
||||
def test_identifying_retriggerables_doesnt_select_out_of_range_data_points(
|
||||
def test_identify_retriggerables_doesnt_select_out_of_range_data_points(
|
||||
retriggerable_and_nonretriggerable_performance_data, test_perf_alert):
|
||||
identify_retriggerables = IdentifyingRetriggerables(range_width=5, time_interval=ONE_DAY_INTERVAL)
|
||||
identify_retriggerables = IdentifyAlertRetriggerables(max_data_points=5, time_interval=ONE_DAY_INTERVAL)
|
||||
data_points_to_retrigger = identify_retriggerables(test_perf_alert)
|
||||
|
||||
job_ids_to_retrigger = set(map(get_key("job_id"), data_points_to_retrigger))
|
|
@ -1,7 +1,8 @@
|
|||
import datetime
|
||||
import logging
|
||||
import time
|
||||
from collections import namedtuple
|
||||
from datetime import (datetime,
|
||||
timedelta)
|
||||
from itertools import zip_longest
|
||||
from typing import (List,
|
||||
Tuple)
|
||||
|
@ -41,7 +42,7 @@ def generate_new_alerts_in_series(signature):
|
|||
# (1) the last alert, if there is one
|
||||
# (2) the alerts max age
|
||||
# (use whichever is newer)
|
||||
max_alert_age = (datetime.datetime.now() -
|
||||
max_alert_age = (datetime.now() -
|
||||
settings.PERFHERDER_ALERTS_MAX_AGE)
|
||||
series = PerformanceDatum.objects.filter(signature=signature).filter(
|
||||
push_timestamp__gte=max_alert_age).order_by('push_timestamp')
|
||||
|
@ -104,7 +105,7 @@ def generate_new_alerts_in_series(signature):
|
|||
prev_push_id=prev.push_id,
|
||||
defaults={
|
||||
'manually_created': False,
|
||||
'created': datetime.datetime.utcfromtimestamp(
|
||||
'created': datetime.utcfromtimestamp(
|
||||
cur.push_timestamp)
|
||||
})
|
||||
|
||||
|
@ -133,7 +134,7 @@ class AlertsPicker:
|
|||
For this algorithm, regressions are considered the most important, followed by improvements.
|
||||
'''
|
||||
|
||||
def __init__(self, max_alerts: int, max_improvements: int, platforms_of_interest: Tuple[str]):
|
||||
def __init__(self, max_alerts: int, max_improvements: int, platforms_of_interest: Tuple[str, ...]):
|
||||
'''
|
||||
:param max_alerts: the maximum number of selected alerts
|
||||
:param max_improvements: max when handling only improvements
|
||||
|
@ -157,7 +158,7 @@ class AlertsPicker:
|
|||
self.max_improvements = max_improvements
|
||||
self.ordered_platforms_of_interest = platforms_of_interest
|
||||
|
||||
def extract_important_alerts(self, alerts: Tuple[PerformanceAlert]):
|
||||
def extract_important_alerts(self, alerts: Tuple[PerformanceAlert, ...]):
|
||||
if any(not isinstance(alert, PerformanceAlert) for alert in alerts):
|
||||
raise ValueError('Provided parameter does not contain only PerformanceAlert objects.')
|
||||
relevant_alerts = self._extract_by_relevant_platforms(alerts)
|
||||
|
@ -247,18 +248,18 @@ class AlertsPicker:
|
|||
return sorted_alerts
|
||||
|
||||
|
||||
class IdentifyingRetriggerables:
|
||||
def __init__(self, range_width: int, time_interval: datetime.timedelta, logger=None):
|
||||
if range_width < 1:
|
||||
class IdentifyAlertRetriggerables:
|
||||
def __init__(self, max_data_points: int, time_interval: timedelta, logger=None):
|
||||
if max_data_points < 1:
|
||||
raise ValueError('Cannot set range width less than 1')
|
||||
if range_width % 2 == 0:
|
||||
if max_data_points % 2 == 0:
|
||||
raise ValueError('Must provide odd range width')
|
||||
if not isinstance(time_interval, datetime.timedelta):
|
||||
if not isinstance(time_interval, timedelta):
|
||||
raise TypeError('Must provide time interval as timedelta')
|
||||
|
||||
self._range_width = range_width
|
||||
self._range_width = max_data_points
|
||||
self._time_interval = time_interval
|
||||
self.log = logger or logging.getLogger(self.__call__.__name__)
|
||||
self.log = logger or logging.getLogger(self.__class__.__name__)
|
||||
|
||||
def __call__(self, alert: PerformanceAlert) -> List[dict]:
|
||||
"""
|
||||
|
@ -279,10 +280,10 @@ class IdentifyingRetriggerables:
|
|||
|
||||
return data_points_to_retrigger
|
||||
|
||||
def min_timestamp(self, alert_push_time: datetime.datetime) -> datetime.datetime:
|
||||
def min_timestamp(self, alert_push_time: datetime) -> datetime:
|
||||
return alert_push_time - self._time_interval
|
||||
|
||||
def max_timestamp(self, alert_push_time: datetime.datetime) -> datetime.datetime:
|
||||
def max_timestamp(self, alert_push_time: datetime) -> datetime:
|
||||
return alert_push_time + self._time_interval
|
||||
|
||||
def _fetch_suspect_data_points(self, alert: PerformanceAlert) -> QuerySet:
|
||||
|
@ -331,3 +332,54 @@ class IdentifyingRetriggerables:
|
|||
if retrigger_range < self._range_width:
|
||||
self.log.warning('Found small backfill range (of size {} instead of {})'
|
||||
.format(retrigger_range, self._range_width))
|
||||
|
||||
|
||||
class IdentifyLatestRetriggerables:
|
||||
def __init__(self, since: datetime, data_points_lookup_interval: timedelta):
|
||||
'''
|
||||
Acquire/instantiate data used for finding alerts.
|
||||
:param since: datetime since the lookup will occur.
|
||||
:param data_points_lookup_interval: time range before/after data point in which search neighboring data points occurs.
|
||||
'''
|
||||
self.since = since
|
||||
self.picker = AlertsPicker(
|
||||
max_alerts=5,
|
||||
max_improvements=2,
|
||||
platforms_of_interest=('windows10', 'windows7', 'linux', 'osx', 'android'))
|
||||
self.find_alert_retriggerables = IdentifyAlertRetriggerables(
|
||||
max_data_points=5,
|
||||
time_interval=data_points_lookup_interval)
|
||||
|
||||
def __call__(self, frameworks: List[str], repositories: List[str]) -> dict:
|
||||
summaries_to_retrigger = self._fetch_by(
|
||||
self._summaries(self.since),
|
||||
frameworks,
|
||||
repositories
|
||||
)
|
||||
return self._identify_retriggerables(summaries_to_retrigger)
|
||||
|
||||
def _summaries(self, timestamp: datetime) -> QuerySet:
|
||||
return (PerformanceAlertSummary.objects
|
||||
.select_related('framework', 'repository')
|
||||
.filter(created__gte=timestamp))
|
||||
|
||||
def _fetch_by(self, summaries_to_retrigger: QuerySet, frameworks: List[str], repositories: List[str]) -> QuerySet:
|
||||
if frameworks:
|
||||
summaries_to_retrigger = summaries_to_retrigger.filter(framework__name__in=frameworks)
|
||||
return summaries_to_retrigger.filter(repository__name__in=repositories)
|
||||
|
||||
def _identify_retriggerables(self, summaries_to_retrigger: QuerySet) -> dict:
|
||||
json_output = []
|
||||
for summary in summaries_to_retrigger:
|
||||
important_alerts = self.picker.extract_important_alerts(
|
||||
summary.alerts.all())
|
||||
|
||||
summary_record = {"alert_summary_id": summary.id, "alerts": []}
|
||||
for alert in important_alerts:
|
||||
data_points = self.find_alert_retriggerables(alert)
|
||||
|
||||
summary_record["alerts"].append(
|
||||
{"id": alert.id, "data_points_to_retrigger": data_points})
|
||||
json_output.append(summary_record)
|
||||
|
||||
return json_output
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
from argparse import ArgumentError
|
||||
from datetime import (datetime,
|
||||
timedelta)
|
||||
from typing import (List,
|
||||
Tuple)
|
||||
|
||||
import simplejson
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from treeherder.perf.alerts import IdentifyLatestRetriggerables
|
||||
from treeherder.perf.models import PerformanceFramework
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
repos_to_retrigger_on = ['autoland', 'mozilla-inbound', 'mozilla-beta']
|
||||
help = "Select most relevant alerts and identify jobs to retrigger."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
'--time-window',
|
||||
action='store',
|
||||
type=int,
|
||||
default=60,
|
||||
help="How far back to look for alerts to retrigger (expressed in minutes)."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--frameworks',
|
||||
nargs='+',
|
||||
default=None,
|
||||
help="Defaults to all registered performance frameworks."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--repositories',
|
||||
nargs='+',
|
||||
default=Command.repos_to_retrigger_on,
|
||||
help=f"Defaults to {Command.repos_to_retrigger_on}."
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
frameworks, repositories, since, days_to_lookup = self._parse_args(**options)
|
||||
self._validate_args(frameworks, repositories)
|
||||
latest_retriggerables = IdentifyLatestRetriggerables(since, days_to_lookup)(frameworks, repositories)
|
||||
return simplejson.dumps(latest_retriggerables, default=str)
|
||||
|
||||
def _parse_args(self, **options) -> Tuple[List, List, datetime, timedelta]:
|
||||
return (options['frameworks'],
|
||||
options['repositories'],
|
||||
datetime.now() - timedelta(minutes=options['time_window']),
|
||||
timedelta(days=1))
|
||||
|
||||
def _validate_args(self, frameworks: List[str], repositories: List[str]):
|
||||
if frameworks:
|
||||
available_frameworks = set(PerformanceFramework.objects.
|
||||
values_list('name', flat=True))
|
||||
if not set(frameworks).issubset(available_frameworks):
|
||||
raise ArgumentError('Unknown framework provided.')
|
||||
if repositories:
|
||||
if not set(repositories).issubset(set(Command.repos_to_retrigger_on)):
|
||||
raise ArgumentError('Unknown repository provided.')
|
Загрузка…
Ссылка в новой задаче