From 185ac4e43f49049d9c8d15ef932c0a2dd74dcf04 Mon Sep 17 00:00:00 2001 From: octavian-negru <53253211+octavian-negru@users.noreply.github.com> Date: Mon, 16 Sep 2019 16:53:05 +0300 Subject: [PATCH] Bug 1571364 - Synthesize backfill report --- ...les.py => test_identify_retriggerables.py} | 24 +++--- treeherder/perf/alerts.py | 80 +++++++++++++++---- .../commands/synthesize_backfill_report.py | 61 ++++++++++++++ 3 files changed, 139 insertions(+), 26 deletions(-) rename tests/perfalert/test_alerts/{test_identifying_retriggerables.py => test_identify_retriggerables.py} (85%) create mode 100644 treeherder/perf/management/commands/synthesize_backfill_report.py diff --git a/tests/perfalert/test_alerts/test_identifying_retriggerables.py b/tests/perfalert/test_alerts/test_identify_retriggerables.py similarity index 85% rename from tests/perfalert/test_alerts/test_identifying_retriggerables.py rename to tests/perfalert/test_alerts/test_identify_retriggerables.py index 217fd1f5f..d625b336a 100644 --- a/tests/perfalert/test_alerts/test_identifying_retriggerables.py +++ b/tests/perfalert/test_alerts/test_identify_retriggerables.py @@ -5,7 +5,7 @@ from operator import itemgetter as get_key import pytest from treeherder.model.models import Job -from treeherder.perf.alerts import IdentifyingRetriggerables +from treeherder.perf.alerts import IdentifyAlertRetriggerables from treeherder.perf.models import PerformanceDatum NON_RETRIGGERABLE_JOB_ID = 9 @@ -125,18 +125,18 @@ def prepare_graph_data_scenario(push_ids_to_keep, highlighted_push_id, perf_aler # Unit tests -def test_identifying_retriggerables_as_unit(): +def test_identify_retriggerables_as_unit(): # basic instantiation & usage one_day = datetime.timedelta(days=1) with pytest.raises(ValueError): - _ = IdentifyingRetriggerables(range_width=0, time_interval=one_day) + _ = IdentifyAlertRetriggerables(max_data_points=0, time_interval=one_day) with pytest.raises(ValueError): - _ = IdentifyingRetriggerables(range_width=4, time_interval=one_day) + _ = IdentifyAlertRetriggerables(max_data_points=4, time_interval=one_day) with pytest.raises(TypeError): - _ = IdentifyingRetriggerables(range_width=5, time_interval=1) + _ = IdentifyAlertRetriggerables(max_data_points=5, time_interval=1) # its small private methods annotated_data_points = [ @@ -147,7 +147,7 @@ def test_identifying_retriggerables_as_unit(): {'job_id': 5, 'push_id': 3}, {'job_id': 6, 'push_id': 3}, ] - operation = IdentifyingRetriggerables(range_width=5, time_interval=one_day) + operation = IdentifyAlertRetriggerables(max_data_points=5, time_interval=one_day) flattened_data_points = operation._one_data_point_per_push(annotated_data_points) push_counter = Counter([data_point['push_id'] for data_point in flattened_data_points]) @@ -165,8 +165,8 @@ def test_identifying_retriggerables_as_unit(): # Component tests -def test_identifying_retriggerables_selects_all_data_points(gapped_performance_data, test_perf_alert): - identify_retriggerables = IdentifyingRetriggerables(range_width=5, time_interval=ONE_DAY_INTERVAL) +def test_identify_retriggerables_selects_all_data_points(gapped_performance_data, test_perf_alert): + identify_retriggerables = IdentifyAlertRetriggerables(max_data_points=5, time_interval=ONE_DAY_INTERVAL) data_points_to_retrigger = identify_retriggerables(test_perf_alert) assert len(data_points_to_retrigger) == 5 @@ -181,17 +181,17 @@ def test_identifying_retriggerables_selects_all_data_points(gapped_performance_d assert max_push_timestamp <= datetime.datetime(year=2013, month=11, day=14) -def test_identifying_retriggerables_selects_even_single_data_point(single_performance_datum, test_perf_alert): - identify_retriggerables = IdentifyingRetriggerables(range_width=5, time_interval=ONE_DAY_INTERVAL) +def test_identify_retriggerables_selects_even_single_data_point(single_performance_datum, test_perf_alert): + identify_retriggerables = IdentifyAlertRetriggerables(max_data_points=5, time_interval=ONE_DAY_INTERVAL) data_points_to_retrigger = identify_retriggerables(test_perf_alert) assert len(data_points_to_retrigger) == 1 assert {4} == set(map(get_key("job_id"), data_points_to_retrigger)) -def test_identifying_retriggerables_doesnt_select_out_of_range_data_points( +def test_identify_retriggerables_doesnt_select_out_of_range_data_points( retriggerable_and_nonretriggerable_performance_data, test_perf_alert): - identify_retriggerables = IdentifyingRetriggerables(range_width=5, time_interval=ONE_DAY_INTERVAL) + identify_retriggerables = IdentifyAlertRetriggerables(max_data_points=5, time_interval=ONE_DAY_INTERVAL) data_points_to_retrigger = identify_retriggerables(test_perf_alert) job_ids_to_retrigger = set(map(get_key("job_id"), data_points_to_retrigger)) diff --git a/treeherder/perf/alerts.py b/treeherder/perf/alerts.py index 741297518..480d46fef 100644 --- a/treeherder/perf/alerts.py +++ b/treeherder/perf/alerts.py @@ -1,7 +1,8 @@ -import datetime import logging import time from collections import namedtuple +from datetime import (datetime, + timedelta) from itertools import zip_longest from typing import (List, Tuple) @@ -41,7 +42,7 @@ def generate_new_alerts_in_series(signature): # (1) the last alert, if there is one # (2) the alerts max age # (use whichever is newer) - max_alert_age = (datetime.datetime.now() - + max_alert_age = (datetime.now() - settings.PERFHERDER_ALERTS_MAX_AGE) series = PerformanceDatum.objects.filter(signature=signature).filter( push_timestamp__gte=max_alert_age).order_by('push_timestamp') @@ -104,7 +105,7 @@ def generate_new_alerts_in_series(signature): prev_push_id=prev.push_id, defaults={ 'manually_created': False, - 'created': datetime.datetime.utcfromtimestamp( + 'created': datetime.utcfromtimestamp( cur.push_timestamp) }) @@ -133,7 +134,7 @@ class AlertsPicker: For this algorithm, regressions are considered the most important, followed by improvements. ''' - def __init__(self, max_alerts: int, max_improvements: int, platforms_of_interest: Tuple[str]): + def __init__(self, max_alerts: int, max_improvements: int, platforms_of_interest: Tuple[str, ...]): ''' :param max_alerts: the maximum number of selected alerts :param max_improvements: max when handling only improvements @@ -157,7 +158,7 @@ class AlertsPicker: self.max_improvements = max_improvements self.ordered_platforms_of_interest = platforms_of_interest - def extract_important_alerts(self, alerts: Tuple[PerformanceAlert]): + def extract_important_alerts(self, alerts: Tuple[PerformanceAlert, ...]): if any(not isinstance(alert, PerformanceAlert) for alert in alerts): raise ValueError('Provided parameter does not contain only PerformanceAlert objects.') relevant_alerts = self._extract_by_relevant_platforms(alerts) @@ -247,18 +248,18 @@ class AlertsPicker: return sorted_alerts -class IdentifyingRetriggerables: - def __init__(self, range_width: int, time_interval: datetime.timedelta, logger=None): - if range_width < 1: +class IdentifyAlertRetriggerables: + def __init__(self, max_data_points: int, time_interval: timedelta, logger=None): + if max_data_points < 1: raise ValueError('Cannot set range width less than 1') - if range_width % 2 == 0: + if max_data_points % 2 == 0: raise ValueError('Must provide odd range width') - if not isinstance(time_interval, datetime.timedelta): + if not isinstance(time_interval, timedelta): raise TypeError('Must provide time interval as timedelta') - self._range_width = range_width + self._range_width = max_data_points self._time_interval = time_interval - self.log = logger or logging.getLogger(self.__call__.__name__) + self.log = logger or logging.getLogger(self.__class__.__name__) def __call__(self, alert: PerformanceAlert) -> List[dict]: """ @@ -279,10 +280,10 @@ class IdentifyingRetriggerables: return data_points_to_retrigger - def min_timestamp(self, alert_push_time: datetime.datetime) -> datetime.datetime: + def min_timestamp(self, alert_push_time: datetime) -> datetime: return alert_push_time - self._time_interval - def max_timestamp(self, alert_push_time: datetime.datetime) -> datetime.datetime: + def max_timestamp(self, alert_push_time: datetime) -> datetime: return alert_push_time + self._time_interval def _fetch_suspect_data_points(self, alert: PerformanceAlert) -> QuerySet: @@ -331,3 +332,54 @@ class IdentifyingRetriggerables: if retrigger_range < self._range_width: self.log.warning('Found small backfill range (of size {} instead of {})' .format(retrigger_range, self._range_width)) + + +class IdentifyLatestRetriggerables: + def __init__(self, since: datetime, data_points_lookup_interval: timedelta): + ''' + Acquire/instantiate data used for finding alerts. + :param since: datetime since the lookup will occur. + :param data_points_lookup_interval: time range before/after data point in which search neighboring data points occurs. + ''' + self.since = since + self.picker = AlertsPicker( + max_alerts=5, + max_improvements=2, + platforms_of_interest=('windows10', 'windows7', 'linux', 'osx', 'android')) + self.find_alert_retriggerables = IdentifyAlertRetriggerables( + max_data_points=5, + time_interval=data_points_lookup_interval) + + def __call__(self, frameworks: List[str], repositories: List[str]) -> dict: + summaries_to_retrigger = self._fetch_by( + self._summaries(self.since), + frameworks, + repositories + ) + return self._identify_retriggerables(summaries_to_retrigger) + + def _summaries(self, timestamp: datetime) -> QuerySet: + return (PerformanceAlertSummary.objects + .select_related('framework', 'repository') + .filter(created__gte=timestamp)) + + def _fetch_by(self, summaries_to_retrigger: QuerySet, frameworks: List[str], repositories: List[str]) -> QuerySet: + if frameworks: + summaries_to_retrigger = summaries_to_retrigger.filter(framework__name__in=frameworks) + return summaries_to_retrigger.filter(repository__name__in=repositories) + + def _identify_retriggerables(self, summaries_to_retrigger: QuerySet) -> dict: + json_output = [] + for summary in summaries_to_retrigger: + important_alerts = self.picker.extract_important_alerts( + summary.alerts.all()) + + summary_record = {"alert_summary_id": summary.id, "alerts": []} + for alert in important_alerts: + data_points = self.find_alert_retriggerables(alert) + + summary_record["alerts"].append( + {"id": alert.id, "data_points_to_retrigger": data_points}) + json_output.append(summary_record) + + return json_output diff --git a/treeherder/perf/management/commands/synthesize_backfill_report.py b/treeherder/perf/management/commands/synthesize_backfill_report.py new file mode 100644 index 000000000..1926de98e --- /dev/null +++ b/treeherder/perf/management/commands/synthesize_backfill_report.py @@ -0,0 +1,61 @@ +from argparse import ArgumentError +from datetime import (datetime, + timedelta) +from typing import (List, + Tuple) + +import simplejson +from django.core.management.base import BaseCommand + +from treeherder.perf.alerts import IdentifyLatestRetriggerables +from treeherder.perf.models import PerformanceFramework + + +class Command(BaseCommand): + repos_to_retrigger_on = ['autoland', 'mozilla-inbound', 'mozilla-beta'] + help = "Select most relevant alerts and identify jobs to retrigger." + + def add_arguments(self, parser): + parser.add_argument( + '--time-window', + action='store', + type=int, + default=60, + help="How far back to look for alerts to retrigger (expressed in minutes)." + ) + + parser.add_argument( + '--frameworks', + nargs='+', + default=None, + help="Defaults to all registered performance frameworks." + ) + + parser.add_argument( + '--repositories', + nargs='+', + default=Command.repos_to_retrigger_on, + help=f"Defaults to {Command.repos_to_retrigger_on}." + ) + + def handle(self, *args, **options): + frameworks, repositories, since, days_to_lookup = self._parse_args(**options) + self._validate_args(frameworks, repositories) + latest_retriggerables = IdentifyLatestRetriggerables(since, days_to_lookup)(frameworks, repositories) + return simplejson.dumps(latest_retriggerables, default=str) + + def _parse_args(self, **options) -> Tuple[List, List, datetime, timedelta]: + return (options['frameworks'], + options['repositories'], + datetime.now() - timedelta(minutes=options['time_window']), + timedelta(days=1)) + + def _validate_args(self, frameworks: List[str], repositories: List[str]): + if frameworks: + available_frameworks = set(PerformanceFramework.objects. + values_list('name', flat=True)) + if not set(frameworks).issubset(available_frameworks): + raise ArgumentError('Unknown framework provided.') + if repositories: + if not set(repositories).issubset(set(Command.repos_to_retrigger_on)): + raise ArgumentError('Unknown repository provided.')