Bug 1571369 - Store backfill reports per alert summary (#5539)

This commit is contained in:
ionutgoldan 2019-11-14 15:29:55 +02:00 коммит произвёл Armen Zambrano
Родитель 464ffe8ffe
Коммит 419c2650a1
8 изменённых файлов: 511 добавлений и 43 удалений

Просмотреть файл

@ -720,3 +720,49 @@ def sample_option_collections(transactional_db):
OptionCollection.objects.create(
option_collection_hash='option_hash2',
option=option2)
@pytest.fixture
def backfill_record_context():
return {"data_points_to_retrigger": [
{
"perf_datum_id": 933219901,
"value": 0.8714208119774209,
"job_id": 269034923,
"push_id": 565159,
"push_timestamp": "2019-10-02 02:22:28",
"push__revision": "04e8766a29242d4deae31b5b04e6ac61ebf61ffd"
},
{
"perf_datum_id": 933219962,
"value": 0.9160434865973892,
"job_id": 269034920,
"push_id": 565160,
"push_timestamp": "2019-10-02 02:23:29",
"push__revision": "9b42bdc4889fe7782df9b2a0aa990ed5e62cb04c"
},
{
"perf_datum_id": 931772364,
"value": 0.9508247997807697,
"job_id": 268828343,
"push_id": 565161,
"push_timestamp": "2019-10-02 02:24:35",
"push__revision": "057b59fdadad75e888a739e85a683b2ff7bfc62e"
},
{
"perf_datum_id": 931924904,
"value": 0.9829230628232519,
"job_id": 268840223,
"push_id": 565188,
"push_timestamp": "2019-10-02 04:03:09",
"push__revision": "49ef9afb62bb909389b105a1751e9b46e6f1688d"
},
{
"perf_datum_id": 931927300,
"value": 0.9873498499464002,
"job_id": 268840309,
"push_id": 565193,
"push_timestamp": "2019-10-02 04:08:06",
"push__revision": "f5cce52461bac31945b083e51a085fb429a36f04"
}
]}

Просмотреть файл

@ -0,0 +1,48 @@
import json
from django.utils.timezone import now as django_now
from treeherder.perf.models import (BackfillRecord,
BackfillReport)
class TestBackfillReportClass:
def test_is_outdated_is_synced_with_related_summary(self, test_perf_alert_summary):
backfill_record = BackfillReport.objects.create(summary=test_perf_alert_summary)
assert backfill_record.is_outdated is False
# now change summary, so it's more recent that its report
test_perf_alert_summary.last_updated = django_now()
test_perf_alert_summary.save()
assert backfill_record.is_outdated is True
def test_last_updated_is_synced_with_child_records(self, test_perf_alert,
backfill_record_context):
test_summary = test_perf_alert.summary
context_dump = json.dumps(backfill_record_context)
backfill_report = BackfillReport.objects.create(summary=test_summary)
last_updated_before_new_record = backfill_report.last_updated
# this should re update the report
BackfillRecord.objects.create(alert=test_perf_alert,
report=backfill_report,
context=context_dump)
assert last_updated_before_new_record < backfill_report.last_updated
# record bulk deletes count as report updates too
last_updated_before_expelling_records = backfill_report.last_updated
backfill_report.expel_records()
assert last_updated_before_expelling_records < backfill_report.last_updated
# deleting single record counts are report update too
new_backfill_record = BackfillRecord.objects.create(alert=test_perf_alert,
report=backfill_report,
context=context_dump)
last_updated_before_single_record_delete = backfill_report.last_updated
new_backfill_record.delete()
assert last_updated_before_single_record_delete < backfill_report.last_updated

Просмотреть файл

@ -0,0 +1,219 @@
import datetime
import random
import string
from typing import Tuple
from unittest.mock import Mock
import pytest
from treeherder.model.models import (MachinePlatform,
Option,
OptionCollection)
from treeherder.perf.alerts import (AlertsPicker,
BackfillReportMaintainer)
from treeherder.perf.models import (BackfillRecord,
BackfillReport,
PerformanceAlert,
PerformanceSignature)
LETTERS = string.ascii_lowercase
EPOCH = datetime.datetime.utcfromtimestamp(0)
RANDOM_STRINGS = set()
@pytest.fixture(scope='module')
def alerts_picker():
# real-world instance
return AlertsPicker(max_alerts=5,
max_improvements=2,
platforms_of_interest=('windows10', 'windows7', 'linux', 'osx', 'android'))
@pytest.fixture
def mock_backfill_context_fetcher(backfill_record_context):
# because underlying data is hard to provide (performance datum, pushes, jobs etc)
return Mock(return_value=backfill_record_context)
@pytest.fixture
def option_collection():
option = Option.objects.create(name='opt')
return OptionCollection.objects.create(
option_collection_hash='my_option_hash',
option=option)
@pytest.fixture
def relevant_platform():
return MachinePlatform.objects.create(
os_name='win',
platform='windows10',
architecture='x86')
@pytest.fixture
def irrelevant_platform():
return MachinePlatform.objects.create(
os_name='OS_OF_NO_INTEREST',
platform='PLATFORM_OF_NO_INTEREST',
architecture='x86')
@pytest.fixture
def unique_random_string():
global RANDOM_STRINGS, LETTERS
def _unique_random_string(length=14):
while True:
random_string = ''.join(random.choice(LETTERS) for _ in range(length))
if random_string not in RANDOM_STRINGS:
RANDOM_STRINGS.add(random_string)
return random_string
return _unique_random_string
@pytest.fixture
def create_perf_signature(test_repository, test_perf_framework, option_collection, relevant_platform,
irrelevant_platform, unique_random_string):
def _create_perf_signature(relevant=True):
platform = relevant_platform if relevant else irrelevant_platform
signature = PerformanceSignature.objects.create(
repository=test_repository,
signature_hash=unique_random_string(40),
framework=test_perf_framework,
platform=platform,
option_collection=option_collection,
suite=unique_random_string(),
test=unique_random_string(),
has_subtests=False,
last_updated=datetime.datetime.now()
)
return signature
return _create_perf_signature
@pytest.fixture
def create_alerts(create_perf_signature):
def _create_alerts(summary, relevant=True, amount=3):
alerts = []
for _ in range(amount):
alert = PerformanceAlert.objects.create(
summary=summary,
series_signature=create_perf_signature(relevant),
is_regression=True,
amount_pct=0.5,
amount_abs=50.0,
prev_value=100.0,
new_value=150.0,
t_value=20.0)
alerts.append(alert)
return alerts
return _create_alerts
def test_reports_are_generated_for_relevant_alerts_only(test_perf_alert_summary,
test_perf_framework,
test_repository,
create_alerts,
alerts_picker,
mock_backfill_context_fetcher):
create_alerts(test_perf_alert_summary, # irrelevant alert
relevant=False,
amount=1)
report_maintainer = BackfillReportMaintainer(alerts_picker,
mock_backfill_context_fetcher)
report_maintainer.provide_updated_reports(since=EPOCH,
frameworks=[test_perf_framework.name],
repositories=[test_repository.name])
assert not BackfillReport.objects.exists()
def test_running_report_twice_on_unchanged_data_doesnt_change_anything(test_perf_alert_summary,
test_perf_framework,
test_repository,
create_alerts,
alerts_picker,
mock_backfill_context_fetcher):
create_alerts(test_perf_alert_summary, amount=3) # relevant alerts
create_alerts(test_perf_alert_summary, # irrelevant alert
relevant=False,
amount=1)
assert not BackfillReport.objects.exists()
report_maintainer = BackfillReportMaintainer(alerts_picker,
mock_backfill_context_fetcher)
# run report once
report_maintainer.provide_updated_reports(since=EPOCH,
frameworks=[test_perf_framework.name],
repositories=[test_repository.name])
initial_records_timestamps, initial_report_timestamps = __fetch_report_timestamps(test_perf_alert_summary)
# run report twice (no changes happened on underlying data)
report_maintainer.provide_updated_reports(since=EPOCH,
frameworks=[test_perf_framework.name],
repositories=[test_repository.name])
records_timestamps, report_timestamps = __fetch_report_timestamps(test_perf_alert_summary)
assert initial_report_timestamps == report_timestamps
assert initial_records_timestamps == records_timestamps
def test_reports_are_updated_after_alert_summaries_change(test_perf_alert_summary,
test_perf_framework,
test_repository,
create_alerts,
alerts_picker,
mock_backfill_context_fetcher):
relevant_alerts = create_alerts(test_perf_alert_summary, amount=3) # relevant alerts, all regressions
create_alerts(test_perf_alert_summary, # irrelevant alert
relevant=False,
amount=1)
assert not BackfillReport.objects.exists()
report_maintainer = BackfillReportMaintainer(alerts_picker,
mock_backfill_context_fetcher)
report_maintainer.provide_updated_reports(since=EPOCH,
frameworks=[test_perf_framework.name],
repositories=[test_repository.name])
assert BackfillReport.objects.count() == 1
assert BackfillRecord.objects.count() == 3
# new alerts will cause report updates
create_alerts(test_perf_alert_summary, amount=3) # relevant alerts
report_maintainer.provide_updated_reports(since=EPOCH,
frameworks=[test_perf_framework.name],
repositories=[test_repository.name])
assert BackfillRecord.objects.count() == 5
# any change to a summary's alert will cause report updates
alert = relevant_alerts[0]
alert.status = PerformanceAlert.ACKNOWLEDGED
alert.save()
initial_report_timestamps, initial_records_timestamps = __fetch_report_timestamps(test_perf_alert_summary)
report_maintainer.provide_updated_reports(since=EPOCH,
frameworks=[test_perf_framework.name],
repositories=[test_repository.name])
report_timestamps, records_timestmaps = __fetch_report_timestamps(test_perf_alert_summary)
assert initial_report_timestamps != report_timestamps
assert initial_records_timestamps != records_timestmaps
def __fetch_report_timestamps(test_perf_alert_summary) -> Tuple:
report = BackfillReport.objects.get(summary=test_perf_alert_summary)
report_timestamps = report.created, report.last_updated
records_timestamps = [record.created for record in report.records.all()]
return records_timestamps, report_timestamps

Просмотреть файл

@ -7,11 +7,17 @@ from itertools import zip_longest
from typing import (List,
Tuple)
import simplejson as json
from django.conf import settings
from django.db import transaction
from django.db.models import (F,
Q)
from django.db.models.query import QuerySet
from treeherder.perf.models import (PerformanceAlert,
from treeherder.perf.exceptions import MissingRecords
from treeherder.perf.models import (BackfillRecord,
BackfillReport,
PerformanceAlert,
PerformanceAlertSummary,
PerformanceDatum,
PerformanceSignature)
@ -268,10 +274,7 @@ class IdentifyAlertRetriggerables:
annotated_data_points = self._fetch_suspect_data_points(alert) # in time_interval around alert
flattened_data_points = self._one_data_point_per_push(annotated_data_points)
try:
alert_index = self._find_push_id_index(alert.summary.push_id, flattened_data_points)
except LookupError as ex:
raise RuntimeError("Unexpected lookup failure") from ex
alert_index = self._find_push_id_index(alert.summary.push_id, flattened_data_points)
retrigger_window = self.__compute_window_slices(alert_index)
data_points_to_retrigger = flattened_data_points[retrigger_window]
@ -334,52 +337,95 @@ class IdentifyAlertRetriggerables:
.format(retrigger_range, self._range_width))
class IdentifyLatestRetriggerables:
def __init__(self, since: datetime, data_points_lookup_interval: timedelta):
class BackfillReportMaintainer:
def __init__(self, alerts_picker: AlertsPicker, backfill_context_fetcher: IdentifyAlertRetriggerables, logger=None):
'''
Acquire/instantiate data used for finding alerts.
:param since: datetime since the lookup will occur.
:param data_points_lookup_interval: time range before/after data point in which search neighboring data points occurs.
'''
self.since = since
self.picker = AlertsPicker(
max_alerts=5,
max_improvements=2,
platforms_of_interest=('windows10', 'windows7', 'linux', 'osx', 'android'))
self.find_alert_retriggerables = IdentifyAlertRetriggerables(
max_data_points=5,
time_interval=data_points_lookup_interval)
self.alerts_picker = alerts_picker
self.fetch_backfill_context = backfill_context_fetcher
self.log = logger or logging.getLogger(self.__class__.__name__)
def __call__(self, frameworks: List[str], repositories: List[str]) -> dict:
def provide_updated_reports(self, since: datetime,
frameworks: List[str],
repositories: List[str]) -> List[BackfillReport]:
summaries_to_retrigger = self._fetch_by(
self._summaries(self.since),
self._summaries_requiring_reports(since),
frameworks,
repositories
)
return self._identify_retriggerables(summaries_to_retrigger)
return self.compile_reports_for(summaries_to_retrigger)
def compile_reports_for(self, summaries_to_retrigger: QuerySet) -> List[BackfillReport]:
reports = []
for summary in summaries_to_retrigger:
important_alerts = self._pick_important_alerts(summary)
if len(important_alerts) == 0 and self._doesnt_have_report(summary):
continue # won't create blank reports
# but will update if case
try:
alert_context_map = self._associate_retrigger_context(important_alerts)
except MissingRecords as ex:
self.log.warning(f"Failed to compute report for alert summary {summary}. {ex}")
continue
backfill_report, created = BackfillReport.objects.get_or_create(summary_id=summary.id)
if created or backfill_report.is_outdated:
backfill_report.expel_records() # associated records are outdated & irrelevant
self._provide_records(backfill_report, alert_context_map)
reports.append(backfill_report)
return reports
def _pick_important_alerts(self, from_summary: PerformanceAlertSummary) -> List[PerformanceAlert]:
return self.alerts_picker.extract_important_alerts(from_summary.alerts.all())
def _provide_records(self, backfill_report: BackfillReport,
alert_context_map: List[Tuple]):
for alert, retrigger_context in alert_context_map:
BackfillRecord.objects.create(alert=alert,
report=backfill_report,
context=json.dumps(retrigger_context))
def _summaries_requiring_reports(self, timestamp: datetime) -> QuerySet:
recent_summaries_with_no_reports = Q(last_updated__gte=timestamp,
backfill_report__isnull=True)
summaries_with_outdated_reports = Q(last_updated__gt=F('backfill_report__last_updated'))
def _summaries(self, timestamp: datetime) -> QuerySet:
return (PerformanceAlertSummary.objects
.prefetch_related('backfill_report')
.select_related('framework', 'repository')
.filter(created__gte=timestamp))
.filter(recent_summaries_with_no_reports | summaries_with_outdated_reports))
def _fetch_by(self, summaries_to_retrigger: QuerySet, frameworks: List[str], repositories: List[str]) -> QuerySet:
if frameworks:
summaries_to_retrigger = summaries_to_retrigger.filter(framework__name__in=frameworks)
return summaries_to_retrigger.filter(repository__name__in=repositories)
if repositories:
summaries_to_retrigger = summaries_to_retrigger.filter(repository__name__in=repositories)
return summaries_to_retrigger
def _identify_retriggerables(self, summaries_to_retrigger: QuerySet) -> dict:
json_output = []
for summary in summaries_to_retrigger:
important_alerts = self.picker.extract_important_alerts(
summary.alerts.all())
def _associate_retrigger_context(self, important_alerts: List[PerformanceAlert]) -> List[Tuple]:
retrigger_map = []
incomplete_mapping = False
summary_record = {"alert_summary_id": summary.id, "alerts": []}
for alert in important_alerts:
data_points = self.find_alert_retriggerables(alert)
for alert in important_alerts:
try:
data_points = self.fetch_backfill_context(alert)
except LookupError as ex:
incomplete_mapping = True
self.log.debug(f"Couldn't identify retrigger context for alert {alert}. (Exception: {ex})")
continue
summary_record["alerts"].append(
{"id": alert.id, "data_points_to_retrigger": data_points})
json_output.append(summary_record)
retrigger_map.append((alert, data_points))
return json_output
if incomplete_mapping:
expected = len(important_alerts)
missing = expected - len(retrigger_map)
raise MissingRecords(f'{missing} out of {expected} records are missing!')
return retrigger_map
def _doesnt_have_report(self, summary):
return not hasattr(summary, 'backfill_report')

Просмотреть файл

@ -4,3 +4,7 @@ class NoDataCyclingAtAll(Exception):
class MaxRuntimeExceeded(Exception):
pass
class MissingRecords(Exception):
pass

Просмотреть файл

@ -4,10 +4,11 @@ from datetime import (datetime,
from typing import (List,
Tuple)
import simplejson
from django.core.management.base import BaseCommand
from treeherder.perf.alerts import IdentifyLatestRetriggerables
from treeherder.perf.alerts import (AlertsPicker,
BackfillReportMaintainer,
IdentifyAlertRetriggerables)
from treeherder.perf.models import PerformanceFramework
@ -41,8 +42,15 @@ class Command(BaseCommand):
def handle(self, *args, **options):
frameworks, repositories, since, days_to_lookup = self._parse_args(**options)
self._validate_args(frameworks, repositories)
latest_retriggerables = IdentifyLatestRetriggerables(since, days_to_lookup)(frameworks, repositories)
return simplejson.dumps(latest_retriggerables, default=str)
alerts_picker = AlertsPicker(max_alerts=5,
max_improvements=2,
platforms_of_interest=('windows10', 'windows7', 'linux', 'osx', 'android'))
backfill_context_fetcher = IdentifyAlertRetriggerables(max_data_points=5,
time_interval=days_to_lookup)
reporter = BackfillReportMaintainer(alerts_picker, backfill_context_fetcher)
reporter.provide_updated_reports(since, frameworks, repositories)
def _parse_args(self, **options) -> Tuple[List, List, datetime, timedelta]:
return (options['frameworks'],

Просмотреть файл

@ -0,0 +1,37 @@
# Generated by Django 2.2.6 on 2019-11-14 08:51
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('perf', '0023_increase_extra_options_size'),
]
operations = [
migrations.CreateModel(
name='BackfillReport',
fields=[
('summary', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, related_name='backfill_report', serialize=False, to='perf.PerformanceAlertSummary')),
('created', models.DateTimeField(auto_now_add=True)),
('last_updated', models.DateTimeField(auto_now=True)),
],
options={
'db_table': 'backfill_report',
},
),
migrations.CreateModel(
name='BackfillRecord',
fields=[
('alert', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, related_name='backfill_record', serialize=False, to='perf.PerformanceAlert')),
('context', models.TextField()),
('created', models.DateTimeField(auto_now_add=True)),
('report', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='records', to='perf.BackfillReport')),
],
options={
'db_table': 'backfill_record',
},
),
]

Просмотреть файл

@ -324,10 +324,8 @@ class PerformanceAlertSummary(models.Model):
self.__prev_bug_number = self.bug_number
def update_status(self, using=None):
autodetermined_status = self.autodetermine_status()
if autodetermined_status != self.status:
self.status = autodetermined_status
self.save(using=using)
self.status = self.autodetermine_status()
self.save(using=using)
def autodetermine_status(self):
alerts = (PerformanceAlert.objects.filter(summary=self) | PerformanceAlert.objects.filter(related_summary=self))
@ -514,3 +512,65 @@ class PerformanceBugTemplate(models.Model):
def __str__(self):
return '{} bug template'.format(self.framework.name)
class BackfillReport(models.Model):
"""
Groups & stores all context required to retrigger/backfill
relevant alerts from a performance alert summary.
"""
summary = models.OneToOneField(PerformanceAlertSummary,
on_delete=models.CASCADE,
primary_key=True,
related_name='backfill_report')
created = models.DateTimeField(auto_now_add=True)
last_updated = models.DateTimeField(auto_now=True)
@property
def is_outdated(self):
# alert summary updated since last report was made
return self.summary.last_updated > self.last_updated
def expel_records(self):
BackfillRecord.objects.filter(report=self).delete()
self.save() # refresh last_updated
class Meta:
db_table = "backfill_report"
def __str__(self):
return "BackfillReport(summary #{}, last update {})".format(self.summary.id, self.last_updated)
class BackfillRecord(models.Model):
alert = models.OneToOneField(PerformanceAlert,
on_delete=models.CASCADE,
primary_key=True,
related_name='backfill_record')
report = models.ForeignKey(BackfillReport,
on_delete=models.CASCADE,
related_name='records')
# all data required to retrigger/backfill
# associated perf alert, as JSON dump
# TODO-igoldan: could we employ a JSONField?
context = models.TextField()
created = models.DateTimeField(auto_now_add=True)
def save(self, *args, **kwargs):
# refresh parent's latest update time
super().save(*args, **kwargs)
self.report.save(using=kwargs.get('using'))
def delete(self, using=None, keep_parents=False):
super().delete(using, keep_parents)
self.report.save() # refresh last_updated
class Meta:
db_table = "backfill_record"
def __str__(self):
return "BackfillRecord(alert #{}, from {})".format(self.alert.id, self.report)