Bug 1725577 - Aggregate auto backfill emails to daily frequency (#7244)

* Bug 1725577 - WIP

* Bug 1725577 - Temporary tests
This commit is contained in:
beatrice-acasandrei 2021-09-01 09:53:11 +03:00 коммит произвёл GitHub
Родитель 43d44d1bb8
Коммит 5f14ee55c4
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 256 добавлений и 167 удалений

Просмотреть файл

@ -228,6 +228,27 @@ def notify_client_mock() -> taskcluster.Notify:
)
class Response:
def __init__(self):
self.status_code = 200
@pytest.fixture
def tc_notify_mock(monkeypatch):
from treeherder.services import taskcluster as tc_services
mock = MagicMock()
response = Response()
mock.email.return_value = {'response': response}
def mockreturn(*arg, **kwargs):
nonlocal mock
return mock
monkeypatch.setattr(tc_services, 'notify_client_factory', mockreturn)
return mock
@pytest.fixture
def job_from_try(eleven_job_blobs, create_jobs):
job_blob = eleven_job_blobs[0]

Просмотреть файл

@ -0,0 +1,121 @@
from datetime import datetime, timedelta
import pytest
from tests import settings as test_settings
from django.core.management import call_command
from treeherder.perf.auto_perf_sheriffing.sherlock import Sherlock
from treeherder.perf.models import BackfillNotificationRecord
from treeherder.perf.exceptions import MaxRuntimeExceeded
EPOCH = datetime.utcfromtimestamp(0)
# TODO: Update tests so that the mock 'tc_notify_mock' works as expected
def test_email_is_sent_after_successful_backfills(
report_maintainer_mock,
backfill_tool_mock,
secretary,
record_ready_for_processing,
sherlock_settings,
tc_notify_mock,
):
sherlock = Sherlock(
report_maintainer_mock,
backfill_tool_mock,
secretary,
)
sherlock.sheriff(
since=EPOCH,
frameworks=['test_talos'],
repositories=[test_settings.TREEHERDER_TEST_REPOSITORY_NAME],
)
record_ready_for_processing.refresh_from_db()
assert BackfillNotificationRecord.objects.count() == 1
call_command('report_backfill_outcome')
assert BackfillNotificationRecord.objects.count() == 0
def test_email_is_still_sent_if_context_is_too_corrupt_to_be_actionable(
report_maintainer_mock,
backfill_tool_mock,
secretary,
record_ready_for_processing,
sherlock_settings,
broken_context_str,
tc_notify_mock
# Note: parametrizes the test
):
record_ready_for_processing.context = broken_context_str
record_ready_for_processing.save()
sherlock = Sherlock(
report_maintainer_mock,
backfill_tool_mock,
secretary,
)
sherlock.sheriff(
since=EPOCH,
frameworks=['test_talos'],
repositories=[test_settings.TREEHERDER_TEST_REPOSITORY_NAME],
)
assert BackfillNotificationRecord.objects.count() == 1
call_command('report_backfill_outcome')
assert BackfillNotificationRecord.objects.count() == 0
def test_no_email_is_sent_if_runtime_exceeded(
report_maintainer_mock,
backfill_tool_mock,
secretary,
record_ready_for_processing,
sherlock_settings,
tc_notify_mock,
):
no_time_left = timedelta(seconds=0)
sherlock = Sherlock(report_maintainer_mock, backfill_tool_mock, secretary, no_time_left)
try:
sherlock.sheriff(since=EPOCH, frameworks=['raptor', 'talos'], repositories=['autoland'])
except MaxRuntimeExceeded:
pass
assert BackfillNotificationRecord.objects.count() == 0
call_command('report_backfill_outcome')
assert BackfillNotificationRecord.objects.count() == 0
@pytest.mark.parametrize(
'framework, repository',
[
('non_existent_framework', test_settings.TREEHERDER_TEST_REPOSITORY_NAME),
('test_talos', 'non_existent_repository'),
('non_existent_framework', 'non_existent_repository'),
],
)
def test_no_email_is_sent_for_untargeted_alerts(
report_maintainer_mock,
backfill_tool_mock,
secretary,
record_ready_for_processing,
sherlock_settings,
framework,
repository,
tc_notify_mock,
):
sherlock = Sherlock(
report_maintainer_mock,
backfill_tool_mock,
secretary,
)
sherlock.sheriff(
since=EPOCH,
frameworks=[framework],
repositories=[repository],
)
record_ready_for_processing.refresh_from_db()
assert BackfillNotificationRecord.objects.count() == 0
call_command('report_backfill_outcome')
assert BackfillNotificationRecord.objects.count() == 0

Просмотреть файл

@ -1,7 +1,6 @@
from datetime import datetime, timedelta
import simplejson as json
from json import JSONDecodeError
from unittest.mock import MagicMock
import pytest
from django.db import models
@ -10,7 +9,6 @@ from tests import settings as test_settings
from tests.perf.auto_perf_sheriffing.conftest import prepare_record_with_search_str
from treeherder.model.models import Job, Push
from treeherder.perf.auto_perf_sheriffing.sherlock import Sherlock
from treeherder.perf.email import BackfillNotificationWriter
from treeherder.perf.exceptions import MaxRuntimeExceeded
from treeherder.perf.models import BackfillRecord, BackfillReport
@ -29,121 +27,6 @@ def has_changed(orm_object: models.Model) -> bool:
return False
class TestEmailIntegration:
def test_email_is_sent_after_successful_backfills(
self,
report_maintainer_mock,
backfill_tool_mock,
secretary,
record_ready_for_processing,
sherlock_settings,
notify_client_mock,
):
sherlock = Sherlock(
report_maintainer_mock,
backfill_tool_mock,
secretary,
notify_client_mock,
email_writer=self.email_writer_mock(),
)
sherlock.sheriff(
since=EPOCH,
frameworks=['test_talos'],
repositories=[test_settings.TREEHERDER_TEST_REPOSITORY_NAME],
)
record_ready_for_processing.refresh_from_db()
assert notify_client_mock.email.call_count == 1
def test_email_is_still_sent_if_context_is_too_corrupt_to_be_actionable(
self,
report_maintainer_mock,
backfill_tool_mock,
secretary,
record_ready_for_processing,
sherlock_settings,
notify_client_mock,
broken_context_str,
# Note: parametrizes the test
):
record_ready_for_processing.context = broken_context_str
record_ready_for_processing.save()
sherlock = Sherlock(
report_maintainer_mock,
backfill_tool_mock,
secretary,
notify_client_mock,
)
sherlock.sheriff(
since=EPOCH,
frameworks=['test_talos'],
repositories=[test_settings.TREEHERDER_TEST_REPOSITORY_NAME],
)
assert notify_client_mock.email.call_count == 1
def test_no_email_is_sent_if_runtime_exceeded(
self,
report_maintainer_mock,
backfill_tool_mock,
secretary,
record_ready_for_processing,
sherlock_settings,
notify_client_mock,
):
no_time_left = timedelta(seconds=0)
sherlock = Sherlock(
report_maintainer_mock, backfill_tool_mock, secretary, notify_client_mock, no_time_left
)
try:
sherlock.sheriff(since=EPOCH, frameworks=['raptor', 'talos'], repositories=['autoland'])
except MaxRuntimeExceeded:
pass
assert notify_client_mock.email.call_count == 0
@pytest.mark.parametrize(
'framework, repository',
[
('non_existent_framework', test_settings.TREEHERDER_TEST_REPOSITORY_NAME),
('test_talos', 'non_existent_repository'),
('non_existent_framework', 'non_existent_repository'),
],
)
def test_no_email_is_sent_for_untargeted_alerts(
self,
report_maintainer_mock,
backfill_tool_mock,
secretary,
record_ready_for_processing,
sherlock_settings,
notify_client_mock,
framework,
repository,
):
sherlock = Sherlock(
report_maintainer_mock,
backfill_tool_mock,
secretary,
notify_client_mock,
email_writer=self.email_writer_mock(),
)
sherlock.sheriff(
since=EPOCH,
frameworks=[framework],
repositories=[repository],
)
record_ready_for_processing.refresh_from_db()
assert notify_client_mock.email.call_count == 0
@staticmethod
def email_writer_mock():
return MagicMock(spec=BackfillNotificationWriter())
def test_record_job_symbol_is_none_if_component_misses(record_with_missing_job_symbol_components):
job_symbol = record_with_missing_job_symbol_components.job_symbol
@ -179,7 +62,6 @@ def test_records_change_to_ready_for_processing(
backfill_tool_mock,
secretary,
sherlock_settings,
notify_client_mock,
):
# create new report with records - the report will not be mature
create_record(test_perf_alert)
@ -195,7 +77,6 @@ def test_records_change_to_ready_for_processing(
report_maintainer_mock,
backfill_tool_mock,
secretary,
notify_client_mock,
)
sherlock.sheriff(since=EPOCH, frameworks=['raptor', 'talos'], repositories=['autoland'])
@ -210,12 +91,9 @@ def test_assert_can_run_throws_exception_when_runtime_exceeded(
secretary,
record_ready_for_processing,
sherlock_settings,
notify_client_mock,
):
no_time_left = timedelta(seconds=0)
sherlock_bot = Sherlock(
report_maintainer_mock, backfill_tool_mock, secretary, notify_client_mock, no_time_left
)
sherlock_bot = Sherlock(report_maintainer_mock, backfill_tool_mock, secretary, no_time_left)
with pytest.raises(MaxRuntimeExceeded):
sherlock_bot.assert_can_run()
@ -225,14 +103,11 @@ def test_assert_can_run_doesnt_throw_exception_when_enough_time_left(
report_maintainer_mock,
backfill_tool_mock,
secretary,
notify_client_mock,
record_ready_for_processing,
sherlock_settings,
):
enough_time_left = timedelta(minutes=10)
sherlock = Sherlock(
report_maintainer_mock, backfill_tool_mock, secretary, notify_client_mock, enough_time_left
)
sherlock = Sherlock(report_maintainer_mock, backfill_tool_mock, secretary, enough_time_left)
try:
sherlock.assert_can_run()
@ -244,11 +119,10 @@ def test_records_and_db_limits_remain_unchanged_if_no_records_suitable_for_backf
report_maintainer_mock,
backfill_tool_mock,
secretary,
notify_client_mock,
sherlock_settings,
record_unsuited_for_backfill,
):
sherlock = Sherlock(report_maintainer_mock, backfill_tool_mock, secretary, notify_client_mock)
sherlock = Sherlock(report_maintainer_mock, backfill_tool_mock, secretary)
sherlock._backfill(['test_talos'], [test_settings.TREEHERDER_TEST_REPOSITORY_NAME])
assert not has_changed(record_unsuited_for_backfill)
@ -259,11 +133,10 @@ def test_records_remain_unchanged_if_no_backfills_left(
report_maintainer_mock,
backfill_tool_mock,
secretary,
notify_client_mock,
record_ready_for_processing,
empty_sheriff_settings,
):
sherlock = Sherlock(report_maintainer_mock, backfill_tool_mock, secretary, notify_client_mock)
sherlock = Sherlock(report_maintainer_mock, backfill_tool_mock, secretary)
sherlock._backfill(['test_talos'], [test_settings.TREEHERDER_TEST_REPOSITORY_NAME])
assert not has_changed(record_ready_for_processing)
@ -275,12 +148,9 @@ def test_records_and_db_limits_remain_unchanged_if_runtime_exceeded(
secretary,
record_ready_for_processing,
sherlock_settings,
notify_client_mock,
):
no_time_left = timedelta(seconds=0)
sherlock = Sherlock(
report_maintainer_mock, backfill_tool_mock, secretary, notify_client_mock, no_time_left
)
sherlock = Sherlock(report_maintainer_mock, backfill_tool_mock, secretary, no_time_left)
try:
sherlock.sheriff(since=EPOCH, frameworks=['raptor', 'talos'], repositories=['autoland'])
except MaxRuntimeExceeded:
@ -296,13 +166,12 @@ def test_db_limits_update_if_backfills_left(
secretary,
record_ready_for_processing,
sherlock_settings,
notify_client_mock,
):
targeted_platform = record_ready_for_processing.platform.platform
initial_backfills = secretary.backfills_left(on_platform=targeted_platform)
assert initial_backfills == json.loads(sherlock_settings.settings)['limits'][targeted_platform]
sherlock = Sherlock(report_maintainer_mock, backfill_tool_mock, secretary, notify_client_mock)
sherlock = Sherlock(report_maintainer_mock, backfill_tool_mock, secretary)
sherlock.sheriff(
since=EPOCH,
frameworks=['test_talos'],
@ -320,13 +189,12 @@ def test_backfilling_gracefully_handles_invalid_json_contexts_without_blowing_up
secretary,
record_ready_for_processing,
sherlock_settings,
notify_client_mock,
broken_context_str, # Note: parametrizes the test
):
record_ready_for_processing.context = broken_context_str
record_ready_for_processing.save()
sherlock = Sherlock(report_maintainer_mock, backfill_tool_mock, secretary, notify_client_mock)
sherlock = Sherlock(report_maintainer_mock, backfill_tool_mock, secretary)
try:
sherlock.sheriff(
since=EPOCH,

Просмотреть файл

@ -8,7 +8,6 @@ from treeherder.perf.auto_perf_sheriffing.backfill_reports import (
from treeherder.perf.auto_perf_sheriffing.backfill_tool import BackfillTool
from treeherder.perf.auto_perf_sheriffing.sherlock import Sherlock
from treeherder.perf.auto_perf_sheriffing.secretary import Secretary
from treeherder.services.taskcluster import notify_client_factory
from treeherder.services.taskcluster import taskcluster_model_factory
@ -16,9 +15,8 @@ def sherlock_factory(days_to_lookup: timedelta) -> Sherlock:
report_maintainer = __report_maintainer_factory(days_to_lookup)
backfill_tool = backfill_tool_factory()
secretary = Secretary()
notify_client = notify_client_factory()
return Sherlock(report_maintainer, backfill_tool, secretary, notify_client)
return Sherlock(report_maintainer, backfill_tool, secretary)
def __report_maintainer_factory(days_to_lookup: timedelta) -> BackfillReportMaintainer:

Просмотреть файл

@ -4,7 +4,6 @@ from json import JSONDecodeError
from logging import INFO, WARNING
from typing import List, Tuple
import taskcluster
from django.conf import settings
from django.db.models import QuerySet
from taskcluster.helper import TaskclusterConfig
@ -12,9 +11,8 @@ from taskcluster.helper import TaskclusterConfig
from treeherder.perf.auto_perf_sheriffing.backfill_reports import BackfillReportMaintainer
from treeherder.perf.auto_perf_sheriffing.backfill_tool import BackfillTool
from treeherder.perf.auto_perf_sheriffing.secretary import Secretary
from treeherder.perf.email import BackfillNotificationWriter, EmailWriter
from treeherder.perf.exceptions import CannotBackfill, MaxRuntimeExceeded
from treeherder.perf.models import BackfillRecord, BackfillReport
from treeherder.perf.models import BackfillRecord, BackfillReport, BackfillNotificationRecord
logger = logging.getLogger(__name__)
@ -36,21 +34,16 @@ class Sherlock:
report_maintainer: BackfillReportMaintainer,
backfill_tool: BackfillTool,
secretary: Secretary,
notify_client: taskcluster.Notify,
max_runtime: timedelta = None,
email_writer: EmailWriter = None,
supported_platforms: List[str] = None,
):
self.report_maintainer = report_maintainer
self.backfill_tool = backfill_tool
self.secretary = secretary
self._notify = notify_client
self._max_runtime = self.DEFAULT_MAX_RUNTIME if max_runtime is None else max_runtime
self._email_writer = email_writer or BackfillNotificationWriter()
self.supported_platforms = supported_platforms or settings.SUPPORTED_PLATFORMS
self._wake_up_time = datetime.now()
self.backfilled_records = [] # useful for reporting backfill outcome
def sheriff(self, since: datetime, frameworks: List[str], repositories: List[str]):
logger.info("Sherlock: Validating settings...")
@ -74,9 +67,6 @@ class Sherlock:
self._backfill(frameworks, repositories)
self.assert_can_run()
logger.info("Sherlock: Notifying backfill outcome...")
self._notify_backfill_outcome()
def runtime_exceeded(self) -> bool:
elapsed_runtime = datetime.now() - self._wake_up_time
return self._max_runtime <= elapsed_runtime
@ -110,7 +100,8 @@ class Sherlock:
break
left, consumed = self._backfill_record(record, left)
logger.info(f"Sherlock: Backfilled record with id {record.alert.id}.")
self.backfilled_records.append(record)
# Model used for reporting backfill outcome
BackfillNotificationRecord.objects.create(record=record)
total_consumed += consumed
self.secretary.consume_backfills(platform, total_consumed)
@ -208,19 +199,6 @@ class Sherlock:
return pending_tasks_count > acceptable_limit
def _notify_backfill_outcome(self):
if self.backfilled_records:
backfill_notification = self._email_writer.prepare_new_email(self.backfilled_records)
logger.debug(
f"Sherlock: Composed email notification payload `{backfill_notification}`."
)
# send email
response = self._notify.email(backfill_notification)
logger.debug(f"Sherlock: Email notification service replied with `{response}`.")
else:
logger.info("Sherlock: Nothing to report via email.")
@staticmethod
def __get_data_points_to_backfill(context: List[dict]) -> List[dict]:
context_len = len(context)

Просмотреть файл

@ -0,0 +1,44 @@
import logging
from django.core.management.base import BaseCommand
from treeherder.perf.models import BackfillNotificationRecord
from treeherder.perf.email import BackfillNotificationWriter
from treeherder.services.taskcluster import notify_client_factory
SUCCESS_STATUS = 200
logger = logging.getLogger(__name__)
class Command(BaseCommand):
help = (
'Command used for reporting the outcome of the automatic backfilling process once per day.'
)
def handle(self, *args, **options):
logger.info("Sherlock Notify Service: Notifying backfill outcome...")
notify = notify_client_factory()
email_writer = BackfillNotificationWriter()
backfills_to_email = BackfillNotificationRecord.objects.all()
backfilled_records = [backfill.record for backfill in backfills_to_email]
if backfilled_records:
backfill_notification = email_writer.prepare_new_email(backfilled_records)
logger.debug(
f"Sherlock Notify Service: Composed email notification payload `{backfill_notification}`."
)
# send email
notification_outcome = notify.email(backfill_notification)
logger.debug(
f"Sherlock Notify Service: Email notification service replied with `{notification_outcome}`."
)
if notification_outcome['response'].status_code == SUCCESS_STATUS:
logger.debug(
"Sherlock Notify Service: Removing notified records from helper table."
)
for record in backfills_to_email:
record.delete()
else:
logger.debug("Sherlock Notify Service: Email notification service failed.")
else:
logger.info("Sherlock Notify Service: Nothing to report via email.")

Просмотреть файл

@ -0,0 +1,38 @@
# Generated by Django 3.1.12 on 2021-08-18 08:06
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('perf', '0040_performancealert_noise_profile'),
]
operations = [
migrations.CreateModel(
name='BackfillNotificationRecord',
fields=[
(
'id',
models.AutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name='ID'
),
),
('created', models.DateTimeField(auto_now_add=True)),
('last_updated', models.DateTimeField(auto_now=True)),
(
'record',
models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
related_name='backfill_notification_record',
to='perf.backfillrecord',
),
),
],
options={
'db_table': 'backfill_notification_record',
},
),
]

Просмотреть файл

@ -764,6 +764,27 @@ class BackfillRecord(models.Model):
return "BackfillRecord(alert #{}, from {})".format(self.alert.id, self.report)
class BackfillNotificationRecord(models.Model):
"""
Groups & stores all backfilled records so that
they can be sent all in one email summary, once a day.
"""
record = models.OneToOneField(
BackfillRecord,
on_delete=models.CASCADE,
related_name='backfill_notification_record',
)
created = models.DateTimeField(auto_now_add=True)
last_updated = models.DateTimeField(auto_now=True)
class Meta:
db_table = "backfill_notification_record"
def __str__(self):
return "Backfill Notification Record"
class PerformanceSettings(models.Model):
id = models.AutoField(primary_key=True)
name = models.CharField(max_length=255)