Bug 1671564 - Email summary of signatures prior to deleting them

This commit is contained in:
beatrice-acasandrei 2020-12-09 15:28:51 +02:00 коммит произвёл GitHub
Родитель 6717c92aa9
Коммит 17dbc80b33
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
9 изменённых файлов: 330 добавлений и 35 удалений

Просмотреть файл

@ -8,9 +8,11 @@ from os.path import join, dirname
import kombu
import pytest
import responses
import taskcluster
from _pytest.monkeypatch import MonkeyPatch
from django.conf import settings
from rest_framework.test import APIClient
from unittest.mock import MagicMock
from treeherder.autoclassify.autoclassify import mark_best_classification
from treeherder.etl.jobs import store_job_data
@ -255,6 +257,17 @@ def mock_log_parser(monkeypatch):
monkeypatch.setattr(tasks, 'parse_logs', task_mock)
@pytest.fixture
def mock_taskcluster_notify(monkeypatch):
monkeypatch.setattr(taskcluster, 'Notify', MagicMock())
@pytest.fixture
def mock_tc_prod_credentials(monkeypatch):
monkeypatch.setattr(settings, 'NOTIFY_CLIENT_ID', "client_id")
monkeypatch.setattr(settings, 'NOTIFY_ACCESS_TOKEN', "access_token")
@pytest.fixture
def push_stored(test_repository, sample_push):
store_push_data(test_repository, sample_push)

Просмотреть файл

@ -1,6 +1,9 @@
import math
import taskcluster
from datetime import datetime, timedelta
import pytest
from unittest.mock import MagicMock
from django.core.management import call_command
from django.db.models import Max
@ -23,6 +26,8 @@ from treeherder.perf.models import (
PerformanceAlertSummary,
PerformanceAlert,
)
from treeherder.perf.data_cycling.signature_remover import PublicSignatureRemover
from treeherder.perf.data_cycling.max_runtime import MaxRuntime
@pytest.mark.parametrize(
@ -216,6 +221,7 @@ def test_cycle_performance_data(
repository_name,
push_stored,
test_perf_signature,
mock_taskcluster_notify,
):
test_repository.name = repository_name
test_repository.save()
@ -276,7 +282,7 @@ def test_cycle_performance_data(
]
def test_performance_signatures_are_deleted(test_perf_signature):
def test_performance_signatures_are_deleted(test_perf_signature, mock_taskcluster_notify):
cycler = PerfherderCycler(chunk_size=100, sleep_time=0)
expired_timestamp = cycler.max_timestamp
@ -315,7 +321,12 @@ def test_performance_signatures_are_deleted(test_perf_signature):
def test_try_data_removal(
try_repository, test_repository, try_push_stored, test_perf_signature, test_perf_signature_2
try_repository,
test_repository,
try_push_stored,
test_perf_signature,
test_perf_signature_2,
mock_taskcluster_notify,
):
total_removals = 3
test_perf_signature.repository = try_repository
@ -373,6 +384,7 @@ def test_irrelevant_repos_data_removal(
repository_name,
push_stored,
test_perf_signature,
mock_taskcluster_notify,
):
# test_repository is considered irrelevant repositories
@ -427,7 +439,88 @@ def test_irrelevant_repos_data_removal(
).exists()
def test_performance_cycler_quit_indicator():
def test_signature_remover(
test_perf_signature,
test_perf_signature_2,
test_perf_data,
mock_taskcluster_notify,
mock_tc_prod_credentials,
):
cycler = PerfherderCycler(chunk_size=100, sleep_time=0)
expired_timestamp = cycler.max_timestamp
test_perf_signature_2.last_updated = expired_timestamp
test_perf_signature_2.save()
assert len(PerformanceSignature.objects.all()) == 2
call_command('cycle_data', 'from:perfherder')
assert taskcluster.Notify().ping.called_once
assert taskcluster.Notify().email.call_count == 2
assert len(PerformanceSignature.objects.all()) == 1
assert PerformanceSignature.objects.first() == test_perf_signature
def test_signature_remover_when_notify_service_is_down(
test_perf_signature,
test_perf_signature_2,
test_perf_data,
mock_taskcluster_notify,
mock_tc_prod_credentials,
):
taskcluster.Notify().ping.side_effect = Exception('Email Service is down.')
call_command('cycle_data', 'from:perfherder')
assert taskcluster.Notify().ping.called_once
assert not taskcluster.Notify().email.called
assert len(PerformanceSignature.objects.all()) == 2
@pytest.mark.parametrize('total_signatures', [3, 4, 8, 10])
def test_total_emails_sent(test_perf_signature, total_signatures, mock_tc_prod_credentials):
tc_model = MagicMock()
timer = MaxRuntime()
timer.start_timer()
total_rows = 2
total_emails = 4
signatures_remover = PublicSignatureRemover(
timer=timer,
taskcluster_model=tc_model,
max_rows_allowed=total_rows,
max_emails_allowed=total_emails,
)
for n in range(0, total_signatures):
PerformanceSignature.objects.create(
repository=test_perf_signature.repository,
signature_hash=(20 * ('t%s' % n)),
framework=test_perf_signature.framework,
platform=test_perf_signature.platform,
option_collection=test_perf_signature.option_collection,
suite='mysuite%s' % n,
test='mytest%s' % n,
application='firefox',
has_subtests=test_perf_signature.has_subtests,
extra_options=test_perf_signature.extra_options,
last_updated=datetime.now(),
)
total_signatures += 1 # is incremented because of test_perf_signature
total_of_possible_emails = math.ceil(total_signatures / total_rows)
expected_call_count = (
total_of_possible_emails if total_of_possible_emails <= total_emails else total_emails
)
signatures = PerformanceSignature.objects.filter(last_updated__lte=datetime.now())
signatures_remover.remove_in_chunks(signatures)
assert tc_model.notify.ping.call_count == expected_call_count
assert (
tc_model.notify.email.call_count == expected_call_count * 2
) # the email is sent to two email scopes
def test_performance_cycler_quit_indicator(mock_taskcluster_notify):
ten_minutes_ago = datetime.now() - timedelta(minutes=10)
one_second = timedelta(seconds=1)
@ -435,16 +528,17 @@ def test_performance_cycler_quit_indicator():
five_minutes = timedelta(minutes=5)
with pytest.raises(MaxRuntimeExceeded):
cycler = PerfherderCycler(chunk_size=100, sleep_time=0, max_runtime=one_second)
cycler.started_at = ten_minutes_ago
cycler._quit_on_timeout()
PerfherderCycler(chunk_size=100, sleep_time=0)
max_runtime = MaxRuntime(max_runtime=one_second)
max_runtime.started_at = ten_minutes_ago
max_runtime.quit_on_timeout()
try:
cycler = PerfherderCycler(chunk_size=100, sleep_time=0, max_runtime=five_minutes)
cycler.started_at = two_seconds_ago
PerfherderCycler(chunk_size=100, sleep_time=0)
cycler._quit_on_timeout()
max_runtime = MaxRuntime(max_runtime=five_minutes)
max_runtime.started_at = two_seconds_ago
max_runtime.quit_on_timeout()
except MaxRuntimeExceeded:
pytest.fail('Performance cycling shouldn\'t have timed out')
@ -470,7 +564,9 @@ def empty_alert_summary(
datetime.now() - timedelta(days=180, hours=1),
],
)
def test_summary_without_any_kind_of_alerts_is_deleted(expired_time, empty_alert_summary):
def test_summary_without_any_kind_of_alerts_is_deleted(
expired_time, empty_alert_summary, mock_taskcluster_notify
):
empty_alert_summary.created = expired_time
empty_alert_summary.save()
@ -491,7 +587,9 @@ def test_summary_without_any_kind_of_alerts_is_deleted(expired_time, empty_alert
datetime.now() - timedelta(days=179, hours=23),
],
)
def test_summary_without_any_kind_of_alerts_isnt_deleted(recently, empty_alert_summary):
def test_summary_without_any_kind_of_alerts_isnt_deleted(
recently, empty_alert_summary, mock_taskcluster_notify
):
empty_alert_summary.created = recently
empty_alert_summary.save()
@ -518,7 +616,12 @@ def test_summary_without_any_kind_of_alerts_isnt_deleted(recently, empty_alert_s
],
)
def test_summary_with_alerts_isnt_deleted(
creation_time, empty_alert_summary, test_perf_alert, test_perf_alert_2, test_perf_data
creation_time,
empty_alert_summary,
test_perf_alert,
test_perf_alert_2,
test_perf_data,
mock_taskcluster_notify,
):
empty_alert_summary.created = creation_time
empty_alert_summary.save()

Просмотреть файл

@ -482,6 +482,10 @@ RESET_BACKFILL_LIMITS = timedelta(hours=24)
PERF_SHERIFF_BOT_CLIENT_ID = env('PERF_SHERIFF_BOT_CLIENT_ID', default=None)
PERF_SHERIFF_BOT_ACCESS_TOKEN = env('PERF_SHERIFF_BOT_ACCESS_TOKEN', default=None)
# Taskcluster credentials for Notification Service
NOTIFY_CLIENT_ID = env('NOTIFY_CLIENT_ID', default=None)
NOTIFY_ACCESS_TOKEN = env('NOTIFY_ACCESS_TOKEN', default=None)
# This is only used for removing the rate limiting. You can create your own here:
# https://github.com/settings/tokens
GITHUB_TOKEN = env("GITHUB_TOKEN", default=None)

Просмотреть файл

@ -9,11 +9,15 @@ from django.db import connection
from django.db.backends.utils import CursorWrapper
from django.db.models import Count
from django.db.utils import OperationalError
from django.conf import settings
from typing import List
from treeherder.model.models import Job, JobGroup, JobType, Machine, Repository
from treeherder.perf.exceptions import MaxRuntimeExceeded, NoDataCyclingAtAll
from treeherder.perf.models import PerformanceDatum, PerformanceSignature, PerformanceAlertSummary
from treeherder.services.taskcluster import TaskclusterModel, DEFAULT_ROOT_URL as root_url
from treeherder.perf.data_cycling.signature_remover import PublicSignatureRemover
from treeherder.perf.data_cycling.max_runtime import MaxRuntime
logging.basicConfig(format='%(levelname)s:%(message)s')
@ -85,22 +89,19 @@ class TreeherderCycler(DataCycler):
class PerfherderCycler(DataCycler):
DEFAULT_MAX_RUNTIME = timedelta(hours=23)
def __init__(
self,
chunk_size: int,
sleep_time: int,
is_debug: bool = None,
max_runtime: timedelta = None,
strategies: List[RemovalStrategy] = None,
**kwargs,
):
super().__init__(chunk_size, sleep_time, is_debug)
self.started_at = None
self.max_runtime = max_runtime or PerfherderCycler.DEFAULT_MAX_RUNTIME
self.strategies = strategies or RemovalStrategy.fabricate_all_strategies(chunk_size)
self.timer = MaxRuntime()
@property
def max_timestamp(self):
"""
@ -115,7 +116,7 @@ class PerfherderCycler(DataCycler):
into chunks of chunk_size size.
"""
logger.warning(f"Cycling {PERFHERDER.title()} data...")
self.started_at = datetime.now()
self.timer.start_timer()
try:
for strategy in self.strategies:
@ -132,15 +133,12 @@ class PerfherderCycler(DataCycler):
def _remove_leftovers(self):
# remove any signatures which are
# no longer associated with a job
logger.warning('Removing performance signatures with missing jobs...')
for signature in PerformanceSignature.objects.filter(last_updated__lte=self.max_timestamp):
self._quit_on_timeout()
if not PerformanceDatum.objects.filter(
repository_id=signature.repository_id, # leverages (repository, signature) compound index
signature_id=signature.id,
).exists():
signature.delete()
signatures = PerformanceSignature.objects.filter(last_updated__lte=self.max_timestamp)
tc_model = TaskclusterModel(
root_url, client_id=settings.NOTIFY_CLIENT_ID, access_token=settings.NOTIFY_ACCESS_TOKEN
)
signatures_remover = PublicSignatureRemover(timer=self.timer, taskcluster_model=tc_model)
signatures_remover.remove_in_chunks(signatures)
# remove empty alert summaries
logger.warning('Removing alert summaries which no longer have any alerts...')
@ -162,18 +160,12 @@ class PerfherderCycler(DataCycler):
.delete()
)
def _quit_on_timeout(self):
elapsed_runtime = datetime.now() - self.started_at
if self.max_runtime < elapsed_runtime:
raise MaxRuntimeExceeded('Max runtime for performance data cycling exceeded')
def _delete_in_chunks(self, strategy: RemovalStrategy):
any_successful_attempt = False
with connection.cursor() as cursor:
while True:
self._quit_on_timeout()
self.timer.quit_on_timeout()
try:
strategy.remove(using=cursor)

Просмотреть файл

@ -0,0 +1 @@
from . import max_runtime, signature_remover # noqa

Просмотреть файл

@ -0,0 +1,22 @@
from datetime import datetime, timedelta
from treeherder.perf.exceptions import MaxRuntimeExceeded
class MaxRuntime:
DEFAULT_MAX_RUNTIME = timedelta(hours=23)
def __init__(
self,
max_runtime: timedelta = None,
):
self.started_at = None
self.max_runtime = max_runtime or MaxRuntime.DEFAULT_MAX_RUNTIME
def quit_on_timeout(self):
elapsed_runtime = datetime.now() - self.started_at
if self.max_runtime < elapsed_runtime:
raise MaxRuntimeExceeded('Max runtime for performance data cycling exceeded')
def start_timer(self):
self.started_at = datetime.now()

Просмотреть файл

@ -0,0 +1,153 @@
import logging
from typing import List
from django.conf import settings
from treeherder.services.taskcluster import TaskclusterModel
from treeherder.perf.data_cycling.max_runtime import MaxRuntime
logger = logging.getLogger(__name__)
RECEIVER_TEAM_EMAIL = "perftest-alerts@mozilla.com"
RECEIVER_EMAIL = "dhunt@mozilla.com"
class PublicSignatureRemover:
"""
This class handles the removal of signatures which are (no longer)
associated to any data point and sends email notifications to the entire team.
"""
TABLE_DESCRIPTION = """Perfherder removes performance data that is older than one year and in some cases even sooner, leaving behind performance signatures that aren't associated to any data point. These as well need to be removed.
> __Here's a summary of recently deleted performance signatures:__
---
"""
TABLE_HEADERS = """
| Repository | Framework | Platform | Suite | Application |
| :---: | :---: | :---: | :---: | :---: |
"""
def __init__(
self,
timer: MaxRuntime,
taskcluster_model: TaskclusterModel,
max_rows_allowed=None,
max_emails_allowed=None,
):
self.tc_model = taskcluster_model
self._subject = "Summary of deleted Performance Signatures"
self._content = None
self._max_rows_allowed = max_rows_allowed or 50
self._max_emails_allowed = max_emails_allowed or 10
self.timer = timer
def remove_in_chunks(self, signatures):
emails_sent = 0
rows_left = self._max_rows_allowed
chunk_of_signatures = []
logger.warning("Removing performance signatures which don't have any data points...")
for perf_signature in signatures:
self.timer.quit_on_timeout()
if emails_sent < self._max_emails_allowed and (
not perf_signature.has_performance_data()
):
rows_left -= 1
chunk_of_signatures.append(perf_signature)
if rows_left == 0:
# extract the proprieties of interest from signatures in a list of dictionaries
email_data = self.__extract_properties(chunk_of_signatures)
# check if Taskcluster Notify Service is up
try:
self._ping_notify_service()
except Exception:
logger.warning(
"Failed to delete signatures because the Notify Service is not available"
)
break
else:
self._delete(chunk_of_signatures)
self._send_notification(email_data)
emails_sent += 1
chunk_of_signatures = []
rows_left = self._max_rows_allowed
if emails_sent < self._max_emails_allowed and chunk_of_signatures != []:
# extract the proprieties of interest from signatures in a list of dictionaries
email_data = self.__extract_properties(chunk_of_signatures)
# check if Taskcluster Notify Service is up
try:
self._ping_notify_service()
except Exception:
logger.warning(
"Failed to delete signatures because the Notify Service is not available"
)
else:
self._delete(chunk_of_signatures)
self._send_notification(email_data)
def _send_notification(self, email_data):
# should only run on one instance at a time
if settings.NOTIFY_CLIENT_ID and settings.NOTIFY_ACCESS_TOKEN:
logger.info("Sending email with summary of deleted perf signatures to team...")
self._send_email(RECEIVER_TEAM_EMAIL, email_data)
self._send_email(RECEIVER_EMAIL, email_data)
else:
logger.warning("Failed to send notification because deployment is NOT production")
def _ping_notify_service(self):
# should only run on one instance at a time
if settings.NOTIFY_CLIENT_ID and settings.NOTIFY_ACCESS_TOKEN:
self.tc_model.notify.ping()
else:
logger.warning("Failed to ping Notify service because deployment is NOT production")
@staticmethod
def _delete(chunk_of_signatures):
for signature in chunk_of_signatures:
signature.delete()
def _send_email(self, address: str, signatures: List[dict]):
self.__set_content(signatures)
payload = {
"address": address,
"content": self._content,
"subject": self._subject,
}
self.tc_model.notify.email(payload)
def __set_content(self, signatures: List[dict]):
self._content = self.TABLE_DESCRIPTION + self.TABLE_HEADERS
for signature in signatures:
self.__add_new_row(signature)
def __add_new_row(self, signature: dict):
signature_row = (
"""| {repository} | {framework} | {platform} | {suite} | {application} |""".format(
repository=signature["repository"],
framework=signature["framework"],
platform=signature["platform"],
suite=signature["suite"],
application=signature["application"],
)
)
self._content += signature_row
self._content += "\n"
@staticmethod
def __extract_properties(signatures) -> List[dict]:
properties = []
for signature in signatures:
signature_properties = {
"repository": signature.repository,
"framework": signature.framework,
"platform": signature.platform,
"suite": signature.suite,
"application": signature.application,
}
properties.append(signature_properties)
return properties

Просмотреть файл

@ -94,6 +94,12 @@ class PerformanceSignature(models.Model):
return idx
return None
def has_performance_data(self):
return PerformanceDatum.objects.filter(
repository_id=self.repository_id, # leverages (repository, signature) compound index
signature_id=self.id,
).exists()
class Meta:
db_table = 'performance_signature'

Просмотреть файл

@ -25,6 +25,7 @@ class TaskclusterModel:
# Taskcluster APIs
self.hooks = taskcluster.Hooks({**options, 'credentials': credentials})
self.notify = taskcluster.Notify({**options, 'credentials': credentials})
# Following least-privilege principle, as services
# bellow don't really need authorization credentials.