Bug 1700606 - Expire garbage backfill reports

* also remove alerts older than a year
* slightly refactor main garbage collection method
* fix ORM filter
* provide test coverage
* reduce ambiguity around signature garbage collection
This commit is contained in:
ionutgoldan 2021-03-31 15:33:55 +03:00 коммит произвёл GitHub
Родитель b24b958722
Коммит fde027cf3c
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 84 добавлений и 14 удалений

Просмотреть файл

@ -24,9 +24,15 @@ from treeherder.perf.models import (
PerformanceAlertSummary,
PerformanceAlert,
MultiCommitDatum,
BackfillReport,
)
@pytest.fixture
def empty_backfill_report(test_perf_alert_summary) -> BackfillReport:
return BackfillReport.objects.create(summary=test_perf_alert_summary)
@pytest.mark.parametrize(
'repository_name',
[
@ -840,3 +846,34 @@ def test_deleting_performance_data_cascades_to_perf_multicomit_data(test_perf_da
cursor.close()
assert MultiCommitDatum.objects.count() == 0
def test_alerts_older_than_a_year_are_removed_even_if_signature_is_active(test_perf_alert):
# alert (fixture) comes pre linked to an active signature
test_perf_alert.created = datetime.now() - timedelta(days=365)
test_perf_alert.save()
PerfherderCycler(10_000, 0).cycle()
assert not PerformanceAlert.objects.filter(id=test_perf_alert.id).exists()
def test_empty_backfill_reports_get_removed(empty_backfill_report):
empty_backfill_report.created = datetime.now() - timedelta(days=120)
empty_backfill_report.save()
PerfherderCycler(10_000, 0).cycle()
assert BackfillReport.objects.count() == 0
@pytest.mark.parametrize('days_since_created', [0, 30, 100])
def test_empty_backfill_reports_arent_removed_if_not_enough_time_passed(
empty_backfill_report, days_since_created
):
empty_backfill_report.created = datetime.now() - timedelta(days=days_since_created)
empty_backfill_report.save()
PerfherderCycler(10_000, 0).cycle()
assert BackfillReport.objects.filter(summary_id=empty_backfill_report.summary_id).exists()

Просмотреть файл

@ -10,7 +10,12 @@ from django.db.models import Count
from treeherder.model.data_cycling.removal_strategies import RemovalStrategy
from treeherder.model.models import Job, JobType, JobGroup, Machine
from treeherder.perf.exceptions import NoDataCyclingAtAll, MaxRuntimeExceeded
from treeherder.perf.models import PerformanceSignature, PerformanceAlertSummary
from treeherder.perf.models import (
PerformanceSignature,
PerformanceAlertSummary,
PerformanceAlert,
BackfillReport,
)
from treeherder.services import taskcluster
from .max_runtime import MaxRuntime
from .signature_remover import PublicSignatureRemover
@ -130,15 +135,35 @@ class PerfherderCycler(DataCycler):
logger.warning(ex)
def _remove_leftovers(self):
# remove any signatures which are
# no longer associated with a job
signatures = PerformanceSignature.objects.filter(last_updated__lte=self.max_timestamp)
notify_client = taskcluster.notify_client_factory()
signatures_remover = PublicSignatureRemover(timer=self.timer, notify_client=notify_client)
signatures_remover.remove_in_chunks(signatures)
self.__remove_empty_signatures()
# remove empty alert summaries
logger.warning('Removing alert summaries which no longer have any alerts...')
self.__remove_too_old_alerts()
self.__remove_empty_alert_summaries()
self.__remove_empty_backfill_reports()
def __remove_empty_signatures(self):
logger.warning("Removing performance signatures which don't have any data points...")
potentially_empty_signatures = PerformanceSignature.objects.filter(
last_updated__lte=self.max_timestamp
)
notify_client = taskcluster.notify_client_factory()
signatures_remover = PublicSignatureRemover(timer=self.timer, notify_client=notify_client)
signatures_remover.remove_in_chunks(potentially_empty_signatures)
def __remove_too_old_alerts(self):
logger.warning("Removing alerts older than a year...")
PerformanceAlert.objects.filter(
# WARNING! Don't change this without proper approval! #
# Otherwise we risk deleting data that's actively investigated #
# and cripple the perf sheriffing process! #
created__lt=(datetime.now() - timedelta(days=365))
#################################################################
).delete()
def __remove_empty_alert_summaries(self):
logger.warning("Removing alert summaries which no longer have any alerts...")
(
PerformanceAlertSummary.objects.prefetch_related('alerts', 'related_alerts')
.annotate(
@ -157,6 +182,14 @@ class PerfherderCycler(DataCycler):
.delete()
)
def __remove_empty_backfill_reports(self):
logger.warning("Removing backfill reports which no longer have any records...")
four_months_ago = datetime.now() - timedelta(days=120)
BackfillReport.objects.annotate(total_records=Count('records')).filter(
created__lt=four_months_ago, total_records=0
).delete()
def _delete_in_chunks(self, strategy: RemovalStrategy):
any_successful_attempt = False

Просмотреть файл

@ -4,6 +4,7 @@ from typing import List
import taskcluster
from django.conf import settings
from django.db import transaction
from django.db.models import QuerySet
from taskcluster.exceptions import TaskclusterRestFailure
from treeherder.perf.models import PerformanceSignature
@ -34,14 +35,13 @@ class PublicSignatureRemover:
self._email_writer = email_writer or DeletionNotificationWriter()
self.timer = timer
def remove_in_chunks(self, signatures):
def remove_in_chunks(self, potentially_empty_signatures: QuerySet):
emails_sent = 0
rows_left = self._max_rows_allowed
chunk_of_signatures = []
logger.warning("Removing performance signatures which don't have any data points...")
self._remove_empty_try_signatures(signatures)
for perf_signature in signatures:
self._remove_empty_try_signatures(potentially_empty_signatures)
for perf_signature in potentially_empty_signatures:
self.timer.quit_on_timeout()
if emails_sent < self._max_emails_allowed and (
@ -63,7 +63,7 @@ class PublicSignatureRemover:
self.__delete_and_notify(chunk_of_signatures)
@staticmethod
def _remove_empty_try_signatures(signatures):
def _remove_empty_try_signatures(signatures: QuerySet):
try_signatures = signatures.filter(repository__name='try')
for perf_signature in try_signatures:
if not perf_signature.has_performance_data():