зеркало из https://github.com/mozilla/treeherder.git
Bug 1700606 - Expire garbage backfill reports
* also remove alerts older than a year * slightly refactor main garbage collection method * fix ORM filter * provide test coverage * reduce ambiguity around signature garbage collection
This commit is contained in:
Родитель
b24b958722
Коммит
fde027cf3c
|
@ -24,9 +24,15 @@ from treeherder.perf.models import (
|
|||
PerformanceAlertSummary,
|
||||
PerformanceAlert,
|
||||
MultiCommitDatum,
|
||||
BackfillReport,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_backfill_report(test_perf_alert_summary) -> BackfillReport:
|
||||
return BackfillReport.objects.create(summary=test_perf_alert_summary)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'repository_name',
|
||||
[
|
||||
|
@ -840,3 +846,34 @@ def test_deleting_performance_data_cascades_to_perf_multicomit_data(test_perf_da
|
|||
cursor.close()
|
||||
|
||||
assert MultiCommitDatum.objects.count() == 0
|
||||
|
||||
|
||||
def test_alerts_older_than_a_year_are_removed_even_if_signature_is_active(test_perf_alert):
|
||||
# alert (fixture) comes pre linked to an active signature
|
||||
test_perf_alert.created = datetime.now() - timedelta(days=365)
|
||||
test_perf_alert.save()
|
||||
|
||||
PerfherderCycler(10_000, 0).cycle()
|
||||
|
||||
assert not PerformanceAlert.objects.filter(id=test_perf_alert.id).exists()
|
||||
|
||||
|
||||
def test_empty_backfill_reports_get_removed(empty_backfill_report):
|
||||
empty_backfill_report.created = datetime.now() - timedelta(days=120)
|
||||
empty_backfill_report.save()
|
||||
|
||||
PerfherderCycler(10_000, 0).cycle()
|
||||
|
||||
assert BackfillReport.objects.count() == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize('days_since_created', [0, 30, 100])
|
||||
def test_empty_backfill_reports_arent_removed_if_not_enough_time_passed(
|
||||
empty_backfill_report, days_since_created
|
||||
):
|
||||
empty_backfill_report.created = datetime.now() - timedelta(days=days_since_created)
|
||||
empty_backfill_report.save()
|
||||
|
||||
PerfherderCycler(10_000, 0).cycle()
|
||||
|
||||
assert BackfillReport.objects.filter(summary_id=empty_backfill_report.summary_id).exists()
|
||||
|
|
|
@ -10,7 +10,12 @@ from django.db.models import Count
|
|||
from treeherder.model.data_cycling.removal_strategies import RemovalStrategy
|
||||
from treeherder.model.models import Job, JobType, JobGroup, Machine
|
||||
from treeherder.perf.exceptions import NoDataCyclingAtAll, MaxRuntimeExceeded
|
||||
from treeherder.perf.models import PerformanceSignature, PerformanceAlertSummary
|
||||
from treeherder.perf.models import (
|
||||
PerformanceSignature,
|
||||
PerformanceAlertSummary,
|
||||
PerformanceAlert,
|
||||
BackfillReport,
|
||||
)
|
||||
from treeherder.services import taskcluster
|
||||
from .max_runtime import MaxRuntime
|
||||
from .signature_remover import PublicSignatureRemover
|
||||
|
@ -130,15 +135,35 @@ class PerfherderCycler(DataCycler):
|
|||
logger.warning(ex)
|
||||
|
||||
def _remove_leftovers(self):
|
||||
# remove any signatures which are
|
||||
# no longer associated with a job
|
||||
signatures = PerformanceSignature.objects.filter(last_updated__lte=self.max_timestamp)
|
||||
notify_client = taskcluster.notify_client_factory()
|
||||
signatures_remover = PublicSignatureRemover(timer=self.timer, notify_client=notify_client)
|
||||
signatures_remover.remove_in_chunks(signatures)
|
||||
self.__remove_empty_signatures()
|
||||
|
||||
# remove empty alert summaries
|
||||
logger.warning('Removing alert summaries which no longer have any alerts...')
|
||||
self.__remove_too_old_alerts()
|
||||
self.__remove_empty_alert_summaries()
|
||||
|
||||
self.__remove_empty_backfill_reports()
|
||||
|
||||
def __remove_empty_signatures(self):
|
||||
logger.warning("Removing performance signatures which don't have any data points...")
|
||||
potentially_empty_signatures = PerformanceSignature.objects.filter(
|
||||
last_updated__lte=self.max_timestamp
|
||||
)
|
||||
notify_client = taskcluster.notify_client_factory()
|
||||
|
||||
signatures_remover = PublicSignatureRemover(timer=self.timer, notify_client=notify_client)
|
||||
signatures_remover.remove_in_chunks(potentially_empty_signatures)
|
||||
|
||||
def __remove_too_old_alerts(self):
|
||||
logger.warning("Removing alerts older than a year...")
|
||||
PerformanceAlert.objects.filter(
|
||||
# WARNING! Don't change this without proper approval! #
|
||||
# Otherwise we risk deleting data that's actively investigated #
|
||||
# and cripple the perf sheriffing process! #
|
||||
created__lt=(datetime.now() - timedelta(days=365))
|
||||
#################################################################
|
||||
).delete()
|
||||
|
||||
def __remove_empty_alert_summaries(self):
|
||||
logger.warning("Removing alert summaries which no longer have any alerts...")
|
||||
(
|
||||
PerformanceAlertSummary.objects.prefetch_related('alerts', 'related_alerts')
|
||||
.annotate(
|
||||
|
@ -157,6 +182,14 @@ class PerfherderCycler(DataCycler):
|
|||
.delete()
|
||||
)
|
||||
|
||||
def __remove_empty_backfill_reports(self):
|
||||
logger.warning("Removing backfill reports which no longer have any records...")
|
||||
four_months_ago = datetime.now() - timedelta(days=120)
|
||||
|
||||
BackfillReport.objects.annotate(total_records=Count('records')).filter(
|
||||
created__lt=four_months_ago, total_records=0
|
||||
).delete()
|
||||
|
||||
def _delete_in_chunks(self, strategy: RemovalStrategy):
|
||||
any_successful_attempt = False
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ from typing import List
|
|||
import taskcluster
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
from django.db.models import QuerySet
|
||||
from taskcluster.exceptions import TaskclusterRestFailure
|
||||
|
||||
from treeherder.perf.models import PerformanceSignature
|
||||
|
@ -34,14 +35,13 @@ class PublicSignatureRemover:
|
|||
self._email_writer = email_writer or DeletionNotificationWriter()
|
||||
self.timer = timer
|
||||
|
||||
def remove_in_chunks(self, signatures):
|
||||
def remove_in_chunks(self, potentially_empty_signatures: QuerySet):
|
||||
emails_sent = 0
|
||||
rows_left = self._max_rows_allowed
|
||||
chunk_of_signatures = []
|
||||
|
||||
logger.warning("Removing performance signatures which don't have any data points...")
|
||||
self._remove_empty_try_signatures(signatures)
|
||||
for perf_signature in signatures:
|
||||
self._remove_empty_try_signatures(potentially_empty_signatures)
|
||||
for perf_signature in potentially_empty_signatures:
|
||||
self.timer.quit_on_timeout()
|
||||
|
||||
if emails_sent < self._max_emails_allowed and (
|
||||
|
@ -63,7 +63,7 @@ class PublicSignatureRemover:
|
|||
self.__delete_and_notify(chunk_of_signatures)
|
||||
|
||||
@staticmethod
|
||||
def _remove_empty_try_signatures(signatures):
|
||||
def _remove_empty_try_signatures(signatures: QuerySet):
|
||||
try_signatures = signatures.filter(repository__name='try')
|
||||
for perf_signature in try_signatures:
|
||||
if not perf_signature.has_performance_data():
|
||||
|
|
Загрузка…
Ссылка в новой задаче