зеркало из https://github.com/mozilla/treeherder.git
Bug 1673911 - Remove more data from treeherder-prototype2
* overwrite cycle interval in strategies (if legal) * DRY days validation & split test coverage * provide test coverage & refactor strategies
This commit is contained in:
Родитель
08f29dd4d9
Коммит
fee6da9a74
|
@ -5,20 +5,16 @@ from datetime import datetime, timedelta
|
|||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
from django.core.management import call_command
|
||||
from django.db.models import Max
|
||||
from unittest.mock import patch
|
||||
|
||||
from tests import test_utils
|
||||
from tests.autoclassify.utils import create_failure_lines, test_line
|
||||
from treeherder.model.management.commands.cycle_data import PerfherderCycler
|
||||
from treeherder.model.models import (
|
||||
FailureLine,
|
||||
Job,
|
||||
JobGroup,
|
||||
JobLog,
|
||||
JobType,
|
||||
Machine,
|
||||
Push,
|
||||
from treeherder.model.management.commands.cycle_data import (
|
||||
PerfherderCycler,
|
||||
MainRemovalStrategy,
|
||||
TryDataRemoval,
|
||||
IrrelevantDataRemoval,
|
||||
StalledDataRemoval,
|
||||
)
|
||||
from treeherder.model.models import Push
|
||||
from treeherder.perf.exceptions import MaxRuntimeExceeded
|
||||
from treeherder.perf.models import (
|
||||
PerformanceDatum,
|
||||
|
@ -30,182 +26,6 @@ from treeherder.perf.data_cycling.signature_remover import PublicSignatureRemove
|
|||
from treeherder.perf.data_cycling.max_runtime import MaxRuntime
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'days, expected_jobs, expected_failure_lines, expected_job_logs, cmd_args, cmd_kwargs',
|
||||
[
|
||||
(7, 0, 0, 0, ('cycle_data', 'from:treeherder'), {'sleep_time': 0, 'days': 1}),
|
||||
# also check default '--days' param from treeherder
|
||||
(119, 20, 2, 22, ('cycle_data',), {'sleep_time': 0}),
|
||||
(120, 0, 0, 0, ('cycle_data',), {'sleep_time': 0}),
|
||||
(150, 0, 0, 0, ('cycle_data',), {'sleep_time': 0}),
|
||||
],
|
||||
)
|
||||
def test_cycle_all_data(
|
||||
test_repository,
|
||||
failure_classifications,
|
||||
sample_data,
|
||||
sample_push,
|
||||
mock_log_parser,
|
||||
failure_lines,
|
||||
days,
|
||||
expected_jobs,
|
||||
expected_failure_lines,
|
||||
expected_job_logs,
|
||||
cmd_args,
|
||||
cmd_kwargs,
|
||||
):
|
||||
"""
|
||||
Test cycling the sample data
|
||||
"""
|
||||
job_data = sample_data.job_data[:20]
|
||||
test_utils.do_job_ingestion(test_repository, job_data, sample_push, False)
|
||||
|
||||
cycle_date_ts = datetime.now() - timedelta(days=days)
|
||||
for job in Job.objects.all():
|
||||
job.submit_time = cycle_date_ts
|
||||
job.save()
|
||||
|
||||
call_command(*cmd_args, **cmd_kwargs)
|
||||
|
||||
# There should be no jobs or failure lines after cycling
|
||||
assert Job.objects.count() == expected_jobs
|
||||
assert FailureLine.objects.count() == expected_failure_lines
|
||||
assert JobLog.objects.count() == expected_job_logs
|
||||
|
||||
|
||||
def test_cycle_all_but_one_job(
|
||||
test_repository,
|
||||
failure_classifications,
|
||||
sample_data,
|
||||
sample_push,
|
||||
mock_log_parser,
|
||||
failure_lines,
|
||||
):
|
||||
"""
|
||||
Test cycling all but one job in a group of jobs to confirm there are no
|
||||
unexpected deletions
|
||||
"""
|
||||
|
||||
job_data = sample_data.job_data[:20]
|
||||
test_utils.do_job_ingestion(test_repository, job_data, sample_push, False)
|
||||
|
||||
# one job should not be deleted, set its submit time to now
|
||||
job_not_deleted = Job.objects.get(id=Job.objects.aggregate(Max("id"))["id__max"])
|
||||
job_not_deleted.submit_time = datetime.now()
|
||||
job_not_deleted.save()
|
||||
|
||||
extra_objects = {
|
||||
'failure_lines': (
|
||||
FailureLine,
|
||||
create_failure_lines(
|
||||
job_not_deleted, [(test_line, {}), (test_line, {"subtest": "subtest2"})]
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
# set other job's submit time to be a week ago from now
|
||||
cycle_date_ts = datetime.now() - timedelta(weeks=1)
|
||||
for job in Job.objects.all().exclude(id=job_not_deleted.id):
|
||||
job.submit_time = cycle_date_ts
|
||||
job.save()
|
||||
num_job_logs_to_be_deleted = JobLog.objects.all().exclude(job__id=job_not_deleted.id).count()
|
||||
num_job_logs_before = JobLog.objects.count()
|
||||
|
||||
call_command('cycle_data', 'from:treeherder', sleep_time=0, days=1, debug=True, chunk_size=1)
|
||||
|
||||
assert Job.objects.count() == 1
|
||||
assert JobLog.objects.count() == (num_job_logs_before - num_job_logs_to_be_deleted)
|
||||
|
||||
for (object_type, objects) in extra_objects.values():
|
||||
actual = set(item.id for item in object_type.objects.all())
|
||||
expected = set(item.id for item in objects)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_cycle_all_data_in_chunks(
|
||||
test_repository, failure_classifications, sample_data, sample_push, mock_log_parser
|
||||
):
|
||||
"""
|
||||
Test cycling the sample data in chunks.
|
||||
"""
|
||||
job_data = sample_data.job_data[:20]
|
||||
test_utils.do_job_ingestion(test_repository, job_data, sample_push, False)
|
||||
|
||||
# build a date that will cause the data to be cycled
|
||||
cycle_date_ts = datetime.now() - timedelta(weeks=1)
|
||||
for job in Job.objects.all():
|
||||
job.submit_time = cycle_date_ts
|
||||
job.save()
|
||||
|
||||
create_failure_lines(Job.objects.get(id=1), [(test_line, {})] * 7)
|
||||
|
||||
call_command('cycle_data', 'from:treeherder', sleep_time=0, days=1, chunk_size=3)
|
||||
|
||||
# There should be no jobs after cycling
|
||||
assert Job.objects.count() == 0
|
||||
assert FailureLine.objects.count() == 0
|
||||
|
||||
|
||||
def test_cycle_job_model_reference_data(
|
||||
test_repository, failure_classifications, sample_data, sample_push, mock_log_parser
|
||||
):
|
||||
job_data = sample_data.job_data[:20]
|
||||
test_utils.do_job_ingestion(test_repository, job_data, sample_push, False)
|
||||
|
||||
# get a list of ids of original reference data
|
||||
original_job_type_ids = JobType.objects.values_list('id', flat=True)
|
||||
original_job_group_ids = JobGroup.objects.values_list('id', flat=True)
|
||||
original_machine_ids = Machine.objects.values_list('id', flat=True)
|
||||
|
||||
# create a bunch of job model data that should be cycled, since they don't
|
||||
# reference any current jobs
|
||||
jg = JobGroup.objects.create(symbol='moo', name='moo')
|
||||
jt = JobType.objects.create(symbol='mu', name='mu')
|
||||
m = Machine.objects.create(name='machine_with_no_job')
|
||||
(jg_id, jt_id, m_id) = (jg.id, jt.id, m.id)
|
||||
call_command('cycle_data', 'from:treeherder', sleep_time=0, days=1, chunk_size=3)
|
||||
|
||||
# assert that reference data that should have been cycled, was cycled
|
||||
assert JobGroup.objects.filter(id=jg_id).count() == 0
|
||||
assert JobType.objects.filter(id=jt_id).count() == 0
|
||||
assert Machine.objects.filter(id=m_id).count() == 0
|
||||
|
||||
# assert that we still have everything that shouldn't have been cycled
|
||||
assert JobType.objects.filter(id__in=original_job_type_ids).count() == len(
|
||||
original_job_type_ids
|
||||
)
|
||||
assert JobGroup.objects.filter(id__in=original_job_group_ids).count() == len(
|
||||
original_job_group_ids
|
||||
)
|
||||
assert Machine.objects.filter(id__in=original_machine_ids).count() == len(original_machine_ids)
|
||||
|
||||
|
||||
def test_cycle_job_with_performance_data(
|
||||
test_repository, failure_classifications, test_job, mock_log_parser, test_perf_signature
|
||||
):
|
||||
# build a date that will cause the data to be cycled
|
||||
test_job.submit_time = datetime.now() - timedelta(weeks=1)
|
||||
test_job.save()
|
||||
|
||||
PerformanceDatum.objects.create(
|
||||
repository=test_repository,
|
||||
push=test_job.push,
|
||||
job=test_job,
|
||||
signature=test_perf_signature,
|
||||
push_timestamp=test_job.push.time,
|
||||
value=1.0,
|
||||
)
|
||||
|
||||
call_command('cycle_data', 'from:treeherder', sleep_time=0, days=1, chunk_size=3)
|
||||
|
||||
# assert that the job got cycled
|
||||
assert Job.objects.count() == 0
|
||||
|
||||
# assert that the perf object is still there, but the job reference is None
|
||||
p = PerformanceDatum.objects.get(id=1)
|
||||
assert p.job is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'repository_name',
|
||||
[
|
||||
|
@ -695,3 +515,51 @@ def test_stalled_data_removal(
|
|||
assert test_perf_alert not in PerformanceAlert.objects.all()
|
||||
assert test_perf_signature_2 in PerformanceSignature.objects.all()
|
||||
assert seg2_data in PerformanceDatum.objects.all()
|
||||
|
||||
|
||||
@patch('treeherder.config.settings.SITE_HOSTNAME', 'treeherder-prototype2.herokuapp.com')
|
||||
@pytest.mark.parametrize('days', [None, 5, 30, 100])
|
||||
def test_explicit_days_validation_on_treeherder_prototype2_environment(days):
|
||||
try:
|
||||
_ = PerfherderCycler(10_000, 0, days=days)
|
||||
except ValueError:
|
||||
pytest.fail()
|
||||
|
||||
try:
|
||||
_ = MainRemovalStrategy(10_000, days=days)
|
||||
except ValueError:
|
||||
pytest.fail()
|
||||
|
||||
try:
|
||||
_ = TryDataRemoval(10_000, days=days)
|
||||
except ValueError:
|
||||
pytest.fail()
|
||||
|
||||
try:
|
||||
_ = IrrelevantDataRemoval(10_000, days=days)
|
||||
except ValueError:
|
||||
pytest.fail()
|
||||
|
||||
try:
|
||||
_ = StalledDataRemoval(10_000, days=days)
|
||||
except ValueError:
|
||||
pytest.fail()
|
||||
|
||||
|
||||
@patch('treeherder.config.settings.SITE_HOSTNAME', 'treeherder-production.com')
|
||||
@pytest.mark.parametrize('days', [5, 30, 100, 364])
|
||||
def test_explicit_days_validation_on_envs_other_than_treeherder_prototype2(days):
|
||||
with pytest.raises(ValueError):
|
||||
_ = PerfherderCycler(10_000, 0, days=days)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
_ = MainRemovalStrategy(10_000, days=days)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
_ = TryDataRemoval(10_000, days=days)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
_ = IrrelevantDataRemoval(10_000, days=days)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
_ = StalledDataRemoval(10_000, days=days)
|
|
@ -0,0 +1,196 @@
|
|||
from datetime import datetime, timedelta
|
||||
|
||||
import pytest
|
||||
from django.core.management import call_command
|
||||
from django.db.models import Max
|
||||
|
||||
from tests import test_utils
|
||||
from tests.autoclassify.utils import create_failure_lines, test_line
|
||||
from treeherder.model.models import Job, FailureLine, JobLog, JobType, Machine, JobGroup
|
||||
from treeherder.perf.models import PerformanceDatum
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'days, expected_jobs, expected_failure_lines, expected_job_logs, cmd_args, cmd_kwargs',
|
||||
[
|
||||
(7, 0, 0, 0, ('cycle_data', 'from:treeherder'), {'sleep_time': 0, 'days': 1}),
|
||||
# also check default '--days' param from treeherder
|
||||
(119, 20, 2, 22, ('cycle_data',), {'sleep_time': 0}),
|
||||
(120, 0, 0, 0, ('cycle_data',), {'sleep_time': 0}),
|
||||
(150, 0, 0, 0, ('cycle_data',), {'sleep_time': 0}),
|
||||
],
|
||||
)
|
||||
def test_cycle_all_data(
|
||||
test_repository,
|
||||
failure_classifications,
|
||||
sample_data,
|
||||
sample_push,
|
||||
mock_log_parser,
|
||||
failure_lines,
|
||||
days,
|
||||
expected_jobs,
|
||||
expected_failure_lines,
|
||||
expected_job_logs,
|
||||
cmd_args,
|
||||
cmd_kwargs,
|
||||
):
|
||||
"""
|
||||
Test cycling the sample data
|
||||
"""
|
||||
job_data = sample_data.job_data[:20]
|
||||
test_utils.do_job_ingestion(test_repository, job_data, sample_push, False)
|
||||
|
||||
cycle_date_ts = datetime.now() - timedelta(days=days)
|
||||
for job in Job.objects.all():
|
||||
job.submit_time = cycle_date_ts
|
||||
job.save()
|
||||
|
||||
call_command(*cmd_args, **cmd_kwargs)
|
||||
|
||||
# There should be no jobs or failure lines after cycling
|
||||
assert Job.objects.count() == expected_jobs
|
||||
assert FailureLine.objects.count() == expected_failure_lines
|
||||
assert JobLog.objects.count() == expected_job_logs
|
||||
|
||||
|
||||
def test_cycle_all_but_one_job(
|
||||
test_repository,
|
||||
failure_classifications,
|
||||
sample_data,
|
||||
sample_push,
|
||||
mock_log_parser,
|
||||
failure_lines,
|
||||
):
|
||||
"""
|
||||
Test cycling all but one job in a group of jobs to confirm there are no
|
||||
unexpected deletions
|
||||
"""
|
||||
|
||||
job_data = sample_data.job_data[:20]
|
||||
test_utils.do_job_ingestion(test_repository, job_data, sample_push, False)
|
||||
|
||||
# one job should not be deleted, set its submit time to now
|
||||
job_not_deleted = Job.objects.get(id=Job.objects.aggregate(Max("id"))["id__max"])
|
||||
job_not_deleted.submit_time = datetime.now()
|
||||
job_not_deleted.save()
|
||||
|
||||
extra_objects = {
|
||||
'failure_lines': (
|
||||
FailureLine,
|
||||
create_failure_lines(
|
||||
job_not_deleted, [(test_line, {}), (test_line, {"subtest": "subtest2"})]
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
# set other job's submit time to be a week ago from now
|
||||
cycle_date_ts = datetime.now() - timedelta(weeks=1)
|
||||
for job in Job.objects.all().exclude(id=job_not_deleted.id):
|
||||
job.submit_time = cycle_date_ts
|
||||
job.save()
|
||||
num_job_logs_to_be_deleted = JobLog.objects.all().exclude(job__id=job_not_deleted.id).count()
|
||||
num_job_logs_before = JobLog.objects.count()
|
||||
|
||||
call_command('cycle_data', 'from:treeherder', sleep_time=0, days=1, debug=True, chunk_size=1)
|
||||
|
||||
assert Job.objects.count() == 1
|
||||
assert JobLog.objects.count() == (num_job_logs_before - num_job_logs_to_be_deleted)
|
||||
|
||||
for (object_type, objects) in extra_objects.values():
|
||||
actual = set(item.id for item in object_type.objects.all())
|
||||
expected = set(item.id for item in objects)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_cycle_all_data_in_chunks(
|
||||
test_repository, failure_classifications, sample_data, sample_push, mock_log_parser
|
||||
):
|
||||
"""
|
||||
Test cycling the sample data in chunks.
|
||||
"""
|
||||
job_data = sample_data.job_data[:20]
|
||||
test_utils.do_job_ingestion(test_repository, job_data, sample_push, False)
|
||||
|
||||
# build a date that will cause the data to be cycled
|
||||
cycle_date_ts = datetime.now() - timedelta(weeks=1)
|
||||
for job in Job.objects.all():
|
||||
job.submit_time = cycle_date_ts
|
||||
job.save()
|
||||
|
||||
create_failure_lines(Job.objects.get(id=1), [(test_line, {})] * 7)
|
||||
|
||||
call_command('cycle_data', 'from:treeherder', sleep_time=0, days=1, chunk_size=3)
|
||||
|
||||
# There should be no jobs after cycling
|
||||
assert Job.objects.count() == 0
|
||||
assert FailureLine.objects.count() == 0
|
||||
|
||||
|
||||
def test_cycle_job_model_reference_data(
|
||||
test_repository, failure_classifications, sample_data, sample_push, mock_log_parser
|
||||
):
|
||||
job_data = sample_data.job_data[:20]
|
||||
test_utils.do_job_ingestion(test_repository, job_data, sample_push, False)
|
||||
|
||||
# get a list of ids of original reference data
|
||||
original_job_type_ids = JobType.objects.values_list('id', flat=True)
|
||||
original_job_group_ids = JobGroup.objects.values_list('id', flat=True)
|
||||
original_machine_ids = Machine.objects.values_list('id', flat=True)
|
||||
|
||||
# create a bunch of job model data that should be cycled, since they don't
|
||||
# reference any current jobs
|
||||
jg = JobGroup.objects.create(symbol='moo', name='moo')
|
||||
jt = JobType.objects.create(symbol='mu', name='mu')
|
||||
m = Machine.objects.create(name='machine_with_no_job')
|
||||
(jg_id, jt_id, m_id) = (jg.id, jt.id, m.id)
|
||||
call_command('cycle_data', 'from:treeherder', sleep_time=0, days=1, chunk_size=3)
|
||||
|
||||
# assert that reference data that should have been cycled, was cycled
|
||||
assert JobGroup.objects.filter(id=jg_id).count() == 0
|
||||
assert JobType.objects.filter(id=jt_id).count() == 0
|
||||
assert Machine.objects.filter(id=m_id).count() == 0
|
||||
|
||||
# assert that we still have everything that shouldn't have been cycled
|
||||
assert JobType.objects.filter(id__in=original_job_type_ids).count() == len(
|
||||
original_job_type_ids
|
||||
)
|
||||
assert JobGroup.objects.filter(id__in=original_job_group_ids).count() == len(
|
||||
original_job_group_ids
|
||||
)
|
||||
assert Machine.objects.filter(id__in=original_machine_ids).count() == len(original_machine_ids)
|
||||
|
||||
|
||||
# Treeherder's data cycling can have some impact upon
|
||||
# Perfherder data. Test cases touching this aspect
|
||||
# should be defined bellow.
|
||||
|
||||
|
||||
def test_cycle_job_with_performance_data(
|
||||
test_repository, failure_classifications, test_job, mock_log_parser, test_perf_signature
|
||||
):
|
||||
"""
|
||||
Ensure that removing Treeherder jobs won't CASCADE DELETE to
|
||||
`performance_datum` rows, as this would have dire consequences.
|
||||
Rather the perf rows remain, but with their `job` foreign key set to NULL.
|
||||
"""
|
||||
# build a date that will cause the data to be cycled
|
||||
test_job.submit_time = datetime.now() - timedelta(weeks=1)
|
||||
test_job.save()
|
||||
|
||||
PerformanceDatum.objects.create(
|
||||
repository=test_repository,
|
||||
push=test_job.push,
|
||||
job=test_job,
|
||||
signature=test_perf_signature,
|
||||
push_timestamp=test_job.push.time,
|
||||
value=1.0,
|
||||
)
|
||||
|
||||
call_command('cycle_data', 'from:treeherder', sleep_time=0, days=1, chunk_size=3)
|
||||
|
||||
# assert that the job got cycled
|
||||
assert Job.objects.count() == 0
|
||||
|
||||
# assert that the perf object is still there, but the job reference is None
|
||||
p = PerformanceDatum.objects.get(id=1)
|
||||
assert p.job is None
|
|
@ -9,9 +9,9 @@ from django.db import connection
|
|||
from django.db.backends.utils import CursorWrapper
|
||||
from django.db.models import Count
|
||||
from django.db.utils import OperationalError
|
||||
from django.conf import settings
|
||||
from typing import List
|
||||
|
||||
from treeherder.config import settings
|
||||
from treeherder.model.models import Job, JobGroup, JobType, Machine, Repository
|
||||
from treeherder.perf.exceptions import MaxRuntimeExceeded, NoDataCyclingAtAll
|
||||
from treeherder.perf.models import PerformanceDatum, PerformanceSignature, PerformanceAlertSummary
|
||||
|
@ -30,8 +30,22 @@ MINIMUM_PERFHERDER_EXPIRE_INTERVAL = 365
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def has_valid_explicit_days(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
days = kwargs.get('days')
|
||||
if (days is not None) and settings.SITE_HOSTNAME != 'treeherder-prototype2.herokuapp.com':
|
||||
raise ValueError(
|
||||
'Cannot override perf data retention parameters on projects other than treeherder-prototype2'
|
||||
)
|
||||
func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class DataCycler(ABC):
|
||||
def __init__(self, chunk_size: int, sleep_time: int, is_debug: bool = None, **kwargs):
|
||||
def __init__(
|
||||
self, chunk_size: int, sleep_time: int, is_debug: bool = None, days: int = None, **kwargs
|
||||
):
|
||||
self.chunk_size = chunk_size
|
||||
self.sleep_time = sleep_time
|
||||
self.is_debug = is_debug or False
|
||||
|
@ -89,17 +103,22 @@ class TreeherderCycler(DataCycler):
|
|||
|
||||
|
||||
class PerfherderCycler(DataCycler):
|
||||
DEFAULT_MAX_RUNTIME = timedelta(hours=23)
|
||||
|
||||
@has_valid_explicit_days
|
||||
def __init__(
|
||||
self,
|
||||
chunk_size: int,
|
||||
sleep_time: int,
|
||||
is_debug: bool = None,
|
||||
days: int = None,
|
||||
strategies: List[RemovalStrategy] = None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(chunk_size, sleep_time, is_debug)
|
||||
self.strategies = strategies or RemovalStrategy.fabricate_all_strategies(chunk_size)
|
||||
|
||||
self.strategies = strategies or RemovalStrategy.fabricate_all_strategies(
|
||||
chunk_size, days=days
|
||||
)
|
||||
self.timer = MaxRuntime()
|
||||
|
||||
@property
|
||||
|
@ -199,6 +218,22 @@ class PerfherderCycler(DataCycler):
|
|||
|
||||
|
||||
class RemovalStrategy(ABC):
|
||||
@property
|
||||
@abstractmethod
|
||||
def CYCLE_INTERVAL(self) -> int:
|
||||
"""
|
||||
expressed in days
|
||||
"""
|
||||
pass
|
||||
|
||||
@has_valid_explicit_days
|
||||
def __init__(self, chunk_size: int, days: int = None):
|
||||
days = days or self.CYCLE_INTERVAL
|
||||
|
||||
self._cycle_interval = timedelta(days=days)
|
||||
self._chunk_size = chunk_size
|
||||
self._max_timestamp = datetime.now() - self._cycle_interval
|
||||
|
||||
@abstractmethod
|
||||
def remove(self, using: CursorWrapper):
|
||||
pass
|
||||
|
@ -231,15 +266,14 @@ class MainRemovalStrategy(RemovalStrategy):
|
|||
that are at least 1 year old.
|
||||
"""
|
||||
|
||||
# WARNING!! Don't override this without proper approval!
|
||||
CYCLE_INTERVAL = 365 # in days #
|
||||
@property
|
||||
def CYCLE_INTERVAL(self) -> int:
|
||||
# WARNING!! Don't override this without proper approval!
|
||||
return 365 # days #
|
||||
########################################################
|
||||
|
||||
########################################################
|
||||
|
||||
def __init__(self, chunk_size: int):
|
||||
self._cycle_interval = timedelta(days=self.CYCLE_INTERVAL)
|
||||
self._chunk_size = chunk_size
|
||||
self._max_timestamp = datetime.now() - self._cycle_interval
|
||||
def __init__(self, chunk_size: int, days: int = None):
|
||||
super().__init__(chunk_size, days=days)
|
||||
self._manager = PerformanceDatum.objects
|
||||
|
||||
@property
|
||||
|
@ -279,11 +313,14 @@ class TryDataRemoval(RemovalStrategy):
|
|||
|
||||
SIGNATURE_BULK_SIZE = 10
|
||||
|
||||
def __init__(self, chunk_size: int):
|
||||
self._cycle_interval = timedelta(weeks=6)
|
||||
self._chunk_size = chunk_size
|
||||
self._max_timestamp = datetime.now() - self._cycle_interval
|
||||
self._manager = PerformanceDatum.objects
|
||||
@property
|
||||
def CYCLE_INTERVAL(self) -> int:
|
||||
# WARNING!! Don't override this without proper approval!
|
||||
return 42 # days #
|
||||
########################################################
|
||||
|
||||
def __init__(self, chunk_size: int, days: int = None):
|
||||
super().__init__(chunk_size, days=days)
|
||||
|
||||
self.__try_repo_id = None
|
||||
self.__target_signatures = None
|
||||
|
@ -375,10 +412,15 @@ class IrrelevantDataRemoval(RemovalStrategy):
|
|||
'reference-browser',
|
||||
]
|
||||
|
||||
def __init__(self, chunk_size: int):
|
||||
self._cycle_interval = timedelta(days=(6 * 30))
|
||||
self._chunk_size = chunk_size
|
||||
self._max_timestamp = datetime.now() - self._cycle_interval
|
||||
@property
|
||||
def CYCLE_INTERVAL(self) -> int:
|
||||
# WARNING!! Don't override this without proper approval!
|
||||
return 180 # days #
|
||||
########################################################
|
||||
|
||||
def __init__(self, chunk_size: int, days: int = None):
|
||||
super().__init__(chunk_size, days=days)
|
||||
|
||||
self._manager = PerformanceDatum.objects
|
||||
self.__relevant_repos = None
|
||||
|
||||
|
@ -442,10 +484,15 @@ class StalledDataRemoval(RemovalStrategy):
|
|||
that haven't been updated in the last 4 months.
|
||||
"""
|
||||
|
||||
def __init__(self, chunk_size: int):
|
||||
self._cycle_interval = timedelta(days=120)
|
||||
self._chunk_size = chunk_size
|
||||
self._max_timestamp = datetime.now() - self._cycle_interval
|
||||
@property
|
||||
def CYCLE_INTERVAL(self) -> int:
|
||||
# WARNING!! Don't override this without proper approval!
|
||||
return 120 # days #
|
||||
########################################################
|
||||
|
||||
def __init__(self, chunk_size: int, days: int = None):
|
||||
super().__init__(chunk_size, days=days)
|
||||
|
||||
self._target_signature = None
|
||||
self._removable_signatures = None
|
||||
|
||||
|
@ -531,6 +578,17 @@ class Command(BaseCommand):
|
|||
default=False,
|
||||
help='Write debug messages to stdout',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--days',
|
||||
action='store',
|
||||
dest='days',
|
||||
type=int,
|
||||
help=(
|
||||
"Data cycle interval expressed in days. "
|
||||
"On Perfherder specifically, this only applies for `treeherder-prototype2` "
|
||||
"environment; supplying it for other environments is illegal."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
'--chunk-size',
|
||||
action='store',
|
||||
|
@ -552,17 +610,7 @@ class Command(BaseCommand):
|
|||
subparsers = parser.add_subparsers(
|
||||
description='Data producers from which to expire data', dest='data_source'
|
||||
)
|
||||
treeherder_subcommand = subparsers.add_parser(
|
||||
TREEHERDER_SUBCOMMAND
|
||||
) # default subcommand even if not provided
|
||||
treeherder_subcommand.add_argument(
|
||||
'--days',
|
||||
action='store',
|
||||
dest='days',
|
||||
type=int,
|
||||
help='Data cycle interval expressed in days. '
|
||||
'Only relevant for Treeherder specific data.',
|
||||
)
|
||||
subparsers.add_parser(TREEHERDER_SUBCOMMAND) # default subcommand even if not provided
|
||||
|
||||
# Perfherder will have its own specifics
|
||||
subparsers.add_parser(PERFHERDER_SUBCOMMAND)
|
||||
|
|
Загрузка…
Ссылка в новой задаче