Bug 1329742 - Expire performance data (with option to override) (#2084)

In most cases, but not all, we want to expire performance datum on the
same cadence as job data. Add some code to do this, with an optional
override to keep it around indefinitely for some cases.
This commit is contained in:
William Lachance 2017-03-10 15:35:25 -05:00 коммит произвёл GitHub
Родитель 2e1d1539a4
Коммит 73670ac626
7 изменённых файлов: 148 добавлений и 11 удалений

Просмотреть файл

@ -106,7 +106,8 @@ def test_repository(transactional_db):
codebase="gecko",
repository_group_id=1,
description="",
performance_alerts_enabled=True
performance_alerts_enabled=True,
expire_performance_data=False
)
return r

Просмотреть файл

@ -1,5 +1,6 @@
import datetime
import pytest
from django.core.management import call_command
from tests import test_utils
@ -11,10 +12,12 @@ from treeherder.model.models import (FailureLine,
JobGroup,
JobLog,
JobType,
Machine)
Machine,
Push)
from treeherder.model.search import (TestFailureLine,
refresh_all)
from treeherder.perf.models import PerformanceDatum
from treeherder.perf.models import (PerformanceDatum,
PerformanceSignature)
def test_cycle_all_data(test_repository, failure_classifications, sample_data,
@ -179,3 +182,64 @@ def test_cycle_job_with_performance_data(test_repository, failure_classification
# assert that the perf object is still there, but the job reference is None
p = PerformanceDatum.objects.get(id=1)
assert p.job is None
@pytest.mark.parametrize("test_repository_expire_data", [False, True])
def test_cycle_performance_data(test_repository, result_set_stored,
test_perf_signature,
test_repository_expire_data):
test_repository.expire_performance_data = test_repository_expire_data
test_repository.save()
expired_timestamp = datetime.datetime.now() - datetime.timedelta(weeks=1)
test_perf_signature_2 = PerformanceSignature.objects.create(
signature_hash='b'*40,
repository=test_perf_signature.repository,
framework=test_perf_signature.framework,
platform=test_perf_signature.platform,
option_collection=test_perf_signature.option_collection,
suite=test_perf_signature.suite,
test='test 2',
last_updated=expired_timestamp,
has_subtests=False)
push1 = Push.objects.get(id=1)
push1.time = datetime.datetime.now()
push1.save()
push2 = Push.objects.get(id=2)
push2.time = expired_timestamp
push2.save()
# a performance datum that *should not* be deleted
PerformanceDatum.objects.create(
id=1,
repository=test_repository,
push=push1,
job=None,
signature=test_perf_signature,
push_timestamp=push1.time,
value=1.0)
# a performance datum that *should* be deleted (but only if the
# repository is marked as having expirable performance data)
PerformanceDatum.objects.create(
id=2,
repository=test_repository,
push=push2,
job=None,
signature=test_perf_signature_2,
push_timestamp=push2.time,
value=1.0)
call_command('cycle_data', sleep_time=0, days=1)
if test_repository_expire_data:
assert list(PerformanceDatum.objects.values_list('id', flat=True)) == [1]
assert list(PerformanceSignature.objects.values_list('id', flat=True)) == [
test_perf_signature.id]
else:
assert list(PerformanceDatum.objects.values_list('id', flat=True)) == [1, 2]
assert list(PerformanceSignature.objects.values_list('id', flat=True)) == [
test_perf_signature.id, test_perf_signature_2.id]

Просмотреть файл

@ -9,7 +9,8 @@
"active_status": "active",
"codebase": "gecko",
"repository_group": 1,
"description": ""
"description": "",
"expire_performance_data": false
}
},
{
@ -23,7 +24,8 @@
"codebase": "gecko",
"repository_group": 1,
"description": "",
"performance_alerts_enabled": true
"performance_alerts_enabled": true,
"expire_performance_data": false
}
},
{
@ -63,7 +65,8 @@
"codebase": "gecko",
"repository_group": 2,
"description": "",
"performance_alerts_enabled": true
"performance_alerts_enabled": true,
"expire_performance_data": false
}
},
{
@ -77,7 +80,8 @@
"codebase": "gecko",
"repository_group": 2,
"description": "",
"performance_alerts_enabled": true
"performance_alerts_enabled": true,
"expire_performance_data": false
}
},
{
@ -91,7 +95,8 @@
"codebase": "gecko",
"repository_group": 2,
"description": "",
"performance_alerts_enabled": true
"performance_alerts_enabled": true,
"expire_performance_data": false
}
},
{
@ -183,7 +188,8 @@
"codebase": "gecko",
"repository_group": 1,
"description": "",
"performance_alerts_enabled": false
"performance_alerts_enabled": false,
"expire_performance_data": false
}
},
{
@ -947,7 +953,8 @@
"active_status": "active",
"codebase": "servo",
"repository_group": 7,
"description": "The Servo Parallel Browser Engine."
"description": "The Servo Parallel Browser Engine.",
"expire_performance_data": false
}
},
{
@ -974,7 +981,8 @@
"codebase": "gecko",
"repository_group": 1,
"description": "The destination for automatically landed Firefox commits.",
"performance_alerts_enabled": true
"performance_alerts_enabled": true,
"expire_performance_data": false
}
},
{

Просмотреть файл

@ -8,6 +8,7 @@ from treeherder.model.models import (Job,
JobType,
Machine,
Repository)
from treeherder.perf.models import PerformanceDatum
class Command(BaseCommand):
@ -62,6 +63,11 @@ class Command(BaseCommand):
options['sleep_time'])
self.debug("Deleted {} jobs from {}".format(rs_deleted,
repository.name))
if repository.expire_performance_data:
PerformanceDatum.objects.cycle_data(repository,
cycle_interval,
options['chunk_size'],
options['sleep_time'])
self.cycle_non_job_data(options['chunk_size'], options['sleep_time'])

Просмотреть файл

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.10.5 on 2017-03-09 18:59
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('model', '0006_jobs_submit_time_repository_index'),
]
operations = [
migrations.AddField(
model_name='repository',
name='expire_performance_data',
field=models.BooleanField(default=True),
),
]

Просмотреть файл

@ -89,6 +89,7 @@ class Repository(models.Model):
description = models.TextField(blank=True)
active_status = models.CharField(max_length=7, blank=True, default='active', db_index=True)
performance_alerts_enabled = models.BooleanField(default=False)
expire_performance_data = models.BooleanField(default=True)
class Meta:
db_table = 'repository'

Просмотреть файл

@ -1,3 +1,6 @@
import datetime
import time
from django.contrib.auth.models import User
from django.core.exceptions import ValidationError
from django.core.validators import MinLengthValidator
@ -80,9 +83,43 @@ class PerformanceSignature(models.Model):
self.last_updated)
class PerformanceDatumManager(models.Manager):
"""
Convenience functions for operations on groups of performance datums
"""
def cycle_data(self, repository, cycle_interval, chunk_size, sleep_time):
"""Delete data older than cycle_interval, splitting the target data
into chunks of chunk_size size."""
max_timestamp = datetime.datetime.now() - cycle_interval
# seperate datums into chunks
while True:
perf_datums_to_cycle = list(self.filter(
repository=repository,
push_timestamp__lt=max_timestamp).values_list('id', flat=True)[:chunk_size])
if not perf_datums_to_cycle:
# we're done!
break
self.filter(id__in=perf_datums_to_cycle).delete()
if sleep_time:
# Allow some time for other queries to get through
time.sleep(sleep_time)
# also remove any signatures which are (no longer) associated with
# a job
for signature in PerformanceSignature.objects.filter(
repository=repository):
if not self.filter(signature=signature).exists():
signature.delete()
@python_2_unicode_compatible
class PerformanceDatum(models.Model):
objects = PerformanceDatumManager()
repository = models.ForeignKey(Repository)
signature = models.ForeignKey(PerformanceSignature)
value = models.FloatField()