added populate performance series

This commit is contained in:
Jonathan Eads 2014-06-20 11:32:20 -07:00
Родитель ea8c0c240c
Коммит eba52934ee
11 изменённых файлов: 305 добавлений и 19 удалений

Просмотреть файл

@ -2,6 +2,8 @@ import pytest
import simplejson as json
from mock import MagicMock
from treeherder.log_parser.artifactbuildercollection import ArtifactBuilderCollection
from ..sampledata import SampleData
from treeherder.model.derived import JobData
from treeherder.log_parser.parsers import ErrorParser
@ -54,3 +56,54 @@ def test_parse_log(jm, initial_data, jobs_with_local_log, sample_resultset, mock
# for the job artifact panel
assert len(job_artifacts) >= 2
@pytest.fixture
def jobs_with_talos_log(initial_data):
log = "talos-data"
sample_data = SampleData()
url = "file://{0}".format(
sample_data.get_log_path("{0}.txt.gz".format(log)))
job = sample_data.job_data[0]
# substitute the log url with a local url
job['job']['log_references'][0]['url'] = url
# make this a successful job, so no error log processing
job['job']['result'] = "success"
return [job]
def test_parse_talos_log(jm, initial_data, jobs_with_talos_log, sample_resultset, mock_send_request):
"""
check that at least 2 job_artifacts get inserted when running
a parse_log task
"""
jm.store_result_set_data(sample_resultset)
jobs = jobs_with_talos_log
for job in jobs:
print job
job['revision_hash'] = sample_resultset[0]['revision_hash']
jm.store_job_data(jobs)
jm.process_objects(1, raise_errors=True)
job_id = jm.get_jobs_dhub().execute(
proc="jobs_test.selects.row_by_guid",
placeholders=[jobs[0]['job']['job_guid']]
)[0]['id']
job_artifacts = jm.get_jobs_dhub().execute(
proc="jobs_test.selects.job_artifact",
placeholders=[job_id]
)
for j in job_artifacts:
print j['name']
jm.disconnect()
# we must have at least 2 artifacts: one for the log viewer and another one
# for the job artifact panel
assert len(job_artifacts) >= 2

Просмотреть файл

@ -117,10 +117,11 @@ class TalosDataAdapter(PerformanceDataAdapter):
self.signatures = {}
self.performance_artifact_placeholders = []
self.signature_property_placeholders = []
self.series_signature_data = []
def adapt_and_load(self, reference_data, job_data, datum):
datum['blob'] = json.loads(datum['blob'])
validate(datum['blob'], self.datazilla_schema)
_job_guid = datum["job_guid"]
@ -181,7 +182,7 @@ class TalosDataAdapter(PerformanceDataAdapter):
if series_signature not in self.signatures:
self.signatures[series_signature] = True
self.signatures[series_signature] = []
for signature_property in signature_properties:
self.signature_property_placeholders.append([
@ -201,7 +202,7 @@ class TalosDataAdapter(PerformanceDataAdapter):
json.dumps(obj)
])
self.series_signature_data.append(series_data)
self.signatures[series_signature].append(series_data)
def get_series_signature(self, signature_values):
@ -213,5 +214,10 @@ class TalosDataAdapter(PerformanceDataAdapter):
return signature
def submit_tasks(self):
pass
def submit_tasks(self, project):
from treeherder.model.tasks import populate_performance_series
populate_performance_series.apply_async(
args=[project, 'talos_data', self.signatures]
)

Просмотреть файл

@ -4,7 +4,8 @@ import io
import logging
from contextlib import closing
from .artifactbuilders import (BuildbotLogViewArtifactBuilder,
BuildbotJobArtifactBuilder)
BuildbotJobArtifactBuilder,
BuildbotPerformanceDataArtifactBuilder)
class ArtifactBuilderCollection(object):
@ -46,6 +47,12 @@ BuildbotJobArtifactBuilder
* Parsers:
* ErrorParser
* TinderboxPrintParser
BuildbotPerformanceDataArtifactBuilder
-------------
* Builds an artifact from talos data
* Parsers:
* TalosParser
"""
def __init__(self, url, builders=None, check_errors=True):
@ -72,7 +79,8 @@ In omitted, use defaults.
url=self.url,
check_errors=check_errors,
),
BuildbotJobArtifactBuilder(self.url)
BuildbotJobArtifactBuilder(self.url),
BuildbotPerformanceDataArtifactBuilder(self.url)
]
def get_log_handle(self, url):

Просмотреть файл

@ -57,8 +57,7 @@ class BuildbotJobArtifactBuilder(ArtifactBuilderBase):
"""Construct a job artifact builder."""
super(BuildbotJobArtifactBuilder, self).__init__(url)
self.parsers = [
TinderboxPrintParser(),
TalosParser()
TinderboxPrintParser()
]
self.name = "Job Info"
@ -71,7 +70,17 @@ class BuildbotLogViewArtifactBuilder(ArtifactBuilderBase):
super(BuildbotLogViewArtifactBuilder, self).__init__(url)
self.parsers = [
HeaderParser(),
StepParser(check_errors=check_errors),
TalosParser()
StepParser(check_errors=check_errors)
]
self.name = "Structured Log"
class BuildbotPerformanceDataArtifactBuilder(ArtifactBuilderBase):
"""Makes the artifact for performance data."""
def __init__(self, url=None, check_errors=True):
"""Construct artifact builder for the log viewer"""
super(BuildbotPerformanceDataArtifactBuilder, self).__init__(url)
self.parsers = [
TalosParser()
]
self.name = "talos_data"

Просмотреть файл

@ -57,12 +57,17 @@ def parse_log(project, log_url, job_guid, resultset, check_errors=False):
check_errors=check_errors,
)
artifact_bc.parse()
artifact_list = []
for name, artifact in artifact_bc.artifacts.items():
data_type = 'performance' if name == 'talos_data' else 'json'
artifact_list.append((job_guid, name, data_type, json.dumps(artifact)))
if name == 'talos_data':
data_type = 'performance'
if artifact[name]:
artifact_list.append(
(job_guid, name, data_type, json.dumps(artifact[name][0]))
)
else:
data_type = 'json'
artifact_list.append((job_guid, name, data_type, json.dumps(artifact)))
if check_errors:
all_errors = artifact_bc.artifacts.get(

Просмотреть файл

@ -2,6 +2,8 @@ import json
import MySQLdb
import time
from operator import itemgetter
from _mysql_exceptions import IntegrityError
from warnings import filterwarnings, resetwarnings
@ -1812,8 +1814,52 @@ class JobsModel(TreeherderModelBase):
debug_show=self.DEBUG,
placeholders=tda.signature_property_placeholders,
executemany=True)
print "CALLING SUBMIT_TASKS {0}".format(self.project)
tda.submit_tasks(self.project)
tda.submit_tasks()
def store_performance_series(self, t_range, series_type, signature, series_data):
now_timestamp = int(time.time())
# If we don't have this t_range/signature combination create it
series_data_json = json.dumps(series_data)
insert_placeholders = [
t_range, signature, series_type, now_timestamp,
series_data_json, t_range, signature
]
self.get_jobs_dhub().execute(
proc='jobs.inserts.set_performance_series',
debug_show=self.DEBUG,
placeholders=insert_placeholders)
# Retrieve and update the series
performance_series = self.get_jobs_dhub().execute(
proc='jobs.selects.get_performance_series',
debug_show=self.DEBUG,
placeholders=[t_range, signature])
db_series_json = performance_series[0]['blob']
# If they're equal this was the first time the t_range
# and signature combination was stored, so there's nothing to
# do
if series_data_json != db_series_json:
series = json.loads(db_series_json)
push_timestamp_limit = now_timestamp - int(t_range)
series.extend(series_data)
sorted_series = sorted(
series, key=itemgetter('result_set_id')
)
filtered_series = filter(
lambda d: d['push_timestamp'] > push_timestamp_limit,
sorted_series
)
print [t_range, push_timestamp_limit, signature]
print filtered_series
def _load_job_artifacts(self, artifact_placeholders, job_id_lookup):
"""

Просмотреть файл

@ -0,0 +1,141 @@
import json
import sys
from optparse import make_option
from django.core.management.base import BaseCommand
from treeherder.model.models import Repository
from treeherder.model.derived import JobsModel
from treeherder.model.tasks import populate_performance_series
class Command(BaseCommand):
help = """Poplate performance series data"""
option_list = BaseCommand.option_list + (
make_option('--debug',
action='store_true',
dest='debug',
default=None,
help='Write debug messages to stdout'),
make_option('--result_set_id',
dest='result_set_id',
default=None,
help='Increment result_set id to test adding to one set of structure'),
make_option('--push_timestamp',
dest='push_timestamp',
default=None,
help='Specify the push_timestamp for the default data structure'),
make_option('--project',
dest='project',
default='mozilla-inbound',
help='Project name to store data in.'),
make_option('--data_file',
dest='data_file',
default=None,
help='File to use for input data, otherwise uses default'),
make_option('--show_default_data',
action='store_true',
dest='show_default_data',
default=None,
help='Write default data structure to stdout'),
)
default_data = {
'4aa266824dbe623696a767d59de310a05bef21f5': [
{'std': '50.2', 'result_set_id': 11, 'job_id': 1121,
'min': 2474.0, 'max': 2642.0, 'median': '2525.000000',
'total_replicates': 10, 'push_timestamp': 1402944547,
'mean': '2524.7'}
],
'930295ab2dedff613f888c002cc471e055cf89ee': [
{'std': '35.0', 'result_set_id': 11, 'job_id': 1121,
'min': 1156.0, 'max': 1359.0, 'median': '1228.000000',
'total_replicates': 10, 'push_timestamp': 1402944547,
'mean': '1228.0'}
],
'843a5600f1d4b964526fde76e073c50a8e69497f': [
{'std': '29.3', 'result_set_id': 11, 'job_id': 1121,
'min': 827.0, 'max': 880.0, 'median': '861.000000',
'total_replicates': 10, 'push_timestamp': 1402944547,
'mean': '860.6'}
],
'e07ff2a8e874e066ad680a4c61d65486877ef5a3': [
{'std': '44.9', 'result_set_id': 11, 'job_id': 1121,
'min': 1935.0, 'max': 2540.0, 'median': '2020.000000',
'total_replicates': 10, 'push_timestamp': 1402944547,
'mean': '2020.1'}
],
'b8d07a1a082d93f87cde13cbd9c571259d5b30cb': [
{'std': '26.0', 'result_set_id': 11, 'job_id': 1121,
'min': 636.0, 'max': 934.0, 'median': '675.000000',
'total_replicates': 10, 'push_timestamp': 1402944547,
'mean': '675.1'}
],
'3b7fc925aeedffaecd8344b52e551c36833ee426': [
{'std': '54.2', 'result_set_id': 11, 'job_id': 1121,
'min': 2876.0, 'max': 3022.0, 'median': '2932.000000',
'total_replicates': 10, 'push_timestamp': 1402944547,
'mean': '2932.4'}
],
'824dbda856e33a97a85213c1e02f39b7e31103f1': [
{'std': '43.8', 'result_set_id': 11, 'job_id': 1121,
'min': 1879.0, 'max': 1976.0, 'median': '1916.000000',
'total_replicates': 10, 'push_timestamp': 1402944547,
'mean': '1916.4'}
],
'619479c35f8dd4bb2dff79a22e02b6577824e5a3': [
{'std': '196.6', 'result_set_id': 11, 'job_id': 1121,
'min': 38627.0, 'max': 38720.0, 'median': '38660.000000',
'total_replicates': 10, 'push_timestamp': 1402944547,
'mean': '38660.2'}
],
'191dc90def70a5bcf6024392e33794e109240e41': [
{'std': '115.2', 'result_set_id': 11, 'job_id': 1121,
'min': 13163.0, 'max': 13571.0, 'median': '13274.000000',
'total_replicates': 10, 'push_timestamp': 1402944547,
'mean': '13273.8'}
],
'63044a1201d428b519ae82131b6bd1892493d19b': [
{'std': '202.2', 'result_set_id': 11, 'job_id': 1121,
'min': 40726.0, 'max': 41059.0, 'median': '40881.000000',
'total_replicates': 10, 'push_timestamp': 1402944547,
'mean': '40880.9'}
],
'69a1cb436d2e96f7da12171da96ed32ac90e013c': [
{'std': '89.8', 'result_set_id': 11, 'job_id': 1121,
'min': 8000.0, 'max': 8235.0, 'median': '8056.000000',
'total_replicates': 10, 'push_timestamp': 1402944547,
'mean': '8055.9'}
],
'325a288c74ee5f59853e31ee6461dd804d09572e': [
{'std': '115.2', 'result_set_id': 11, 'job_id': 1121,
'min': 13129.0, 'max': 13557.0, 'median': '13267.000000',
'total_replicates': 10, 'push_timestamp': 1402944547,
'mean': '13266.5'}
]}
def handle(self, *args, **options):
show_default_data = options.get('show_default_data', None)
result_set_id = options.get('result_set_id', None)
push_timestamp = options.get('push_timestamp', None)
if show_default_data:
print json.dumps(self.default_data)
sys.exit()
if result_set_id or push_timestamp:
for sig in self.default_data:
if result_set_id:
self.default_data[sig][0]['result_set_id'] = result_set_id
if push_timestamp:
self.default_data[sig][0]['push_timestamp'] = push_timestamp
project = options.get('project')
populate_performance_series(project, 'talos_data', self.default_data)

Просмотреть файл

@ -315,9 +315,9 @@
},
"get_performance_series": {
"sql":"SELECT *
"sql":"SELECT `interval_seconds`, `series_signature`, `type`, `last_updated`, `blob`
FROM `performance_series`
WHERE `id` IN (REP0)",
WHERE `interval_seconds` = ? AND `series_signature` = ?",
"host":"read_host"

Просмотреть файл

@ -85,4 +85,14 @@ def calculate_eta(sample_window_seconds=21600, debug=False):
jm.disconnect()
@task(name='populate-performance-series')
def populate_performance_series(project, series_type, series_data):
jm = JobsModel(project)
for t_range in settings.TREEHERDER_PERF_SERIES_TIME_RANGES:
for signature in series_data:
jm.store_performance_series(
t_range['seconds'], series_type, signature,
series_data[signature]
)
jm.disconnect()

Просмотреть файл

@ -25,6 +25,15 @@ DEBUG = os.environ.get("TREEHERDER_DEBUG", False)
TREEHERDER_REQUEST_PROTOCOL = os.environ.get("TREEHERDER_REQUEST_PROTOCOL", "http")
TREEHERDER_REQUEST_HOST = os.environ.get("TREEHERDER_REQUEST_HOST", "local.treeherder.mozilla.org")
TREEHERDER_PERF_SERIES_TIME_RANGES = [
{ "seconds":86400, "days":1 },
{ "seconds":604800, "days":7 },
{ "seconds":1209600, "days":14 },
{ "seconds":2592000, "days":30 },
{ "seconds":5184000, "days":60 },
{ "seconds":7776000, "days":90 },
]
RABBITMQ_USER = os.environ.get("TREEHERDER_RABBITMQ_USER", "")
RABBITMQ_PASSWORD = os.environ.get("TREEHERDER_RABBITMQ_PASSWORD", "")
RABBITMQ_VHOST = os.environ.get("TREEHERDER_RABBITMQ_VHOST", "")

Просмотреть файл

@ -52,7 +52,6 @@ class ArtifactViewSet(viewsets.ViewSet):
if job_id:
if datum['type'] in PerformanceDataAdapter.performance_types:
job_id_list.append(job_id)
performance_artifact_data.append(datum)
else:
artifact_data.append((