diff --git a/tests/log_parser/test_tasks.py b/tests/log_parser/test_tasks.py index 57e14651d..d21ed9615 100644 --- a/tests/log_parser/test_tasks.py +++ b/tests/log_parser/test_tasks.py @@ -2,6 +2,8 @@ import pytest import simplejson as json from mock import MagicMock +from treeherder.log_parser.artifactbuildercollection import ArtifactBuilderCollection + from ..sampledata import SampleData from treeherder.model.derived import JobData from treeherder.log_parser.parsers import ErrorParser @@ -54,3 +56,54 @@ def test_parse_log(jm, initial_data, jobs_with_local_log, sample_resultset, mock # for the job artifact panel assert len(job_artifacts) >= 2 + +@pytest.fixture +def jobs_with_talos_log(initial_data): + log = "talos-data" + sample_data = SampleData() + url = "file://{0}".format( + sample_data.get_log_path("{0}.txt.gz".format(log))) + + job = sample_data.job_data[0] + + # substitute the log url with a local url + job['job']['log_references'][0]['url'] = url + # make this a successful job, so no error log processing + job['job']['result'] = "success" + return [job] + +def test_parse_talos_log(jm, initial_data, jobs_with_talos_log, sample_resultset, mock_send_request): + """ + check that at least 2 job_artifacts get inserted when running + a parse_log task + """ + + jm.store_result_set_data(sample_resultset) + + jobs = jobs_with_talos_log + for job in jobs: + print job + job['revision_hash'] = sample_resultset[0]['revision_hash'] + + jm.store_job_data(jobs) + jm.process_objects(1, raise_errors=True) + + job_id = jm.get_jobs_dhub().execute( + proc="jobs_test.selects.row_by_guid", + placeholders=[jobs[0]['job']['job_guid']] + )[0]['id'] + + job_artifacts = jm.get_jobs_dhub().execute( + proc="jobs_test.selects.job_artifact", + placeholders=[job_id] + ) + + for j in job_artifacts: + print j['name'] + + jm.disconnect() + + # we must have at least 2 artifacts: one for the log viewer and another one + # for the job artifact panel + + assert len(job_artifacts) >= 2 diff --git a/treeherder/etl/perf_data_adapters.py b/treeherder/etl/perf_data_adapters.py index 63070eca1..db130af44 100644 --- a/treeherder/etl/perf_data_adapters.py +++ b/treeherder/etl/perf_data_adapters.py @@ -117,10 +117,11 @@ class TalosDataAdapter(PerformanceDataAdapter): self.signatures = {} self.performance_artifact_placeholders = [] self.signature_property_placeholders = [] - self.series_signature_data = [] def adapt_and_load(self, reference_data, job_data, datum): + datum['blob'] = json.loads(datum['blob']) + validate(datum['blob'], self.datazilla_schema) _job_guid = datum["job_guid"] @@ -181,7 +182,7 @@ class TalosDataAdapter(PerformanceDataAdapter): if series_signature not in self.signatures: - self.signatures[series_signature] = True + self.signatures[series_signature] = [] for signature_property in signature_properties: self.signature_property_placeholders.append([ @@ -201,7 +202,7 @@ class TalosDataAdapter(PerformanceDataAdapter): json.dumps(obj) ]) - self.series_signature_data.append(series_data) + self.signatures[series_signature].append(series_data) def get_series_signature(self, signature_values): @@ -213,5 +214,10 @@ class TalosDataAdapter(PerformanceDataAdapter): return signature - def submit_tasks(self): - pass + def submit_tasks(self, project): + + from treeherder.model.tasks import populate_performance_series + + populate_performance_series.apply_async( + args=[project, 'talos_data', self.signatures] + ) diff --git a/treeherder/log_parser/artifactbuildercollection.pyx b/treeherder/log_parser/artifactbuildercollection.pyx index aa986df6d..98ab29009 100644 --- a/treeherder/log_parser/artifactbuildercollection.pyx +++ b/treeherder/log_parser/artifactbuildercollection.pyx @@ -4,7 +4,8 @@ import io import logging from contextlib import closing from .artifactbuilders import (BuildbotLogViewArtifactBuilder, - BuildbotJobArtifactBuilder) + BuildbotJobArtifactBuilder, + BuildbotPerformanceDataArtifactBuilder) class ArtifactBuilderCollection(object): @@ -46,6 +47,12 @@ BuildbotJobArtifactBuilder * Parsers: * ErrorParser * TinderboxPrintParser + +BuildbotPerformanceDataArtifactBuilder +------------- +* Builds an artifact from talos data +* Parsers: +* TalosParser """ def __init__(self, url, builders=None, check_errors=True): @@ -72,7 +79,8 @@ In omitted, use defaults. url=self.url, check_errors=check_errors, ), - BuildbotJobArtifactBuilder(self.url) + BuildbotJobArtifactBuilder(self.url), + BuildbotPerformanceDataArtifactBuilder(self.url) ] def get_log_handle(self, url): diff --git a/treeherder/log_parser/artifactbuilders.pyx b/treeherder/log_parser/artifactbuilders.pyx index 2fd9bd377..f6ade3b3d 100644 --- a/treeherder/log_parser/artifactbuilders.pyx +++ b/treeherder/log_parser/artifactbuilders.pyx @@ -57,8 +57,7 @@ class BuildbotJobArtifactBuilder(ArtifactBuilderBase): """Construct a job artifact builder.""" super(BuildbotJobArtifactBuilder, self).__init__(url) self.parsers = [ - TinderboxPrintParser(), - TalosParser() + TinderboxPrintParser() ] self.name = "Job Info" @@ -71,7 +70,17 @@ class BuildbotLogViewArtifactBuilder(ArtifactBuilderBase): super(BuildbotLogViewArtifactBuilder, self).__init__(url) self.parsers = [ HeaderParser(), - StepParser(check_errors=check_errors), - TalosParser() + StepParser(check_errors=check_errors) ] self.name = "Structured Log" + +class BuildbotPerformanceDataArtifactBuilder(ArtifactBuilderBase): + """Makes the artifact for performance data.""" + + def __init__(self, url=None, check_errors=True): + """Construct artifact builder for the log viewer""" + super(BuildbotPerformanceDataArtifactBuilder, self).__init__(url) + self.parsers = [ + TalosParser() + ] + self.name = "talos_data" diff --git a/treeherder/log_parser/tasks.py b/treeherder/log_parser/tasks.py index fe72ba825..fcccdd95b 100644 --- a/treeherder/log_parser/tasks.py +++ b/treeherder/log_parser/tasks.py @@ -57,12 +57,17 @@ def parse_log(project, log_url, job_guid, resultset, check_errors=False): check_errors=check_errors, ) artifact_bc.parse() - artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): - data_type = 'performance' if name == 'talos_data' else 'json' - - artifact_list.append((job_guid, name, data_type, json.dumps(artifact))) + if name == 'talos_data': + data_type = 'performance' + if artifact[name]: + artifact_list.append( + (job_guid, name, data_type, json.dumps(artifact[name][0])) + ) + else: + data_type = 'json' + artifact_list.append((job_guid, name, data_type, json.dumps(artifact))) if check_errors: all_errors = artifact_bc.artifacts.get( diff --git a/treeherder/model/derived/jobs.py b/treeherder/model/derived/jobs.py index 4482fbff0..a2334889e 100644 --- a/treeherder/model/derived/jobs.py +++ b/treeherder/model/derived/jobs.py @@ -2,6 +2,8 @@ import json import MySQLdb import time +from operator import itemgetter + from _mysql_exceptions import IntegrityError from warnings import filterwarnings, resetwarnings @@ -1812,8 +1814,52 @@ class JobsModel(TreeherderModelBase): debug_show=self.DEBUG, placeholders=tda.signature_property_placeholders, executemany=True) + print "CALLING SUBMIT_TASKS {0}".format(self.project) + tda.submit_tasks(self.project) - tda.submit_tasks() + def store_performance_series(self, t_range, series_type, signature, series_data): + + now_timestamp = int(time.time()) + + # If we don't have this t_range/signature combination create it + series_data_json = json.dumps(series_data) + insert_placeholders = [ + t_range, signature, series_type, now_timestamp, + series_data_json, t_range, signature + ] + self.get_jobs_dhub().execute( + proc='jobs.inserts.set_performance_series', + debug_show=self.DEBUG, + placeholders=insert_placeholders) + + # Retrieve and update the series + performance_series = self.get_jobs_dhub().execute( + proc='jobs.selects.get_performance_series', + debug_show=self.DEBUG, + placeholders=[t_range, signature]) + + db_series_json = performance_series[0]['blob'] + + # If they're equal this was the first time the t_range + # and signature combination was stored, so there's nothing to + # do + if series_data_json != db_series_json: + + series = json.loads(db_series_json) + push_timestamp_limit = now_timestamp - int(t_range) + + series.extend(series_data) + + sorted_series = sorted( + series, key=itemgetter('result_set_id') + ) + + filtered_series = filter( + lambda d: d['push_timestamp'] > push_timestamp_limit, + sorted_series + ) + print [t_range, push_timestamp_limit, signature] + print filtered_series def _load_job_artifacts(self, artifact_placeholders, job_id_lookup): """ diff --git a/treeherder/model/management/commands/populate_performance_series.py b/treeherder/model/management/commands/populate_performance_series.py new file mode 100644 index 000000000..eba315c12 --- /dev/null +++ b/treeherder/model/management/commands/populate_performance_series.py @@ -0,0 +1,141 @@ +import json +import sys + +from optparse import make_option +from django.core.management.base import BaseCommand +from treeherder.model.models import Repository +from treeherder.model.derived import JobsModel +from treeherder.model.tasks import populate_performance_series + +class Command(BaseCommand): + help = """Poplate performance series data""" + + option_list = BaseCommand.option_list + ( + + make_option('--debug', + action='store_true', + dest='debug', + default=None, + help='Write debug messages to stdout'), + + make_option('--result_set_id', + dest='result_set_id', + default=None, + help='Increment result_set id to test adding to one set of structure'), + + make_option('--push_timestamp', + dest='push_timestamp', + default=None, + help='Specify the push_timestamp for the default data structure'), + + make_option('--project', + dest='project', + default='mozilla-inbound', + help='Project name to store data in.'), + + make_option('--data_file', + dest='data_file', + default=None, + help='File to use for input data, otherwise uses default'), + + make_option('--show_default_data', + action='store_true', + dest='show_default_data', + default=None, + help='Write default data structure to stdout'), + + ) + + default_data = { + '4aa266824dbe623696a767d59de310a05bef21f5': [ + {'std': '50.2', 'result_set_id': 11, 'job_id': 1121, + 'min': 2474.0, 'max': 2642.0, 'median': '2525.000000', + 'total_replicates': 10, 'push_timestamp': 1402944547, + 'mean': '2524.7'} + ], + '930295ab2dedff613f888c002cc471e055cf89ee': [ + {'std': '35.0', 'result_set_id': 11, 'job_id': 1121, + 'min': 1156.0, 'max': 1359.0, 'median': '1228.000000', + 'total_replicates': 10, 'push_timestamp': 1402944547, + 'mean': '1228.0'} + ], + '843a5600f1d4b964526fde76e073c50a8e69497f': [ + {'std': '29.3', 'result_set_id': 11, 'job_id': 1121, + 'min': 827.0, 'max': 880.0, 'median': '861.000000', + 'total_replicates': 10, 'push_timestamp': 1402944547, + 'mean': '860.6'} + ], + 'e07ff2a8e874e066ad680a4c61d65486877ef5a3': [ + {'std': '44.9', 'result_set_id': 11, 'job_id': 1121, + 'min': 1935.0, 'max': 2540.0, 'median': '2020.000000', + 'total_replicates': 10, 'push_timestamp': 1402944547, + 'mean': '2020.1'} + ], + 'b8d07a1a082d93f87cde13cbd9c571259d5b30cb': [ + {'std': '26.0', 'result_set_id': 11, 'job_id': 1121, + 'min': 636.0, 'max': 934.0, 'median': '675.000000', + 'total_replicates': 10, 'push_timestamp': 1402944547, + 'mean': '675.1'} + ], + '3b7fc925aeedffaecd8344b52e551c36833ee426': [ + {'std': '54.2', 'result_set_id': 11, 'job_id': 1121, + 'min': 2876.0, 'max': 3022.0, 'median': '2932.000000', + 'total_replicates': 10, 'push_timestamp': 1402944547, + 'mean': '2932.4'} + ], + '824dbda856e33a97a85213c1e02f39b7e31103f1': [ + {'std': '43.8', 'result_set_id': 11, 'job_id': 1121, + 'min': 1879.0, 'max': 1976.0, 'median': '1916.000000', + 'total_replicates': 10, 'push_timestamp': 1402944547, + 'mean': '1916.4'} + ], + '619479c35f8dd4bb2dff79a22e02b6577824e5a3': [ + {'std': '196.6', 'result_set_id': 11, 'job_id': 1121, + 'min': 38627.0, 'max': 38720.0, 'median': '38660.000000', + 'total_replicates': 10, 'push_timestamp': 1402944547, + 'mean': '38660.2'} + ], + '191dc90def70a5bcf6024392e33794e109240e41': [ + {'std': '115.2', 'result_set_id': 11, 'job_id': 1121, + 'min': 13163.0, 'max': 13571.0, 'median': '13274.000000', + 'total_replicates': 10, 'push_timestamp': 1402944547, + 'mean': '13273.8'} + ], + '63044a1201d428b519ae82131b6bd1892493d19b': [ + {'std': '202.2', 'result_set_id': 11, 'job_id': 1121, + 'min': 40726.0, 'max': 41059.0, 'median': '40881.000000', + 'total_replicates': 10, 'push_timestamp': 1402944547, + 'mean': '40880.9'} + ], + '69a1cb436d2e96f7da12171da96ed32ac90e013c': [ + {'std': '89.8', 'result_set_id': 11, 'job_id': 1121, + 'min': 8000.0, 'max': 8235.0, 'median': '8056.000000', + 'total_replicates': 10, 'push_timestamp': 1402944547, + 'mean': '8055.9'} + ], + '325a288c74ee5f59853e31ee6461dd804d09572e': [ + {'std': '115.2', 'result_set_id': 11, 'job_id': 1121, + 'min': 13129.0, 'max': 13557.0, 'median': '13267.000000', + 'total_replicates': 10, 'push_timestamp': 1402944547, + 'mean': '13266.5'} + ]} + + def handle(self, *args, **options): + + show_default_data = options.get('show_default_data', None) + result_set_id = options.get('result_set_id', None) + push_timestamp = options.get('push_timestamp', None) + + if show_default_data: + print json.dumps(self.default_data) + sys.exit() + + if result_set_id or push_timestamp: + for sig in self.default_data: + if result_set_id: + self.default_data[sig][0]['result_set_id'] = result_set_id + if push_timestamp: + self.default_data[sig][0]['push_timestamp'] = push_timestamp + + project = options.get('project') + populate_performance_series(project, 'talos_data', self.default_data) diff --git a/treeherder/model/sql/jobs.json b/treeherder/model/sql/jobs.json index eeae16f68..25d8ec0a4 100644 --- a/treeherder/model/sql/jobs.json +++ b/treeherder/model/sql/jobs.json @@ -315,9 +315,9 @@ }, "get_performance_series": { - "sql":"SELECT * + "sql":"SELECT `interval_seconds`, `series_signature`, `type`, `last_updated`, `blob` FROM `performance_series` - WHERE `id` IN (REP0)", + WHERE `interval_seconds` = ? AND `series_signature` = ?", "host":"read_host" diff --git a/treeherder/model/tasks.py b/treeherder/model/tasks.py index ad968a4fe..7244238cc 100644 --- a/treeherder/model/tasks.py +++ b/treeherder/model/tasks.py @@ -85,4 +85,14 @@ def calculate_eta(sample_window_seconds=21600, debug=False): jm.disconnect() +@task(name='populate-performance-series') +def populate_performance_series(project, series_type, series_data): + jm = JobsModel(project) + for t_range in settings.TREEHERDER_PERF_SERIES_TIME_RANGES: + for signature in series_data: + jm.store_performance_series( + t_range['seconds'], series_type, signature, + series_data[signature] + ) + jm.disconnect() diff --git a/treeherder/settings/base.py b/treeherder/settings/base.py index 714435aa6..1032b1d17 100644 --- a/treeherder/settings/base.py +++ b/treeherder/settings/base.py @@ -25,6 +25,15 @@ DEBUG = os.environ.get("TREEHERDER_DEBUG", False) TREEHERDER_REQUEST_PROTOCOL = os.environ.get("TREEHERDER_REQUEST_PROTOCOL", "http") TREEHERDER_REQUEST_HOST = os.environ.get("TREEHERDER_REQUEST_HOST", "local.treeherder.mozilla.org") +TREEHERDER_PERF_SERIES_TIME_RANGES = [ + { "seconds":86400, "days":1 }, + { "seconds":604800, "days":7 }, + { "seconds":1209600, "days":14 }, + { "seconds":2592000, "days":30 }, + { "seconds":5184000, "days":60 }, + { "seconds":7776000, "days":90 }, +] + RABBITMQ_USER = os.environ.get("TREEHERDER_RABBITMQ_USER", "") RABBITMQ_PASSWORD = os.environ.get("TREEHERDER_RABBITMQ_PASSWORD", "") RABBITMQ_VHOST = os.environ.get("TREEHERDER_RABBITMQ_VHOST", "") diff --git a/treeherder/webapp/api/artifact.py b/treeherder/webapp/api/artifact.py index ce0cb1757..5898c4f68 100644 --- a/treeherder/webapp/api/artifact.py +++ b/treeherder/webapp/api/artifact.py @@ -52,7 +52,6 @@ class ArtifactViewSet(viewsets.ViewSet): if job_id: if datum['type'] in PerformanceDataAdapter.performance_types: job_id_list.append(job_id) - performance_artifact_data.append(datum) else: artifact_data.append((