From f6ac262dd07b8cac14c5fe38c1f3bd6c642e5af7 Mon Sep 17 00:00:00 2001 From: Rob Wood Date: Wed, 8 Feb 2017 08:50:52 -0500 Subject: [PATCH] Bug 1330652 - fix job priorities endpoint so it returns data from correct repo (#2128) * Bug 1330677 - Allow calling runnable jobs API without having to pass the Gecko decision task id. This is useful if all you care about is to determine what is most up-to-date list of tasks that can be scheduled. In the future, this will allow for determining the "current set of runnable jobs" on a schedule (caching the latest values) rather on per API call. * Bug 1330652 - SETA - Fix job priorities endpoint We were not passing the project name down to the functionality that retrieves runnable jobs, thus, using 'mozilla-inbound' by default. This change starts using the simplified ref_data_names() method which also takes the project name. This also paves the way to drop Treecodes from the code. --- tests/etl/test_runnable_jobs.py | 71 +++++++++++++++++- tests/seta/test_job_priorities.py | 25 +++++-- tests/webapp/api/test_runnable_jobs_api.py | 4 +- treeherder/config/settings.py | 1 + treeherder/etl/runnable_jobs.py | 86 ++++++++++++---------- treeherder/etl/seta.py | 57 +++++++++++--- treeherder/seta/job_priorities.py | 52 +++---------- treeherder/seta/runnable_jobs.py | 5 +- 8 files changed, 202 insertions(+), 99 deletions(-) diff --git a/tests/etl/test_runnable_jobs.py b/tests/etl/test_runnable_jobs.py index c2eb93169..028d4dc55 100644 --- a/tests/etl/test_runnable_jobs.py +++ b/tests/etl/test_runnable_jobs.py @@ -1,5 +1,10 @@ +import responses + +from treeherder.config.settings import (TASKCLUSTER_INDEX_URL, + TASKCLUSTER_TASKGRAPH_URL) from treeherder.etl.buildbot import get_symbols_and_platforms -from treeherder.etl.runnable_jobs import RunnableJobsProcess +from treeherder.etl.runnable_jobs import (RunnableJobsProcess, + _taskcluster_runnable_jobs) from treeherder.model.models import (BuildPlatform, JobType, MachinePlatform, @@ -29,3 +34,67 @@ def test_prune_old_runnable_job(test_repository, eleven_jobs_stored): assert len(rj) == 1 assert rj[0].ref_data_name == buildername + + +@responses.activate +def test_taskcluster_runnable_jobs(test_repository): + + """ + Test getting runnable jobs without providing decision task id + """ + repo = test_repository.name + task_id = 'AFq3FRt4TyiTwIN7fUqOQg' + tc_index_url = TASKCLUSTER_INDEX_URL % repo + tc_graph_url = TASKCLUSTER_TASKGRAPH_URL.format(task_id=task_id) + platform = 'plaform name' + grp_symbol = 'GRP' + grp_name = 'Group Name' + symbol = 'sym' + collection = {'opt': True} + name = 'job name' + description = 'job description' + + content1 = { + 'taskId': task_id + } + + content2 = { + 'node': { + 'task': { + 'extra': { + 'treeherder': { + 'groupSymbol': grp_symbol, + 'groupName': grp_name, + 'symbol': symbol, + 'machine': { + 'platform': platform + }, + 'collection': collection + } + }, + 'metadata': { + 'name': name, + 'description': description + } + } + } + } + + responses.add(responses.GET, tc_index_url, json=content1, match_querystring=True, status=200) + responses.add(responses.GET, tc_graph_url, json=content2, match_querystring=True, status=200) + + jobs_ret = _taskcluster_runnable_jobs(repo, None) + + assert len(jobs_ret) == 1 + test_job = jobs_ret[0] + + assert test_job['build_platform'] == platform + assert test_job['build_system_type'] == 'taskcluster' + assert test_job['job_group_name'] == grp_name + assert test_job['job_group_symbol'] == grp_symbol + assert test_job['job_type_name'] == name + assert test_job['job_type_description'] == description + assert test_job['job_type_symbol'] == symbol + assert test_job['platform'] == platform + assert test_job['state'] == 'runnable' + assert test_job['result'] == 'runnable' diff --git a/tests/seta/test_job_priorities.py b/tests/seta/test_job_priorities.py index 7a89be875..9e1048e67 100644 --- a/tests/seta/test_job_priorities.py +++ b/tests/seta/test_job_priorities.py @@ -4,20 +4,33 @@ import pytest from mock import patch from treeherder.config.settings import SETA_LOW_VALUE_PRIORITY -from treeherder.seta.job_priorities import seta_job_scheduling -from treeherder.seta.runnable_jobs import RunnableJobsClient +from treeherder.seta.job_priorities import (SetaError, + seta_job_scheduling) @pytest.mark.django_db() @patch('treeherder.seta.job_priorities._validate_request', return_value=None) # Prevent checking the repository name -@patch.object(RunnableJobsClient, 'query_runnable_jobs') -def test_gecko_decision_task(query_runnable_jobs, validate_request, - test_repository, runnable_jobs_data, all_job_priorities_stored): +@patch('treeherder.etl.seta.list_runnable_jobs') +def test_gecko_decision_task(runnable_jobs_list, validate_request, + test_repository, runnable_jobs_data, + all_job_priorities_stored): ''' When the Gecko decision task calls SETA it will return all jobs that are less likely to catch a regression (low value jobs). ''' - query_runnable_jobs.return_value = runnable_jobs_data + runnable_jobs_list.return_value = runnable_jobs_data jobs = seta_job_scheduling(project=test_repository.name, build_system_type='taskcluster') assert len(jobs['jobtypes'][str(datetime.date.today())]) == 1 + + +def test_gecko_decision_task_invalid_repo(): + ''' + When the Gecko decision task calls SETA it will return all jobs that are less likely to catch + a regression (low value jobs). + ''' + with pytest.raises(SetaError) as exception_info: + seta_job_scheduling(project='mozilla-repo-x', build_system_type='taskcluster') + + assert exception_info.value.message == "The specified project repo 'mozilla-repo-x' " \ + "is not supported by SETA." diff --git a/tests/webapp/api/test_runnable_jobs_api.py b/tests/webapp/api/test_runnable_jobs_api.py index 4bccdff8f..b11315090 100644 --- a/tests/webapp/api/test_runnable_jobs_api.py +++ b/tests/webapp/api/test_runnable_jobs_api.py @@ -1,9 +1,11 @@ from django.core.urlresolvers import reverse +from mock import patch from treeherder.model.models import RunnableJob -def test_runnable_jobs_api(webapp, test_job): +@patch('treeherder.etl.runnable_jobs._taskcluster_runnable_jobs', return_value=[]) +def test_runnable_jobs_api(taskcluster_runnable_jobs, webapp, test_job): RunnableJob.objects.create( build_platform=test_job.build_platform, machine_platform=test_job.machine_platform, diff --git a/treeherder/config/settings.py b/treeherder/config/settings.py index 9a17bd9fa..cacb2faf6 100644 --- a/treeherder/config/settings.py +++ b/treeherder/config/settings.py @@ -393,6 +393,7 @@ BUILDAPI_RUNNING_URL = "https://secure.pub.build.mozilla.org/builddata/buildjson BUILDAPI_BUILDS4H_URL = "https://secure.pub.build.mozilla.org/builddata/buildjson/builds-4hr.js.gz" ALLTHETHINGS_URL = "https://secure.pub.build.mozilla.org/builddata/reports/allthethings.json" TASKCLUSTER_TASKGRAPH_URL = 'https://queue.taskcluster.net/v1/task/{task_id}/artifacts/public/full-task-graph.json' +TASKCLUSTER_INDEX_URL = 'https://index.taskcluster.net/v1/task/gecko.v2.%s.latest.firefox.decision' # the amount of time we cache bug suggestion lookups (to speed up loading the bug # suggestions or autoclassify panels for recently finished jobs) diff --git a/treeherder/etl/runnable_jobs.py b/treeherder/etl/runnable_jobs.py index 3b8bc94c1..ca5c2dc6d 100644 --- a/treeherder/etl/runnable_jobs.py +++ b/treeherder/etl/runnable_jobs.py @@ -7,6 +7,7 @@ from django.conf import settings from django.core.exceptions import ValidationError from django.core.validators import URLValidator +from treeherder.config.settings import TASKCLUSTER_INDEX_URL from treeherder.etl.buildbot import get_symbols_and_platforms from treeherder.etl.common import fetch_json from treeherder.model.models import (BuildPlatform, @@ -117,49 +118,49 @@ class RunnableJobsProcess(AllthethingsTransformerMixin): self.update_runnable_jobs_table(jobs_per_branch) -def _taskcluster_runnable_jobs(decision_task_id): +def _taskcluster_runnable_jobs(project, decision_task_id): + ret = [] + tc_graph = {} if not decision_task_id: + decision_task_id = _query_latest_gecko_decision_task_id(project) + + tc_graph_url = settings.TASKCLUSTER_TASKGRAPH_URL.format(task_id=decision_task_id) + validate = URLValidator() + try: + validate(tc_graph_url) + tc_graph = fetch_json(tc_graph_url) + except ValidationError: + logger.warning('Failed to validate {}'.format(tc_graph_url)) return [] - else: - ret = [] - tc_graph = {} - tc_graph_url = settings.TASKCLUSTER_TASKGRAPH_URL.format(task_id=decision_task_id) - validate = URLValidator() - try: - validate(tc_graph_url) - tc_graph = fetch_json(tc_graph_url) - except ValidationError: - logger.warning('Failed to validate {}'.format(tc_graph_url)) - return [] - for label, node in tc_graph.iteritems(): - if not ('extra' in node['task'] and 'treeherder' in node['task']['extra']): - # some tasks don't have the treeherder information we need - # to be able to display them (and are not intended to be - # displayed). skip. - continue + for label, node in tc_graph.iteritems(): + if not ('extra' in node['task'] and 'treeherder' in node['task']['extra']): + # some tasks don't have the treeherder information we need + # to be able to display them (and are not intended to be + # displayed). skip. + continue - treeherder_options = node['task']['extra']['treeherder'] - task_metadata = node['task']['metadata'] - platform_option = ' '.join(treeherder_options.get('collection', {}).keys()) + treeherder_options = node['task']['extra']['treeherder'] + task_metadata = node['task']['metadata'] + platform_option = ' '.join(treeherder_options.get('collection', {}).keys()) - ret.append({ - 'build_platform': treeherder_options.get('machine', {}).get('platform', ''), - 'build_system_type': 'taskcluster', - 'job_group_name': treeherder_options.get('groupName', ''), - 'job_group_symbol': treeherder_options.get('groupSymbol', ''), - 'job_type_description': task_metadata['description'], - 'job_type_name': task_metadata['name'], - 'job_type_symbol': treeherder_options['symbol'], - 'platform': treeherder_options.get('machine', {}).get('platform', ''), - 'platform_option': platform_option, - 'ref_data_name': label, - 'state': 'runnable', - 'result': 'runnable', - 'job_coalesced_to_guid': None - }) + ret.append({ + 'build_platform': treeherder_options.get('machine', {}).get('platform', ''), + 'build_system_type': 'taskcluster', + 'job_group_name': treeherder_options.get('groupName', ''), + 'job_group_symbol': treeherder_options.get('groupSymbol', ''), + 'job_type_description': task_metadata['description'], + 'job_type_name': task_metadata['name'], + 'job_type_symbol': treeherder_options['symbol'], + 'platform': treeherder_options.get('machine', {}).get('platform', ''), + 'platform_option': platform_option, + 'ref_data_name': label, + 'state': 'runnable', + 'result': 'runnable', + 'job_coalesced_to_guid': None + }) - return ret + return ret def _buildbot_runnable_jobs(project): @@ -209,6 +210,15 @@ def _buildbot_runnable_jobs(project): def list_runnable_jobs(project, decision_task_id=None): ret = _buildbot_runnable_jobs(project) - ret = ret + _taskcluster_runnable_jobs(decision_task_id) + ret = ret + _taskcluster_runnable_jobs(project, decision_task_id) return dict(meta={"repository": project, "offset": 0, "count": len(ret)}, results=ret) + + +def _query_latest_gecko_decision_task_id(project): + url = TASKCLUSTER_INDEX_URL % project + logger.info('Fetching {}'.format(url)) + latest_task = fetch_json(url) + task_id = latest_task['taskId'] + logger.info('For {} we found the task id: {}'.format(project, task_id)) + return task_id diff --git a/treeherder/etl/seta.py b/treeherder/etl/seta.py index da7812a1d..250d31374 100644 --- a/treeherder/etl/seta.py +++ b/treeherder/etl/seta.py @@ -1,5 +1,6 @@ import logging +from treeherder.etl.runnable_jobs import list_runnable_jobs from treeherder.seta.common import unique_key from treeherder.seta.models import JobPriority from treeherder.seta.runnable_jobs import RunnableJobsClient @@ -21,7 +22,7 @@ class Treecodes: # e.g. Ubuntu VM 12.04 x64 mozilla-inbound opt test web-platform-tests-4 OR # test-linux64/opt-web-platform-tests-4 testtype = job_testtype(job) - if self._ignore(testtype): + if _ignore(testtype): ignored_jobs.append(job['ref_data_name']) continue @@ -40,16 +41,6 @@ class Treecodes: for ref_data_name in sorted(ignored_jobs): LOG.info('Ignoring {}'.format(ref_data_name)) - def _ignore(self, testtype): - if not testtype: - return True - - # XXX: This has the danger of falling out of date - # https://bugzilla.mozilla.org/show_bug.cgi?id=1325369 - for i in ('dep', 'nightly', 'non-unified', 'valgrind', 'build'): - if testtype.find(i) != -1: - return True - def query_jobtypes(self): """Query all available jobtypes and return it as list""" return self.jobtypes @@ -59,6 +50,17 @@ class Treecodes: return self.jobnames +def _ignore(testtype): + if not testtype: + return True + + # XXX: This has the danger of falling out of date + # https://bugzilla.mozilla.org/show_bug.cgi?id=1325369 + for i in ('dep', 'nightly', 'non-unified', 'valgrind', 'build'): + if testtype.find(i) != -1: + return True + + def job_testtype(job): return parse_testtype( build_system_type=job['build_system_type'], @@ -164,3 +166,36 @@ def job_priorities_to_jobtypes(): jobtypes.append(jp.unique_identifier()) return jobtypes + + +def ref_data_names(project, build_system): + ''' + We want all reference data names for every task that runs on a specific project. + + For example: + * Buildbot - "Windows 8 64-bit mozilla-inbound debug test web-platform-tests-1" + * TaskCluster = "test-linux64/opt-mochitest-webgl-e10s-1" + ''' + ignored_jobs = [] + ref_data_names = {} + runnable_jobs = list_runnable_jobs(project)['results'] + + for job in runnable_jobs: + testtype = job_testtype(job) # e.g. web-platform-tests-4 + if _ignore(testtype): + ignored_jobs.append(job['ref_data_name']) + continue + + key = unique_key(testtype=testtype, + buildtype=job['platform_option'], + platform=job['platform']) + + if build_system == '*': + ref_data_names[key] = job['ref_data_name'] + elif job['build_system_type'] == build_system: + ref_data_names[key] = job['ref_data_name'] + + for ref_data_name in sorted(ignored_jobs): + LOG.info('Ignoring {}'.format(ref_data_name)) + + return ref_data_names diff --git a/treeherder/seta/job_priorities.py b/treeherder/seta/job_priorities.py index 36179bde3..d8b553100 100644 --- a/treeherder/seta/job_priorities.py +++ b/treeherder/seta/job_priorities.py @@ -2,54 +2,24 @@ import datetime import logging from treeherder.config.settings import SETA_LOW_VALUE_PRIORITY -from treeherder.etl.seta import Treecodes +from treeherder.etl.seta import ref_data_names from treeherder.model.models import Repository -from treeherder.seta.common import unique_key from treeherder.seta.models import JobPriority logger = logging.getLogger(__name__) +SETA_PROJECTS = ['mozilla-inbound', 'autoland'] + class SetaError(Exception): pass -# XXX: We can get rid of this function if Treecodes took care of it -def _ref_data_names(build_system): - ''' - Sample data from Treecodes().query_jobnames() (skipping irrelevant fields) - { - "buildplatform": "buildbot", - "buildtype": "debug", - "platform": "windows8-64", - "ref_data_name": "Windows 8 64-bit mozilla-inbound debug test web-platform-tests-1", - "testtype": "web-platform-tests-1", - },{ - "buildplatform": "taskcluster", - "buildtype": "opt", - "platform": "linux64", - "ref_data_name": "test-linux64/opt-mochitest-webgl-e10s-1", - "testtype": "mochitest-webgl-e10s-1", - } - ''' - ref_data_names = {} - for job in Treecodes().query_jobnames(): - key = unique_key(testtype=job['testtype'], - buildtype=job['buildtype'], - platform=job['platform']) - if build_system == '*': - ref_data_names[key] = job['ref_data_name'] - elif job['buildplatform'] == build_system: - ref_data_names[key] = job['ref_data_name'] - - return ref_data_names - - -def _process(build_system, job_priorities): +def _process(project, build_system, job_priorities): '''Return list of ref_data_name for job_priorities''' jobs = [] - # This map contains the ref_data_name of every Treeherder job - ref_data_names = _ref_data_names(build_system) + # This map contains the ref_data_name of every Treeherder *test* job for this project + ref_data_names_map = ref_data_names(project, build_system) for jp in job_priorities: if build_system == 'taskcluster': @@ -60,9 +30,9 @@ def _process(build_system, job_priorities): jp.testtype = jp.testtype.replace('gl-', 'webgl-') key = jp.unique_identifier() - if key in ref_data_names: + if key in ref_data_names_map: # e.g. desktop-test-linux64-pgo/opt-reftest-13 or builder name - jobs.append(ref_data_names[key]) + jobs.append(ref_data_names_map[key]) else: logger.warning('We did not find job priority ({}) in the list of accepted jobs'.format(jp)) @@ -76,7 +46,7 @@ def _gecko_decision_task_request(project): if jp.has_expired(): job_priorities.append(jp) - return _process(build_system='taskcluster', job_priorities=job_priorities) + return _process(project, build_system='taskcluster', job_priorities=job_priorities) def _query_job_priorities(priority, excluded_build_system_type): @@ -91,6 +61,8 @@ def _query_job_priorities(priority, excluded_build_system_type): def _validate_request(build_system_type, project): if build_system_type not in ('buildbot', 'taskcluster', '*'): raise SetaError('Valid build_system_type values are buildbot or taskcluster.') + if project not in SETA_PROJECTS: + raise SetaError("The specified project repo '%s' is not supported by SETA." % project) def seta_job_scheduling(project, build_system_type, priority=None): @@ -102,7 +74,7 @@ def seta_job_scheduling(project, build_system_type, priority=None): if build_system_type != '*': excluded_build_system_type = 'taskcluster' if build_system_type == 'buildbot' else 'buildbot' job_priorities = _query_job_priorities(priority, excluded_build_system_type) - ref_data_names = _process(build_system_type, job_priorities) + ref_data_names = _process(project, build_system_type, job_priorities) # We don't really need 'jobtypes' and today's date in the returning data # Getting rid of it will require the consumers to not expect it. diff --git a/treeherder/seta/runnable_jobs.py b/treeherder/seta/runnable_jobs.py index c9d2946b8..6ef005dd9 100644 --- a/treeherder/seta/runnable_jobs.py +++ b/treeherder/seta/runnable_jobs.py @@ -1,5 +1,7 @@ +# TODO: Get rid of this module (bug 1330649) import logging +from treeherder.config.settings import TASKCLUSTER_INDEX_URL from treeherder.etl.common import fetch_json from treeherder.etl.runnable_jobs import list_runnable_jobs @@ -7,8 +9,7 @@ logger = logging.getLogger(__name__) class RunnableJobsClient(): - def __init__(self, treeherder_host='https://treeherder.mozilla.org', - tc_index_url='https://index.taskcluster.net/v1/task/gecko.v2.%s.latest.firefox.decision'): + def __init__(self, tc_index_url=TASKCLUSTER_INDEX_URL): self.tc_index_url = tc_index_url self.cache = {}