Bug 1330652 - fix job priorities endpoint so it returns data from correct repo (#2128)

* Bug 1330677 - Allow calling runnable jobs API without having to pass the Gecko decision task id.

This is useful if all you care about is to determine what is most up-to-date list of tasks that can be scheduled.

In the future, this will allow for determining the "current set of runnable jobs" on a schedule
(caching the latest values) rather on per API call.

* Bug 1330652 - SETA - Fix job priorities endpoint

We were not passing the project name down to the functionality that
retrieves runnable jobs, thus, using 'mozilla-inbound' by default.

This change starts using the simplified ref_data_names() method which also
takes the project name.

This also paves the way to drop Treecodes from the code.
This commit is contained in:
Rob Wood 2017-02-08 08:50:52 -05:00 коммит произвёл GitHub
Родитель f9cd47c16b
Коммит f6ac262dd0
8 изменённых файлов: 202 добавлений и 99 удалений

Просмотреть файл

@ -1,5 +1,10 @@
import responses
from treeherder.config.settings import (TASKCLUSTER_INDEX_URL,
TASKCLUSTER_TASKGRAPH_URL)
from treeherder.etl.buildbot import get_symbols_and_platforms
from treeherder.etl.runnable_jobs import RunnableJobsProcess
from treeherder.etl.runnable_jobs import (RunnableJobsProcess,
_taskcluster_runnable_jobs)
from treeherder.model.models import (BuildPlatform,
JobType,
MachinePlatform,
@ -29,3 +34,67 @@ def test_prune_old_runnable_job(test_repository, eleven_jobs_stored):
assert len(rj) == 1
assert rj[0].ref_data_name == buildername
@responses.activate
def test_taskcluster_runnable_jobs(test_repository):
"""
Test getting runnable jobs without providing decision task id
"""
repo = test_repository.name
task_id = 'AFq3FRt4TyiTwIN7fUqOQg'
tc_index_url = TASKCLUSTER_INDEX_URL % repo
tc_graph_url = TASKCLUSTER_TASKGRAPH_URL.format(task_id=task_id)
platform = 'plaform name'
grp_symbol = 'GRP'
grp_name = 'Group Name'
symbol = 'sym'
collection = {'opt': True}
name = 'job name'
description = 'job description'
content1 = {
'taskId': task_id
}
content2 = {
'node': {
'task': {
'extra': {
'treeherder': {
'groupSymbol': grp_symbol,
'groupName': grp_name,
'symbol': symbol,
'machine': {
'platform': platform
},
'collection': collection
}
},
'metadata': {
'name': name,
'description': description
}
}
}
}
responses.add(responses.GET, tc_index_url, json=content1, match_querystring=True, status=200)
responses.add(responses.GET, tc_graph_url, json=content2, match_querystring=True, status=200)
jobs_ret = _taskcluster_runnable_jobs(repo, None)
assert len(jobs_ret) == 1
test_job = jobs_ret[0]
assert test_job['build_platform'] == platform
assert test_job['build_system_type'] == 'taskcluster'
assert test_job['job_group_name'] == grp_name
assert test_job['job_group_symbol'] == grp_symbol
assert test_job['job_type_name'] == name
assert test_job['job_type_description'] == description
assert test_job['job_type_symbol'] == symbol
assert test_job['platform'] == platform
assert test_job['state'] == 'runnable'
assert test_job['result'] == 'runnable'

Просмотреть файл

@ -4,20 +4,33 @@ import pytest
from mock import patch
from treeherder.config.settings import SETA_LOW_VALUE_PRIORITY
from treeherder.seta.job_priorities import seta_job_scheduling
from treeherder.seta.runnable_jobs import RunnableJobsClient
from treeherder.seta.job_priorities import (SetaError,
seta_job_scheduling)
@pytest.mark.django_db()
@patch('treeherder.seta.job_priorities._validate_request', return_value=None) # Prevent checking the repository name
@patch.object(RunnableJobsClient, 'query_runnable_jobs')
def test_gecko_decision_task(query_runnable_jobs, validate_request,
test_repository, runnable_jobs_data, all_job_priorities_stored):
@patch('treeherder.etl.seta.list_runnable_jobs')
def test_gecko_decision_task(runnable_jobs_list, validate_request,
test_repository, runnable_jobs_data,
all_job_priorities_stored):
'''
When the Gecko decision task calls SETA it will return all jobs that are less likely to catch
a regression (low value jobs).
'''
query_runnable_jobs.return_value = runnable_jobs_data
runnable_jobs_list.return_value = runnable_jobs_data
jobs = seta_job_scheduling(project=test_repository.name,
build_system_type='taskcluster')
assert len(jobs['jobtypes'][str(datetime.date.today())]) == 1
def test_gecko_decision_task_invalid_repo():
'''
When the Gecko decision task calls SETA it will return all jobs that are less likely to catch
a regression (low value jobs).
'''
with pytest.raises(SetaError) as exception_info:
seta_job_scheduling(project='mozilla-repo-x', build_system_type='taskcluster')
assert exception_info.value.message == "The specified project repo 'mozilla-repo-x' " \
"is not supported by SETA."

Просмотреть файл

@ -1,9 +1,11 @@
from django.core.urlresolvers import reverse
from mock import patch
from treeherder.model.models import RunnableJob
def test_runnable_jobs_api(webapp, test_job):
@patch('treeherder.etl.runnable_jobs._taskcluster_runnable_jobs', return_value=[])
def test_runnable_jobs_api(taskcluster_runnable_jobs, webapp, test_job):
RunnableJob.objects.create(
build_platform=test_job.build_platform,
machine_platform=test_job.machine_platform,

Просмотреть файл

@ -393,6 +393,7 @@ BUILDAPI_RUNNING_URL = "https://secure.pub.build.mozilla.org/builddata/buildjson
BUILDAPI_BUILDS4H_URL = "https://secure.pub.build.mozilla.org/builddata/buildjson/builds-4hr.js.gz"
ALLTHETHINGS_URL = "https://secure.pub.build.mozilla.org/builddata/reports/allthethings.json"
TASKCLUSTER_TASKGRAPH_URL = 'https://queue.taskcluster.net/v1/task/{task_id}/artifacts/public/full-task-graph.json'
TASKCLUSTER_INDEX_URL = 'https://index.taskcluster.net/v1/task/gecko.v2.%s.latest.firefox.decision'
# the amount of time we cache bug suggestion lookups (to speed up loading the bug
# suggestions or autoclassify panels for recently finished jobs)

Просмотреть файл

@ -7,6 +7,7 @@ from django.conf import settings
from django.core.exceptions import ValidationError
from django.core.validators import URLValidator
from treeherder.config.settings import TASKCLUSTER_INDEX_URL
from treeherder.etl.buildbot import get_symbols_and_platforms
from treeherder.etl.common import fetch_json
from treeherder.model.models import (BuildPlatform,
@ -117,49 +118,49 @@ class RunnableJobsProcess(AllthethingsTransformerMixin):
self.update_runnable_jobs_table(jobs_per_branch)
def _taskcluster_runnable_jobs(decision_task_id):
def _taskcluster_runnable_jobs(project, decision_task_id):
ret = []
tc_graph = {}
if not decision_task_id:
decision_task_id = _query_latest_gecko_decision_task_id(project)
tc_graph_url = settings.TASKCLUSTER_TASKGRAPH_URL.format(task_id=decision_task_id)
validate = URLValidator()
try:
validate(tc_graph_url)
tc_graph = fetch_json(tc_graph_url)
except ValidationError:
logger.warning('Failed to validate {}'.format(tc_graph_url))
return []
else:
ret = []
tc_graph = {}
tc_graph_url = settings.TASKCLUSTER_TASKGRAPH_URL.format(task_id=decision_task_id)
validate = URLValidator()
try:
validate(tc_graph_url)
tc_graph = fetch_json(tc_graph_url)
except ValidationError:
logger.warning('Failed to validate {}'.format(tc_graph_url))
return []
for label, node in tc_graph.iteritems():
if not ('extra' in node['task'] and 'treeherder' in node['task']['extra']):
# some tasks don't have the treeherder information we need
# to be able to display them (and are not intended to be
# displayed). skip.
continue
for label, node in tc_graph.iteritems():
if not ('extra' in node['task'] and 'treeherder' in node['task']['extra']):
# some tasks don't have the treeherder information we need
# to be able to display them (and are not intended to be
# displayed). skip.
continue
treeherder_options = node['task']['extra']['treeherder']
task_metadata = node['task']['metadata']
platform_option = ' '.join(treeherder_options.get('collection', {}).keys())
treeherder_options = node['task']['extra']['treeherder']
task_metadata = node['task']['metadata']
platform_option = ' '.join(treeherder_options.get('collection', {}).keys())
ret.append({
'build_platform': treeherder_options.get('machine', {}).get('platform', ''),
'build_system_type': 'taskcluster',
'job_group_name': treeherder_options.get('groupName', ''),
'job_group_symbol': treeherder_options.get('groupSymbol', ''),
'job_type_description': task_metadata['description'],
'job_type_name': task_metadata['name'],
'job_type_symbol': treeherder_options['symbol'],
'platform': treeherder_options.get('machine', {}).get('platform', ''),
'platform_option': platform_option,
'ref_data_name': label,
'state': 'runnable',
'result': 'runnable',
'job_coalesced_to_guid': None
})
ret.append({
'build_platform': treeherder_options.get('machine', {}).get('platform', ''),
'build_system_type': 'taskcluster',
'job_group_name': treeherder_options.get('groupName', ''),
'job_group_symbol': treeherder_options.get('groupSymbol', ''),
'job_type_description': task_metadata['description'],
'job_type_name': task_metadata['name'],
'job_type_symbol': treeherder_options['symbol'],
'platform': treeherder_options.get('machine', {}).get('platform', ''),
'platform_option': platform_option,
'ref_data_name': label,
'state': 'runnable',
'result': 'runnable',
'job_coalesced_to_guid': None
})
return ret
return ret
def _buildbot_runnable_jobs(project):
@ -209,6 +210,15 @@ def _buildbot_runnable_jobs(project):
def list_runnable_jobs(project, decision_task_id=None):
ret = _buildbot_runnable_jobs(project)
ret = ret + _taskcluster_runnable_jobs(decision_task_id)
ret = ret + _taskcluster_runnable_jobs(project, decision_task_id)
return dict(meta={"repository": project, "offset": 0, "count": len(ret)}, results=ret)
def _query_latest_gecko_decision_task_id(project):
url = TASKCLUSTER_INDEX_URL % project
logger.info('Fetching {}'.format(url))
latest_task = fetch_json(url)
task_id = latest_task['taskId']
logger.info('For {} we found the task id: {}'.format(project, task_id))
return task_id

Просмотреть файл

@ -1,5 +1,6 @@
import logging
from treeherder.etl.runnable_jobs import list_runnable_jobs
from treeherder.seta.common import unique_key
from treeherder.seta.models import JobPriority
from treeherder.seta.runnable_jobs import RunnableJobsClient
@ -21,7 +22,7 @@ class Treecodes:
# e.g. Ubuntu VM 12.04 x64 mozilla-inbound opt test web-platform-tests-4 OR
# test-linux64/opt-web-platform-tests-4
testtype = job_testtype(job)
if self._ignore(testtype):
if _ignore(testtype):
ignored_jobs.append(job['ref_data_name'])
continue
@ -40,16 +41,6 @@ class Treecodes:
for ref_data_name in sorted(ignored_jobs):
LOG.info('Ignoring {}'.format(ref_data_name))
def _ignore(self, testtype):
if not testtype:
return True
# XXX: This has the danger of falling out of date
# https://bugzilla.mozilla.org/show_bug.cgi?id=1325369
for i in ('dep', 'nightly', 'non-unified', 'valgrind', 'build'):
if testtype.find(i) != -1:
return True
def query_jobtypes(self):
"""Query all available jobtypes and return it as list"""
return self.jobtypes
@ -59,6 +50,17 @@ class Treecodes:
return self.jobnames
def _ignore(testtype):
if not testtype:
return True
# XXX: This has the danger of falling out of date
# https://bugzilla.mozilla.org/show_bug.cgi?id=1325369
for i in ('dep', 'nightly', 'non-unified', 'valgrind', 'build'):
if testtype.find(i) != -1:
return True
def job_testtype(job):
return parse_testtype(
build_system_type=job['build_system_type'],
@ -164,3 +166,36 @@ def job_priorities_to_jobtypes():
jobtypes.append(jp.unique_identifier())
return jobtypes
def ref_data_names(project, build_system):
'''
We want all reference data names for every task that runs on a specific project.
For example:
* Buildbot - "Windows 8 64-bit mozilla-inbound debug test web-platform-tests-1"
* TaskCluster = "test-linux64/opt-mochitest-webgl-e10s-1"
'''
ignored_jobs = []
ref_data_names = {}
runnable_jobs = list_runnable_jobs(project)['results']
for job in runnable_jobs:
testtype = job_testtype(job) # e.g. web-platform-tests-4
if _ignore(testtype):
ignored_jobs.append(job['ref_data_name'])
continue
key = unique_key(testtype=testtype,
buildtype=job['platform_option'],
platform=job['platform'])
if build_system == '*':
ref_data_names[key] = job['ref_data_name']
elif job['build_system_type'] == build_system:
ref_data_names[key] = job['ref_data_name']
for ref_data_name in sorted(ignored_jobs):
LOG.info('Ignoring {}'.format(ref_data_name))
return ref_data_names

Просмотреть файл

@ -2,54 +2,24 @@ import datetime
import logging
from treeherder.config.settings import SETA_LOW_VALUE_PRIORITY
from treeherder.etl.seta import Treecodes
from treeherder.etl.seta import ref_data_names
from treeherder.model.models import Repository
from treeherder.seta.common import unique_key
from treeherder.seta.models import JobPriority
logger = logging.getLogger(__name__)
SETA_PROJECTS = ['mozilla-inbound', 'autoland']
class SetaError(Exception):
pass
# XXX: We can get rid of this function if Treecodes took care of it
def _ref_data_names(build_system):
'''
Sample data from Treecodes().query_jobnames() (skipping irrelevant fields)
{
"buildplatform": "buildbot",
"buildtype": "debug",
"platform": "windows8-64",
"ref_data_name": "Windows 8 64-bit mozilla-inbound debug test web-platform-tests-1",
"testtype": "web-platform-tests-1",
},{
"buildplatform": "taskcluster",
"buildtype": "opt",
"platform": "linux64",
"ref_data_name": "test-linux64/opt-mochitest-webgl-e10s-1",
"testtype": "mochitest-webgl-e10s-1",
}
'''
ref_data_names = {}
for job in Treecodes().query_jobnames():
key = unique_key(testtype=job['testtype'],
buildtype=job['buildtype'],
platform=job['platform'])
if build_system == '*':
ref_data_names[key] = job['ref_data_name']
elif job['buildplatform'] == build_system:
ref_data_names[key] = job['ref_data_name']
return ref_data_names
def _process(build_system, job_priorities):
def _process(project, build_system, job_priorities):
'''Return list of ref_data_name for job_priorities'''
jobs = []
# This map contains the ref_data_name of every Treeherder job
ref_data_names = _ref_data_names(build_system)
# This map contains the ref_data_name of every Treeherder *test* job for this project
ref_data_names_map = ref_data_names(project, build_system)
for jp in job_priorities:
if build_system == 'taskcluster':
@ -60,9 +30,9 @@ def _process(build_system, job_priorities):
jp.testtype = jp.testtype.replace('gl-', 'webgl-')
key = jp.unique_identifier()
if key in ref_data_names:
if key in ref_data_names_map:
# e.g. desktop-test-linux64-pgo/opt-reftest-13 or builder name
jobs.append(ref_data_names[key])
jobs.append(ref_data_names_map[key])
else:
logger.warning('We did not find job priority ({}) in the list of accepted jobs'.format(jp))
@ -76,7 +46,7 @@ def _gecko_decision_task_request(project):
if jp.has_expired():
job_priorities.append(jp)
return _process(build_system='taskcluster', job_priorities=job_priorities)
return _process(project, build_system='taskcluster', job_priorities=job_priorities)
def _query_job_priorities(priority, excluded_build_system_type):
@ -91,6 +61,8 @@ def _query_job_priorities(priority, excluded_build_system_type):
def _validate_request(build_system_type, project):
if build_system_type not in ('buildbot', 'taskcluster', '*'):
raise SetaError('Valid build_system_type values are buildbot or taskcluster.')
if project not in SETA_PROJECTS:
raise SetaError("The specified project repo '%s' is not supported by SETA." % project)
def seta_job_scheduling(project, build_system_type, priority=None):
@ -102,7 +74,7 @@ def seta_job_scheduling(project, build_system_type, priority=None):
if build_system_type != '*':
excluded_build_system_type = 'taskcluster' if build_system_type == 'buildbot' else 'buildbot'
job_priorities = _query_job_priorities(priority, excluded_build_system_type)
ref_data_names = _process(build_system_type, job_priorities)
ref_data_names = _process(project, build_system_type, job_priorities)
# We don't really need 'jobtypes' and today's date in the returning data
# Getting rid of it will require the consumers to not expect it.

Просмотреть файл

@ -1,5 +1,7 @@
# TODO: Get rid of this module (bug 1330649)
import logging
from treeherder.config.settings import TASKCLUSTER_INDEX_URL
from treeherder.etl.common import fetch_json
from treeherder.etl.runnable_jobs import list_runnable_jobs
@ -7,8 +9,7 @@ logger = logging.getLogger(__name__)
class RunnableJobsClient():
def __init__(self, treeherder_host='https://treeherder.mozilla.org',
tc_index_url='https://index.taskcluster.net/v1/task/gecko.v2.%s.latest.firefox.decision'):
def __init__(self, tc_index_url=TASKCLUSTER_INDEX_URL):
self.tc_index_url = tc_index_url
self.cache = {}