Bug 1097090 - jobs endpoint refactoring

Main changes:
- removed the full parameter on the jobs endpoint, since in both cases the data returned had similar shape/size but slightly different set of attributes.
- removed the exclusion_state parameter in favour of exclusion_profile. The latter allows to specify which profile to apply to the jobs; by default it will use the default profile and can be disabled using exclusion_profile=false
- the data is now returned as a flat list instead of a triple nested structure. As a result the jobs endpoint is now much faster to return data and it allows to easily do basic operations like filtering, sorting, and pagination. Also, it will allow to implement attribute selection with a minimal effort.
- removed the debug parameter in favour of a more explicit return_type (dict|list) that allows a consumer to specify the type of structure expected for the results (dict by default)
- the resultset endpoint doesn't return jobs anymore but only resultsets.
This commit is contained in:
mdoglio 2015-01-12 17:23:28 +00:00
Родитель 4064b07b70
Коммит 119407ad8e
7 изменённых файлов: 83 добавлений и 452 удалений

Просмотреть файл

@ -14,9 +14,9 @@ def test_pending_job_available(jm, initial_data, pending_jobs_stored):
)
jobs = resp.json
assert len(jobs) ==1
assert len(jobs['results']) ==1
assert jobs[0]['state'] == 'pending'
assert jobs['results'][0]['state'] == 'pending'
def test_running_job_available(jm, initial_data, running_jobs_stored):
@ -26,9 +26,9 @@ def test_running_job_available(jm, initial_data, running_jobs_stored):
)
jobs = resp.json
assert len(jobs) ==1
assert len(jobs['results']) == 1
assert jobs[0]['state'] == 'running'
assert jobs['results'][0]['state'] == 'running'
def test_completed_job_available(jm, initial_data, completed_jobs_loaded):
@ -38,8 +38,8 @@ def test_completed_job_available(jm, initial_data, completed_jobs_loaded):
)
jobs = resp.json
assert len(jobs) == 1
assert jobs[0]['state'] == 'completed'
assert len(jobs['results']) == 1
assert jobs['results'][0]['state'] == 'completed'
def test_pending_stored_to_running_loaded(jm, initial_data, pending_jobs_stored, running_jobs_stored):
@ -55,8 +55,8 @@ def test_pending_stored_to_running_loaded(jm, initial_data, pending_jobs_stored,
)
jobs = resp.json
assert len(jobs) == 1
assert jobs[0]['state'] == 'running'
assert len(jobs['results']) == 1
assert jobs['results'][0]['state'] == 'running'
def test_finished_job_to_running(jm, initial_data, completed_jobs_loaded, running_jobs_stored):
@ -69,8 +69,8 @@ def test_finished_job_to_running(jm, initial_data, completed_jobs_loaded, runnin
)
jobs = resp.json
assert len(jobs) == 1
assert jobs[0]['state'] == 'completed'
assert len(jobs['results']) == 1
assert jobs['results'][0]['state'] == 'completed'
def test_running_job_to_pending(jm, initial_data, running_jobs_stored, pending_jobs_stored):
@ -84,5 +84,5 @@ def test_running_job_to_pending(jm, initial_data, running_jobs_stored, pending_j
)
jobs = resp.json
assert len(jobs) == 1
assert jobs[0]['state'] == 'running'
assert len(jobs['results']) == 1
assert jobs['results'][0]['state'] == 'running'

Просмотреть файл

@ -15,9 +15,10 @@ def test_job_list(webapp, eleven_jobs_processed, jm):
kwargs={"project": jm.project})
)
assert resp.status_int == 200
assert isinstance(resp.json, list)
assert len(resp.json) == 10
jobs = resp.json
response_dict = resp.json
jobs = response_dict["results"]
assert isinstance(jobs, list)
assert len(jobs) == 10
exp_keys = [
"submit_timestamp",
"start_timestamp",
@ -75,8 +76,9 @@ def test_job_list_equals_filter(webapp, eleven_jobs_processed, jm):
kwargs={"project": jm.project})
final_url = url + "?job_guid=f1c75261017c7c5ce3000931dce4c442fe0a1297"
resp = webapp.get(final_url)
assert len(resp.json) == 1
resp = webapp.get(final_url).json
assert len(resp['results']) == 1
def test_job_list_in_filter(webapp, eleven_jobs_processed, jm):
@ -89,9 +91,8 @@ def test_job_list_in_filter(webapp, eleven_jobs_processed, jm):
"f1c75261017c7c5ce3000931dce4c442fe0a1297,"
"9abb6f7d54a49d763c584926377f09835c5e1a32")
resp = webapp.get(final_url)
assert len(resp.json) == 2
resp = webapp.get(final_url).json
assert len(resp['results']) == 2
def test_job_detail(webapp, eleven_jobs_processed, sample_artifacts, jm):

Просмотреть файл

@ -10,14 +10,13 @@ from tests import test_utils
from treeherder.webapp.api import utils
def test_resultset_list_full_true(webapp, eleven_jobs_processed, jm):
def test_resultset_list(webapp, eleven_jobs_processed, jm):
"""
test retrieving a list of ten json blobs from the jobs-list
endpoint. Defaults to returning revisions.
endpoint. ``full`` set to false, so it doesn't return revisions.
"""
resp = webapp.get(
reverse("resultset-list", kwargs={"project": jm.project})
)
reverse("resultset-list", kwargs={"project": jm.project}))
results = resp.json['results']
meta = resp.json['meta']
@ -29,7 +28,6 @@ def test_resultset_list_full_true(webapp, eleven_jobs_processed, jm):
exp_keys = set([
u'id',
u'repository_id',
u'push_timestamp',
u'author',
u'comments',
u'revision_hash',
@ -37,8 +35,7 @@ def test_resultset_list_full_true(webapp, eleven_jobs_processed, jm):
u'revisions',
u'revision_count',
u'revisions_uri',
u'job_counts',
u'platforms'
u'push_timestamp',
])
for rs in results:
assert set(rs.keys()) == exp_keys
@ -51,46 +48,6 @@ def test_resultset_list_full_true(webapp, eleven_jobs_processed, jm):
})
def test_resultset_list_full_false(webapp, eleven_jobs_processed, jm):
"""
test retrieving a list of ten json blobs from the jobs-list
endpoint. ``full`` set to false, so it doesn't return revisions.
"""
resp = webapp.get(
reverse("resultset-list", kwargs={"project": jm.project}),
{"full": False, "debug": True}
)
results = resp.json['results']
meta = resp.json['meta']
assert resp.status_int == 200
assert isinstance(results, list)
assert len(results) == 10
exp_keys = set([
u'id',
u'repository_id',
u'push_timestamp',
u'author',
u'revision_hash',
u'revision',
u'revision_count',
u'revisions_uri',
u'job_counts',
u'platforms'
])
for rs in results:
assert set(rs.keys()) == exp_keys
assert(meta == {
u'count': 10,
u'filter_params': {u'full': u'False', u'debug': u'True'},
u'repository':
u'test_treeherder'
})
def test_resultset_list_bad_project(webapp, jm):
"""
test retrieving a list of ten json blobs from the jobs-list

Просмотреть файл

@ -246,7 +246,8 @@ class JobsModel(TreeherderModelBase):
)
return data
def get_job_list(self, offset, limit, full=True, conditions=None):
def get_job_list(self, offset, limit,
conditions=None, exclusion_profile=None):
"""
Retrieve a list of jobs. It's mainly used by the restful api to list
the jobs. The conditions parameter is a dict containing a set of
@ -261,14 +262,32 @@ class JobsModel(TreeherderModelBase):
conditions, self.INDEXED_COLUMNS['job']
)
repl = [self.refdata_model.get_db_name(), replace_str]
if full:
proc = "jobs.selects.get_job_list_full"
if exclusion_profile:
try:
if exclusion_profile is "default":
profile = ExclusionProfile.objects.get(
is_default=True
)
else:
proc = "jobs.selects.get_job_list"
profile = ExclusionProfile.objects.get(
name=exclusion_profile
)
signatures = profile.flat_exclusion[self.project]
replace_str += " AND j.signature NOT IN ({0})".format(
",".join(["%s"] * len(signatures))
)
placeholders += signatures
except KeyError:
# this repo/project has no hidden signatures
pass
except ExclusionProfile.DoesNotExist:
# Either there's no default profile setup or the profile
# specified is not availble
pass
repl = [self.refdata_model.get_db_name(), replace_str]
data = self.jobs_execute(
proc=proc,
proc="jobs.selects.get_job_list",
replace=repl,
placeholders=placeholders,
limit="{0},{1}".format(offset, limit),
@ -1084,72 +1103,6 @@ into chunks of chunk_size size. Returns the number of result sets deleted"""
return aggregate_details
def get_result_set_job_list(self,
result_set_ids,
full=True,
exclusion_state='included',
**kwargs):
"""
Retrieve a list of ``jobs`` and results for a result_set.
Mainly used by the restful api to list the job results in the UI
"""
if not result_set_ids:
# No result sets provided
return {}
repl = [self.refdata_model.get_db_name()]
# Generate a list of result_set_ids
id_placeholders = []
for data in result_set_ids:
id_placeholders.append('%s')
repl.append(','.join(id_placeholders))
# filter by job_type if specified
if "job_type_name" in kwargs:
repl.append(" AND jt.`name` = '{0}'".format(kwargs["job_type_name"]))
if exclusion_state != 'all':
def_excl = ExclusionProfile.objects.filter(is_default=True)
if len(def_excl):
try:
signatures = def_excl[0].flat_exclusion[self.project]
# NOT IN if it's 'included' so we don't see excluded jobs
# just IN if it's 'excluded' so we ONLY see excluded jobs
negation = "NOT " if exclusion_state == 'included' else ''
repl.append(" AND j.signature {0}IN ('{1}')".format(
negation,
"', '".join(signatures)
))
except KeyError:
# this repo/project has no hidden signatures
pass
if full:
proc = "jobs.selects.get_result_set_job_list_full"
else:
proc = "jobs.selects.get_result_set_job_list"
data = self.jobs_execute(
proc=proc,
placeholders=result_set_ids,
debug_show=self.DEBUG,
replace=repl,
)
signatures = set()
for job in data:
signatures.add(job['signature'])
reference_signature_names = self.refdata_model.get_reference_data_signature_names(
list(signatures))
return {
'job_list':data,
'reference_signature_names':reference_signature_names
}
def get_push_timestamp_lookup(self, result_set_ids):
"""Get the push timestamp for a list of result_set."""

Просмотреть файл

@ -564,48 +564,6 @@
"host_type":"read_host"
},
"get_job_list":{
"sql":"SELECT
j.id,
j.`option_collection_hash`,
j.job_coalesced_to_guid,
j.failure_classification_id,
m.`name` as machine_name,
d.`name` as device_name,
mp.`platform` as platform,
jt.`name` as job_type_name,
jt.`symbol` as job_type_symbol,
jg.`name` as job_group_name,
jg.`symbol` as job_group_symbol,
j.`result_set_id`,
j.`result`,
j.`state`,
rs.push_timestamp as push_timestamp,
j.`pending_eta`,
j.`running_eta`,
j.`last_modified`
FROM `job` as j
LEFT JOIN `REP0`.`machine` as m
ON j.`machine_id` = m.id
LEFT JOIN `REP0`.`machine_platform` as mp
ON j.`machine_platform_id` = mp.id
LEFT JOIN `REP0`.`job_type` as jt
ON j.`job_type_id` = jt.id
LEFT JOIN `REP0`.`job_group` as jg
ON jt.`job_group_id` = jg.id
LEFT JOIN result_set rs
ON rs.id = j.result_set_id
LEFT JOIN `REP0`.`device` as d
ON j.device_id = d.id
WHERE 1
REP1
ORDER BY
rs.push_timestamp DESC,
mp.platform ASC
",
"host_type":"read_host"
},
"get_job_list_full":{
"sql":"SELECT
j.id,
j.`job_guid`,

Просмотреть файл

@ -54,22 +54,36 @@ class JobsViewSet(viewsets.ViewSet):
def list(self, request, project, jm):
"""
GET method implementation for list view
Optional paramters (default):
- offset (0)
- count (10)
- return_type (dict)
"""
filter = UrlQueryFilter(request.QUERY_PARAMS)
offset = filter.pop("offset", 0)
count = min(int(filter.pop("count", 10)), 1000)
count = min(int(filter.pop("count", 10)), 2000)
return_type = filter.pop("return_type", "dict").lower()
exclusion_profile = filter.pop("exclusion_profile", "default")
if exclusion_profile in ('false', 'null'):
exclusion_profile = None
results = jm.get_job_list(offset, count, conditions=filter.conditions,
exclusion_profile=exclusion_profile)
full = filter.pop('full', 'true').lower() == 'true'
objs = jm.get_job_list(offset, count, full, filter.conditions)
if objs:
if results:
option_collections = jm.refdata_model.get_all_option_collections()
for job in objs:
for job in results:
job["platform_option"] = get_option(job, option_collections)
return Response(objs)
response_body = dict(meta={"repository": project}, results=[])
if results and return_type == "list":
response_body["job_property_names"] = results[0].keys()
results = [job.values() for job in results]
response_body["results"] = results
response_body["meta"].update(offset=offset, count=count)
return Response(response_body)
@action(permission_classes=[IsAuthenticated])
@with_jobs
@ -125,4 +139,3 @@ class JobsViewSet(viewsets.ViewSet):
jm.load_job_data(request.DATA)
return Response({'message': 'Job successfully updated'})

Просмотреть файл

@ -1,10 +1,6 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at http://mozilla.org/MPL/2.0/.
import itertools
import re
from rest_framework import viewsets
from rest_framework.response import Response
from rest_framework.decorators import link, action
@ -12,59 +8,8 @@ from rest_framework.reverse import reverse
from rest_framework.permissions import IsAuthenticated
from treeherder.model.derived import DatasetNotFoundError
from treeherder.webapp.api.utils import (UrlQueryFilter, with_jobs,
oauth_required, get_option,
to_timestamp, get_job_value_list,
JOB_PROPERTY_RETURN_KEY)
PLATFORM_ORDER = {
"linux32": 0,
"linux64": 1,
"osx-10-6": 2,
"osx-10-8": 3,
"osx-10-10": 4,
"windowsxp": 5,
"windows7-32": 6,
"windows8-32": 7,
"windows8-64": 8,
"windows2012-64": 9,
"android-2-2-armv6": 10,
"android-2-2": 11,
"android-2-3-armv6": 12,
"android-2-3": 13,
"android-2-3-armv7-api9": 14,
"android-4-0": 15,
"android-4-0-armv7-api10": 16,
"android-4-0-armv7-api11": 17,
"android-4-2-x86": 18,
"android-4-2": 19,
"android-4-2-armv7-api11": 20,
"android-4-3": 21,
"android-4-3-armv7-api11": 22,
"android-4-4": 23,
"android-4-4-armv7-api11": 24,
"android-5-0-armv7-api11": 25,
"b2g-linux32": 26,
"b2g-linux64": 27,
"b2g-osx": 28,
"b2g-win32": 29,
"b2g-emu-ics": 30,
"b2g-emu-jb": 31,
"b2g-emu-kk": 32,
"b2g-device-image" : 33,
"mulet-linux32" : 34,
"mulet-linux64" : 35,
"mulet-osx": 36,
"mulet-win32": 37,
"other": 38
}
OPT_ORDER = {
"opt": 0,
"pgo": 1,
"asan": 2,
"debug": 3,
"cc": 4,
}
oauth_required,
to_timestamp)
class ResultSetViewSet(viewsets.ViewSet):
"""
@ -122,39 +67,26 @@ class ResultSetViewSet(viewsets.ViewSet):
count = min(int(filter.pop("count", 10)), 1000)
full = filter.pop('full', 'true').lower() == 'true'
with_jobs = filter.pop('with_jobs', 'true').lower() == 'true'
debug = request.QUERY_PARAMS.get('debug', None)
objs = jm.get_result_set_list(
results = jm.get_result_set_list(
offset_id,
count,
full,
filter.conditions
)
if with_jobs:
results = self.get_resultsets_with_jobs(
jm, objs, full, {}, debug)
else:
for rs in objs:
for rs in results:
rs["revisions_uri"] = reverse("resultset-revisions",
kwargs={"project": jm.project, "pk": rs["id"]})
results = objs
meta['count'] = len(results)
meta['repository'] = project
resp = {
'meta': meta,
'results': results,
'results': results
}
if with_jobs:
resp['job_property_names'] = JOB_PROPERTY_RETURN_KEY
return Response(resp)
@with_jobs
@ -166,11 +98,9 @@ class ResultSetViewSet(viewsets.ViewSet):
full = filter.pop('full', 'true').lower() == 'true'
objs = jm.get_result_set_list(0, 1, full, filter.conditions)
if objs:
debug = request.QUERY_PARAMS.get('debug', None)
rs = self.get_resultsets_with_jobs(jm, objs, full, {}, debug)
return Response(rs[0])
result_set_list = jm.get_result_set_list(0, 1, full, filter.conditions)
if result_set_list:
return Response(result_set_list[0])
else:
return Response("No resultset with id: {0}".format(pk), 404)
@ -183,187 +113,6 @@ class ResultSetViewSet(viewsets.ViewSet):
objs = jm.get_resultset_revisions_list(pk)
return Response(objs)
@link()
@with_jobs
def get_resultset_jobs(self, request, project, jm, pk=None):
result_set_ids = request.QUERY_PARAMS.getlist('result_set_ids') or []
exclusion_state = request.QUERY_PARAMS.get('exclusion_state', 'included')
# default to only included, if an invalid value passed in
if exclusion_state not in jm.EXCLUSION_STATES:
exclusion_state = 'included'
debug = request.QUERY_PARAMS.get('debug', None)
filter_params = request.QUERY_PARAMS.copy()
if 'result_set_ids' in filter_params:
del filter_params['result_set_ids']
if 'exclusion_state' in filter_params:
del filter_params['exclusion_state']
# adapt the result_set_ids to the get_result_set_list
# return structure
objs = []
map(lambda r_id:objs.append({'id':int(r_id)}), result_set_ids)
results = self.get_resultsets_with_jobs(
jm, objs, True, filter_params, debug, exclusion_state, 'id')
meta = {}
meta['count'] = len(results)
meta['repository'] = project
return Response({
'meta': meta,
'results': results,
'job_property_names': JOB_PROPERTY_RETURN_KEY
})
@staticmethod
def get_resultsets_with_jobs(
jm, rs_list, full, filter_kwargs, debug, exclusion_state='included',
sort_key='push_timestamp'):
"""Convert db result of resultsets in a list to JSON"""
rs_map = {}
for rs in rs_list:
rs_map[rs["id"]] = rs
# all rs should have the revisions_uri, so add it here
rs["revisions_uri"] = reverse("resultset-revisions",
kwargs={"project": jm.project, "pk": rs["id"]})
job_list = jm.get_result_set_job_list(
rs_map.keys(),
full,
exclusion_state,
**filter_kwargs
)
jobs_ungrouped = job_list.get('job_list', [])
reference_signature_names = job_list.get('reference_signature_names', {})
option_collections = jm.refdata_model.get_all_option_collections()
rs_grouper = lambda rsg: rsg["result_set_id"]
# the main grouper for a result set is the combination of
# platform and options
platform_grouper = lambda pg: (
PLATFORM_ORDER.get(pg["platform"], 100),
OPT_ORDER.get(get_option(pg, option_collections), 100)
)
def get_sortable_job_symbol(symbol):
"""
Sort jobs by symbol.
Symbol could be something like 1, 2 or 3. Or A, B, C or R1, R2, R10.
So this will pad the numeric portion with 0s like R001, R010, etc.
"""
newsymbol = symbol
if symbol.isdigit():
newsymbol = symbol.zfill(3)
else:
x = re.split(r'(\d+)', symbol)
newsymbol = x[0]
if len(x) > 1:
newsymbol += x[1].zfill(3)
return newsymbol
job_group_grouper = lambda jgg: jgg["job_group_symbol"]
job_type_grouper = lambda jtg: (get_sortable_job_symbol(jtg['job_type_symbol']))
rs_sorted = sorted(jobs_ungrouped, key=rs_grouper)
resultsets = []
for rs_id, resultset_group in itertools.groupby(rs_sorted, key=rs_grouper):
resultset = rs_map[rs_id]
resultsets.append(resultset)
# we found jobs for this resultset, so remove it from the map
# now that it's in the ``resultsets`` list.
# after we are done with all these jobs, whatever is in the map are
# resultsets with no jobs yet, which we add back in to the list
# of resultsets to be returned.
del(rs_map[rs_id])
job_counts = dict.fromkeys(
jm.RESULTS + jm.INCOMPLETE_STATES + ["total"], 0)
#itertools needs the elements to be sorted by the grouper
by_platform = sorted(list(resultset_group), key=platform_grouper)
platforms = []
for platform_group_name, platform_group in itertools.groupby(
by_platform,
key=platform_grouper):
by_job_group = sorted(list(platform_group), key=job_group_grouper)
platform_name = by_job_group[0]["platform"]
platform_option = option_collections[
by_job_group[0]["option_collection_hash"]]['opt']
groups = []
for jg_symbol, jg_group in itertools.groupby(
by_job_group,
job_group_grouper):
by_job_type = sorted(list(jg_group), key=job_type_grouper)
job_list = []
groups.append({
"symbol": jg_symbol,
"name": by_job_type[0]["job_group_name"],
"jobs": job_list
})
# build the uri ref for each job
for job in by_job_type:
job_list.append(
get_job_value_list(
job, reference_signature_names,
platform_option, jm.project, debug
)
)
if job["state"] == "completed":
job_counts[job["result"]] += 1
else:
job_counts[job["state"]] += 1
job_counts["total"] += 1
platforms.append({
"name": platform_name,
"option": platform_option,
"groups": groups,
})
#the unique set of results that are contained in this resultset
#can be used to determine the resultset's severity
resultset.update({
"platforms": platforms,
"job_counts": job_counts,
})
# the resultsets left in the map have no jobs, so fill in the fields
# with blanks that WOULD otherwise have been filled.
for rs in rs_map.values():
rs.update({
"platforms": [],
"job_counts": dict.fromkeys(
jm.RESULTS + jm.INCOMPLETE_STATES + ["total"], 0),
})
resultsets.append(rs)
return sorted(
resultsets,
key=lambda x: x[sort_key],
reverse=True)
@action(permission_classes=[IsAuthenticated])
@with_jobs
def cancel_all(self, request, project, jm, pk=None):