This commit is contained in:
mdoglio 2013-11-07 21:00:15 +00:00
Родитель 42c7be37d6
Коммит ab9a2e7268
11 изменённых файлов: 144 добавлений и 57 удалений

Просмотреть файл

@ -232,15 +232,20 @@ def result_set_stored(jm, initial_data, sample_resultset):
@pytest.fixture(scope='function') @pytest.fixture(scope='function')
def mock_get_resultset(monkeypatch, result_set_stored): def mock_get_resultset(monkeypatch, result_set_stored):
from treeherder.etl import common from treeherder.etl import common
def _get_resultset(project, revision): def _get_resultset(params):
return { for k in params:
rev = params[k][0]
params[k] = {
rev: {
'id': 1, 'id': 1,
'revision_hash': result_set_stored[0]['revision_hash'] 'revision_hash': result_set_stored[0]['revision_hash']
} }
monkeypatch.setattr(common, 'get_resultset', _get_resultset) }
return params
monkeypatch.setattr(common, 'lookup_revisions', _get_resultset)
@pytest.fixture() @pytest.fixture()
def refdata(): def refdata():

Просмотреть файл

@ -3,14 +3,13 @@ from django.conf import settings
def test_get_revision_hash(jm, initial_data, def test_get_revision_hash(jm, initial_data,
result_set_stored, mock_get_resultset): result_set_stored, mock_get_remote_content):
"""That the correct revision_hash is retrieved is the revision exists""" """That the correct revision_hash is retrieved is the revision exists"""
from treeherder.etl import common from treeherder.etl import common
project = settings.DATABASES["default"]["TEST_NAME"] project = result_set_stored[0]['revisions'][0]['repository']
revision = result_set_stored[0]['revisions'][0]['revision'] revision = result_set_stored[0]['revisions'][0]['revision']
resultset = common.get_resultset(project, revision) resultset = common.lookup_revisions({project: [revision]})
assert resultset[project][revision]['revision_hash'] == result_set_stored[0]['revision_hash']
assert resultset['revision_hash'] == result_set_stored[0]['revision_hash']
def test_get_revision_hash_none(jm, mock_get_remote_content, def test_get_revision_hash_none(jm, mock_get_remote_content,
@ -19,5 +18,5 @@ def test_get_revision_hash_none(jm, mock_get_remote_content,
from treeherder.etl import common from treeherder.etl import common
project = settings.DATABASES["default"]["TEST_NAME"] project = settings.DATABASES["default"]["TEST_NAME"]
revision = "fakerevision" revision = "fakerevision"
resultset = common.get_resultset(project, revision) resultset = common.lookup_revisions({project: [revision]})
assert resultset == None assert len(resultset) == 0

Просмотреть файл

@ -55,7 +55,7 @@ def test_ingest_all_sample_jobs(jm, refdata, sample_data, initial_data, sample_r
job_data = sample_data.job_data job_data = sample_data.job_data
test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset) test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset)
def test_bad_date_value_ingestion(jm, initial_data): def test_bad_date_value_ingestion(jm, initial_data, mock_log_parser):
""" """
Test ingesting an blob with bad date value Test ingesting an blob with bad date value

Просмотреть файл

@ -110,7 +110,7 @@ def test_process_objects_unknown_error(jm):
assert row_id == 0 assert row_id == 0
assert response == exp_resp assert response == exp_resp
def test_ingest_sample_data(jm, sample_data, sample_resultset): def test_ingest_sample_data(jm, sample_data, sample_resultset, mock_log_parser):
"""Process all job structures in the job_data.txt file""" """Process all job structures in the job_data.txt file"""
resultset_count = len(sample_resultset) resultset_count = len(sample_resultset)

Просмотреть файл

@ -1,5 +1,5 @@
import logging import logging
from collections import defaultdict
from django.conf import settings from django.conf import settings
from . import buildbot from . import buildbot
@ -44,19 +44,27 @@ class Builds4hTransformerMixin(object):
our restful api our restful api
""" """
job_list = [] job_list = []
revisions = defaultdict(list)
for build in data['builds']:
prop = build['properties']
prop['revision'] = prop.get('revision',
prop.get('got_revision',
prop.get('sourcestamp', None)))
if not prop['revision']:
continue
revisions[prop['branch']].append(prop['revision'][0:12])
revisions_lookup = common.lookup_revisions(revisions)
for build in data['builds']: for build in data['builds']:
prop = build['properties'] prop = build['properties']
revision = prop.get('revision',
prop.get('got_revision',
prop.get('sourcestamp', None)))
if not revision:
continue
try: if not prop['revision'] in revisions_lookup[prop['branch']]:
resultset = common.get_resultset(prop['branch'], revision[0:12]) continue
except Exception: # the resultset object is in the nested dict
resultset = None resultset = revisions_lookup[prop['branch']][prop['revision']]
if not resultset: if not resultset:
continue continue
@ -133,14 +141,29 @@ class PendingTransformerMixin(object):
job_list = [] job_list = []
projects = set(x.project for x in Datasource.objects.cached()) projects = set(x.project for x in Datasource.objects.cached())
revision_dict = defaultdict(list)
# loop to catch all the revisions
for project, revisions in data['pending'].items(): for project, revisions in data['pending'].items():
# this skips those projects we don't care about
if not project in projects: if not project in projects:
continue continue
for rev, jobs in revisions.items(): for rev, jobs in revisions.items():
resultset = common.get_resultset(project, rev) revision_dict[project].append(rev)
if not resultset:
continue # retrieving the revision->resultset lookups
for job in jobs: revisions_lookup = common.lookup_revisions(revision_dict)
for project, revisions in revisions_lookup.items():
for revision in revisions:
resultset = revisions[revision]
# using project and revision form the revision lookups
# to filter those jobs with unmatched revision
for job in data['pending'][project][revision]:
treeherder_data = { treeherder_data = {
'revision_hash': resultset['revision_hash'], 'revision_hash': resultset['revision_hash'],
'resultset_id': resultset['id'], 'resultset_id': resultset['id'],
@ -190,15 +213,28 @@ class RunningTransformerMixin(object):
""" """
job_list = [] job_list = []
projects = set(x.project for x in Datasource.objects.cached()) projects = set(x.project for x in Datasource.objects.cached())
revisions = defaultdict(list)
# loop to catch all the revisions
for project, revisions in data['running'].items(): for project, revisions in data['running'].items():
# this skips those projects we don't care about
if not project in projects: if not project in projects:
continue continue
for rev, jobs in revisions.items():
resultset = common.get_resultset(project, rev)
if not resultset:
continue
for job in jobs: for rev, jobs in revisions.items():
revisions[project].append(rev)
# retrieving the revision->resultset lookups
revisions_lookup = common.lookup_revisions(revisions)
for project, revisions in revisions_lookup.items():
for revision in revisions:
resultset = revisions[revision]
# using project and revision form the revision lookups
# to filter those jobs with unmatched revision
for job in data['running'][project][revision]:
treeherder_data = { treeherder_data = {
'revision_hash': resultset['revision_hash'], 'revision_hash': resultset['revision_hash'],
'resultset_id': resultset['id'], 'resultset_id': resultset['id'],

Просмотреть файл

@ -1,3 +1,4 @@
from collections import defaultdict
import hashlib import hashlib
import urllib2 import urllib2
import simplejson as json import simplejson as json
@ -76,28 +77,25 @@ def get_remote_content(url):
return content return content
def lookup_revisions(revision_dict):
def get_resultset(project, revision): """
"""Retrieve a revision hash given a single revision""" Retrieve a list of revision->resultset lookups
"""
cache_key = "{0}:{1}".format(project, revision) lookup = dict()
for project, revisions in revision_dict.items():
cached_resultset = cache.get(cache_key) revision_set = set(revisions)
endpoint = reverse('revision-lookup-list', kwargs={"project": project})
if not cached_resultset: # build the query string as a comma separated list of revisions
endpoint = reverse('resultset-list', kwargs={"project": project}) q = ','.join(revision_set)
url = "{0}/{1}/?revision={2}".format( url = "{0}/{1}/?revision={2}".format(
settings.API_HOSTNAME.strip('/'), settings.API_HOSTNAME.strip('/'),
endpoint.strip('/'), endpoint.strip('/'),
revision q
) )
content = get_remote_content(url) content = get_remote_content(url)
if content: if content:
cached_resultset = content[0] lookup[project] = get_remote_content(url)
cache.set(cache_key, cached_resultset, 60 * 60 * 12) return lookup
else:
return None
return cached_resultset
def generate_revision_hash(revisions): def generate_revision_hash(revisions):

Просмотреть файл

@ -357,9 +357,12 @@ class TreeherderPulseDataAdapter(PulseDataAdapter, ObjectstoreLoaderMixin):
def adapt_data(self, data): def adapt_data(self, data):
"""Adapts the PulseDataAdapter into the treeherder input data structure""" """Adapts the PulseDataAdapter into the treeherder input data structure"""
resultset = common.get_resultset(data['branch'], data['revision']) resultset = common.lookup_revisions({data['branch']: [data['revision']]})
if not resultset:
return {}
treeherder_data = resultset or {} del resultset[data['branch']][data['revision']]['id']
treeherder_data = resultset[data['branch']][data['revision']]
treeherder_data['project'] = data['branch'] treeherder_data['project'] = data['branch']
#### ####
#TODO: This is a temporary fix, this data will not be located #TODO: This is a temporary fix, this data will not be located
@ -378,7 +381,7 @@ class TreeherderPulseDataAdapter(PulseDataAdapter, ObjectstoreLoaderMixin):
#modified #modified
request_id, data['request_times'][unicode(request_id)] request_id, data['request_times'][unicode(request_id)]
), ),
'revision_hash': resultset['revision_hash'], 'revision_hash': treeherder_data.pop('revision_hash'),
'name': data['test_name'], 'name': data['test_name'],
'product_name': data['product'], 'product_name': data['product'],
'state': 'completed', 'state': 'completed',

Просмотреть файл

@ -300,6 +300,26 @@ class JobsModel(TreeherderModelBase):
return self.as_list(return_list, "result_set", **kwargs) return self.as_list(return_list, "result_set", **kwargs)
def get_revision_resultset_lookup(self, revision_list):
"""
Create a list of revision->resultset lookups from a list of revision
"""
replacement = ",".join(["%s"] * len(revision_list))
replacement = " AND revision IN ("+replacement+") "
proc = "jobs.selects.get_result_set_list"
lookups = self.get_jobs_dhub().execute(
proc=proc,
placeholders=revision_list+[0, len(revision_list)],
debug_show=self.DEBUG,
replace=[replacement],
return_type="dict",
key_column="revision"
)
return lookups
def get_result_set_details(self, result_set_ids): def get_result_set_details(self, result_set_ids):
""" """
Retrieve all revisions associated with a set of ``result_set`` (also known as ``pushes``) Retrieve all revisions associated with a set of ``result_set`` (also known as ``pushes``)

Просмотреть файл

@ -239,7 +239,9 @@
"sql":"SELECT DISTINCT "sql":"SELECT DISTINCT
rs.id, rs.id,
rs.revision_hash, rs.revision_hash,
rs.push_timestamp rs.push_timestamp,
revision.id as revision_id,
revision.revision
FROM result_set AS rs FROM result_set AS rs
INNER JOIN revision_map INNER JOIN revision_map
ON rs.id = revision_map.result_set_id ON rs.id = revision_map.result_set_id

Просмотреть файл

@ -34,6 +34,13 @@ project_bound_router.register(
base_name='note', base_name='note',
) )
project_bound_router.register(
r'revision-lookup',
views.RevisionLookupSetViewSet,
base_name='revision-lookup',
)
# this is the default router for plain restful endpoints # this is the default router for plain restful endpoints
# refdata endpoints: # refdata endpoints:

Просмотреть файл

@ -6,8 +6,9 @@ from rest_framework import viewsets
from rest_framework.response import Response from rest_framework.response import Response
from rest_framework.decorators import action from rest_framework.decorators import action
from rest_framework.reverse import reverse from rest_framework.reverse import reverse
from treeherder.model import models from rest_framework.exceptions import ParseError
from treeherder.model import models
from treeherder.model.derived import (JobsModel, DatasetNotFoundError, from treeherder.model.derived import (JobsModel, DatasetNotFoundError,
ObjectNotFoundException) ObjectNotFoundException)
@ -364,6 +365,22 @@ class ResultSetViewSet(viewsets.ViewSet):
return Response({"message": "well-formed JSON stored"}) return Response({"message": "well-formed JSON stored"})
class RevisionLookupSetViewSet(viewsets.ViewSet):
@with_jobs
def list(self, request, project, jm):
revision_filter = request.QUERY_PARAMS.get('revision', None)
if not revision_filter:
raise ParseError(detail="The revision parameter is mandatory for this endpoint")
revision_list = revision_filter.split(",")
return Response(jm.get_revision_resultset_lookup(revision_list))
##################### #####################
# Refdata ViewSets # Refdata ViewSets
##################### #####################