зеркало из https://github.com/mozilla/treeherder.git
optimized revisions lookups
This commit is contained in:
Родитель
42c7be37d6
Коммит
ab9a2e7268
|
@ -232,15 +232,20 @@ def result_set_stored(jm, initial_data, sample_resultset):
|
||||||
|
|
||||||
@pytest.fixture(scope='function')
|
@pytest.fixture(scope='function')
|
||||||
def mock_get_resultset(monkeypatch, result_set_stored):
|
def mock_get_resultset(monkeypatch, result_set_stored):
|
||||||
|
|
||||||
from treeherder.etl import common
|
from treeherder.etl import common
|
||||||
|
|
||||||
def _get_resultset(project, revision):
|
def _get_resultset(params):
|
||||||
return {
|
for k in params:
|
||||||
|
rev = params[k][0]
|
||||||
|
params[k] = {
|
||||||
|
rev: {
|
||||||
'id': 1,
|
'id': 1,
|
||||||
'revision_hash': result_set_stored[0]['revision_hash']
|
'revision_hash': result_set_stored[0]['revision_hash']
|
||||||
}
|
}
|
||||||
monkeypatch.setattr(common, 'get_resultset', _get_resultset)
|
}
|
||||||
|
return params
|
||||||
|
|
||||||
|
monkeypatch.setattr(common, 'lookup_revisions', _get_resultset)
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def refdata():
|
def refdata():
|
||||||
|
|
|
@ -3,14 +3,13 @@ from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
def test_get_revision_hash(jm, initial_data,
|
def test_get_revision_hash(jm, initial_data,
|
||||||
result_set_stored, mock_get_resultset):
|
result_set_stored, mock_get_remote_content):
|
||||||
"""That the correct revision_hash is retrieved is the revision exists"""
|
"""That the correct revision_hash is retrieved is the revision exists"""
|
||||||
from treeherder.etl import common
|
from treeherder.etl import common
|
||||||
project = settings.DATABASES["default"]["TEST_NAME"]
|
project = result_set_stored[0]['revisions'][0]['repository']
|
||||||
revision = result_set_stored[0]['revisions'][0]['revision']
|
revision = result_set_stored[0]['revisions'][0]['revision']
|
||||||
resultset = common.get_resultset(project, revision)
|
resultset = common.lookup_revisions({project: [revision]})
|
||||||
|
assert resultset[project][revision]['revision_hash'] == result_set_stored[0]['revision_hash']
|
||||||
assert resultset['revision_hash'] == result_set_stored[0]['revision_hash']
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_revision_hash_none(jm, mock_get_remote_content,
|
def test_get_revision_hash_none(jm, mock_get_remote_content,
|
||||||
|
@ -19,5 +18,5 @@ def test_get_revision_hash_none(jm, mock_get_remote_content,
|
||||||
from treeherder.etl import common
|
from treeherder.etl import common
|
||||||
project = settings.DATABASES["default"]["TEST_NAME"]
|
project = settings.DATABASES["default"]["TEST_NAME"]
|
||||||
revision = "fakerevision"
|
revision = "fakerevision"
|
||||||
resultset = common.get_resultset(project, revision)
|
resultset = common.lookup_revisions({project: [revision]})
|
||||||
assert resultset == None
|
assert len(resultset) == 0
|
||||||
|
|
|
@ -55,7 +55,7 @@ def test_ingest_all_sample_jobs(jm, refdata, sample_data, initial_data, sample_r
|
||||||
job_data = sample_data.job_data
|
job_data = sample_data.job_data
|
||||||
test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset)
|
test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset)
|
||||||
|
|
||||||
def test_bad_date_value_ingestion(jm, initial_data):
|
def test_bad_date_value_ingestion(jm, initial_data, mock_log_parser):
|
||||||
"""
|
"""
|
||||||
Test ingesting an blob with bad date value
|
Test ingesting an blob with bad date value
|
||||||
|
|
||||||
|
|
|
@ -110,7 +110,7 @@ def test_process_objects_unknown_error(jm):
|
||||||
assert row_id == 0
|
assert row_id == 0
|
||||||
assert response == exp_resp
|
assert response == exp_resp
|
||||||
|
|
||||||
def test_ingest_sample_data(jm, sample_data, sample_resultset):
|
def test_ingest_sample_data(jm, sample_data, sample_resultset, mock_log_parser):
|
||||||
"""Process all job structures in the job_data.txt file"""
|
"""Process all job structures in the job_data.txt file"""
|
||||||
|
|
||||||
resultset_count = len(sample_resultset)
|
resultset_count = len(sample_resultset)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import logging
|
import logging
|
||||||
|
from collections import defaultdict
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
from . import buildbot
|
from . import buildbot
|
||||||
|
@ -44,19 +44,27 @@ class Builds4hTransformerMixin(object):
|
||||||
our restful api
|
our restful api
|
||||||
"""
|
"""
|
||||||
job_list = []
|
job_list = []
|
||||||
|
revisions = defaultdict(list)
|
||||||
|
for build in data['builds']:
|
||||||
|
prop = build['properties']
|
||||||
|
prop['revision'] = prop.get('revision',
|
||||||
|
prop.get('got_revision',
|
||||||
|
prop.get('sourcestamp', None)))
|
||||||
|
|
||||||
|
if not prop['revision']:
|
||||||
|
continue
|
||||||
|
revisions[prop['branch']].append(prop['revision'][0:12])
|
||||||
|
|
||||||
|
revisions_lookup = common.lookup_revisions(revisions)
|
||||||
|
|
||||||
for build in data['builds']:
|
for build in data['builds']:
|
||||||
|
|
||||||
prop = build['properties']
|
prop = build['properties']
|
||||||
revision = prop.get('revision',
|
|
||||||
prop.get('got_revision',
|
|
||||||
prop.get('sourcestamp', None)))
|
|
||||||
if not revision:
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
if not prop['revision'] in revisions_lookup[prop['branch']]:
|
||||||
resultset = common.get_resultset(prop['branch'], revision[0:12])
|
continue
|
||||||
except Exception:
|
# the resultset object is in the nested dict
|
||||||
resultset = None
|
resultset = revisions_lookup[prop['branch']][prop['revision']]
|
||||||
if not resultset:
|
if not resultset:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -133,14 +141,29 @@ class PendingTransformerMixin(object):
|
||||||
job_list = []
|
job_list = []
|
||||||
|
|
||||||
projects = set(x.project for x in Datasource.objects.cached())
|
projects = set(x.project for x in Datasource.objects.cached())
|
||||||
|
revision_dict = defaultdict(list)
|
||||||
|
|
||||||
|
# loop to catch all the revisions
|
||||||
for project, revisions in data['pending'].items():
|
for project, revisions in data['pending'].items():
|
||||||
|
# this skips those projects we don't care about
|
||||||
if not project in projects:
|
if not project in projects:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for rev, jobs in revisions.items():
|
for rev, jobs in revisions.items():
|
||||||
resultset = common.get_resultset(project, rev)
|
revision_dict[project].append(rev)
|
||||||
if not resultset:
|
|
||||||
continue
|
# retrieving the revision->resultset lookups
|
||||||
for job in jobs:
|
revisions_lookup = common.lookup_revisions(revision_dict)
|
||||||
|
|
||||||
|
for project, revisions in revisions_lookup.items():
|
||||||
|
|
||||||
|
for revision in revisions:
|
||||||
|
|
||||||
|
resultset = revisions[revision]
|
||||||
|
# using project and revision form the revision lookups
|
||||||
|
# to filter those jobs with unmatched revision
|
||||||
|
for job in data['pending'][project][revision]:
|
||||||
|
|
||||||
treeherder_data = {
|
treeherder_data = {
|
||||||
'revision_hash': resultset['revision_hash'],
|
'revision_hash': resultset['revision_hash'],
|
||||||
'resultset_id': resultset['id'],
|
'resultset_id': resultset['id'],
|
||||||
|
@ -190,15 +213,28 @@ class RunningTransformerMixin(object):
|
||||||
"""
|
"""
|
||||||
job_list = []
|
job_list = []
|
||||||
projects = set(x.project for x in Datasource.objects.cached())
|
projects = set(x.project for x in Datasource.objects.cached())
|
||||||
|
revisions = defaultdict(list)
|
||||||
|
|
||||||
|
# loop to catch all the revisions
|
||||||
for project, revisions in data['running'].items():
|
for project, revisions in data['running'].items():
|
||||||
|
# this skips those projects we don't care about
|
||||||
if not project in projects:
|
if not project in projects:
|
||||||
continue
|
continue
|
||||||
for rev, jobs in revisions.items():
|
|
||||||
resultset = common.get_resultset(project, rev)
|
|
||||||
if not resultset:
|
|
||||||
continue
|
|
||||||
|
|
||||||
for job in jobs:
|
for rev, jobs in revisions.items():
|
||||||
|
revisions[project].append(rev)
|
||||||
|
|
||||||
|
# retrieving the revision->resultset lookups
|
||||||
|
revisions_lookup = common.lookup_revisions(revisions)
|
||||||
|
|
||||||
|
for project, revisions in revisions_lookup.items():
|
||||||
|
|
||||||
|
for revision in revisions:
|
||||||
|
|
||||||
|
resultset = revisions[revision]
|
||||||
|
# using project and revision form the revision lookups
|
||||||
|
# to filter those jobs with unmatched revision
|
||||||
|
for job in data['running'][project][revision]:
|
||||||
treeherder_data = {
|
treeherder_data = {
|
||||||
'revision_hash': resultset['revision_hash'],
|
'revision_hash': resultset['revision_hash'],
|
||||||
'resultset_id': resultset['id'],
|
'resultset_id': resultset['id'],
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
from collections import defaultdict
|
||||||
import hashlib
|
import hashlib
|
||||||
import urllib2
|
import urllib2
|
||||||
import simplejson as json
|
import simplejson as json
|
||||||
|
@ -76,28 +77,25 @@ def get_remote_content(url):
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_revisions(revision_dict):
|
||||||
def get_resultset(project, revision):
|
"""
|
||||||
"""Retrieve a revision hash given a single revision"""
|
Retrieve a list of revision->resultset lookups
|
||||||
|
"""
|
||||||
cache_key = "{0}:{1}".format(project, revision)
|
lookup = dict()
|
||||||
|
for project, revisions in revision_dict.items():
|
||||||
cached_resultset = cache.get(cache_key)
|
revision_set = set(revisions)
|
||||||
|
endpoint = reverse('revision-lookup-list', kwargs={"project": project})
|
||||||
if not cached_resultset:
|
# build the query string as a comma separated list of revisions
|
||||||
endpoint = reverse('resultset-list', kwargs={"project": project})
|
q = ','.join(revision_set)
|
||||||
url = "{0}/{1}/?revision={2}".format(
|
url = "{0}/{1}/?revision={2}".format(
|
||||||
settings.API_HOSTNAME.strip('/'),
|
settings.API_HOSTNAME.strip('/'),
|
||||||
endpoint.strip('/'),
|
endpoint.strip('/'),
|
||||||
revision
|
q
|
||||||
)
|
)
|
||||||
content = get_remote_content(url)
|
content = get_remote_content(url)
|
||||||
if content:
|
if content:
|
||||||
cached_resultset = content[0]
|
lookup[project] = get_remote_content(url)
|
||||||
cache.set(cache_key, cached_resultset, 60 * 60 * 12)
|
return lookup
|
||||||
else:
|
|
||||||
return None
|
|
||||||
return cached_resultset
|
|
||||||
|
|
||||||
|
|
||||||
def generate_revision_hash(revisions):
|
def generate_revision_hash(revisions):
|
||||||
|
|
|
@ -357,9 +357,12 @@ class TreeherderPulseDataAdapter(PulseDataAdapter, ObjectstoreLoaderMixin):
|
||||||
def adapt_data(self, data):
|
def adapt_data(self, data):
|
||||||
"""Adapts the PulseDataAdapter into the treeherder input data structure"""
|
"""Adapts the PulseDataAdapter into the treeherder input data structure"""
|
||||||
|
|
||||||
resultset = common.get_resultset(data['branch'], data['revision'])
|
resultset = common.lookup_revisions({data['branch']: [data['revision']]})
|
||||||
|
if not resultset:
|
||||||
|
return {}
|
||||||
|
|
||||||
treeherder_data = resultset or {}
|
del resultset[data['branch']][data['revision']]['id']
|
||||||
|
treeherder_data = resultset[data['branch']][data['revision']]
|
||||||
treeherder_data['project'] = data['branch']
|
treeherder_data['project'] = data['branch']
|
||||||
####
|
####
|
||||||
#TODO: This is a temporary fix, this data will not be located
|
#TODO: This is a temporary fix, this data will not be located
|
||||||
|
@ -378,7 +381,7 @@ class TreeherderPulseDataAdapter(PulseDataAdapter, ObjectstoreLoaderMixin):
|
||||||
#modified
|
#modified
|
||||||
request_id, data['request_times'][unicode(request_id)]
|
request_id, data['request_times'][unicode(request_id)]
|
||||||
),
|
),
|
||||||
'revision_hash': resultset['revision_hash'],
|
'revision_hash': treeherder_data.pop('revision_hash'),
|
||||||
'name': data['test_name'],
|
'name': data['test_name'],
|
||||||
'product_name': data['product'],
|
'product_name': data['product'],
|
||||||
'state': 'completed',
|
'state': 'completed',
|
||||||
|
|
|
@ -300,6 +300,26 @@ class JobsModel(TreeherderModelBase):
|
||||||
|
|
||||||
return self.as_list(return_list, "result_set", **kwargs)
|
return self.as_list(return_list, "result_set", **kwargs)
|
||||||
|
|
||||||
|
def get_revision_resultset_lookup(self, revision_list):
|
||||||
|
"""
|
||||||
|
Create a list of revision->resultset lookups from a list of revision
|
||||||
|
"""
|
||||||
|
|
||||||
|
replacement = ",".join(["%s"] * len(revision_list))
|
||||||
|
replacement = " AND revision IN ("+replacement+") "
|
||||||
|
|
||||||
|
proc = "jobs.selects.get_result_set_list"
|
||||||
|
lookups = self.get_jobs_dhub().execute(
|
||||||
|
proc=proc,
|
||||||
|
placeholders=revision_list+[0, len(revision_list)],
|
||||||
|
debug_show=self.DEBUG,
|
||||||
|
replace=[replacement],
|
||||||
|
return_type="dict",
|
||||||
|
key_column="revision"
|
||||||
|
)
|
||||||
|
return lookups
|
||||||
|
|
||||||
|
|
||||||
def get_result_set_details(self, result_set_ids):
|
def get_result_set_details(self, result_set_ids):
|
||||||
"""
|
"""
|
||||||
Retrieve all revisions associated with a set of ``result_set`` (also known as ``pushes``)
|
Retrieve all revisions associated with a set of ``result_set`` (also known as ``pushes``)
|
||||||
|
|
|
@ -239,7 +239,9 @@
|
||||||
"sql":"SELECT DISTINCT
|
"sql":"SELECT DISTINCT
|
||||||
rs.id,
|
rs.id,
|
||||||
rs.revision_hash,
|
rs.revision_hash,
|
||||||
rs.push_timestamp
|
rs.push_timestamp,
|
||||||
|
revision.id as revision_id,
|
||||||
|
revision.revision
|
||||||
FROM result_set AS rs
|
FROM result_set AS rs
|
||||||
INNER JOIN revision_map
|
INNER JOIN revision_map
|
||||||
ON rs.id = revision_map.result_set_id
|
ON rs.id = revision_map.result_set_id
|
||||||
|
|
|
@ -34,6 +34,13 @@ project_bound_router.register(
|
||||||
base_name='note',
|
base_name='note',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
project_bound_router.register(
|
||||||
|
r'revision-lookup',
|
||||||
|
views.RevisionLookupSetViewSet,
|
||||||
|
base_name='revision-lookup',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# this is the default router for plain restful endpoints
|
# this is the default router for plain restful endpoints
|
||||||
|
|
||||||
# refdata endpoints:
|
# refdata endpoints:
|
||||||
|
|
|
@ -6,8 +6,9 @@ from rest_framework import viewsets
|
||||||
from rest_framework.response import Response
|
from rest_framework.response import Response
|
||||||
from rest_framework.decorators import action
|
from rest_framework.decorators import action
|
||||||
from rest_framework.reverse import reverse
|
from rest_framework.reverse import reverse
|
||||||
from treeherder.model import models
|
from rest_framework.exceptions import ParseError
|
||||||
|
|
||||||
|
from treeherder.model import models
|
||||||
from treeherder.model.derived import (JobsModel, DatasetNotFoundError,
|
from treeherder.model.derived import (JobsModel, DatasetNotFoundError,
|
||||||
ObjectNotFoundException)
|
ObjectNotFoundException)
|
||||||
|
|
||||||
|
@ -364,6 +365,22 @@ class ResultSetViewSet(viewsets.ViewSet):
|
||||||
return Response({"message": "well-formed JSON stored"})
|
return Response({"message": "well-formed JSON stored"})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class RevisionLookupSetViewSet(viewsets.ViewSet):
|
||||||
|
|
||||||
|
@with_jobs
|
||||||
|
def list(self, request, project, jm):
|
||||||
|
|
||||||
|
revision_filter = request.QUERY_PARAMS.get('revision', None)
|
||||||
|
if not revision_filter:
|
||||||
|
raise ParseError(detail="The revision parameter is mandatory for this endpoint")
|
||||||
|
|
||||||
|
revision_list = revision_filter.split(",")
|
||||||
|
|
||||||
|
return Response(jm.get_revision_resultset_lookup(revision_list))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#####################
|
#####################
|
||||||
# Refdata ViewSets
|
# Refdata ViewSets
|
||||||
#####################
|
#####################
|
||||||
|
|
Загрузка…
Ссылка в новой задаче