Bug 1071577 - Cache revision hashes to reduce pushlog load

This commit is contained in:
Jonathan Eads 2014-09-24 15:46:33 -07:00
Родитель 24c437d0d6
Коммит d7908793a5
4 изменённых файлов: 37 добавлений и 1 удалений

Просмотреть файл

@ -5,6 +5,9 @@ import itertools
import pprint
import copy
from django.core.cache import cache
from treeherder.etl.common import generate_result_set_cache_key
from treeherder.model.derived.base import DatasetNotFoundError
from tests.sample_data_generator import job_data, result_set
from tests.sampledata import SampleData
@ -356,6 +359,12 @@ def test_store_result_set_data(jm, initial_data, sample_resultset):
assert set(data['result_set_ids'].keys()) == revision_hashes
assert set(data['revision_ids'].keys()) == revisions
# confirm all revision_hashes were stored in the cache
for target_revision_hash in data['result_set_ids'].keys():
key = generate_result_set_cache_key(jm.project, target_revision_hash)
revision_hash = cache.get(key)
assert revision_hash == target_revision_hash
# Confirm the data structures returned match what's stored in
# the database
print '<><>EXP'

Просмотреть файл

@ -116,6 +116,8 @@ def generate_revision_hash(revisions):
return sh.hexdigest()
def generate_result_set_cache_key(project, revision_hash):
return "{0}-{1}".format(project, revision_hash)
def generate_job_guid(request_id, request_time, endtime=None):
"""Converts a request_id and request_time into a guid"""

Просмотреть файл

@ -3,7 +3,7 @@ from django.core.cache import cache
from thclient import TreeherderRequest, TreeherderResultSetCollection
from .mixins import JsonExtractorMixin, OAuthLoaderMixin
from treeherder.etl.common import generate_revision_hash
from treeherder.etl.common import generate_revision_hash, generate_result_set_cache_key
class HgPushlogTransformerMixin(object):
@ -47,6 +47,15 @@ class HgPushlogTransformerMixin(object):
result_set['revision_hash'] = generate_revision_hash(rev_hash_components)
cached_revision_hash = cache.get(
generate_result_set_cache_key(
repository, result_set['revision_hash']
) )
if cached_revision_hash == result_set['revision_hash']:
# Result set is already loaded
continue
if repository not in th_collections:
th_collections[ repository ] = TreeherderResultSetCollection()

Просмотреть файл

@ -26,6 +26,7 @@ from .base import TreeherderModelBase, ObjectNotFoundException
from datasource.DataHub import DataHub
from treeherder.etl.perf_data_adapters import TalosDataAdapter
from treeherder.etl.common import generate_result_set_cache_key
logger = logging.getLogger(__name__)
@ -60,6 +61,9 @@ class JobsModel(TreeherderModelBase):
INCOMPLETE_STATES = ["running", "pending"]
STATES = INCOMPLETE_STATES + ["completed", "coalesced"]
# Set timeout (seconds) to 48 hours
RESULT_SET_CACHE_TIMEOUT = (60 * 60) * 48
# indexes of specific items in the ``job_placeholder`` objects
JOB_PH_JOB_GUID = 0
JOB_PH_COALESCED_TO_GUID = 2
@ -2585,11 +2589,23 @@ class JobsModel(TreeherderModelBase):
# result_set ids and submit publish to pulse tasks.
if inserted_result_sets and lastrowid > 0:
cache_data = {}
for revision_hash in inserted_result_sets:
inserted_result_set_ids.append(
result_set_id_lookup[revision_hash]['id']
)
key = generate_result_set_cache_key(
self.project, revision_hash
)
cache_data[key] = revision_hash
cache.set_many(
cache_data,
self.RESULT_SET_CACHE_TIMEOUT
)
# Insert new revisions
dhub.execute(
proc='jobs.inserts.set_revision',