From 9e414b8a61d805da7cee70feca8ec782dbfd71fa Mon Sep 17 00:00:00 2001 From: Cameron Dawson Date: Fri, 17 Oct 2014 14:02:30 -0700 Subject: [PATCH] bug 1077136 - added test and fixed when RS didn't get set to 'onhold' --- tests/etl/test_pushlog.py | 38 +++++++++++++++++++++++++++++++- tests/jobs_test.json | 4 ++++ treeherder/etl/common.py | 19 ++++++++++++++++ treeherder/etl/pushlog.py | 35 ++++++++++------------------- treeherder/model/derived/jobs.py | 4 ++-- treeherder/model/sql/jobs.json | 2 +- 6 files changed, 75 insertions(+), 27 deletions(-) diff --git a/tests/etl/test_pushlog.py b/tests/etl/test_pushlog.py index d555fb56c..1bf43fe22 100644 --- a/tests/etl/test_pushlog.py +++ b/tests/etl/test_pushlog.py @@ -1,7 +1,8 @@ import os import json import responses -from treeherder.etl.pushlog import HgPushlogProcess +from treeherder.etl.pushlog import HgPushlogProcess, MissingHgPushlogProcess +from treeherder.etl.common import get_not_found_onhold_push def test_ingest_hg_pushlog(jm, initial_data, test_base_dir, @@ -87,3 +88,38 @@ def test_ingest_hg_pushlog_already_stored(jm, initial_data, test_base_dir, ) assert len(pushes_stored) == 2 + + +def test_ingest_hg_pushlog_not_found_in_json_pushes(jm, initial_data, test_base_dir, + test_repository, mock_post_json_data, activate_responses): + """ + Ingest a pushlog that is not found in json-pushes. So we ingest a + resultset that is "onhold" + + """ + + pushlog_fake_url = "http://www.thisismypushlog.com" + responses.add(responses.GET, pushlog_fake_url, + body="foo", status=404, + content_type='application/json') + + process = MissingHgPushlogProcess() + + process.run(pushlog_fake_url, jm.project, "123456789012") + + pushes_stored = jm.get_jobs_dhub().execute( + proc="jobs_test.selects.result_sets", + return_type='tuple' + ) + + assert len(pushes_stored) == 1 + assert pushes_stored[0]['active_status'] == "onhold" + + revisions_stored = jm.get_jobs_dhub().execute( + proc="jobs_test.selects.revision_ids", + return_type='tuple' + ) + + assert len(revisions_stored) == 1 + + diff --git a/tests/jobs_test.json b/tests/jobs_test.json index ab579f660..c63c5c5d8 100644 --- a/tests/jobs_test.json +++ b/tests/jobs_test.json @@ -69,6 +69,10 @@ "result_set_ids": { "sql": "SELECT `id`, `push_timestamp`, `revision_hash` FROM `result_set`", "host": "master_host" + }, + "result_sets": { + "sql": "SELECT * FROM `result_set`", + "host": "master_host" } }, "updates": { diff --git a/treeherder/etl/common.py b/treeherder/etl/common.py index 8610bf6bd..0cf74b176 100644 --- a/treeherder/etl/common.py +++ b/treeherder/etl/common.py @@ -2,6 +2,7 @@ from collections import defaultdict import hashlib import urllib2 import simplejson as json +import time from django.core.urlresolvers import reverse from django.conf import settings @@ -197,3 +198,21 @@ def get_resultset(project, revisions_lookup, revision, missing_resultsets, logge return resultset +def get_not_found_onhold_push(url, revision): + return { + "00001": { + "date": int(time.time()), + "changesets": [ + { + "node": revision, + "files": [], + "tags": [], + "author": "Unknown", + "branch": "default", + "desc": "Pushlog not found at {0}".format(url) + } + ], + "user": "Unknown", + "active_status": "onhold" + } + } \ No newline at end of file diff --git a/treeherder/etl/pushlog.py b/treeherder/etl/pushlog.py index d3491186b..1efb0af61 100644 --- a/treeherder/etl/pushlog.py +++ b/treeherder/etl/pushlog.py @@ -7,7 +7,7 @@ import logging from thclient import TreeherderRequest, TreeherderResultSetCollection from .mixins import JsonExtractorMixin, OAuthLoaderMixin -from treeherder.etl.common import generate_revision_hash +from treeherder.etl.common import generate_revision_hash, get_not_found_onhold_push logger = logging.getLogger(__name__) @@ -31,6 +31,8 @@ class HgPushlogTransformerMixin(object): # Author of the push/resultset result_set['author'] = push['user'] + result_set['active_status'] = push.get('active_status', 'active') + rev_hash_components = [] # iterate over the revisions @@ -109,7 +111,7 @@ class HgPushlogProcess(HgPushlogTransformerMixin, class MissingHgPushlogProcess(HgPushlogTransformerMixin, OAuthLoaderMixin): - def extract(self, url, resultset): + def extract(self, url, revision): logger.info("extracting missing resultsets: {0}".format(url)) response = requests.get(url, timeout=settings.TREEHERDER_REQUESTS_TIMEOUT) if response.status_code == 404: @@ -126,33 +128,20 @@ class MissingHgPushlogProcess(HgPushlogTransformerMixin, logger.warn(("no pushlog in json-pushes. generating a dummy" " onhold placeholder: {0}").format(url)) - # we want to make a "fake" resultset, because json-pushes doesn't - # know about it. This is what TBPL does - return { - "00001": { - "date": int(time.time()), - "changesets": [ - { - "node": resultset, - "files": [], - "tags": [], - "author": "Unknown", - "branch": "default", - "desc": "Pushlog not found at {0}".format(url) - } - ], - "user": "Unknown", - "active_status": "onhold" - } - } + # we want to make a "dummy" resultset that is "onhold", + # because json-pushes doesn't know about it. + # This is, in effect, what TBPL does. + # These won't show in the UI, because they only fetch "active" + # resultsets + return get_not_found_onhold_push(url, revision) else: response.raise_for_status() return response.json() - def run(self, source_url, repository, resultset): + def run(self, source_url, repository, revision): try: - extracted_content = self.extract(source_url, resultset) + extracted_content = self.extract(source_url, revision) if extracted_content: diff --git a/treeherder/model/derived/jobs.py b/treeherder/model/derived/jobs.py index 1c3cb1d52..9488c2fb0 100644 --- a/treeherder/model/derived/jobs.py +++ b/treeherder/model/derived/jobs.py @@ -2545,7 +2545,7 @@ class JobsModel(TreeherderModelBase): repository_id = self.refdata_model.get_repository_id( rev_datum['repository'] ) - repository_id_lookup[ rev_datum['repository'] ] = repository_id + repository_id_lookup[rev_datum['repository']] = repository_id # We may not have a commit timestamp in the push data commit_timestamp = rev_datum.get( @@ -2563,7 +2563,7 @@ class JobsModel(TreeherderModelBase): ) file_str = ','.join(file_list) - repository_id = repository_id_lookup[ rev_datum['repository'] ] + repository_id = repository_id_lookup[rev_datum['repository']] revision_placeholders.append( [ rev_datum['revision'], rev_datum['author'], diff --git a/treeherder/model/sql/jobs.json b/treeherder/model/sql/jobs.json index 5b8b13f98..c9dd2a734 100644 --- a/treeherder/model/sql/jobs.json +++ b/treeherder/model/sql/jobs.json @@ -706,7 +706,7 @@ "get_result_set_ids":{ "sql":"SELECT `id`, `revision_hash`, `push_timestamp` FROM `result_set` - WHERE `active_status` = 'active' AND `revision_hash` IN (REP0)", + WHERE `active_status` in ('active', 'onhold') AND `revision_hash` IN (REP0)", "host": "read_host" }, "get_all_result_set_revision_hashes":{