bug 1077136 - added test and fixed when RS didn't get set to 'onhold'

This commit is contained in:
Cameron Dawson 2014-10-17 14:02:30 -07:00
Родитель 5fa23c2aa0
Коммит 9e414b8a61
6 изменённых файлов: 75 добавлений и 27 удалений

Просмотреть файл

@ -1,7 +1,8 @@
import os
import json
import responses
from treeherder.etl.pushlog import HgPushlogProcess
from treeherder.etl.pushlog import HgPushlogProcess, MissingHgPushlogProcess
from treeherder.etl.common import get_not_found_onhold_push
def test_ingest_hg_pushlog(jm, initial_data, test_base_dir,
@ -87,3 +88,38 @@ def test_ingest_hg_pushlog_already_stored(jm, initial_data, test_base_dir,
)
assert len(pushes_stored) == 2
def test_ingest_hg_pushlog_not_found_in_json_pushes(jm, initial_data, test_base_dir,
test_repository, mock_post_json_data, activate_responses):
"""
Ingest a pushlog that is not found in json-pushes. So we ingest a
resultset that is "onhold"
"""
pushlog_fake_url = "http://www.thisismypushlog.com"
responses.add(responses.GET, pushlog_fake_url,
body="foo", status=404,
content_type='application/json')
process = MissingHgPushlogProcess()
process.run(pushlog_fake_url, jm.project, "123456789012")
pushes_stored = jm.get_jobs_dhub().execute(
proc="jobs_test.selects.result_sets",
return_type='tuple'
)
assert len(pushes_stored) == 1
assert pushes_stored[0]['active_status'] == "onhold"
revisions_stored = jm.get_jobs_dhub().execute(
proc="jobs_test.selects.revision_ids",
return_type='tuple'
)
assert len(revisions_stored) == 1

Просмотреть файл

@ -69,6 +69,10 @@
"result_set_ids": {
"sql": "SELECT `id`, `push_timestamp`, `revision_hash` FROM `result_set`",
"host": "master_host"
},
"result_sets": {
"sql": "SELECT * FROM `result_set`",
"host": "master_host"
}
},
"updates": {

Просмотреть файл

@ -2,6 +2,7 @@ from collections import defaultdict
import hashlib
import urllib2
import simplejson as json
import time
from django.core.urlresolvers import reverse
from django.conf import settings
@ -197,3 +198,21 @@ def get_resultset(project, revisions_lookup, revision, missing_resultsets, logge
return resultset
def get_not_found_onhold_push(url, revision):
return {
"00001": {
"date": int(time.time()),
"changesets": [
{
"node": revision,
"files": [],
"tags": [],
"author": "Unknown",
"branch": "default",
"desc": "Pushlog not found at {0}".format(url)
}
],
"user": "Unknown",
"active_status": "onhold"
}
}

Просмотреть файл

@ -7,7 +7,7 @@ import logging
from thclient import TreeherderRequest, TreeherderResultSetCollection
from .mixins import JsonExtractorMixin, OAuthLoaderMixin
from treeherder.etl.common import generate_revision_hash
from treeherder.etl.common import generate_revision_hash, get_not_found_onhold_push
logger = logging.getLogger(__name__)
@ -31,6 +31,8 @@ class HgPushlogTransformerMixin(object):
# Author of the push/resultset
result_set['author'] = push['user']
result_set['active_status'] = push.get('active_status', 'active')
rev_hash_components = []
# iterate over the revisions
@ -109,7 +111,7 @@ class HgPushlogProcess(HgPushlogTransformerMixin,
class MissingHgPushlogProcess(HgPushlogTransformerMixin,
OAuthLoaderMixin):
def extract(self, url, resultset):
def extract(self, url, revision):
logger.info("extracting missing resultsets: {0}".format(url))
response = requests.get(url, timeout=settings.TREEHERDER_REQUESTS_TIMEOUT)
if response.status_code == 404:
@ -126,33 +128,20 @@ class MissingHgPushlogProcess(HgPushlogTransformerMixin,
logger.warn(("no pushlog in json-pushes. generating a dummy"
" onhold placeholder: {0}").format(url))
# we want to make a "fake" resultset, because json-pushes doesn't
# know about it. This is what TBPL does
return {
"00001": {
"date": int(time.time()),
"changesets": [
{
"node": resultset,
"files": [],
"tags": [],
"author": "Unknown",
"branch": "default",
"desc": "Pushlog not found at {0}".format(url)
}
],
"user": "Unknown",
"active_status": "onhold"
}
}
# we want to make a "dummy" resultset that is "onhold",
# because json-pushes doesn't know about it.
# This is, in effect, what TBPL does.
# These won't show in the UI, because they only fetch "active"
# resultsets
return get_not_found_onhold_push(url, revision)
else:
response.raise_for_status()
return response.json()
def run(self, source_url, repository, resultset):
def run(self, source_url, repository, revision):
try:
extracted_content = self.extract(source_url, resultset)
extracted_content = self.extract(source_url, revision)
if extracted_content:

Просмотреть файл

@ -2545,7 +2545,7 @@ class JobsModel(TreeherderModelBase):
repository_id = self.refdata_model.get_repository_id(
rev_datum['repository']
)
repository_id_lookup[ rev_datum['repository'] ] = repository_id
repository_id_lookup[rev_datum['repository']] = repository_id
# We may not have a commit timestamp in the push data
commit_timestamp = rev_datum.get(
@ -2563,7 +2563,7 @@ class JobsModel(TreeherderModelBase):
)
file_str = ','.join(file_list)
repository_id = repository_id_lookup[ rev_datum['repository'] ]
repository_id = repository_id_lookup[rev_datum['repository']]
revision_placeholders.append(
[ rev_datum['revision'],
rev_datum['author'],

Просмотреть файл

@ -706,7 +706,7 @@
"get_result_set_ids":{
"sql":"SELECT `id`, `revision_hash`, `push_timestamp`
FROM `result_set`
WHERE `active_status` = 'active' AND `revision_hash` IN (REP0)",
WHERE `active_status` in ('active', 'onhold') AND `revision_hash` IN (REP0)",
"host": "read_host"
},
"get_all_result_set_revision_hashes":{