Bug 1077136 - handle pushes missing from json-pushes gracefully

This commit is contained in:
Cameron Dawson 2014-10-10 17:04:14 -07:00
Родитель e1810cf419
Коммит 06728b9835
4 изменённых файлов: 56 добавлений и 18 удалений

Просмотреть файл

@ -159,4 +159,7 @@ def fetch_missing_resultsets(source, missing_resultsets, logger):
try:
fetch_missing_push_logs.apply_async(args=[missing_resultsets])
except Exception as ex:
logger.error("error fetching missing resultsets: {0}".format(ex))
logger.exception("error fetching missing resultsets: {0}, {1}".format(
missing_resultsets,
ex
))

Просмотреть файл

@ -129,6 +129,7 @@ class OAuthLoaderMixin(object):
oauth_secret=credentials.get('consumer_secret', None)
)
logger.info("collection loading request: {0}".format(th_request.get_uri(th_collections[project].endpoint_base)))
response = th_request.post(th_collections[project])
if not response or response.status != 200:

Просмотреть файл

@ -1,5 +1,6 @@
from django.core.cache import cache
from django.conf import settings
import time
import requests
import logging
@ -108,16 +109,40 @@ class HgPushlogProcess(HgPushlogTransformerMixin,
class MissingHgPushlogProcess(HgPushlogTransformerMixin,
OAuthLoaderMixin):
def extract(self, url):
def extract(self, url, resultset):
# we will sometimes get here because builds4hr/pending/running have a
# job with a resultset that json-pushes doesn't know about. Seems
# odd, but it happens. So we just ingest
logger.info("extracting missing resultsets: {0}".format(url))
response = requests.get(url, timeout=settings.TREEHERDER_REQUESTS_TIMEOUT)
if response.status_code == 404:
# we want to make a "fake" resultset, because json-pushes doesn't
# know about it. This is what TBPL does
return {
"00001": {
"date": int(time.time()),
"changesets": [
{
"node": resultset,
"files": [],
"tags": [],
"author": "Unknown",
"branch": "default",
"desc": "Pushlog not found at {0}".format(url)
}
],
"user": "Unknown"
}
}
else:
response.raise_for_status()
return response.json()
def run(self, source_url, repository):
def run(self, source_url, repository, resultset):
try:
extracted_content = self.extract(source_url)
extracted_content = self.extract(source_url, resultset)
if extracted_content:
@ -125,16 +150,22 @@ class MissingHgPushlogProcess(HgPushlogTransformerMixin,
extracted_content,
repository
)
logger.info("loading missing resultsets: {0}".format(transformed))
for project, coll in transformed.iteritems():
logger.info("loading missing resultsets for {0}: {1}".format(
project,
coll.to_json()))
self.load(transformed)
logger.info("done loading missing resultsets for {0}".format(repository))
else:
assert extracted_content, (
"Got no content response for missing resultsets: {0}".format(
source_url)
)
except Exception as ex:
logger.error("error fetching missing resultsets: {0}".format(
ex
logger.exception("error loading missing resultsets: {0}".format(
source_url
))
class GitPushlogTransformerMixin(object):

Просмотреть файл

@ -16,11 +16,14 @@ def fetch_missing_push_logs(missing_pushlogs):
try:
repos = filter(lambda x: x['url'], rdm.get_all_repository_info())
for repo in repos:
if repo['dvcs_type'] == 'hg':
if repo['dvcs_type'] == 'hg' and repo['name'] in missing_pushlogs:
# we must get them one at a time, because if ANY are missing
# from json-pushes, it'll return a 404 for the group.
for resultset in missing_pushlogs[repo['name']]:
fetch_missing_hg_push_logs.apply_async(args=(
repo['name'],
repo['url'],
missing_pushlogs[repo['name']]
resultset
),
routing_key='pushlog'
)
@ -29,7 +32,7 @@ def fetch_missing_push_logs(missing_pushlogs):
@task(name='fetch-missing-hg-push-logs', time_limit=3*60)
def fetch_missing_hg_push_logs(repo_name, repo_url, resultsets):
def fetch_missing_hg_push_logs(repo_name, repo_url, resultset):
"""
Run a HgPushlog etl process
@ -37,10 +40,10 @@ def fetch_missing_hg_push_logs(repo_name, repo_url, resultsets):
"""
process = MissingHgPushlogProcess()
changesetParam = urllib.urlencode({"changeset": resultsets}, True)
changesetParam = urllib.urlencode({"changeset": resultset}, True)
url_str = repo_url + '/json-pushes/?full=1&' + changesetParam
logger.info("fetching missing resultsets: {0}".format(url_str))
process.run(url_str, repo_name)
process.run(url_str, repo_name, resultset)