Bug 1077136 - handle pushes missing from json-pushes gracefully

This commit is contained in:
Cameron Dawson 2014-10-10 17:04:14 -07:00
Родитель e1810cf419
Коммит 06728b9835
4 изменённых файлов: 56 добавлений и 18 удалений

Просмотреть файл

@ -159,4 +159,7 @@ def fetch_missing_resultsets(source, missing_resultsets, logger):
try: try:
fetch_missing_push_logs.apply_async(args=[missing_resultsets]) fetch_missing_push_logs.apply_async(args=[missing_resultsets])
except Exception as ex: except Exception as ex:
logger.error("error fetching missing resultsets: {0}".format(ex)) logger.exception("error fetching missing resultsets: {0}, {1}".format(
missing_resultsets,
ex
))

Просмотреть файл

@ -129,6 +129,7 @@ class OAuthLoaderMixin(object):
oauth_secret=credentials.get('consumer_secret', None) oauth_secret=credentials.get('consumer_secret', None)
) )
logger.info("collection loading request: {0}".format(th_request.get_uri(th_collections[project].endpoint_base)))
response = th_request.post(th_collections[project]) response = th_request.post(th_collections[project])
if not response or response.status != 200: if not response or response.status != 200:

Просмотреть файл

@ -1,5 +1,6 @@
from django.core.cache import cache from django.core.cache import cache
from django.conf import settings from django.conf import settings
import time
import requests import requests
import logging import logging
@ -108,16 +109,40 @@ class HgPushlogProcess(HgPushlogTransformerMixin,
class MissingHgPushlogProcess(HgPushlogTransformerMixin, class MissingHgPushlogProcess(HgPushlogTransformerMixin,
OAuthLoaderMixin): OAuthLoaderMixin):
def extract(self, url): def extract(self, url, resultset):
# we will sometimes get here because builds4hr/pending/running have a
# job with a resultset that json-pushes doesn't know about. Seems
# odd, but it happens. So we just ingest
logger.info("extracting missing resultsets: {0}".format(url)) logger.info("extracting missing resultsets: {0}".format(url))
response = requests.get(url, timeout=settings.TREEHERDER_REQUESTS_TIMEOUT) response = requests.get(url, timeout=settings.TREEHERDER_REQUESTS_TIMEOUT)
response.raise_for_status() if response.status_code == 404:
# we want to make a "fake" resultset, because json-pushes doesn't
# know about it. This is what TBPL does
return {
"00001": {
"date": int(time.time()),
"changesets": [
{
"node": resultset,
"files": [],
"tags": [],
"author": "Unknown",
"branch": "default",
"desc": "Pushlog not found at {0}".format(url)
}
],
"user": "Unknown"
}
}
else:
response.raise_for_status()
return response.json() return response.json()
def run(self, source_url, repository): def run(self, source_url, repository, resultset):
try: try:
extracted_content = self.extract(source_url) extracted_content = self.extract(source_url, resultset)
if extracted_content: if extracted_content:
@ -125,16 +150,22 @@ class MissingHgPushlogProcess(HgPushlogTransformerMixin,
extracted_content, extracted_content,
repository repository
) )
logger.info("loading missing resultsets: {0}".format(transformed))
for project, coll in transformed.iteritems():
logger.info("loading missing resultsets for {0}: {1}".format(
project,
coll.to_json()))
self.load(transformed) self.load(transformed)
logger.info("done loading missing resultsets for {0}".format(repository))
else: else:
assert extracted_content, ( assert extracted_content, (
"Got no content response for missing resultsets: {0}".format( "Got no content response for missing resultsets: {0}".format(
source_url) source_url)
) )
except Exception as ex: except Exception as ex:
logger.error("error fetching missing resultsets: {0}".format( logger.exception("error loading missing resultsets: {0}".format(
ex source_url
)) ))
class GitPushlogTransformerMixin(object): class GitPushlogTransformerMixin(object):

Просмотреть файл

@ -16,20 +16,23 @@ def fetch_missing_push_logs(missing_pushlogs):
try: try:
repos = filter(lambda x: x['url'], rdm.get_all_repository_info()) repos = filter(lambda x: x['url'], rdm.get_all_repository_info())
for repo in repos: for repo in repos:
if repo['dvcs_type'] == 'hg': if repo['dvcs_type'] == 'hg' and repo['name'] in missing_pushlogs:
fetch_missing_hg_push_logs.apply_async(args=( # we must get them one at a time, because if ANY are missing
repo['name'], # from json-pushes, it'll return a 404 for the group.
repo['url'], for resultset in missing_pushlogs[repo['name']]:
missing_pushlogs[repo['name']] fetch_missing_hg_push_logs.apply_async(args=(
), repo['name'],
routing_key='pushlog' repo['url'],
resultset
),
routing_key='pushlog'
) )
finally: finally:
rdm.disconnect() rdm.disconnect()
@task(name='fetch-missing-hg-push-logs', time_limit=3*60) @task(name='fetch-missing-hg-push-logs', time_limit=3*60)
def fetch_missing_hg_push_logs(repo_name, repo_url, resultsets): def fetch_missing_hg_push_logs(repo_name, repo_url, resultset):
""" """
Run a HgPushlog etl process Run a HgPushlog etl process
@ -37,10 +40,10 @@ def fetch_missing_hg_push_logs(repo_name, repo_url, resultsets):
""" """
process = MissingHgPushlogProcess() process = MissingHgPushlogProcess()
changesetParam = urllib.urlencode({"changeset": resultsets}, True) changesetParam = urllib.urlencode({"changeset": resultset}, True)
url_str = repo_url + '/json-pushes/?full=1&' + changesetParam url_str = repo_url + '/json-pushes/?full=1&' + changesetParam
logger.info("fetching missing resultsets: {0}".format(url_str)) logger.info("fetching missing resultsets: {0}".format(url_str))
process.run(url_str, repo_name) process.run(url_str, repo_name, resultset)