From 49d228a62e7893c2d31eb6cc0adff0657603eb60 Mon Sep 17 00:00:00 2001 From: Cameron Dawson Date: Thu, 2 Oct 2014 16:45:07 -0700 Subject: [PATCH] Bug 1077136 - new cleanup_tasks file to avoid circular imports --- treeherder/etl/buildapi.py | 15 ++++++------ treeherder/etl/cleanup_tasks.py | 41 +++++++++++++++++++++++++++++++++ treeherder/etl/tasks.py | 37 +---------------------------- 3 files changed, 50 insertions(+), 43 deletions(-) create mode 100644 treeherder/etl/cleanup_tasks.py diff --git a/treeherder/etl/buildapi.py b/treeherder/etl/buildapi.py index bf5b80d9d..2d56c21db 100644 --- a/treeherder/etl/buildapi.py +++ b/treeherder/etl/buildapi.py @@ -11,8 +11,8 @@ from thclient import TreeherderRequest, TreeherderJobCollection from treeherder.etl import common, buildbot from treeherder.etl.mixins import JsonExtractorMixin, OAuthLoaderMixin -from treeherder.etl.tasks import fetch_missing_push_logs from treeherder.model.models import Datasource +from .cleanup_tasks import fetch_missing_push_logs logger = logging.getLogger(__name__) @@ -285,7 +285,7 @@ class PendingTransformerMixin(object): missing_revisions = defaultdict(list) # loop to catch all the revisions - for project, revisions in data['pending'].items(): + for project, revisions in data['pending'].iteritems(): # this skips those projects we don't care about if project not in projects: continue @@ -297,14 +297,14 @@ class PendingTransformerMixin(object): th_collections = {} - for project, revisions in data['pending'].items(): + for project, revisions in data['pending'].iteritems(): for revision, jobs in revisions.items(): try: - project = revisions_lookup[project] + branch = revisions_lookup[project] try: - resultset = project[revision] + resultset = branch[revision] except KeyError: # we don't have the resultset for this build/job yet # we need to queue fetching that resultset @@ -385,6 +385,7 @@ class PendingTransformerMixin(object): } treeherder_data['job'] = new_job + print project if project not in th_collections: th_collections[project] = TreeherderJobCollection( job_type='update' @@ -428,9 +429,9 @@ class RunningTransformerMixin(object): for revision, jobs in revisions.items(): try: - project = revisions_lookup[project] + branch = revisions_lookup[project] try: - resultset = project[revision] + resultset = branch[revision] except KeyError: # we don't have the resultset for this build/job yet # we need to queue fetching that resultset diff --git a/treeherder/etl/cleanup_tasks.py b/treeherder/etl/cleanup_tasks.py new file mode 100644 index 000000000..a13511e26 --- /dev/null +++ b/treeherder/etl/cleanup_tasks.py @@ -0,0 +1,41 @@ +import urllib +from celery import task, group +from treeherder.model.derived import RefDataManager +from .pushlog import MissingHgPushlogProcess + + +@task(name='fetch-missing-push-logs') +def fetch_missing_push_logs(missing_pushlogs): + """ + Run several fetch_hg_push_log subtasks, one per repository + """ + rdm = RefDataManager() + try: + repos = filter(lambda x: x['url'], rdm.get_all_repository_info()) + # create a group of subtasks and apply them + g = group(fetch_missing_hg_push_logs.si( + repo['name'], + repo['url'], + missing_pushlogs[repo['name']] + ) + for repo in repos if repo['dvcs_type'] == 'hg' and repo['name'] in missing_pushlogs) + g() + finally: + rdm.disconnect() + + +@task(name='fetch-missing-hg-push-logs', time_limit=3*60) +def fetch_missing_hg_push_logs(repo_name, repo_url, revisions): + """ + Run a HgPushlog etl process + + ``revisions`` is a list of changeset values truncated to 12 chars. + """ + process = MissingHgPushlogProcess() + + changesetParam = urllib.urlencode({"changeset", revisions}) + urlStr = repo_url + '/json-pushes/?full=1&' + changesetParam + + process.run(urlStr, repo_name) + + diff --git a/treeherder/etl/tasks.py b/treeherder/etl/tasks.py index 5c97b7abd..1c5f28a9f 100644 --- a/treeherder/etl/tasks.py +++ b/treeherder/etl/tasks.py @@ -10,7 +10,7 @@ from .buildapi import (RunningJobsProcess, Builds4hAnalyzer) from .bugzilla import BzApiBugProcess from .tbpl import OrangeFactorBugRequest, TbplBugRequest, BugzillaBugRequest -from .pushlog import HgPushlogProcess, MissingHgPushlogProcess +from .pushlog import HgPushlogProcess @task(name='fetch-buildapi-pending', time_limit=3*60) @@ -67,41 +67,6 @@ def fetch_hg_push_log(repo_name, repo_url): process.run(repo_url + '/json-pushes/?full=1', repo_name) -@task(name='fetch-missing-push-logs') -def fetch_missing_push_logs(missing_pushlogs): - """ - Run several fetch_hg_push_log subtasks, one per repository - """ - rdm = RefDataManager() - try: - repos = filter(lambda x: x['url'], rdm.get_all_repository_info()) - # create a group of subtasks and apply them - g = group(fetch_missing_hg_push_logs.si( - repo['name'], - repo['url'], - missing_pushlogs[repo['name']] - ) - for repo in repos if repo['dvcs_type'] == 'hg' and repo['name'] in missing_pushlogs) - g() - finally: - rdm.disconnect() - - -@task(name='fetch-missing-hg-push-logs', time_limit=3*60) -def fetch_missing_hg_push_logs(repo_name, repo_url, revisions): - """ - Run a HgPushlog etl process - - ``revisions`` is a list of changeset values truncated to 12 chars. - """ - process = MissingHgPushlogProcess() - - changesetParam = urllib.urlencode({"changeset", revisions}) - urlStr = repo_url + '/json-pushes/?full=1&' + changesetParam - - process.run(urlStr, repo_name) - - @task(name='fetch-bugs', time_limit=10 * 60) def fetch_bugs(): """