diff --git a/docs/installation.md b/docs/installation.md index 6b1474d7d..bf33b45f2 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -166,7 +166,7 @@ Ingest a single Mercurial push or the last N pushes: ```console docker-compose exec backend ./manage.py ingest push -p autoland -r 63f8a47cfdf5 -docker-compose exec backend ./manage.py ingest_push mozilla-central --last-n-pushes 100 +docker-compose exec backend ./manage.py ingest mozilla-central --last-n-pushes 100 ``` Ingest a single Github push or the last 10: diff --git a/treeherder/etl/management/commands/ingest.py b/treeherder/etl/management/commands/ingest.py index a1a5675db..e0a76253b 100644 --- a/treeherder/etl/management/commands/ingest.py +++ b/treeherder/etl/management/commands/ingest.py @@ -11,14 +11,16 @@ import taskcluster import taskcluster.aio import taskcluster_urls as liburls from django.conf import settings -from django.core.management.base import BaseCommand +from django.core.management.base import (BaseCommand, + CommandError) from django.db import connection from treeherder.client.thclient import TreeherderClient from treeherder.config.settings import GITHUB_TOKEN from treeherder.etl.job_loader import JobLoader from treeherder.etl.push_loader import PushLoader -from treeherder.etl.pushlog import HgPushlogProcess +from treeherder.etl.pushlog import (HgPushlogProcess, + last_push_id_from_server) from treeherder.etl.taskcluster_pulse.handler import (EXCHANGE_EVENT_MAP, handleMessage) from treeherder.model.models import Repository @@ -376,6 +378,11 @@ class Command(BaseCommand): action="store_true", help="Do not make changes to the database" ) + parser.add_argument( + "--last-n-pushes", + type=int, + help="fetch the last N pushes from the repository" + ) def handle(self, *args, **options): typeOfIngestion = options["ingestion_type"][0] @@ -425,12 +432,23 @@ class Command(BaseCommand): # get reference to repo and ingest this particular revision for this project project = options["project"] commit = options["commit"] - repo = Repository.objects.get(name=project, active_status="active") - pushlog_url = "%s/json-pushes/?full=1&version=2" % repo.url - process = HgPushlogProcess() - process.run(pushlog_url, project, changeset=commit, last_push_id=None) - if options["ingest_all_tasks"]: + if not options['last_n_pushes'] and not commit: + raise CommandError('must specify --last_n_pushes or a positional commit argument') + elif options['last_n_pushes'] and options['ingest_all_tasks']: + raise CommandError('Can\'t specify last_n_pushes and ingest_all_tasks at same time') + elif options['last_n_pushes'] and options['commit']: + raise CommandError('Can\'t specify last_n_pushes and commit/revision at the same time') + # get reference to repo + repo = Repository.objects.get(name=project, active_status="active") + fetch_push_id = None + + if options['last_n_pushes']: + last_push_id = last_push_id_from_server(repo) + fetch_push_id = max(1, last_push_id - options['last_n_pushes']) + logger.info('last server push id: %d; fetching push %d and newer', + last_push_id, fetch_push_id) + elif options["ingest_all_tasks"]: gecko_decision_task = get_decision_task_id(project, commit, repo.tc_root_url) logger.info("## START ##") loop.run_until_complete(processTasks(gecko_decision_task, repo.tc_root_url)) @@ -439,3 +457,11 @@ class Command(BaseCommand): logger.info( "You can ingest all tasks for a push with -a/--ingest-all-tasks." ) + + # get hg pushlog + pushlog_url = "%s/json-pushes/?full=1&version=2" % repo.url + # ingest this particular revision for this project + process = HgPushlogProcess() + # Use the actual push SHA, in case the changeset specified was a tag + # or branch name (eg tip). HgPushlogProcess returns the full SHA. + process.run(pushlog_url, project, changeset=commit, last_push_id=fetch_push_id) diff --git a/treeherder/etl/management/commands/ingest_push.py b/treeherder/etl/management/commands/ingest_push.py deleted file mode 100644 index a34110076..000000000 --- a/treeherder/etl/management/commands/ingest_push.py +++ /dev/null @@ -1,87 +0,0 @@ -import logging -from cProfile import Profile - -from django.conf import settings -from django.core.management.base import (BaseCommand, - CommandError) - -from treeherder.etl.pushlog import (HgPushlogProcess, - last_push_id_from_server) -from treeherder.model.models import Repository - -logger = logging.getLogger(__name__) - - -class Command(BaseCommand): - """Management command to ingest data from a single push.""" - help = "Ingests a single push into treeherder" - - def add_arguments(self, parser): - parser.add_argument( - '--profile-file', - help='Profile command and write result to profile file' - ) - parser.add_argument( - '--last-n-pushes', - type=int, - help='fetch the last N pushes from the repository' - ) - parser.add_argument( - 'project', - help='repository to query' - ) - parser.add_argument( - 'changeset', - nargs='?', - help='changeset to import' - ) - - def _handle(self, *args, **options): - project = options['project'] - changeset = options['changeset'] - - if not options['last_n_pushes'] and not changeset: - raise CommandError('must specify --last-n-pushes or a positional ' - 'changeset argument') - - # get reference to repo - repo = Repository.objects.get(name=project, active_status='active') - - if options['last_n_pushes']: - last_push_id = last_push_id_from_server(repo) - fetch_push_id = max(1, last_push_id - options['last_n_pushes']) - logger.info('last server push id: %d; fetching push %d and newer', - last_push_id, fetch_push_id) - else: - fetch_push_id = None - - # make sure all tasks are run synchronously / immediately - settings.CELERY_TASK_ALWAYS_EAGER = True - - # get hg pushlog - pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url - - # ingest this particular revision for this project - process = HgPushlogProcess() - # Use the actual push SHA, in case the changeset specified was a tag - # or branch name (eg tip). HgPushlogProcess returns the full SHA. - process.run(pushlog_url, project, changeset=changeset, last_push_id=fetch_push_id) - - # Only perform additional processing if fetching a single changeset - # because we only have the sha1 if the tip-most push in "last N pushes" - # mode and can't filter appropriately. - if not fetch_push_id: - raise CommandError( - 'This command is not yet able to ingest Taskcluster jobs automatically. ' - 'Please manually configure pulse job ingestion using this guide: ' - 'https://treeherder.readthedocs.io/pulseload.html' - ) - - def handle(self, *args, **options): - - if options['profile_file']: - profiler = Profile() - profiler.runcall(self._handle, *args, **options) - profiler.dump_stats(options['profile_file']) - else: - self._handle(*args, **options)