Merge ingest_push into ingest management command (#6197)

This commit is contained in:
Suyash Salampuria 2020-04-08 02:08:53 +05:30 коммит произвёл GitHub
Родитель e60d301775
Коммит d300d2424f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 34 добавлений и 95 удалений

Просмотреть файл

@ -166,7 +166,7 @@ Ingest a single Mercurial push or the last N pushes:
```console
docker-compose exec backend ./manage.py ingest push -p autoland -r 63f8a47cfdf5
docker-compose exec backend ./manage.py ingest_push mozilla-central --last-n-pushes 100
docker-compose exec backend ./manage.py ingest mozilla-central --last-n-pushes 100
```
Ingest a single Github push or the last 10:

Просмотреть файл

@ -11,14 +11,16 @@ import taskcluster
import taskcluster.aio
import taskcluster_urls as liburls
from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.management.base import (BaseCommand,
CommandError)
from django.db import connection
from treeherder.client.thclient import TreeherderClient
from treeherder.config.settings import GITHUB_TOKEN
from treeherder.etl.job_loader import JobLoader
from treeherder.etl.push_loader import PushLoader
from treeherder.etl.pushlog import HgPushlogProcess
from treeherder.etl.pushlog import (HgPushlogProcess,
last_push_id_from_server)
from treeherder.etl.taskcluster_pulse.handler import (EXCHANGE_EVENT_MAP,
handleMessage)
from treeherder.model.models import Repository
@ -376,6 +378,11 @@ class Command(BaseCommand):
action="store_true",
help="Do not make changes to the database"
)
parser.add_argument(
"--last-n-pushes",
type=int,
help="fetch the last N pushes from the repository"
)
def handle(self, *args, **options):
typeOfIngestion = options["ingestion_type"][0]
@ -425,12 +432,23 @@ class Command(BaseCommand):
# get reference to repo and ingest this particular revision for this project
project = options["project"]
commit = options["commit"]
repo = Repository.objects.get(name=project, active_status="active")
pushlog_url = "%s/json-pushes/?full=1&version=2" % repo.url
process = HgPushlogProcess()
process.run(pushlog_url, project, changeset=commit, last_push_id=None)
if options["ingest_all_tasks"]:
if not options['last_n_pushes'] and not commit:
raise CommandError('must specify --last_n_pushes or a positional commit argument')
elif options['last_n_pushes'] and options['ingest_all_tasks']:
raise CommandError('Can\'t specify last_n_pushes and ingest_all_tasks at same time')
elif options['last_n_pushes'] and options['commit']:
raise CommandError('Can\'t specify last_n_pushes and commit/revision at the same time')
# get reference to repo
repo = Repository.objects.get(name=project, active_status="active")
fetch_push_id = None
if options['last_n_pushes']:
last_push_id = last_push_id_from_server(repo)
fetch_push_id = max(1, last_push_id - options['last_n_pushes'])
logger.info('last server push id: %d; fetching push %d and newer',
last_push_id, fetch_push_id)
elif options["ingest_all_tasks"]:
gecko_decision_task = get_decision_task_id(project, commit, repo.tc_root_url)
logger.info("## START ##")
loop.run_until_complete(processTasks(gecko_decision_task, repo.tc_root_url))
@ -439,3 +457,11 @@ class Command(BaseCommand):
logger.info(
"You can ingest all tasks for a push with -a/--ingest-all-tasks."
)
# get hg pushlog
pushlog_url = "%s/json-pushes/?full=1&version=2" % repo.url
# ingest this particular revision for this project
process = HgPushlogProcess()
# Use the actual push SHA, in case the changeset specified was a tag
# or branch name (eg tip). HgPushlogProcess returns the full SHA.
process.run(pushlog_url, project, changeset=commit, last_push_id=fetch_push_id)

Просмотреть файл

@ -1,87 +0,0 @@
import logging
from cProfile import Profile
from django.conf import settings
from django.core.management.base import (BaseCommand,
CommandError)
from treeherder.etl.pushlog import (HgPushlogProcess,
last_push_id_from_server)
from treeherder.model.models import Repository
logger = logging.getLogger(__name__)
class Command(BaseCommand):
"""Management command to ingest data from a single push."""
help = "Ingests a single push into treeherder"
def add_arguments(self, parser):
parser.add_argument(
'--profile-file',
help='Profile command and write result to profile file'
)
parser.add_argument(
'--last-n-pushes',
type=int,
help='fetch the last N pushes from the repository'
)
parser.add_argument(
'project',
help='repository to query'
)
parser.add_argument(
'changeset',
nargs='?',
help='changeset to import'
)
def _handle(self, *args, **options):
project = options['project']
changeset = options['changeset']
if not options['last_n_pushes'] and not changeset:
raise CommandError('must specify --last-n-pushes or a positional '
'changeset argument')
# get reference to repo
repo = Repository.objects.get(name=project, active_status='active')
if options['last_n_pushes']:
last_push_id = last_push_id_from_server(repo)
fetch_push_id = max(1, last_push_id - options['last_n_pushes'])
logger.info('last server push id: %d; fetching push %d and newer',
last_push_id, fetch_push_id)
else:
fetch_push_id = None
# make sure all tasks are run synchronously / immediately
settings.CELERY_TASK_ALWAYS_EAGER = True
# get hg pushlog
pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url
# ingest this particular revision for this project
process = HgPushlogProcess()
# Use the actual push SHA, in case the changeset specified was a tag
# or branch name (eg tip). HgPushlogProcess returns the full SHA.
process.run(pushlog_url, project, changeset=changeset, last_push_id=fetch_push_id)
# Only perform additional processing if fetching a single changeset
# because we only have the sha1 if the tip-most push in "last N pushes"
# mode and can't filter appropriately.
if not fetch_push_id:
raise CommandError(
'This command is not yet able to ingest Taskcluster jobs automatically. '
'Please manually configure pulse job ingestion using this guide: '
'https://treeherder.readthedocs.io/pulseload.html'
)
def handle(self, *args, **options):
if options['profile_file']:
profiler = Profile()
profiler.runcall(self._handle, *args, **options)
profiler.dump_stats(options['profile_file'])
else:
self._handle(*args, **options)