Bug 1194830: Add a runnable_job API endpoint

This creates a 'runnable_job' table in the database, as well as an API
endpoint at /api/project/{branch}/runnable_jobs listing all existing
buildbot jobs and their symbols. A new daily task 'fetch_allthethings' is
added to update the this table.
This commit is contained in:
Alice Scarpa 2015-10-15 14:47:22 -03:00
Родитель 225d606bee
Коммит 5d9e430cac
11 изменённых файлов: 304 добавлений и 3 удалений

Просмотреть файл

@ -4,6 +4,6 @@ worker_pushlog: newrelic-admin run-program celery -A treeherder worker -Q pushlo
worker_buildapi_pending: newrelic-admin run-program celery -A treeherder worker -Q buildapi_pending --maxtasksperchild=20 --concurrency=5
worker_buildapi_running: newrelic-admin run-program celery -A treeherder worker -Q buildapi_running --maxtasksperchild=20 --concurrency=5
worker_buildapi_4hr: newrelic-admin run-program celery -A treeherder worker -Q buildapi_4hr --maxtasksperchild=20 --concurrency=1
worker_default: newrelic-admin run-program celery -A treeherder worker -Q default,cycle_data,calculate_eta,fetch_bugs --maxtasksperchild=50 --concurrency=3
worker_default: newrelic-admin run-program celery -A treeherder worker -Q default,cycle_data,calculate_eta,fetch_bugs,fetch_allthethings --maxtasksperchild=50 --concurrency=3
worker_hp: newrelic-admin run-program celery -A treeherder worker -Q classification_mirroring,publish_to_pulse --maxtasksperchild=50 --concurrency=1
worker_log_parser: newrelic-admin run-program celery -A treeherder worker -Q log_parser_fail,log_parser,log_parser_hp,log_parser_json --maxtasksperchild=50 --concurrency=5

Просмотреть файл

@ -19,6 +19,6 @@ if [ ! -f $LOGFILE ]; then
fi
exec $NEWRELIC_ADMIN celery -A treeherder worker -c 3 \
-Q default,cycle_data,calculate_eta,fetch_bugs,autoclassify,detect_intermittents \
-Q default,cycle_data,calculate_eta,fetch_bugs,autoclassify,detect_intermittents,fetch_allthethings \
-E --maxtasksperchild=500 \
--logfile=$LOGFILE -l INFO -n default.%h

Просмотреть файл

@ -184,6 +184,7 @@ CELERY_QUEUES = (
Queue('buildapi_pending', Exchange('default'), routing_key='buildapi_pending'),
Queue('buildapi_running', Exchange('default'), routing_key='buildapi_running'),
Queue('buildapi_4hr', Exchange('default'), routing_key='buildapi_4hr'),
Queue('fetch_allthethings', Exchange('default'), routing_key='fetch_allthethings'),
Queue('cycle_data', Exchange('default'), routing_key='cycle_data'),
Queue('calculate_eta', Exchange('default'), routing_key='calculate_eta'),
Queue('fetch_bugs', Exchange('default'), routing_key='fetch_bugs'),
@ -232,6 +233,14 @@ CELERYBEAT_SCHEDULE = {
"queue": "buildapi_4hr"
}
},
'fetch-allthethings-every-day': {
'task': 'fetch-allthethings',
'schedule': timedelta(days=1),
'relative': True,
'options': {
'queue': "fetch_allthethings"
}
},
'cycle-data-every-day': {
'task': 'cycle-data',
'schedule': timedelta(days=1),
@ -291,6 +300,7 @@ SITE_URL = os.environ.get("SITE_URL", "http://local.treeherder.mozilla.org")
BUILDAPI_PENDING_URL = "https://secure.pub.build.mozilla.org/builddata/buildjson/builds-pending.js"
BUILDAPI_RUNNING_URL = "https://secure.pub.build.mozilla.org/builddata/buildjson/builds-running.js"
BUILDAPI_BUILDS4H_URL = "https://secure.pub.build.mozilla.org/builddata/buildjson/builds-4hr.js.gz"
ALLTHETHINGS_URL = "https://secure.pub.build.mozilla.org/builddata/reports/allthethings.json"
# the max size of a posted request to treeherder client during Buildbot
# data job ingestion.

Просмотреть файл

@ -0,0 +1,109 @@
import collections
import logging
from hashlib import sha1
from django.conf import settings
from treeherder.etl.buildbot import get_symbols_and_platforms
from treeherder.etl.mixins import JsonExtractorMixin
from treeherder.model.models import (BuildPlatform,
JobGroup,
JobType,
MachinePlatform,
Option,
OptionCollection,
Repository,
RunnableJob)
logger = logging.getLogger(__name__)
class AllthethingsTransformerMixin:
def transform(self, extracted_content):
logger.info('About to import allthethings.json builder data.')
jobs_per_branch = collections.defaultdict(list)
for builder, content in extracted_content['builders'].iteritems():
job = get_symbols_and_platforms(builder)
branch = content['properties']['branch']
job.update({'branch': branch})
jobs_per_branch[branch].append(job)
return jobs_per_branch
class RunnableJobsProcess(JsonExtractorMixin,
AllthethingsTransformerMixin):
# XXX: Copied from refdata.py. What is the best place for this?
def get_option_collection_hash(self, options):
"""returns an option_collection_hash given a list of options"""
options = sorted(list(options))
sha_hash = sha1()
# equivalent to loop over the options and call sha_hash.update()
sha_hash.update(''.join(options))
return sha_hash.hexdigest()
def load(self, jobs_per_branch):
active_repositories = Repository.objects.all().filter(
active_status='active')
for repo in active_repositories:
# Some active repositories might not have any buildbot
# builders.
if repo.name not in jobs_per_branch:
continue
for datum in jobs_per_branch[repo.name]:
# XXX: refdata.py truncates those fields at 25 characters.
# Should we do the same?
build_platform, _ = BuildPlatform.objects.get_or_create(
os_name=datum['build_os'],
platform=datum['build_platform'],
architecture=datum['build_architecture']
)
machine_platform, _ = MachinePlatform.objects.get_or_create(
os_name=datum['machine_platform_os'],
platform=datum['platform'],
architecture=datum['machine_platform_architecture']
)
job_group, _ = JobGroup.objects.get_or_create(
name=datum['job_group_name'],
symbol=datum['job_group_symbol']
)
job_type, _ = JobType.objects.get_or_create(
name=datum['job_type_name'],
symbol=datum['job_type_symbol'],
job_group=job_group
)
option_collection_hash = self.get_option_collection_hash(
datum['option_collection'].keys())
for key in datum['option_collection'].keys():
option, _ = Option.objects.get_or_create(name=key)
OptionCollection.objects.get_or_create(
option_collection_hash=option_collection_hash,
option=option)
# This automatically updates the last_touched field.
RunnableJob.objects.update_or_create(
ref_data_name=datum['ref_data_name'],
build_system_type=datum['build_system_type'],
defaults={'build_platform': build_platform,
'machine_platform': machine_platform,
'job_type': job_type,
'option_collection_hash': option_collection_hash,
'repository': repo})
def run(self):
extracted_content = self.extract(settings.ALLTHETHINGS_URL)
jobs_per_branch = self.transform(extracted_content)
self.load(jobs_per_branch)

Просмотреть файл

@ -1072,3 +1072,29 @@ def get_symbol(name, bn):
return n
return "{0}{1}".format(s, n)
def get_symbols_and_platforms(buildername):
"""Return a dict with all the information we extract from the buildername."""
platform_info = extract_platform_info(buildername)
job_name_info = extract_name_info(buildername)
job = {
'job_type_name': job_name_info.get('name', ''),
'job_type_symbol': job_name_info.get('job_symbol', ''),
'job_group_name': job_name_info.get('group_name', ''),
'job_group_symbol': job_name_info.get('group_symbol', ''),
'ref_data_name': buildername,
'build_platform': platform_info.get('os_platform', ''),
'build_os': platform_info.get('os', ''),
'build_architecture': platform_info.get('arch', ''),
'build_system_type': 'buildbot',
'machine_platform_architecture': platform_info.get('arch', ''),
'machine_platform_os': platform_info.get('os', ''),
'option_collection': {
extract_build_type(buildername): True
},
'platform': platform_info.get('os_platform', ''),
'job_coalesced_to_guid': None
}
return job

Просмотреть файл

@ -3,6 +3,7 @@ This module contains
"""
from celery import task
from treeherder.etl.allthethings import RunnableJobsProcess
from treeherder.etl.buildapi import (Builds4hJobsProcess,
PendingJobsProcess,
RunningJobsProcess)
@ -34,6 +35,14 @@ def fetch_buildapi_build4h():
Builds4hJobsProcess().run()
@task(name='fetch-allthethings', time_limit=10 * 60)
def fetch_allthethings():
"""
Fetches possible jobs from allthethings and load them
"""
RunnableJobsProcess().run()
@task(name='fetch-push-logs')
def fetch_push_logs():
"""

Просмотреть файл

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('model', '0003_auto_20151111_0942'),
]
operations = [
migrations.CreateModel(
name='RunnableJob',
fields=[
('id', models.AutoField(serialize=False, primary_key=True)),
('option_collection_hash', models.CharField(max_length=64L)),
('ref_data_name', models.CharField(max_length=255L)),
('build_system_type', models.CharField(max_length=25L)),
('last_touched', models.DateTimeField(auto_now=True)),
('build_platform', models.ForeignKey(to='model.BuildPlatform')),
('job_type', models.ForeignKey(to='model.JobType')),
('machine_platform', models.ForeignKey(to='model.MachinePlatform')),
('repository', models.ForeignKey(to='model.Repository')),
],
options={
'db_table': 'runnable_job',
},
),
migrations.AlterUniqueTogether(
name='runnablejob',
unique_together=set([('ref_data_name', 'build_system_type')]),
),
]

Просмотреть файл

@ -671,3 +671,27 @@ class FailureMatch(models.Model):
unique_together = (
('failure_line', 'classified_failure', 'matcher')
)
@python_2_unicode_compatible
class RunnableJob(models.Model):
id = models.AutoField(primary_key=True)
build_platform = models.ForeignKey(BuildPlatform)
machine_platform = models.ForeignKey(MachinePlatform)
job_type = models.ForeignKey(JobType)
option_collection_hash = models.CharField(max_length=64L)
ref_data_name = models.CharField(max_length=255L)
build_system_type = models.CharField(max_length=25L)
repository = models.ForeignKey(Repository)
last_touched = models.DateTimeField(auto_now=True)
class Meta:
db_table = 'runnable_job'
unique_together = (
('ref_data_name', 'build_system_type')
)
def __str__(self):
return "{0} {1} {2}".format(self.id,
self.ref_data_name,
self.build_system_type)

Просмотреть файл

@ -108,6 +108,22 @@ def publish_resultset_action(project, action, resultset_id, requester, times=1):
)
@task(name='publish-resultset-runnable-job-action')
def publish_resultset_runnable_job_action(project, resultset_id, requester,
buildernames):
publisher = pulse_connection.get_publisher()
if not publisher:
return
publisher.resultset_runnable_job_action(
version=1,
project=project,
requester=requester,
resultset_id=resultset_id,
buildernames=buildernames
)
@task(name='publish-resultset')
def publish_resultset(project, ids):
# If we don't have a publisher (because of missing configs), then we can't

Просмотреть файл

@ -0,0 +1,65 @@
import datetime
from rest_framework import viewsets
from rest_framework.response import Response
from treeherder.model import models
class RunnableJobsViewSet(viewsets.ViewSet):
"""
This viewset is responsible for the runnable_jobs endpoint.
"""
def list(self, request, project):
"""
GET method implementation for list of all runnable buildbot jobs
"""
repository = models.Repository.objects.get(name=project)
options_by_hash = models.OptionCollection.objects.all().select_related(
'option').values_list('option__name', 'option_collection_hash')
runnable_jobs = models.RunnableJob.objects.filter(
repository=repository,
last_touched__gte=datetime.datetime.now() - datetime.timedelta(weeks=1)
).select_related('build_platform', 'machine_platform',
'job_type', 'job_type__job_group')
ret = []
for datum in runnable_jobs:
options = ' '.join(option_name for (option_name, col_hash) in options_by_hash
if col_hash == datum.option_collection_hash)
ret.append({
'build_platform_id': datum.build_platform.id,
'build_platform': datum.build_platform.platform,
'build_os': datum.build_platform.os_name,
'build_architecture': datum.build_platform.architecture,
'machine_platform_id': datum.machine_platform.id,
'platform': datum.machine_platform.platform,
'machine_platform_os': datum.machine_platform.os_name,
'machine_platform_architecture': datum.machine_platform.architecture,
'job_group_id': datum.job_type.job_group.id,
'job_group_name': datum.job_type.job_group.name,
'job_group_symbol': datum.job_type.job_group.symbol,
'job_group_description': datum.job_type.job_group.description,
'job_type_id': datum.job_type.id,
'job_type_name': datum.job_type.name,
'job_type_symbol': datum.job_type.symbol,
'job_type_description': datum.job_type.description,
'option_collection_hash': datum.option_collection_hash,
'ref_data_name': datum.ref_data_name,
'build_system_type': datum.build_system_type,
'platform_option': options,
'job_coalesced_to_guid': None,
'state': 'runnable',
'result': 'runnable'})
response_body = dict(meta={"repository": project,
"offset": 0,
"count": len(ret)},
results=ret)
return Response(response_body)

Просмотреть файл

@ -11,7 +11,8 @@ from treeherder.webapp.api import (artifact,
note,
performance_data,
refdata,
resultset)
resultset,
runnable_jobs)
# router for views that are bound to a project
# i.e. all those views that don't involve reference data
@ -23,6 +24,12 @@ project_bound_router.register(
base_name='jobs',
)
project_bound_router.register(
r'runnable_jobs',
runnable_jobs.RunnableJobsViewSet,
base_name='runnable_jobs',
)
project_bound_router.register(
r'resultset',
resultset.ResultSetViewSet,