Bug 1169320, 1199506 - Adds requirements, settings and JSON schema for Pulse Ingestion

This commit is contained in:
Cameron Dawson 2015-09-25 13:29:35 -07:00
Родитель 834f0d090c
Коммит d23435f8ed
4 изменённых файлов: 328 добавлений и 4 удалений

Просмотреть файл

@ -123,3 +123,6 @@ mohawk==0.3.0
# sha256: 93Oaer_oUL3Wk1F803LwmvJRB-p-t0vr7NuAEWmPA-4
hawkrest==0.0.6
# sha256: KuY89HXwvQSbci-sIIE9Yq7cFJV91aO_ANEg0rVARGA
python-dateutil==2.4.2

282
schemas/pulse-job.yml Normal file
Просмотреть файл

@ -0,0 +1,282 @@
$schema: "http://json-schema.org/draft-04/schema#"
title: "Job Definition"
description: |
Definition of a single job that can be added to Treeherder
Project is determined by the routing key, so we don't need to specify it here.
id: "jobDefinition"
type: "object"
properties:
jobGuid:
title: "jobGuid"
type: "string"
pattern: "^[A-Za-z0-9_/+-]+$"
minLength: 1
maxLength: 50
origin:
anyOf:
- type: "object"
properties:
kind:
type: "string"
enum: ['hg.mozilla.org']
project:
type: "string"
pattern: "^[A-Za-z0-9_-]+$"
minLength: 1
maxLength: 50
revision:
type: "string"
pattern: "^[0-9a-f]+$"
minLength: 12
maxLength: 40
pushLogID:
type: "integer"
required: [kind, project, revision]
- type: "object"
properties:
kind:
type: "string"
enum: ['github.com']
project:
type: "string"
pattern: "^[0-9a-f]+$"
minLength: 1
maxLength: 50
revision:
type: "string"
minLength: 40
maxLength: 40
pullRequestID:
type: "integer"
required: [kind, project, revision]
display:
type: "object"
properties:
jobSymbol:
title: "jobSymbol"
type: "string"
# spaces and "?" are not valid for job symbols
pattern: "^[A-Za-z0-9._-]+$"
minLength: 1
maxLength: 25
groupSymbol:
title: "group symbol"
type: "string"
# spaces not valid for group symbols
pattern: "^[A-Za-z0-9/?_-]+$"
minLength: 1
maxLength: 25
# could do without these if we require job type and group to exist prior
jobName:
title: "job name"
type: "string"
minLength: 1
maxLength: 100
groupName:
title: "group name"
type: "string"
pattern: "^[A-Za-z0-9_-]+$"
minLength: 1
maxLength: 100
required:
- jobSymbol
- groupSymbol
state:
title: "state"
description: |
unscheduled: not yet scheduled
pending: not yet started
running: currently in progress
completed: Job ran through to completion
type: "string"
enum:
- unscheduled
- pending
- running
- completed
result:
title: "result"
description: |
fail: A failure
exception: An infrastructure error/exception
success: Build/Test executed without error or failure
canceled: The job was cancelled by a user
unknown: When the job is not yet completed
type: "string"
enum:
- success
- fail
- exception
- canceled
- unknown
jobKind:
type: "string"
enum:
- build
- test
tier:
type: "integer"
minimum: 1
maximum: 3
isRetried:
description: True indicates this job has been retried.
type: "boolean"
coalesced:
title: "coalesced"
type: "array"
items:
title: "job guid"
type: "string"
pattern: "^[A-Za-z0-9_/+-]+$"
minLength: 1
maxLength: 50
# time data
timeScheduled:
type: "string"
format: "date-time"
timeStarted:
type: "string"
format: "date-time"
timeCompleted:
type: "string"
format: "date-time"
optionCollection:
title: "option collection"
description: |
Options are a dimension of a platform. The values here can vary wildly,
so most strings are valid for this. The list of options that are used
is maleable going forward.
Some examples of options that have been used:
opt Optimize Compiler GCC optimize flags
debug Debug flags passed in
pgo Profile Guided Optimization - Like opt, but runs with profiling, then builds again using that profiling
asan Address Sanitizer
tsan Thread Sanitizer Build
type: "array"
items:
type: "string"
minLength: 1
maxLength: 50
pattern: "^[A-Za-z0-9_-]+$"
who:
title: "who"
format: "email"
type: "string"
minLength: 1
maxLength: 50
reason:
description: |
Examples include:
- scheduled
- scheduler
- Self-serve: Rebuilt by foo@example.com
- Self-serve: Requested by foo@example.com
- The Nightly scheduler named 'b2g_mozilla-inbound periodic' triggered this build
- unknown
type: "string"
minLength: 1
maxLength: 125
productName:
description: |
Examples include:
- 'b2g'
- 'firefox'
- 'taskcluster'
- 'xulrunner'
type: "string"
minLength: 1
maxLength: 125
buildMachine:
$ref: "#/definitions/machine"
runMachine:
$ref: "#/definitions/machine"
artifacts:
type: "array"
items:
type: "object"
properties:
type:
type: "string"
enum: ["json", "text"]
name:
description: |
The artifact name can be anything. But when the name ``text_log_summary`` is used
then treeherder uses that as the summary file for the Log Viewer in the UI.
type: "string"
minLength: 1
maxLength: 50
blob:
type: "string"
minLength: 1
required:
- type
- name
- blob
logs:
type: "array"
items:
type: "object"
properties:
url:
type: "string"
format: "uri"
minLength: 1
maxLength: 255
name:
type: "string"
minLength: 1
maxLength: 50
required: [url, name]
additionalProperties: false
required:
- jobGuid
- origin
- display
- state
- jobKind
definitions:
machine:
type: "object"
properties:
name:
type: "string"
pattern: "^[A-Za-z0-9_-]+$"
minLength: 1
maxLength: 50
platform:
type: "string"
pattern: "^[A-Za-z0-9_-]+$"
minLength: 1
maxLength: 25
os:
type: "string"
pattern: "^[A-Za-z0-9_-]+$"
minLength: 1
maxLength: 25
architecture:
type: "string"
pattern: "^[A-Za-z0-9_-]+$"
minLength: 1
maxLength: 25
required:
- name
- platform
- os
- architecture

15
treeherder/etl/schema.py Normal file
Просмотреть файл

@ -0,0 +1,15 @@
import os
import yaml
def get_json_schema(filename):
"""
Get a JSON Schema by filename.
"""
file_path = os.path.join("schemas", filename)
with open(file_path) as f:
schema = yaml.load(f)
return schema
job_json_schema = get_json_schema("pulse-job.yml")

Просмотреть файл

@ -181,7 +181,8 @@ CELERY_QUEUES = (
Queue('buildapi_4hr', Exchange('default'), routing_key='buildapi_4hr'),
Queue('cycle_data', Exchange('default'), routing_key='cycle_data'),
Queue('calculate_eta', Exchange('default'), routing_key='calculate_eta'),
Queue('fetch_bugs', Exchange('default'), routing_key='fetch_bugs')
Queue('fetch_bugs', Exchange('default'), routing_key='fetch_bugs'),
Queue('store_pulse_jobs', Exchange('default'), routing_key='store_pulse_jobs')
)
CELERY_ACCEPT_CONTENT = ['json']
@ -333,6 +334,10 @@ PULSE_EXCHANGE_NAMESPACE = None
# will be updated as new applications come online that Treeherder supports.
# Can be overridden in local.py to specify fewer or completely different
# exchanges for testing purposes on local machines.
# Treeherder will subscribe with routing keys that are all combinations of
# ``project`` and ``destination`` in the form of:
# <project>.<destination>
# Wildcards such as ``#`` and ``*`` are supported for either field.
PULSE_DATA_INGESTION_EXCHANGES = env.json(
"PULSE_DATA_INGESTION_EXCHANGES",
default=[
@ -342,23 +347,42 @@ PULSE_DATA_INGESTION_EXCHANGES = env.json(
# 'mozilla-central',
# 'mozilla-inbound'
# # other repos TC can submit to
# ],
# "destinations": [
# 'production'
# 'staging'
# ]
# },
# {
# "name": "exchange/treeherder-test/jobs",
# "projects": [
# 'mozilla-inbound'
# ],
# "destinations": [
# 'production'
# 'staging'
# ]
#
# }
# ... other CI systems
])
# Used to specify the PulseGuardian account that will be used to create
# ingestion queues for the exchanges specified in ``PULSE_DATA_INGESTION_EXCHANGES``.
# See https://pulse.mozilla.org/whats_pulse for more info
# Example: "amqp://treeherder-test:thpulsesekrit6@pulse.mozilla.org:5672//"
# See https://pulse.mozilla.org/whats_pulse for more info.
# Example: "amqp://myuserid:mypassword@pulse.mozilla.org:5672/"
PULSE_DATA_INGESTION_CONFIG = env.url("PULSE_DATA_INGESTION_CONFIG", default="")
PULSE_QUEUE_USERID = env("PULSE_QUEUE_USERID", default="")
# Whether the Queues created for pulse ingestion are durable or not.
# For local data ingestion, you probably should set this to False
PULSE_DATA_INGESTION_QUEUES_DURABLE = env("PULSE_DATA_INGESTION_QUEUES_DURABLE",
default=True)
# Whether the Queues created for pulse ingestion auto-delete after connections
# are closed.
# For local data ingestion, you probably should set this to True
PULSE_DATA_INGESTION_QUEUES_AUTO_DELETE = env("PULSE_DATA_INGESTION_QUEUES_AUTO_DELETE",
default=False)
# Note: All the configs below this import will take precedence over what is
# defined in local.py!