gecko-dev/taskcluster/taskgraph/util/schema.py

228 строки
7.6 KiB
Python

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, print_function, unicode_literals
import re
import pprint
import collections
import voluptuous
from six import text_type, iteritems
import taskgraph
from mozbuild import schedules
from .keyed_by import evaluate_keyed_by
def validate_schema(schema, obj, msg_prefix):
"""
Validate that object satisfies schema. If not, generate a useful exception
beginning with msg_prefix.
"""
if taskgraph.fast:
return
try:
schema(obj)
except voluptuous.MultipleInvalid as exc:
msg = [msg_prefix]
for error in exc.errors:
msg.append(str(error))
raise Exception('\n'.join(msg) + '\n' + pprint.pformat(obj))
def optionally_keyed_by(*arguments):
"""
Mark a schema value as optionally keyed by any of a number of fields. The
schema is the last argument, and the remaining fields are taken to be the
field names. For example:
'some-value': optionally_keyed_by(
'test-platform', 'build-platform',
Any('a', 'b', 'c'))
The resulting schema will allow nesting of `by-test-platform` and
`by-build-platform` in either order.
"""
schema = arguments[-1]
fields = arguments[:-1]
# build the nestable schema by generating schema = Any(schema,
# by-fld1, by-fld2, by-fld3) once for each field. So we don't allow
# infinite nesting, but one level of nesting for each field.
for _ in arguments:
options = [schema]
for field in fields:
options.append({'by-' + field: {text_type: schema}})
schema = voluptuous.Any(*options)
return schema
def resolve_keyed_by(item, field, item_name, **extra_values):
"""
For values which can either accept a literal value, or be keyed by some
other attribute of the item, perform that lookup and replacement in-place
(modifying `item` directly). The field is specified using dotted notation
to traverse dictionaries.
For example, given item::
job:
test-platform: linux128
chunks:
by-test-platform:
macosx-10.11/debug: 13
win.*: 6
default: 12
a call to `resolve_keyed_by(item, 'job.chunks', item['thing-name'])`
would mutate item in-place to::
job:
test-platform: linux128
chunks: 12
The `item_name` parameter is used to generate useful error messages.
If extra_values are supplied, they represent additional values available
for reference from by-<field>.
Items can be nested as deeply as the schema will allow::
chunks:
by-test-platform:
win.*:
by-project:
ash: ..
cedar: ..
linux: 13
default: 12
"""
# find the field, returning the item unchanged if anything goes wrong
container, subfield = item, field
while '.' in subfield:
f, subfield = subfield.split('.', 1)
if f not in container:
return item
container = container[f]
if not isinstance(container, dict):
return item
if subfield not in container:
return item
container[subfield] = evaluate_keyed_by(
value=container[subfield],
item_name="`{}` in `{}`".format(field, item_name),
attributes=dict(item, **extra_values),
)
return item
# Schemas for YAML files should use dashed identifiers by default. If there are
# components of the schema for which there is a good reason to use another format,
# they can be whitelisted here.
WHITELISTED_SCHEMA_IDENTIFIERS = [
# upstream-artifacts are handed directly to scriptWorker, which expects interCaps
lambda path: "[{!r}]".format(u'upstream-artifacts') in path,
lambda path: ("[{!r}]".format(u'test_name') in path or
"[{!r}]".format(u'json_location') in path or
"[{!r}]".format(u'video_location') in path),
]
def check_schema(schema):
identifier_re = re.compile('^[a-z][a-z0-9-]*$')
def whitelisted(path):
return any(f(path) for f in WHITELISTED_SCHEMA_IDENTIFIERS)
def iter(path, sch):
def check_identifier(path, k):
if k in (text_type, text_type, voluptuous.Extra):
pass
elif isinstance(k, text_type):
if not identifier_re.match(k) and not whitelisted(path):
raise RuntimeError(
'YAML schemas should use dashed lower-case identifiers, '
'not {!r} @ {}'.format(k, path))
elif isinstance(k, (voluptuous.Optional, voluptuous.Required)):
check_identifier(path, k.schema)
elif isinstance(k, voluptuous.Any):
for v in k.validators:
check_identifier(path, v)
elif not whitelisted(path):
raise RuntimeError(
'Unexpected type in YAML schema: {} @ {}'.format(
type(k).__name__, path))
if isinstance(sch, collections.Mapping):
for k, v in iteritems(sch):
child = "{}[{!r}]".format(path, k)
check_identifier(child, k)
iter(child, v)
elif isinstance(sch, (list, tuple)):
for i, v in enumerate(sch):
iter("{}[{}]".format(path, i), v)
elif isinstance(sch, voluptuous.Any):
for v in sch.validators:
iter(path, v)
iter('schema', schema.schema)
class Schema(voluptuous.Schema):
"""
Operates identically to voluptuous.Schema, but applying some taskgraph-specific checks
in the process.
"""
def __init__(self, *args, **kwargs):
super(Schema, self).__init__(*args, **kwargs)
check_schema(self)
def extend(self, *args, **kwargs):
schema = super(Schema, self).extend(*args, **kwargs)
check_schema(schema)
# We want twice extend schema to be checked too.
schema.__class__ = Schema
return schema
def __getitem__(self, item):
return self.schema[item]
OptimizationSchema = voluptuous.Any(
# always run this task (default)
None,
# always optimize this task
{'always': None},
# optimize strategy aliases for build kind
{'build': list(schedules.ALL_COMPONENTS)},
{'build-fuzzing': None},
# search the index for the given index namespaces, and replace this task if found
# the search occurs in order, with the first match winning
{'index-search': [text_type]},
{'push-interval-10': list(schedules.ALL_COMPONENTS)},
{'push-interval-25': list(schedules.ALL_COMPONENTS)},
# consult SETA and skip this task if it is low-value
{'seta': None},
# skip this task if none of the given file patterns match
{'skip-unless-changed': [text_type]},
# skip this task if unless the change files' SCHEDULES contains any of these components
{'skip-unless-schedules': list(schedules.ALL_COMPONENTS)},
# optimize strategy aliases for the test kind
{'test': (list(schedules.ALL_COMPONENTS), dict)},
{'test-inclusive': list(schedules.ALL_COMPONENTS)},
{'test-try': list(schedules.ALL_COMPONENTS)},
)
# shortcut for a string where task references are allowed
taskref_or_string = voluptuous.Any(
text_type,
{voluptuous.Required('task-reference'): text_type},
{voluptuous.Required('artifact-reference'): text_type},
)