mlos_bench config json schema validation: optimizers and tunable_values (#340)

This PR introduces initial json schemas for mlos_bench optimizer configs and tunable_values configs, their validation at load time, and tests for both.

Future PRs will handle other config types.
See Also: #331
This commit is contained in:
Brian Kroth 2023-05-15 16:30:36 -05:00 коммит произвёл GitHub
Родитель 899f2150e8
Коммит db1a0e359f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
88 изменённых файлов: 1445 добавлений и 54 удалений

Просмотреть файл

@ -27,6 +27,7 @@
"iloc",
"ipykernel",
"iterrows",
"jsonschema",
"jupyterlab",
"kwargs",
"libmamba",
@ -58,6 +59,8 @@
"sklearn",
"skopt",
"sqlalchemy",
"subschema",
"subschemas",
"tolist",
"tunables",
"xlabel",

Просмотреть файл

@ -2,11 +2,14 @@
[MAIN]
# Specify a score threshold to be exceeded before program exits with error.
fail-under=9.7
fail-under=9.8
# Make sure public methods are documented.
# See Also: https://github.com/PyCQA/pydocstyle/issues/309#issuecomment-1426642147
fail-on=C0116
# Also fail on unused imports.
fail-on=
C0116,
unused-import
# Ignore pylint complaints about an upstream dependency.
ignored-modules=ConfigSpace.hyperparameters

33
.vscode/settings.json поставляемый
Просмотреть файл

@ -13,6 +13,39 @@
"python.linting.mypyEnabled": false, // use the dmypy daemon extension instead - it's faster
// "mypy.runUsingActiveInterpreter": true, // in the devcontainer we override this to use an explicit path
"autoDocstring.docstringFormat": "numpy",
"json.validate.enable": true,
"json.schemas": [
// Note, due to a deficiency in the "url" -> (local) "schema" mapping,
// we have to rely on file matching only for local schema imputation on
// a file when it doesn't explicitly specify it either by local path
// (which we generally avoid) or URL.
// See Also:
// - https://github.com/microsoft/vscode/issues/2809#issuecomment-1544387883
// - mlos_bench/config/schemas/README.md
// TODO: Add more rules for other mlos_bench config schemas.
{
"fileMatch": [
"mlos_bench/mlos_bench/tests/config/schemas/optimizers/test-cases/**/*.jsonc",
"mlos_bench/mlos_bench/tests/config/schemas/optimizers/test-cases/**/*.json",
"mlos_bench/mlos_bench/config/optimizers/**/*.jsonc",
"mlos_bench/mlos_bench/config/optimizers/**/*.json"
],
"url": "./mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json"
},
{
"fileMatch": [
"mlos_bench/mlos_bench/tests/config/schemas/tunable-values/test-cases/**/*.jsonc",
"mlos_bench/mlos_bench/tests/config/schemas/tunable-values/test-cases/**/*.json",
"mlos_bench/mlos_bench/tests/config/tunable-values/**/*.jsonc",
"mlos_bench/mlos_bench/tests/config/tunable-values/**/*.json",
"mlos_bench/mlos_bench/config/tunable-values/**/*.jsonc",
"mlos_bench/mlos_bench/config/tunable-values/**/*.json"
],
"url": "./mlos_bench/mlos_bench/config/schemas/tunables/tunable-values-schema.json"
}
],
"restructuredtext.linter.doc8.extraArgs": [
"--ignore D001"
],

Просмотреть файл

@ -93,7 +93,7 @@ Simply open the project in VSCode and follow the prompts to build and open the d
pip install dist/mlos_bench-0.1.0-py3-none-any.whl
```
> Note exact versions may differ due to automatic versioning.
> Note: exact versions may differ due to automatic versioning.
## See Also

Просмотреть файл

@ -21,12 +21,14 @@ dependencies:
#- gcc_linux-64
- pip:
- bump2version
- check-jsonschema
- licenseheaders
- mypy
- types-setuptools
- types-pygments
- types-colorama
- pandas-stubs
- types-jsonschema
- types-colorama
- types-pygments
- types-requests
- types-setuptools
- "--editable ../mlos_core[full-tests]"
- "--editable ../mlos_bench[full-tests]"

Просмотреть файл

@ -21,12 +21,14 @@ dependencies:
#- gcc_linux-64
- pip:
- bump2version
- check-jsonschema
- licenseheaders
- mypy
- types-setuptools
- types-pygments
- types-colorama
- pandas-stubs
- types-colorama
- types-jsonschema
- types-pygments
- types-requests
- types-setuptools
- "--editable ../mlos_core[full-tests]"
- "--editable ../mlos_bench[full-tests]"

Просмотреть файл

@ -21,12 +21,14 @@ dependencies:
#- gcc_linux-64
- pip:
- bump2version
- check-jsonschema
- licenseheaders
- mypy
- types-setuptools
- types-pygments
- types-colorama
- pandas-stubs
- types-jsonschema
- types-colorama
- types-pygments
- types-requests
- types-setuptools
- "--editable ../mlos_core[full-tests]"
- "--editable ../mlos_bench[full-tests]"

Просмотреть файл

@ -21,12 +21,14 @@ dependencies:
#- gcc_linux-64
- pip:
- bump2version
- check-jsonschema
- licenseheaders
- mypy
- types-setuptools
- types-pygments
- types-colorama
- pandas-stubs
- types-colorama
- types-jsonschema
- types-pygments
- types-requests
- types-setuptools
- "--editable ../mlos_core[full-tests]"
- "--editable ../mlos_bench[full-tests]"

Просмотреть файл

@ -24,12 +24,14 @@ dependencies:
- conda-forge::GPy
- pip:
- bump2version
- check-jsonschema
- licenseheaders
- mypy
- types-setuptools
- types-pygments
- types-colorama
- pandas-stubs
- types-jsonschema
- types-colorama
- types-pygments
- types-requests
- types-setuptools
- "--editable ../mlos_core[full-tests]"
- "--editable ../mlos_bench[full-tests]"

Просмотреть файл

@ -24,12 +24,14 @@ dependencies:
#- gcc_linux-64
- pip:
- bump2version
- check-jsonschema
- licenseheaders
- mypy
- types-setuptools
- types-pygments
- types-colorama
- pandas-stubs
- types-colorama
- types-jsonschema
- types-pygments
- types-requests
- types-setuptools
- "--editable ../mlos_core[full-tests]"
- "--editable ../mlos_bench[full-tests]"

Просмотреть файл

@ -79,4 +79,4 @@ Distributing
.. note::
Note exact versions may differ due to automatic versioning.
Note: exact versions may differ due to automatic versioning.

Просмотреть файл

@ -0,0 +1,7 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
"""
mlos_bench.config
"""

Просмотреть файл

@ -14,7 +14,7 @@
"storage": "storage/sqlite.jsonc",
"tunable_values": [
"tunables/tunable-values-example.jsonc"
"tunable-values/tunable-values-example.jsonc"
],
"globals": [

Просмотреть файл

@ -1,5 +1,7 @@
// MLOS Core optimizer
{
"$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json",
"class": "mlos_bench.optimizers.MlosCoreOptimizer",
"config": {

Просмотреть файл

@ -1,5 +1,7 @@
// Mock optimizer to test the benchmarking framework.
{
"$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json",
"class": "mlos_bench.optimizers.MockOptimizer",
"config": {

Просмотреть файл

@ -1,5 +1,7 @@
// Mock optimizer that runs a single benchmark with the specified configuration.
{
"$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json",
"class": "mlos_bench.optimizers.OneShotOptimizer",
// Define the set of tunable params that the optimizer can work over.
@ -18,14 +20,14 @@
"include_tunable_values": [
// Note: tunable values need not specify a value for all params.
// In which case, unspecified values are governed by the "use_default" config parameter.
"tunables/tunable-values-example.jsonc"
"tunable-values/tunable-values-example.jsonc"
],
// Additional values of the tunable parameters.
"tunables_values": {},
"tunable_values": {},
// Whether to assign the "default" tunable parameter as a value or a
// random value for those parameters not specified.
//"use_defaults": true,
"use_defaults": true,
// Other optimizer configuration parameters.
"minimize": "score"

Просмотреть файл

@ -0,0 +1,73 @@
# Config Schemas
This directory contains [json schemas](https://json-schema.org/) for describing the configuration of the MLOS benchmarking framework.
## Usage
`mlos_bench` `.jsonc` config files can reference these schema files in a couple of ways:
### Internally
If the config file is in the same directory as the schema (e.g. when editing within this repository), it can reference the schema by filename:
```jsonc
{
"$schema": "../schemas/optimizer-schema.jsonc",
...
}
```
> Note: we usually avoid this approach since it makes it harder to move the schema files around and just doesn't look very nice.
>
> Instead, we try to use on `.vscode/settings.json` to map local repo file globs to their schema files and simply omit the `$schema` field from the config files.
### Externally
```jsonc
{
"$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/optimizer-schema.jsonc",
...
}
```
> Note: the above URL is not guaranteed to be stable. It is often recommended to use a specific commit hash or tag in the URL rather than `main` if you depend on that.
<!-- intentionally blank line to avoid markdown lint complaints -->
> Note: when doing schema development within the `MLOS` repo, this approach may cause false errors to be reported if the remote schema file is different than the local one (and hence config files don't validate quite right).
>
> There is a [deficiency](https://github.com/microsoft/vscode/issues/2809#issuecomment-1544387883) in the `json.schemas` handling in `.vscode/settings.json` that currently prevents remote URLs from being mapping to local files.
>
> A simple workaround for now is to comment out the `$schema` field in the config file while editing, and then uncomment it when you're ready to commit.
## Validation
Within the codebase we use [`jsonschema`](https://pypi.org/project/jsonschema/) to validate config files against the schemas upon loading.
For manual testing, you can use the [`check-jsonschema`](https://pypi.org/project/check-jsonschema/).
For instance:
```shell
check-jsonschema --verbose --default-filetype json5 \
--schemafile mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json \
mlos_bench/mlos_bench/config/optimizers/mlos_core_opt.jsonc
```
## Development
### Editing
Unlike the config files, the schemas are written in plain `json` instead of `jsonc` since some tooling for schema validation doesn't support parsing json files with comments.
You can add comments within an object using the `"$comment"` property to work around this a little.
When referencing a schema in a config file (see above), the `$schema` property will allow for autocomplete in some editors such as [VSCode](https://code.visualstudio.com/).
### Conventions
- We do not typically specify `"default"` values in the schema files, since for most validators those aren't enforced, and it would require additional maintenance effort to keep the defaults in sync with the code.
- We typically specify `"unevaluatedProperties": false` in order to prevent typos in the config files from going unnoticed, however this can be overridden for portions of the schema if necessary.
> Note: It's important to use `"unevaluatedProperties": false` from the [2020-09 draft](https://json-schema.org/understanding-json-schema/reference/object.html?highlight=unevaluated#unevaluated-properties), and not `"additionalProperties": false` due to the order in which those two rules get processed.
- When specifying "conditions" always pair the property clause `"properties": { "property-name": { "const": "value" } }` to match it with the `"required": ["property-name"]` clause to ensure that it is a strict match.
- Close all `if-then-else` statements inside a `"oneOf"` block with an `"else": false`, else the clause will implicitly default to `true`.
> As a nice corollary, this should force a full set of matching descriptions in the `"oneOf"` block so we don't accidentally leave off a supported matching value.

Просмотреть файл

@ -0,0 +1,15 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
"""
A module for managing config schemas and their validation.
"""
from mlos_bench.config.schemas.config_schemas import ConfigSchema, CONFIG_SCHEMA_DIR
__all__ = [
'ConfigSchema',
'CONFIG_SCHEMA_DIR',
]

Просмотреть файл

Просмотреть файл

@ -0,0 +1,99 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
"""
A simple class for describing where to find different config schemas and validating configs against them.
"""
from enum import Enum
from os import path, walk
from typing import Dict, Iterator, Mapping
import json # schema files are pure json - no comments
import jsonschema
from mlos_bench.util import path_join
# The path to find all config schemas.
CONFIG_SCHEMA_DIR = path_join(path.dirname(__file__), abs_path=True)
# Note: we separate out the SchemaStore from a class method on ConfigSchema
# because of issues with mypy/pylint and non-Enum-member class members.
class SchemaStore(Mapping):
"""
A simple class for storing schemas and subschemas for the validator to reference.
"""
# A class member mapping of schema id to schema object.
_SCHEMA_STORE: Dict[str, dict] = {}
def __len__(self) -> int:
return self._SCHEMA_STORE.__len__()
def __iter__(self) -> Iterator:
return self._SCHEMA_STORE.__iter__()
def __getitem__(self, key: str) -> dict:
"""Gets the schema object for the given key."""
if not self._SCHEMA_STORE:
self._load_schemas()
return self._SCHEMA_STORE[key]
@classmethod
def _load_schemas(cls) -> None:
"""Loads all schemas and subschemas into the schema store for the validator to reference."""
for root, _, files in walk(CONFIG_SCHEMA_DIR):
for file_name in files:
if not file_name.endswith(".json"):
continue
file_path = path_join(root, file_name)
if path.getsize(file_path) == 0:
continue
with open(file_path, mode="r", encoding="utf-8") as schema_file:
schema = json.load(schema_file)
cls._SCHEMA_STORE[file_path] = schema
# Let the schema be referenced by its id as well.
cls._SCHEMA_STORE[schema["$id"]] = schema
SCHEMA_STORE = SchemaStore()
class ConfigSchema(Enum):
"""
An enum to help describe schema types and help validate configs against them.
"""
CLI = path_join(CONFIG_SCHEMA_DIR, "cli/cli-schema.json")
ENVIRONMENT = path_join(CONFIG_SCHEMA_DIR, "environments/environment-schema.json")
OPTIMIZER = path_join(CONFIG_SCHEMA_DIR, "optimizers/optimizer-schema.json")
SERVICE = path_join(CONFIG_SCHEMA_DIR, "services/service-schema.json")
STORAGE = path_join(CONFIG_SCHEMA_DIR, "storage/storage-schema.json")
TUNABLE_PARAMS = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-params-schema.json")
TUNABLE_VALUES = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-values-schema.json")
@property
def schema(self) -> dict:
"""Gets the schema object for this type."""
schema = SCHEMA_STORE[self.value]
assert schema
return schema
def validate(self, config: dict) -> None:
"""
Validates the given config against this schema.
Parameters
----------
config : dict
Raises
------
jsonschema.exceptions.ValidationError
jsonschema.exceptions.SchemaError
"""
resolver: jsonschema.RefResolver = jsonschema.RefResolver.from_schema(self.schema, store=SCHEMA_STORE)
jsonschema.validate(instance=config, schema=self.schema, resolver=resolver)

Просмотреть файл

Просмотреть файл

@ -0,0 +1,145 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/optimizers/mlos_core-optimizer-subschema.json",
"title": "mlos_core Optimizer config subschema",
"$comment": "Extensions to the allowed 'config' object properties when the optimizer is the mlos_core optimizer.",
"type": "object",
"properties": {
"optimizer_type": {
"description": "The underlying optimizer type to use.",
"$comment": "See Also: mlos_core/optimizers/__init__.py",
"enum": [
null,
"SKOPT",
"EMUKIT",
"RANDOM"
]
},
"space_adapter_type": {
"description": "The type of space adapter to use.",
"$comment": "See Also: mlos_core/spaces/__init__.py",
"enum": [
null,
"IDENTITY",
"LLAMATUNE"
]
},
"space_adapter_config": {
"description": "The space adapter specific config.",
"$comment": "stub for possible space adapter configs based on type (set using conditionals below)",
"type": "object"
}
},
"allOf": [
{
"$comment": "add extra recognized params for SKOPT optimizer type",
"if": {
"properties": {
"optimizer_type": {
"const": "SKOPT"
}
},
"required": [
"optimizer_type"
]
},
"then": {
"properties": {
"base_estimator": {
"description": "The base estimator to use for the SKOPT optimizer.",
"enum": [
"gp",
"et"
]
}
}
}
},
{
"$comment": "a set of rules for the space adapter schema extensions",
"oneOf": [
{
"if": {
"$comment": "disallow any extra space_adapter_configs for IDENTITY/null space_adapter_type",
"anyOf": [
{
"properties": {
"space_adapter_type": {
"const": null
}
},
"required": [
"space_adapter_type"
]
},
{
"properties": {
"space_adapter_type": {
"const": "IDENTITY"
}
},
"required": [
"space_adapter_type"
]
},
{
"$comment": "Match when space_adapter_type is not present",
"not": {
"required": [
"space_adapter_type"
]
}
}
]
},
"then": {
"not": {
"$comment": "space_adapter_config should be omitted in this case",
"required": [
"space_adapter_config"
]
}
},
"else": false
},
{
"if": {
"properties": {
"space_adapter_type": {
"const": "LLAMATUNE"
}
},
"required": [
"space_adapter_type"
]
},
"then": {
"properties": {
"space_adapter_config": {
"$comment": "Properties specific to the llamatune space adapter config.",
"type": "object",
"properties": {
"num_low_dims": {
"description": "Number of dimensions used in the low-dimensional parameter search space.",
"type": "integer",
"minimum": 2
},
"special_param_values": {
"type": "object"
},
"max_unique_values_per_param": {
"type": "integer",
"minimum": 1
}
},
"unevaluatedProperties": false
}
}
},
"else": false
}
]
}
]
}

Просмотреть файл

@ -0,0 +1,204 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json",
"title": "mlos_bench Optimizer config",
"$defs": {
"comment": {
"$comment": "This section contains reusable partial schema bits (or just split out for readability)"
},
"config_base_optimizer": {
"$comment": "config properties common to all optimizer types.",
"type": "object",
"properties": {
"minimize": {
"description": "The name of the metric to minimize.",
"$comment": "In oneOf spec below require one of 'minimize' or 'maximize'.",
"type": "string",
"default": "score"
},
"maximize": {
"description": "The name of the metric to maximize.",
"$comment": "In oneOf spec below require one of 'minimize' or 'maximize'.",
"type": "string"
},
"max_iterations": {
"description": "The maximum number of iterations to run.",
"type": "integer",
"minimum": 0
},
"seed": {
"description": "The seed to use for the random number generator.",
"type": "integer"
},
"use_defaults": {
"description": "Whether to use the ConfigSpace defaults for the first iteration of the optimizer.",
"type": "boolean"
}
},
"not": {
"$comment": "Require either 'minimize' or 'maximize' or neither, but not both.",
"required": ["minimize", "maximize"]
}
},
"config_tunable_values_optimizer": {
"$comment": "Properties specific to optimizers that allow a specific set of tunable values to be used (left here in case there's more than one).",
"type": "object",
"properties": {
"include_tunable_values": {
"$comment": "Optional list of file paths with tunable values to include in the optimization.",
"type": "array",
"items": {
"type": "string",
"pattern": "[.]json[c]?$"
}
},
"tunable_values": {
"$ref": "../tunables/tunable-values-schema.json#/$defs/tunable_values_set"
}
}
}
},
"description": "config for the mlos_bench optimizer",
"$comment": "top level schema document rules",
"type": "object",
"properties": {
"$schema": {
"description": "The schema to use for validating the optimizer config (accepts both URLs and local paths).",
"type": "string",
"$comment": "This is optional, but if provided, should match the name of this file.",
"pattern": "/schemas/optimizers/optimizer-schema.json$"
},
"description": {
"description": "Optional description of the config.",
"type": "string"
},
"class": {
"description": "The name of the optimizer class to use.",
"$comment": "required",
"enum": [
"mlos_bench.optimizers.MlosCoreOptimizer",
"mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
"mlos_bench.optimizers.MockOptimizer",
"mlos_bench.optimizers.mock_optimizer.MockOptimizer",
"mlos_bench.optimizers.OneShotOptimizer",
"mlos_bench.optimizers.one_shot_optimizer.OneShotOptimizer"
]
},
"include_tunables": {
"description": "A list of file paths containing tunable parameters definitions to include in the optimization.",
"type": "array",
"items": {
"type": "string",
"$comment": "Paths are expected to be json.",
"pattern": "[.]json[c]?$"
}
},
"config": {
"description": "The optimizer specific config.",
"$comment": "Stub for optimizer specific config appended with condition statements below",
"type": "object"
}
},
"required": ["class"],
"oneOf": [
{
"$comment": "Extensions to the allowed 'config' object properties when the optimizer is the mlos_core optimizer.",
"if": {
"properties": {
"class": {
"enum": [
"mlos_bench.optimizers.MlosCoreOptimizer",
"mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer"
]
}
},
"required": ["class"]
},
"then": {
"properties": {
"config": {
"type": "object",
"allOf": [
{
"$comment": "Allow all base optimizer configs",
"$ref": "#/$defs/config_base_optimizer"
},
{
"$comment": "Extend with properties specific to the mlos_core optimizer type by referencing another subschema file.",
"$ref": "./mlos_core-optimizer-subschema.json"
}
],
"$comment": "set unevaluatedProperties to false to prevent other properties from being allowed outside the combined set of valid documents $ref'ed above",
"unevaluatedProperties": false
}
},
"$comment": "Set 'else' to false to prevent it to defaulting to a valid document match."
},
"else": false
},
{
"$comment": "extensions to the 'config' object properties when the mock optimizer is being used",
"if": {
"properties": {
"class": {
"enum": [
"mlos_bench.optimizers.MockOptimizer",
"mlos_bench.optimizers.mock_optimizer.MockOptimizer"
]
}
},
"required": ["class"]
},
"then": {
"properties": {
"config": {
"type": "object",
"allOf": [{ "$ref": "#/$defs/config_base_optimizer" }],
"$comment": "disallow other properties",
"unevaluatedProperties": false
}
}
},
"else": false
},
{
"$comment": "extensions to the 'config' object properties when the one shot optimizer is being used",
"if": {
"properties": {
"class": {
"enum": [
"mlos_bench.optimizers.OneShotOptimizer",
"mlos_bench.optimizers.one_shot_optimizer.OneShotOptimizer"
]
}
},
"required": ["class"]
},
"then": {
"properties": {
"config": {
"type": "object",
"allOf": [
{ "$ref": "#/$defs/config_base_optimizer" },
{ "$ref": "#/$defs/config_tunable_values_optimizer" }
],
"unevaluatedProperties": false
}
}
},
"else": false
}
],
"unevaluatedProperties": false
}

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -0,0 +1,42 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/tunables/tunable-values-schema.json",
"title": "mlos_bench Tunable values config",
"$defs": {
"tunable_values_set": {
"type": "object",
"patternProperties": {
"^[^$]+$": {
"$comment": "Tunable values are either strings, numbers, booleans or nulls.",
"type": ["string", "number", "boolean"]
}
},
"not": {
"required": ["tunable_values"]
}
}
},
"oneOf": [
{
"description": "Allow a simple object that specifies the tunable values directly.",
"allOf": [
{
"type": "object",
"properties": {
"$schema": {
"$comment": "Optionally allow the schema to be specified in the top level of the config, but make sure it matches the expected schema.",
"type": "string",
"pattern": "/schemas/tunables/tunable-values-schema.json$"
}
}
},
{
"$ref": "#/$defs/tunable_values_set"
}
],
"$comment": "TODO: Add support for lists of tunable values as well."
}
]
}

Просмотреть файл

@ -1,5 +1,7 @@
// PostgreSQL storage.
{
"$schema": "../schemas/storage/storage-schema.jsonc",
"class": "mlos_bench.storage.sql.storage.SqlStorage",
"config": {

Просмотреть файл

@ -14,7 +14,9 @@ import logging
import argparse
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
from mlos_bench.config.schemas import ConfigSchema
from mlos_bench.util import BaseTypeVar
from mlos_bench.tunables.tunable_groups import TunableGroups
from mlos_bench.environments.base_environment import Environment
@ -49,7 +51,7 @@ class Launcher:
# Bootstrap config loader: command line takes priority.
self._config_loader = ConfigPersistenceService({"config_path": args.config_path or []})
if args.config:
config = self._config_loader.load_config(args.config)
config = self._config_loader.load_config(args.config, schema_type=None) # TODO: , schema_type=ConfigSchema.CLI)
assert isinstance(config, Dict)
config_path = config.get("config_path", [])
if config_path and not args.config_path:
@ -189,7 +191,7 @@ class Launcher:
from the specified config files (if any) and command line arguments.
"""
for config_file in (args_globals or []):
conf = self._config_loader.load_config(config_file)
conf = self._config_loader.load_config(config_file, schema_type=None) # FIXME: provide a schema type for globals
assert isinstance(conf, dict)
global_config.update(conf)
global_config.update(Launcher._try_parse_extra_args(args_rest))
@ -199,12 +201,12 @@ class Launcher:
def _load_tunable_values(self, args_tunables: Optional[str]) -> TunableGroups:
"""
Load key/value pairs of the tunable parameters from given JSON files, if any.
Load key/value pairs of the tunable values from given JSON files, if any.
"""
tunables = self.environment.tunable_params
if args_tunables is not None:
for data_file in args_tunables:
values = self._config_loader.load_config(data_file)
values = self._config_loader.load_config(data_file, ConfigSchema.TUNABLE_VALUES)
assert isinstance(values, Dict)
tunables.assign(values)
return tunables
@ -218,7 +220,7 @@ class Launcher:
if args_optimizer is None:
return OneShotOptimizer(
self.tunables, self._parent_service, self.global_config)
optimizer = self._load(Optimizer, args_optimizer) # type: ignore[type-abstract]
optimizer = self._load(Optimizer, args_optimizer, ConfigSchema.OPTIMIZER) # type: ignore[type-abstract]
return optimizer
def _load_storage(self, args_storage: Optional[str]) -> Storage:
@ -232,10 +234,11 @@ class Launcher:
from mlos_bench.storage.sql.storage import SqlStorage
return SqlStorage(self.tunables, self._parent_service,
{"drivername": "sqlite", "database": ":memory:"})
storage = self._load(Storage, args_storage) # type: ignore[type-abstract]
storage = self._load(Storage, args_storage, schema_type=None) # type: ignore[type-abstract]
# TODO: , ConfigSchema.STORAGE)
return storage
def _load(self, cls: Type[BaseTypeVar], json_file_name: str) -> BaseTypeVar:
def _load(self, cls: Type[BaseTypeVar], json_file_name: str, schema_type: Optional[ConfigSchema]) -> BaseTypeVar:
"""
Create a new instance of class `cls` from JSON configuration.
@ -243,7 +246,7 @@ class Launcher:
Use "# type: ignore[type-abstract]" to suppress the warning.
See Also: https://github.com/python/mypy/issues/4717
"""
class_config = self._config_loader.load_config(json_file_name)
class_config = self._config_loader.load_config(json_file_name, schema_type)
assert isinstance(class_config, Dict)
ret = self._config_loader.build_generic(
base_cls=cls,

Просмотреть файл

@ -9,6 +9,7 @@ No-op optimizer for mlos_bench that proposes a single configuration.
import logging
from typing import Dict, Optional, Any
from mlos_bench.config.schemas import ConfigSchema
from mlos_bench.services.base_service import Service
from mlos_bench.tunables.tunable_groups import TunableGroups
from mlos_bench.optimizers.mock_optimizer import MockOptimizer
@ -22,7 +23,7 @@ class OneShotOptimizer(MockOptimizer):
Explicit configs (partial or full) are possible using configuration files.
"""
# TODO: Add support for multiple explicit configs (i.e., FewShot or Manual Optimizer).
# TODO: Add support for multiple explicit configs (i.e., FewShot or Manual Optimizer) - #344
def __init__(self, tunables: TunableGroups,
service: Optional[Service], config: Dict[str, Any]):
@ -34,7 +35,8 @@ class OneShotOptimizer(MockOptimizer):
self._tunables = super().suggest()
# Now assign the values we were given in the config.
for data_file in config.get("include_tunable_values", []):
tunable_values = self._service.config_loader_service.load_config(data_file)
tunable_values = self._service.config_loader_service.load_config(data_file,
schema_type=ConfigSchema.TUNABLE_VALUES)
assert isinstance(tunable_values, Dict)
self._tunables.assign(tunable_values)
self._tunables.assign(config.get("tunable_values", {}))

Просмотреть файл

@ -16,8 +16,10 @@ import logging
from typing import Any, Dict, Iterable, List, Optional, Union, Tuple, Type
import json5 # To read configs with comments and other JSON5 syntax features
import json5 # To read configs with comments and other JSON5 syntax features
from jsonschema import ValidationError, SchemaError
from mlos_bench.config.schemas import ConfigSchema
from mlos_bench.environments.base_environment import Environment
from mlos_bench.services.base_service import Service
from mlos_bench.services.types.config_loader_type import SupportsConfigLoading
@ -101,7 +103,10 @@ class ConfigPersistenceService(Service, SupportsConfigLoading):
_LOG.debug("Path not resolved: %s", file_path)
return file_path
def load_config(self, json_file_name: str) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
def load_config(self,
json_file_name: str,
schema_type: Optional[ConfigSchema],
) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
"""
Load JSON config file. Search for a file relative to `_config_path`
if the input path is not absolute.
@ -111,6 +116,8 @@ class ConfigPersistenceService(Service, SupportsConfigLoading):
----------
json_file_name : str
Path to the input config file.
schema_type : Optional[ConfigSchema]
The schema type to validate the config against.
Returns
-------
@ -120,7 +127,24 @@ class ConfigPersistenceService(Service, SupportsConfigLoading):
json_file_name = self.resolve_path(json_file_name)
_LOG.info("Load config: %s", json_file_name)
with open(json_file_name, mode='r', encoding='utf-8') as fh_json:
return json5.load(fh_json) # type: ignore[no-any-return]
config = json5.load(fh_json)
if schema_type is not None:
try:
schema_type.validate(config)
except (ValidationError, SchemaError) as ex:
_LOG.error("Failed to validate config %s against schema type %s at %s",
json_file_name, schema_type.name, schema_type.value)
raise ValueError(f"Failed to validate config {json_file_name} against " +
"schema type {schema_type.name} at {schema_type.value}") from ex
if isinstance(config, dict) and config.get("$schema"):
# Remove $schema attributes from the config after we've validated
# them to avoid passing them on to other objects
# (e.g. SqlAlchemy based storage initializers).
# NOTE: we only do this for internal schemas.
# Other configs that get loaded may need the schema field
# (e.g. Azure ARM templates).
del config["$schema"]
return config # type: ignore[no-any-return]
def prepare_class_load(self, config: Dict[str, Any],
global_config: Optional[Dict[str, Any]] = None) -> Tuple[str, Dict[str, Any]]:
@ -374,7 +398,7 @@ class ConfigPersistenceService(Service, SupportsConfigLoading):
env : Environment
A new benchmarking environment.
"""
config = self.load_config(json_file_name)
config = self.load_config(json_file_name, schema_type=None) # TODO: , ConfigSchema.ENVIRONMENT)
assert isinstance(config, dict)
return self.build_environment(config, tunables, global_config, service)
@ -402,7 +426,7 @@ class ConfigPersistenceService(Service, SupportsConfigLoading):
env : List[Environment]
A list of new benchmarking environments.
"""
config_list = self.load_config(json_file_name)
config_list = self.load_config(json_file_name, schema_type=None) # TODO: , ConfigSchema.ENVIRONMENT)
if isinstance(config_list, dict):
config_list = [config_list]
return [
@ -435,7 +459,7 @@ class ConfigPersistenceService(Service, SupportsConfigLoading):
json_file_names, parent.__class__.__name__)
service = Service(global_config, parent)
for fname in json_file_names:
config = self.load_config(fname)
config = self.load_config(fname, schema_type=None) # TODO: , ConfigSchema.SERVICE)
service.register(self.build_service(config, global_config, service).export())
return service
@ -463,7 +487,7 @@ class ConfigPersistenceService(Service, SupportsConfigLoading):
"""
_LOG.info("Load tunables: '%s'", json_file_names)
for fname in json_file_names:
config = self.load_config(fname)
config = self.load_config(fname, schema_type=None) # TODO: , ConfigSchema.TUNABLE_PARAMS)
assert isinstance(config, dict)
parent.merge(TunableGroups(config))
return parent

Просмотреть файл

@ -145,7 +145,8 @@ class AzureVMService(Service, SupportsVMOps, SupportsRemoteExec): # pylint: dis
self._poll_timeout = float(config.get("pollTimeout", AzureVMService._POLL_TIMEOUT))
self._request_timeout = float(config.get("requestTimeout", AzureVMService._REQUEST_TIMEOUT))
self._deploy_template = self.config_loader_service.load_config(config['deployTemplatePath'])
# TODO: Provide external schema validation?
self._deploy_template = self.config_loader_service.load_config(config['deployTemplatePath'], schema_type=None)
self._url_deploy = AzureVMService._URL_DEPLOY.format(
subscription=config["subscription"],

Просмотреть файл

@ -8,6 +8,8 @@ Protocol interface for helper functions to lookup and load configs.
from typing import List, Iterable, Optional, Union, Protocol, runtime_checkable, TYPE_CHECKING
from mlos_bench.config.schemas import ConfigSchema
# Avoid's circular import issues.
if TYPE_CHECKING:
@ -41,7 +43,7 @@ class SupportsConfigLoading(Protocol):
An actual path to the config or script.
"""
def load_config(self, json_file_name: str) -> Union[dict, List[dict]]:
def load_config(self, json_file_name: str, schema_type: Optional[ConfigSchema]) -> Union[dict, List[dict]]:
"""
Load JSON config file. Search for a file relative to `_config_path`
if the input path is not absolute.
@ -51,6 +53,8 @@ class SupportsConfigLoading(Protocol):
----------
json_file_name : str
Path to the input config file.
schema_type : Optional[ConfigSchema]
The schema type to validate the config against.
Returns
-------

Просмотреть файл

@ -6,3 +6,20 @@
Tests for mlos_bench.
Used to make mypy happy about multiple conftest.py modules.
"""
from typing import Optional
from mlos_bench.util import get_class_from_name
def try_resolve_class_name(class_name: Optional[str]) -> Optional[str]:
"""
Gets the full class name from the given name or None on error.
"""
if class_name is None:
return None
try:
the_class = get_class_from_name(class_name)
return the_class.__module__ + "." + the_class.__name__
except (ValueError, AttributeError, ModuleNotFoundError, ImportError):
return None

Просмотреть файл

@ -26,6 +26,7 @@ def locate_config_examples(config_examples_dir: str) -> List[str]:
config_examples: List[str]
List of paths to config examples.
"""
assert os.path.isdir(config_examples_dir)
config_examples = []
for root, _, files in os.walk(config_examples_dir):
for file in files:

Просмотреть файл

@ -15,7 +15,7 @@
"storage": "storage/in-memory.jsonc",
"tunable_values": [
"tunables/tunable-values-example.jsonc"
"tunable-values/tunable-values-example.jsonc"
],
// "globals": ["global_config.json"],

Просмотреть файл

@ -11,6 +11,7 @@ from typing import List
import pytest
from mlos_bench.tests.config import locate_config_examples
from mlos_bench.environments.base_environment import Environment
from mlos_bench.environments.composite_env import CompositeEnv
from mlos_bench.services.config_persistence import ConfigPersistenceService
@ -72,7 +73,8 @@ def load_environment_config_examples(config_loader_service: ConfigPersistenceSer
tunable_groups = TunableGroups() # base tunable groups that all others get built on
for mock_service_config_path in mock_service_configs:
mock_service_config = config_loader_service.load_config(mock_service_config_path)
mock_service_config = config_loader_service.load_config(mock_service_config_path, schema_type=None)
# TODO: , ConfigSchema.SERVICE)
config_loader_service.register(config_loader_service.build_service(
config=mock_service_config, parent=config_loader_service).export())

Просмотреть файл

@ -11,6 +11,8 @@ from typing import List
import pytest
from mlos_bench.tests.config import locate_config_examples
from mlos_bench.config.schemas import ConfigSchema
from mlos_bench.services.config_persistence import ConfigPersistenceService
from mlos_bench.optimizers.base_optimizer import Optimizer
from mlos_bench.tunables.tunable_groups import TunableGroups
@ -37,7 +39,7 @@ assert configs
@pytest.mark.parametrize("config_path", configs)
def test_load_optimizer_config_examples(config_loader_service: ConfigPersistenceService, config_path: str) -> None:
"""Tests loading a config example."""
config = config_loader_service.load_config(config_path)
config = config_loader_service.load_config(config_path, ConfigSchema.OPTIMIZER)
assert isinstance(config, dict)
cls = get_class_from_name(config["class"])
assert issubclass(cls, Optimizer)

Просмотреть файл

@ -0,0 +1,95 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
"""
Common tests for config schemas and their validation and test cases.
"""
from dataclasses import dataclass
from typing import Any, Dict, Set, TypedDict
import os
import json5
from mlos_bench.tests.config import locate_config_examples
# The different type of schema test cases we expect to have.
@dataclass
class SchemaTestType:
"""
The different type of schema test cases we expect to have.
"""
test_case_type: str
test_case_subtypes: Set[str]
def __hash__(self) -> int:
return hash(self.test_case_type)
# The different type of schema test cases we expect to have.
_SCHEMA_TEST_TYPES = dict((x.test_case_type, x) for x in (
SchemaTestType(test_case_type='good', test_case_subtypes={'full', 'partial'}),
SchemaTestType(test_case_type='bad', test_case_subtypes={'invalid', 'unhandled'}),
))
# Some attributes we don't expect to be in any schema.
# Used for dynamically check that we've covered all cases.
EXTRA_OUTER_ATTR = "extra_outer_attr"
EXTRA_CONFIG_ATTR = "extra_config_attr"
class SchemaTestCaseInfo(TypedDict):
"""
Some basic info about a schema test case.
"""
config: Dict[str, Any]
test_case: str
test_case_type: str
test_case_subtype: str
def check_schema_dir_layout(test_cases_root: str) -> None:
"""
Makes sure the directory layout matches what we expect so we aren't missing
any extra configs or test cases.
"""
for test_case_dir in os.listdir(test_cases_root):
if test_case_dir == 'README.md':
continue
if test_case_dir not in _SCHEMA_TEST_TYPES:
raise NotImplementedError(f"Unhandled test case type: {test_case_dir}")
for test_case_subdir in os.listdir(os.path.join(test_cases_root, test_case_dir)):
if test_case_subdir == 'README.md':
continue
if test_case_subdir not in _SCHEMA_TEST_TYPES[test_case_dir].test_case_subtypes:
raise NotImplementedError(f"Unhandled test case subtype {test_case_subdir} for test case type {test_case_dir}")
def get_schema_test_cases(test_cases_root: str) -> Dict[str, SchemaTestCaseInfo]:
"""
Gets a dict of schema test cases from the given root.
"""
test_cases: Dict[str, SchemaTestCaseInfo] = {}
check_schema_dir_layout(test_cases_root)
# Note: we sort the test cases so that we can deterministically test them in parallel.
for (test_case_type, schema_test_type) in sorted(_SCHEMA_TEST_TYPES.items()):
for test_case_subtype in sorted(schema_test_type.test_case_subtypes):
for test_case in locate_config_examples(os.path.join(test_cases_root, test_case_type, test_case_subtype)):
with open(test_case, mode='r', encoding='utf-8') as test_case_fh:
try:
test_cases[test_case] = SchemaTestCaseInfo({
"config": json5.load(test_case_fh),
"test_case": test_case,
"test_case_type": test_case_type,
"test_case_subtype": test_case_subtype,
})
except Exception as ex:
raise RuntimeError("Failed to load test case: " + test_case) from ex
# assert test_case_infos
return test_cases

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -0,0 +1,5 @@
# Optimizer Config Schema Test Cases
This directory contains test cases for the optimizer config schema.
> Be cautious when using these as examples.

Просмотреть файл

@ -0,0 +1,10 @@
{
"$schema": "bad/path/to/wrong-schema.json",
"class": "mlos_bench.optimizers.MockOptimizer",
"config": {
"minimize": "score"
}
}

Просмотреть файл

@ -0,0 +1,9 @@
{
"class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
"config": {
"minimize": "score",
// space_adapter_type only accepts certain values
"space_adapter_type": "unknown"
}
}

Просмотреть файл

@ -0,0 +1,9 @@
{
"class": "mlos_bench.optimizers.MockOptimizer",
"config": {
"minimize": "score",
"seed": "wrong type"
}
}

Просмотреть файл

@ -0,0 +1,9 @@
{
"class": "mlos_bench.optimizers.MockOptimizer",
"config": {
// Can't specify both min and max - should throw an error.
"minimize": "foo",
"maximize": "bar"
}
}

Просмотреть файл

@ -0,0 +1,9 @@
{
// Bad optimizer class - should be rejected.
"class": "mlos_bench.optimizers.InvalidOptimizer",
"config": {
"minimize": "score"
}
}

Просмотреть файл

@ -0,0 +1,11 @@
{
"class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
"config": {
"minimize": "score",
"space_adapter_type": "IDENTITY",
"space_adapter_config": {
// IDENTITY space adapter doesn't accept any config values
}
}
}

Просмотреть файл

@ -0,0 +1,9 @@
{
"config": {
"minimize": "score",
"max_iterations": 20,
"seed": 12345,
"use_defaults": false
}
}

Просмотреть файл

@ -0,0 +1,9 @@
{
"class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
"config": {
"minimize": "score",
// optimizer_type only accepts certain values
"optimizer_type": "unknown"
}
}

Просмотреть файл

@ -0,0 +1,10 @@
{
"class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
"config": {
"minimize": "score",
"space_adapter_type": null,
"optimizer_type": "RANDOM",
"random_seed": 0
}
}

Просмотреть файл

@ -0,0 +1,10 @@
{
"class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
"config": {
"minimize": "score",
"optimizer_type": "SKOPT",
// base_estimator only accepts certain values
"base_estimator": "foo"
}
}

Просмотреть файл

@ -0,0 +1,10 @@
{
"class": "mlos_bench.optimizers.OneShotOptimizer",
"config": {
"tunable_values": [
"foo",
"bar"
]
}
}

Просмотреть файл

@ -0,0 +1,10 @@
{
"class": "mlos_bench.optimizers.OneShotOptimizer",
"config": {
"tunable_values": {
"foo": "bar",
"array": [1, 2, 3] // arrays are not allowed
}
}
}

Просмотреть файл

@ -0,0 +1,9 @@
{
"class": "mlos_bench.optimizers.OneShotOptimizer",
"config": {
"tunable_values": [{
"foo": "bar"
}]
}
}

Просмотреть файл

@ -0,0 +1,7 @@
{
"class": "mlos_bench.optimizers.OneShotOptimizer",
"include_tunables": [
"bad/path/to/tunables.xml" // path should end in .json or .jsonc
]
}

Просмотреть файл

@ -0,0 +1,10 @@
{
"class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
"config": {
"minimize": "score",
"optimizer_type": "EMUKIT",
// base_estimator is only valid for optimizer_type == SKOPT
"base_estimator": "gp"
}
}

Просмотреть файл

@ -0,0 +1,21 @@
{
"class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
"config": {
"minimize": "score",
"max_iterations": 20,
"seed": 12345,
"use_defaults": false,
"optimizer_type": "SKOPT",
"base_estimator": "gp",
"space_adapter_type": "LLAMATUNE",
"space_adapter_config": {
"num_low_dims": 2,
"max_unique_values_per_param": 100,
"special_param_values": {
"foo": "bar"
},
"extra_param": "should not be here"
}
}
}

Просмотреть файл

@ -0,0 +1,13 @@
{
"class": "mlos_bench.optimizers.MockOptimizer",
// Note: this is such a common test case that we also test for it dynamically.
"extra_outer": "should not be here",
"config": {
"minimize": "score",
"max_iterations": 20,
"seed": 12345,
"use_defaults": false
}
}

Просмотреть файл

@ -0,0 +1,17 @@
{
"class": "mlos_bench.optimizers.one_shot_optimizer.OneShotOptimizer",
"config": {
"minimize": "score",
"max_iterations": 20,
"seed": 12345,
"use_defaults": false,
"tunable_values": {
"foo": "bar",
// Values can't be objects.
"baz": [
"foo"
]
}
}
}

Просмотреть файл

@ -0,0 +1,13 @@
{
"class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
"config": {
// Here we do our best to list the exhaustive set of full configs available for the base optimizer config.
"minimize": "score",
"max_iterations": 20,
"seed": 12345,
"use_defaults": false,
"optimizer_type": "EMUKIT",
"space_adapter_type": null
}
}

Просмотреть файл

@ -0,0 +1,13 @@
{
"class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
"config": {
// Here we do our best to list the exhaustive set of full configs available for the base optimizer config.
"minimize": "score",
"max_iterations": 20,
"seed": 12345,
"use_defaults": false,
"optimizer_type": "RANDOM",
"space_adapter_type": "IDENTITY"
}
}

Просмотреть файл

@ -0,0 +1,21 @@
{
"class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
"config": {
// Here we do our best to list the exhaustive set of full configs available for the base optimizer config.
"minimize": "score",
"max_iterations": 20,
"seed": 12345,
"use_defaults": false,
"optimizer_type": "SKOPT",
"base_estimator": "gp",
"space_adapter_type": "LLAMATUNE",
"space_adapter_config": {
"num_low_dims": 2,
"max_unique_values_per_param": 100,
"special_param_values": {
"foo": "bar"
}
}
}
}

Просмотреть файл

@ -0,0 +1,11 @@
{
"class": "mlos_bench.optimizers.MockOptimizer",
"config": {
// Here we do our best to list the exhaustive set of full configs available for the base optimizer config.
"minimize": "score",
"max_iterations": 20,
"seed": 12345,
"use_defaults": false
}
}

Просмотреть файл

@ -0,0 +1,28 @@
{
"class": "mlos_bench.optimizers.OneShotOptimizer",
"description": "A full config test case for the OneShotOptimizer.",
"include_tunables": [
"some/path/to/tunables.jsonc",
"some/other/path/to/tunables.json"
],
"config": {
// Here we do our best to list the exhaustive set of full configs available for the base optimizer config.
"minimize": "score",
"max_iterations": 20,
"seed": 12345,
"use_defaults": false,
"include_tunable_values": [
"path/to/tunable/values.jsonc",
"path/to/tunable/values.json"
],
"tunable_values": {
"tunable": "value",
"knobs": 1
}
}
}

Просмотреть файл

@ -0,0 +1,7 @@
{
// no schema
"class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
// no config required
}

Просмотреть файл

@ -0,0 +1,14 @@
{
// No schema required.
"class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
"config": {
"minimize": "score",
"max_iterations": 20,
"seed": 12345,
"optimizer_type": "SKOPT",
// no base_estimator required
"space_adapter_type": "LLAMATUNE"
// no space_adapter_config required
}
}

Просмотреть файл

@ -0,0 +1,7 @@
{
// no schema required
"class": "mlos_bench.optimizers.MockOptimizer"
// no config required
}

Просмотреть файл

@ -0,0 +1,6 @@
{
"class": "mlos_bench.optimizers.MockOptimizer",
"config": {}
}

Просмотреть файл

@ -0,0 +1,4 @@
{
"class": "mlos_bench.optimizers.MockOptimizer"
}

Просмотреть файл

@ -0,0 +1,9 @@
{
"class": "mlos_bench.optimizers.MockOptimizer",
"config": {
"minimize": "score",
"seed": 12345
}
}

Просмотреть файл

@ -0,0 +1,16 @@
{
"$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json",
"class": "mlos_bench.optimizers.OneShotOptimizer",
"include_tunables": [
// tunable values paths are expected to end in .json or .jsonc
"some/path/to/tunable-params.jsonc"
],
"config": {
"tunable_values": {
"foo": "bar"
}
}
}

Просмотреть файл

@ -0,0 +1,152 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
"""
Tests for optimizer schema validation.
"""
from copy import deepcopy
from os import path
from typing import Dict, Optional
import jsonschema
import pytest
from mlos_core.optimizers import OptimizerType
from mlos_core.spaces.adapters import SpaceAdapterType
from mlos_core.tests import get_all_concrete_subclasses
from mlos_bench.config.schemas import ConfigSchema
from mlos_bench.optimizers.base_optimizer import Optimizer
from mlos_bench.tests import try_resolve_class_name
from mlos_bench.tests.config.schemas import get_schema_test_cases, SchemaTestCaseInfo, EXTRA_CONFIG_ATTR, EXTRA_OUTER_ATTR
# General testing strategy:
# - hand code a set of good/bad configs (useful to test editor schema checking)
# - enumerate and try to check that we've covered all the cases
# - for each config, load and validate against expected schema
TEST_CASES: Dict[str, SchemaTestCaseInfo] = get_schema_test_cases(path.join(path.dirname(__file__), "test-cases"))
TEST_CASES_BY_TYPE: Dict[str, Dict[str, SchemaTestCaseInfo]] = {}
TEST_CASES_BY_SUBTYPE: Dict[str, Dict[str, SchemaTestCaseInfo]] = {}
for test_case_info in TEST_CASES.values():
TEST_CASES_BY_TYPE.setdefault(test_case_info["test_case_type"], {})
TEST_CASES_BY_TYPE[test_case_info["test_case_type"]][test_case_info["test_case"]] = test_case_info
TEST_CASES_BY_SUBTYPE.setdefault(test_case_info["test_case_subtype"], {})
TEST_CASES_BY_SUBTYPE[test_case_info["test_case_subtype"]][test_case_info["test_case"]] = test_case_info
assert len(TEST_CASES_BY_TYPE["good"].keys()) > 0
assert len(TEST_CASES_BY_TYPE["bad"].keys()) > 0
assert len(TEST_CASES_BY_SUBTYPE.keys()) > 2
# Dynamically enumerate some of the cases we want to make sure we cover.
expected_mlos_bench_optimizer_class_names = [subclass.__module__ + "." + subclass.__name__
for subclass in get_all_concrete_subclasses(Optimizer)] # type: ignore[type-abstract]
assert expected_mlos_bench_optimizer_class_names
# Also make sure that we check for configs where the optimizer_type or space_adapter_type are left unspecified (None).
expected_mlos_core_optimizer_types = list(OptimizerType) + [None]
assert expected_mlos_core_optimizer_types
expected_mlos_core_space_adapter_types = list(SpaceAdapterType) + [None]
assert expected_mlos_core_space_adapter_types
# Do the full cross product of all the test cases and all the optimizer types.
@pytest.mark.parametrize("test_case_subtype", list(TEST_CASES_BY_SUBTYPE.keys()))
@pytest.mark.parametrize("mlos_bench_optimizer_type", expected_mlos_bench_optimizer_class_names)
def test_case_coverage_mlos_bench_optimizer_type(test_case_subtype: str, mlos_bench_optimizer_type: str) -> None:
"""
Checks to see if there is a given type of test case for the given mlos_bench optimizer type.
"""
for test_case in TEST_CASES_BY_SUBTYPE[test_case_subtype].values():
if try_resolve_class_name(test_case["config"].get("class")) == mlos_bench_optimizer_type:
return
raise NotImplementedError(
f"Missing test case for subtype {test_case_subtype} for Optimizer class {mlos_bench_optimizer_type}")
# Being a little lazy for the moment and relaxing the requirement that we have
# a subtype test case for each optimizer and space adapter combo.
@pytest.mark.parametrize("test_case_type", list(TEST_CASES_BY_TYPE.keys()))
# @pytest.mark.parametrize("test_case_subtype", list(TEST_CASES_BY_SUBTYPE.keys()))
@pytest.mark.parametrize("mlos_core_optimizer_type", expected_mlos_core_optimizer_types)
def test_case_coverage_mlos_core_optimizer_type(test_case_type: str,
mlos_core_optimizer_type: Optional[OptimizerType]) -> None:
"""
Checks to see if there is a given type of test case for the given mlos_core optimizer type.
"""
optimizer_name = None if mlos_core_optimizer_type is None else mlos_core_optimizer_type.name
for test_case in TEST_CASES_BY_TYPE[test_case_type].values():
if try_resolve_class_name(test_case["config"].get("class")) \
== "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer":
optimizer_type = None
if test_case["config"].get("config"):
optimizer_type = test_case["config"]["config"].get("optimizer_type", None)
if optimizer_type == optimizer_name:
return
raise NotImplementedError(
f"Missing test case for type {test_case_type} for MlosCore Optimizer type {mlos_core_optimizer_type}")
@pytest.mark.parametrize("test_case_type", list(TEST_CASES_BY_TYPE.keys()))
# @pytest.mark.parametrize("test_case_subtype", list(TEST_CASES_BY_SUBTYPE.keys()))
@pytest.mark.parametrize("mlos_core_space_adapter_type", expected_mlos_core_space_adapter_types)
def test_case_coverage_mlos_core_space_adapter_type(test_case_type: str,
mlos_core_space_adapter_type: Optional[SpaceAdapterType]) -> None:
"""
Checks to see if there is a given type of test case for the given mlos_core space adapter type.
"""
space_adapter_name = None if mlos_core_space_adapter_type is None else mlos_core_space_adapter_type.name
for test_case in TEST_CASES_BY_TYPE[test_case_type].values():
if try_resolve_class_name(test_case["config"].get("class")) \
== "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer":
space_adapter_type = None
if test_case["config"].get("config"):
space_adapter_type = test_case["config"]["config"].get("space_adapter_type", None)
if space_adapter_type == space_adapter_name:
return
raise NotImplementedError(
f"Missing test case for type {test_case_type} for SpaceAdapter type {mlos_core_space_adapter_type}")
# Now we actually perform all of those validation tests.
@pytest.mark.parametrize("test_case_name", list(TEST_CASES.keys()))
def test_optimizer_configs_against_schema(test_case_name: str) -> None:
"""
Checks that the optimizer config validates against the schema.
"""
test_case = TEST_CASES[test_case_name]
if test_case["test_case_type"] == "good":
ConfigSchema.OPTIMIZER.validate(test_case["config"])
elif test_case["test_case_type"] == "bad":
with pytest.raises(jsonschema.ValidationError):
ConfigSchema.OPTIMIZER.validate(test_case["config"])
else:
raise NotImplementedError(f"Unknown test case type: {test_case['test_case_type']}")
def test_optimizer_configs_with_extra_param() -> None:
"""
Checks that the optimizer config fails to validate if extra params are present in certain places.
"""
test_case = next(iter(TEST_CASES_BY_TYPE["good"].values()))
config = deepcopy(test_case["config"])
ConfigSchema.OPTIMIZER.validate(config)
config[EXTRA_OUTER_ATTR] = "should not be here"
with pytest.raises(jsonschema.ValidationError):
ConfigSchema.OPTIMIZER.validate(config)
del config[EXTRA_OUTER_ATTR]
if not config.get("config"):
config["config"] = {}
config["config"][EXTRA_CONFIG_ATTR] = "should not be here"
with pytest.raises(jsonschema.ValidationError):
ConfigSchema.OPTIMIZER.validate(config)

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -0,0 +1,7 @@
{
"foo": "bar",
"bar": "baz",
"num": 1,
"bool": true,
"array": ["foo", "bar", "baz"]
}

Просмотреть файл

@ -0,0 +1,7 @@
{
"foo": "bar",
"bar": "baz",
"num": 1,
"bool": true,
"null": null
}

Просмотреть файл

@ -0,0 +1,10 @@
{
"foo": "bar",
"bar": "baz",
"num": 1,
"bool": true,
"object": {
// should be disallowed
"foo": "bar"
}
}

Просмотреть файл

@ -0,0 +1,4 @@
{
"extra": "params are allowed here",
"tunable_values": "but the name tunable_values is not"
}

Просмотреть файл

@ -0,0 +1,6 @@
{
"foo": "bar",
"int": 1,
"float": 1.1,
"bool": true
}

Просмотреть файл

@ -0,0 +1,6 @@
{
"foo": "bar",
"int": 1,
"float": 1.1,
"bool": true
}

Просмотреть файл

@ -0,0 +1,52 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
"""
Tests for tunable values schema validation.
"""
from os import path
from typing import Dict
import jsonschema
import pytest
from mlos_bench.config.schemas import ConfigSchema
from mlos_bench.tests.config.schemas import get_schema_test_cases, SchemaTestCaseInfo
# General testing strategy:
# - hand code a set of good/bad configs (useful to test editor schema checking)
# - for each config, load and validate against expected schema
TEST_CASES: Dict[str, SchemaTestCaseInfo] = get_schema_test_cases(path.join(path.dirname(__file__), "test-cases"))
TEST_CASES_BY_TYPE: Dict[str, Dict[str, SchemaTestCaseInfo]] = {}
TEST_CASES_BY_SUBTYPE: Dict[str, Dict[str, SchemaTestCaseInfo]] = {}
for test_case_info in TEST_CASES.values():
TEST_CASES_BY_TYPE.setdefault(test_case_info["test_case_type"], {})
TEST_CASES_BY_TYPE[test_case_info["test_case_type"]][test_case_info["test_case"]] = test_case_info
TEST_CASES_BY_SUBTYPE.setdefault(test_case_info["test_case_subtype"], {})
TEST_CASES_BY_SUBTYPE[test_case_info["test_case_subtype"]][test_case_info["test_case"]] = test_case_info
assert len(TEST_CASES_BY_TYPE["good"].keys()) > 0
assert len(TEST_CASES_BY_TYPE["bad"].keys()) > 0
assert len(TEST_CASES_BY_SUBTYPE.keys()) > 2
# Now we actually perform all of those validation tests.
@pytest.mark.parametrize("test_case_name", list(TEST_CASES.keys()))
def test_tunable_values_configs_against_schema(test_case_name: str) -> None:
"""
Checks that the tunable values config validates against the schema.
"""
test_case = TEST_CASES[test_case_name]
if test_case["test_case_type"] == "good":
ConfigSchema.TUNABLE_VALUES.validate(test_case["config"])
elif test_case["test_case_type"] == "bad":
with pytest.raises((jsonschema.ValidationError, jsonschema.SchemaError)):
ConfigSchema.TUNABLE_VALUES.validate(test_case["config"])
else:
raise NotImplementedError(f"Unknown test case type: {test_case['test_case_type']}")

Просмотреть файл

@ -11,6 +11,7 @@ from typing import List
import pytest
from mlos_bench.tests.config import locate_config_examples
from mlos_bench.services.base_service import Service
from mlos_bench.services.config_persistence import ConfigPersistenceService
from mlos_bench.util import path_join
@ -39,7 +40,7 @@ assert configs
@pytest.mark.parametrize("config_path", configs)
def test_load_service_config_examples(config_loader_service: ConfigPersistenceService, config_path: str) -> None:
"""Tests loading a config example."""
config = config_loader_service.load_config(config_path)
config = config_loader_service.load_config(config_path, schema_type=None) # TODO: , ConfigSchema.SERVICE)
# Make an instance of the class based on the config.
service_inst = config_loader_service.build_service(
config=config,

Просмотреть файл

@ -11,6 +11,7 @@ from typing import List
import pytest
from mlos_bench.tests.config import locate_config_examples
from mlos_bench.services.config_persistence import ConfigPersistenceService
from mlos_bench.storage.base_storage import Storage
from mlos_bench.tunables.tunable_groups import TunableGroups
@ -37,7 +38,7 @@ assert configs
@pytest.mark.parametrize("config_path", configs)
def test_load_storage_config_examples(config_loader_service: ConfigPersistenceService, config_path: str) -> None:
"""Tests loading a config example."""
config = config_loader_service.load_config(config_path)
config = config_loader_service.load_config(config_path, schema_type=None) # TODO: ConfigSchema.STORAGE)
assert isinstance(config, dict)
# Skip schema loading that would require a database connection for this test.
config["config"]["lazy_schema_create"] = True

Просмотреть файл

@ -1,5 +1,7 @@
// A simple key-value assignment of an tunables instance.
{
"$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/tunables/tunable-values-schema.json",
"vmSize": "Standard_B2s",
"idle": "halt",
"kernel_sched_migration_cost_ns": 40000

Просмотреть файл

@ -10,6 +10,7 @@ import os
import sys
import pytest
from mlos_bench.config.schemas import ConfigSchema
from mlos_bench.services.config_persistence import ConfigPersistenceService
@ -58,7 +59,7 @@ def test_resolve_path(config_persistence_service: ConfigPersistenceService) -> N
"""
Check if we can actually find a file somewhere in `config_path`.
"""
file_path = "tunables/tunable-values-example.jsonc"
file_path = "tunable-values/tunable-values-example.jsonc"
path = config_persistence_service.resolve_path(file_path)
assert path.endswith(file_path)
assert os.path.exists(path)
@ -78,7 +79,8 @@ def test_load_config(config_persistence_service: ConfigPersistenceService) -> No
"""
Check if we can successfully load a config file located relative to `config_path`.
"""
tunables_data = config_persistence_service.load_config("tunables/tunable-values-example.jsonc")
tunables_data = config_persistence_service.load_config("tunable-values/tunable-values-example.jsonc",
ConfigSchema.TUNABLE_VALUES)
assert tunables_data is not None
assert isinstance(tunables_data, dict)
assert len(tunables_data) >= 1

Просмотреть файл

@ -76,6 +76,7 @@ setup(
'mlos-core==' + _VERSION,
'requests',
'json5',
'jsonschema',
'importlib_resources;python_version<"3.10"',
] + extra_requires['storage-sql-sqlite'], # NOTE: For now sqlite is a fallback storage backend, so we always install it.
extras_require=extra_requires,

Просмотреть файл

@ -5,12 +5,11 @@ universal = 1
[pycodestyle]
count = True
# E124: Closing bracket does not match indentation of opening bracket's line
# E128: Continuation line under-indented for visual indent
# E261: At least two spaces before inline comment
# E502: The backslash is redundant between brackets
# W503: Line break occurred before a binary operator
# W504: Line break occurred after a binary operator
ignore = E124,E128,E261,E502,W503,W504
ignore = E124,E261,E502,W503,W504
format = pylint
# See Also: .editorconfig, .pylintrc
max-line-length = 132