зеркало из https://github.com/microsoft/MLOS.git
Merged PR 333: Initial MlosCore Implementation
Adds basic `register` and `suggest` APIs for a couple of optimizer backends: `Skopt`, `Emukit`, `Random`. Includes example notebook for starters. Limited unit testing. Basic function documentation. Azure DevOps CI pipelines for `pylint` and `pytest` via the `mlos_core` `conda` `environment.yml`. Related work items: #274, #279
This commit is contained in:
Родитель
c8e177b95d
Коммит
6b8f34e7d4
|
@ -139,3 +139,7 @@ cython_debug/
|
|||
|
||||
# vim swap files
|
||||
.*.swp
|
||||
|
||||
.conda-env.build-stamp
|
||||
.pylint.build-stamp
|
||||
.pytest.build-stamp
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
# vim: set ft=dosini:
|
||||
|
||||
[MAIN]
|
||||
|
||||
# Specify a score threshold to be exceeded before program exits with error.
|
||||
fail-under=9.5
|
||||
|
||||
[FORMAT]
|
||||
|
||||
# Maximum number of characters on a single line.
|
||||
max-line-length=132
|
||||
|
||||
[MESSAGE CONTROL]
|
||||
|
||||
disable=no-else-return
|
|
@ -0,0 +1,30 @@
|
|||
{
|
||||
"python.defaultInterpreterPath": "${env:HOME}/.conda/envs/mlos_core/bin/python",
|
||||
"python.linting.enabled": true,
|
||||
"python.linting.pylintEnabled": true,
|
||||
"python.testing.pytestEnabled": true,
|
||||
"cSpell.ignoreWords": [
|
||||
"Skopt",
|
||||
"conda",
|
||||
"configspace",
|
||||
"emukit",
|
||||
"gpbo",
|
||||
"ipykernel",
|
||||
"iterrows",
|
||||
"jupyterlab",
|
||||
"matplotlib",
|
||||
"mlos",
|
||||
"nsmallest",
|
||||
"numpy",
|
||||
"pylint",
|
||||
"pyplot",
|
||||
"pytest",
|
||||
"scikit",
|
||||
"scipy",
|
||||
"seaborn",
|
||||
"setuptools",
|
||||
"tolist",
|
||||
"xlabel",
|
||||
"ylabel"
|
||||
]
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
CONDA_DEFAULT_ENV := mlos_core
|
||||
PYTHON_FILES := $(shell find mlos_core/ -type f -name '*.py' 2>/dev/null)
|
||||
|
||||
.PHONY: all
|
||||
all: check test dist doc
|
||||
|
||||
.PHONY: conda-env
|
||||
conda-env: .conda-env.build-stamp
|
||||
|
||||
.conda-env.build-stamp: environment.yml setup.py
|
||||
conda env list -q | grep -q "^${CONDA_DEFAULT_ENV} " || conda env create -q -f environment.yml
|
||||
conda env update -q -n ${CONDA_DEFAULT_ENV} --prune -f environment.yml
|
||||
touch .conda-env.build-stamp
|
||||
|
||||
.PHONY: check
|
||||
check: pylint
|
||||
|
||||
.PHONY: pylint
|
||||
pylint: conda-env .pylint.build-stamp
|
||||
|
||||
.pylint.build-stamp: $(PYTHON_FILES) .pylintrc
|
||||
conda run -n ${CONDA_DEFAULT_ENV} pylint -j0 mlos_core
|
||||
touch .pylint.build-stamp
|
||||
|
||||
.PHONY: test
|
||||
test: pytest
|
||||
|
||||
.PHONY: pytest
|
||||
pytest: conda-env .pytest.build-stamp
|
||||
|
||||
# FIXME: There's an issue with pytest-xdist not reaping children when
|
||||
# pytest-timeout fails which we're currently using because somehow pytest is
|
||||
# causing module imports to hang.
|
||||
# pytest -n auto --cov=mlos_core --cov-report=xml mlos_core/
|
||||
.pytest.build-stamp: $(PYTHON_FILES) pytest.ini
|
||||
conda run -n ${CONDA_DEFAULT_ENV} pytest --cov=mlos_core --cov-report=xml mlos_core/
|
||||
touch .pytest.build-stamp
|
||||
|
||||
.PHONY: dist
|
||||
dist: bdist_wheel
|
||||
|
||||
.PHONY: bdist_wheel
|
||||
bdist_wheel: conda-env dist/mlos_core-*-py3-none-any.whl
|
||||
|
||||
dist/mlos_core-*-py3-none-any.whl: setup.py $(PYTHON_FILES)
|
||||
conda run -n ${CONDA_DEFAULT_ENV} python3 setup.py bdist_wheel
|
||||
|
||||
.PHONY: doc
|
||||
doc:
|
||||
# TODO
|
||||
@false
|
||||
|
||||
.PHONY: clean-check
|
||||
clean-check:
|
||||
rm -f .pylint.build-stamp
|
||||
|
||||
.PHONY: clean-test
|
||||
clean-test:
|
||||
rm -f .pytest.build-stamp
|
||||
|
||||
.PHONY: dist-clean
|
||||
dist-clean:
|
||||
rm -rf build dist
|
||||
|
||||
.PHONY: clean
|
||||
clean: clean-check clean-test dist-clean
|
||||
rm -f .conda-dev.build-stamp
|
||||
#rm -rf mlos_core.egg-info
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
41
README.md
41
README.md
|
@ -9,6 +9,47 @@ It is intended to provide a simplified, easier to consume (e.g. via `pip`), with
|
|||
|
||||
For both design requires intend to reuse as much OSS libraries as possible.
|
||||
|
||||
## Getting Started
|
||||
|
||||
0. Create the `mlos_core` Conda environment.
|
||||
|
||||
```sh
|
||||
conda env create -f environment.yml
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```sh
|
||||
# This will also ensure the environment is update to date using "conda env update -f environment.yml"
|
||||
make conda-env
|
||||
```
|
||||
|
||||
1. Initialize the shell environment.
|
||||
|
||||
```sh
|
||||
conda activate mlos_core
|
||||
```
|
||||
|
||||
2. Run the [`BayesianOptimization.ipynb`](./Notebooks/BayesianOptimization.ipynb) notebook.
|
||||
|
||||
## Distributing
|
||||
|
||||
1. Build the *wheel* file.
|
||||
|
||||
```sh
|
||||
make dist
|
||||
```
|
||||
|
||||
2. Install it (e.g. after copying it somewhere else).
|
||||
|
||||
```sh
|
||||
# this will install it with emukit support:
|
||||
pip install dist/mlos_core-0.0.1-py3-none-any.whl[emukit]
|
||||
|
||||
# this will install it with skopt support:
|
||||
pip install dist/mlos_core-0.0.1-py3-none-any.whl[skopt]
|
||||
```
|
||||
|
||||
## See Also
|
||||
|
||||
[MlosCoreApiDesign.docx](https://microsoft.sharepoint.com/:w:/t/CISLGSL/ESAS3G9q4P5Hoult9uqTfB4B3xh2v6yUfp3YNgIvoyR_IA?e=B6klWZ)
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
trigger:
|
||||
#- main
|
||||
branches:
|
||||
include:
|
||||
- '*'
|
||||
|
||||
pr:
|
||||
branches:
|
||||
include:
|
||||
- '*'
|
||||
|
||||
pool:
|
||||
vmImage: ubuntu-latest
|
||||
# TODO: test multiple versions of python.
|
||||
#strategy:
|
||||
# matrix:
|
||||
# Python39:
|
||||
# python.version: '3.9'
|
||||
|
||||
steps:
|
||||
- bash: echo "##vso[task.prependpath]$CONDA/bin"
|
||||
displayName: 'Add conda to PATH'
|
||||
#- task: UsePythonVersion@0
|
||||
# inputs:
|
||||
# versionSpec: '$(python.version)'
|
||||
# displayName: 'Use Python $(python.version)'
|
||||
- bash: make conda-env
|
||||
displayName: 'Create mlos_core conda environment'
|
||||
- bash: conda run -n mlos_core pip install pytest-azurepipelines
|
||||
displayName: 'Install pytest-azurepipelines'
|
||||
- bash: make check
|
||||
displayName: 'Run lint checks'
|
||||
- bash: make test
|
||||
displayName: 'Run tests'
|
||||
- bash: make dist
|
||||
displayName: 'Generate binary distribution files'
|
|
@ -0,0 +1,29 @@
|
|||
name: mlos_core
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- scikit-learn
|
||||
- scipy
|
||||
- numpy
|
||||
- pandas
|
||||
- configspace
|
||||
- pip
|
||||
- pylint
|
||||
- pytest
|
||||
- setuptools
|
||||
- jupyterlab
|
||||
- jupyter
|
||||
- ipykernel
|
||||
- nb_conda_kernels
|
||||
- matplotlib
|
||||
- seaborn
|
||||
- python
|
||||
- gcc_linux-64
|
||||
- pip:
|
||||
- scikit-optimize
|
||||
- emukit
|
||||
- "--editable ."
|
||||
- pytest-cov
|
||||
- pytest-forked
|
||||
- pytest-xdist
|
||||
- pytest-timeout
|
|
@ -0,0 +1,5 @@
|
|||
"""
|
||||
Basic initializer module for the mlos_core package.
|
||||
"""
|
||||
|
||||
from mlos_core import optimizers
|
|
@ -0,0 +1,18 @@
|
|||
"""
|
||||
Basic initializer module for the mlos_core optimizers.
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
|
||||
import ConfigSpace
|
||||
|
||||
from mlos_core.optimizers.optimizer import BaseOptimizer
|
||||
from mlos_core.optimizers.random_optimizer import RandomOptimizer
|
||||
from mlos_core.optimizers.bayesian_optimizers import EmukitOptimizer, SkoptOptimizer
|
||||
|
||||
__all__ = [
|
||||
'BaseOptimizer',
|
||||
'RandomOptimizer',
|
||||
'EmukitOptimizer',
|
||||
'SkoptOptimizer',
|
||||
]
|
|
@ -0,0 +1,191 @@
|
|||
"""
|
||||
Contains the wrapper classes for different Bayesian optimizers.
|
||||
"""
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
import ConfigSpace
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from mlos_core.optimizers.optimizer import BaseOptimizer
|
||||
from mlos_core.spaces import configspace_to_skopt_space, configspace_to_emukit_space
|
||||
|
||||
# TODO: provide a default optimizer.
|
||||
|
||||
class BaseBayesianOptimizer(BaseOptimizer, metaclass=ABCMeta):
|
||||
"""Abstract base class defining the interface for Bayesian optimization. """
|
||||
@abstractmethod
|
||||
def surrogate_predict(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
|
||||
"""Obtain a prediction from this Bayesian optimizer's surrogate model for the given configuration(s).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
configurations : pd.DataFrame
|
||||
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
|
||||
|
||||
context : pd.DataFrame
|
||||
Not Yet Implemented.
|
||||
"""
|
||||
pass # pylint: disable=unnecessary-pass
|
||||
|
||||
@abstractmethod
|
||||
def acquisition_function(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
|
||||
"""Invokes the acquisition function from this Bayesian optimizer for the given configuration.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
configurations : pd.DataFrame
|
||||
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
|
||||
|
||||
context : pd.DataFrame
|
||||
Not Yet Implemented.
|
||||
"""
|
||||
pass # pylint: disable=unnecessary-pass
|
||||
|
||||
class EmukitOptimizer(BaseBayesianOptimizer):
|
||||
"""Wrapper class for Emukit based Bayesian optimization.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
parameter_space : ConfigSpace.ConfigurationSpace
|
||||
The parameter space to optimize.
|
||||
"""
|
||||
def __init__(self, parameter_space: ConfigSpace.ConfigurationSpace):
|
||||
super().__init__(parameter_space)
|
||||
self.emukit_parameter_space = configspace_to_emukit_space(parameter_space)
|
||||
self.gpbo = None
|
||||
|
||||
def register(self, configurations: pd.DataFrame, scores: pd.Series, context: pd.DataFrame = None):
|
||||
"""Registers the given configurations and scores.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
configurations : pd.DataFrame
|
||||
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
|
||||
|
||||
scores : pd.Series
|
||||
Scores from running the configurations. The index is the same as the index of the configurations.
|
||||
|
||||
context : pd.DataFrame
|
||||
Not Yet Implemented.
|
||||
"""
|
||||
from emukit.core.loop.user_function_result import UserFunctionResult # pylint: disable=import-outside-toplevel
|
||||
self._observations.append((configurations, scores, context))
|
||||
if context is not None:
|
||||
# not sure how that works here?
|
||||
raise NotImplementedError
|
||||
if self.gpbo is None:
|
||||
# we're in the random initialization phase
|
||||
# just remembering the observation above is enough
|
||||
return
|
||||
results = []
|
||||
for (_, config), score in zip(configurations.iterrows(), scores):
|
||||
results.append(UserFunctionResult(config, np.array([score])))
|
||||
self.gpbo.loop_state.update(results)
|
||||
self.gpbo._update_models() # pylint: disable=protected-access
|
||||
|
||||
def suggest(self, context: pd.DataFrame = None):
|
||||
"""Suggests a new configuration.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
context : pd.DataFrame
|
||||
Not Yet Implemented.
|
||||
|
||||
Returns
|
||||
-------
|
||||
configuration : pd.DataFrame
|
||||
Pandas dataframe with a single row. Column names are the parameter names.
|
||||
"""
|
||||
from emukit.examples.gp_bayesian_optimization.single_objective_bayesian_optimization import GPBayesianOptimization # pylint: disable=import-outside-toplevel
|
||||
if context is not None:
|
||||
raise NotImplementedError()
|
||||
if len(self._observations) <= 10:
|
||||
from emukit.core.initial_designs import RandomDesign # pylint: disable=import-outside-toplevel
|
||||
config = RandomDesign(self.emukit_parameter_space).get_samples(1)
|
||||
else:
|
||||
if self.gpbo is None:
|
||||
# this should happen exactly once, when calling the 11th time
|
||||
observations = self.get_observations()
|
||||
self.gpbo = GPBayesianOptimization(
|
||||
variables_list=self.emukit_parameter_space.parameters,
|
||||
X=np.array(observations.drop(columns='score')),
|
||||
Y=np.array(observations[['score']]))
|
||||
# this should happen any time after the initial model is created
|
||||
config = self.gpbo.get_next_points(results=[])
|
||||
return pd.DataFrame(config, columns=self.parameter_space.get_hyperparameter_names())
|
||||
|
||||
def register_pending(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
|
||||
raise NotImplementedError()
|
||||
|
||||
def surrogate_predict(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
|
||||
# TODO: return variance in some way
|
||||
mean_predictions, variance_predictions = self.gpbo.model.predict(configurations)
|
||||
return mean_predictions
|
||||
|
||||
def acquisition_function(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class SkoptOptimizer(BaseBayesianOptimizer):
|
||||
"""Wrapper class for Skopt based Bayesian optimization.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
parameter_space : ConfigSpace.ConfigurationSpace
|
||||
The parameter space to optimize.
|
||||
"""
|
||||
def __init__(self, parameter_space: ConfigSpace.ConfigurationSpace, base_estimator = 'gp'):
|
||||
from skopt import Optimizer as Optimizer_Skopt # pylint: disable=import-outside-toplevel
|
||||
self.base_optimizer = Optimizer_Skopt(configspace_to_skopt_space(parameter_space), base_estimator=base_estimator)
|
||||
super().__init__(parameter_space)
|
||||
|
||||
def register(self, configurations: pd.DataFrame, scores: pd.Series, context: pd.DataFrame = None):
|
||||
"""Registers the given configurations and scores.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
configurations : pd.DataFrame
|
||||
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
|
||||
|
||||
scores : pd.Series
|
||||
Scores from running the configurations. The index is the same as the index of the configurations.
|
||||
|
||||
context : pd.DataFrame
|
||||
Not Yet Implemented.
|
||||
"""
|
||||
self._observations.append((configurations, scores, context))
|
||||
|
||||
if context is not None:
|
||||
raise NotImplementedError
|
||||
self.base_optimizer.tell(np.array(configurations).tolist(), np.array(scores).tolist())
|
||||
|
||||
def suggest(self, context: pd.DataFrame = None):
|
||||
"""Suggests a new configuration.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
context : pd.DataFrame
|
||||
Not Yet Implemented.
|
||||
|
||||
Returns
|
||||
-------
|
||||
configuration : pd.DataFrame
|
||||
Pandas dataframe with a single row. Column names are the parameter names.
|
||||
"""
|
||||
if context is not None:
|
||||
raise NotImplementedError
|
||||
return pd.DataFrame([self.base_optimizer.ask()], columns=self.parameter_space.get_hyperparameter_names())
|
||||
|
||||
def register_pending(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
|
||||
raise NotImplementedError()
|
||||
|
||||
def surrogate_predict(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
|
||||
if context is not None:
|
||||
raise NotImplementedError
|
||||
return self.base_optimizer.models[-1].predict(configurations)
|
||||
|
||||
def acquisition_function(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
|
||||
# This seems actually non-trivial to get out of skopt, so maybe we actually shouldn't implement this.
|
||||
raise NotImplementedError()
|
|
@ -0,0 +1,105 @@
|
|||
"""
|
||||
Contains the BaseOptimizer abstract class.
|
||||
"""
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
import ConfigSpace
|
||||
import pandas as pd
|
||||
|
||||
class BaseOptimizer(metaclass=ABCMeta):
|
||||
"""Optimizer abstract base class defining the basic interface.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
parameter_space : ConfigSpace.ConfigurationSpace
|
||||
The parameter space to optimize.
|
||||
"""
|
||||
def __init__(self, parameter_space: ConfigSpace.ConfigurationSpace):
|
||||
self.parameter_space: ConfigSpace.ConfigurationSpace = parameter_space
|
||||
self._observations = []
|
||||
self._pending_observations = []
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.__class__.__name__}(parameter_space={self.parameter_space})"
|
||||
|
||||
@abstractmethod
|
||||
def register(self, configurations: pd.DataFrame, scores: pd.Series, context: pd.DataFrame = None):
|
||||
"""Registers the given configurations and scores.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
configurations : pd.DataFrame
|
||||
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
|
||||
|
||||
scores : pd.Series
|
||||
Scores from running the configurations. The index is the same as the index of the configurations.
|
||||
|
||||
context : pd.DataFrame
|
||||
Not Yet Implemented.
|
||||
"""
|
||||
pass # pylint: disable=unnecessary-pass
|
||||
|
||||
@abstractmethod
|
||||
def suggest(self, context: pd.DataFrame = None):
|
||||
"""Suggests a new configuration.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
context : pd.DataFrame
|
||||
Not Yet Implemented.
|
||||
|
||||
Returns
|
||||
-------
|
||||
configuration : pd.DataFrame
|
||||
Pandas dataframe with a single row. Column names are the parameter names.
|
||||
"""
|
||||
pass # pylint: disable=unnecessary-pass
|
||||
|
||||
@abstractmethod
|
||||
def register_pending(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
|
||||
"""Registers the given configurations as "pending".
|
||||
That is it say, it has been suggested by the optimizer, and an experiment trial has been started.
|
||||
This can be useful for executing multiple trials in parallel, retry logic, etc.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
configurations : pd.DataFrame
|
||||
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
|
||||
|
||||
context : pd.DataFrame
|
||||
Not Yet Implemented.
|
||||
"""
|
||||
pass # pylint: disable=unnecessary-pass
|
||||
|
||||
def get_observations(self):
|
||||
"""Returns the observations as a dataframe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
observations : pd.DataFrame
|
||||
Dataframe of observations. The columns are parameter names and "score" for the score, each row is an observation.
|
||||
"""
|
||||
configs = pd.concat([config for config, _, _ in self._observations])
|
||||
scores = pd.concat([score for _, score, _ in self._observations])
|
||||
try:
|
||||
contexts = pd.concat([context for _, _, context in self._observations])
|
||||
except ValueError:
|
||||
contexts = None
|
||||
configs["score"] = scores
|
||||
if contexts is not None:
|
||||
configs = pd.concat([configs, contexts], axis=1)
|
||||
return configs
|
||||
|
||||
def get_best_observation(self):
|
||||
"""Returns the best observation so far as a dataframe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
best_observation : pd.DataFrame
|
||||
Dataframe with a single row containing the best observation. The columns are parameter names and "score" for the score.
|
||||
"""
|
||||
if len(self._observations) == 0:
|
||||
raise ValueError("No observations registered yet.")
|
||||
observations = self.get_observations()
|
||||
return observations.nsmallest(1, columns='score')
|
|
@ -0,0 +1,55 @@
|
|||
"""
|
||||
Contains the RandomOptimizer class.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from mlos_core.optimizers.optimizer import BaseOptimizer
|
||||
|
||||
class RandomOptimizer(BaseOptimizer):
|
||||
"""Optimizer class that produces random suggestions.
|
||||
Useful for baseline comparison against Bayesian optimizers.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
parameter_space : ConfigSpace.ConfigurationSpace
|
||||
The parameter space to optimize.
|
||||
"""
|
||||
def register(self, configurations: pd.DataFrame, scores: pd.Series, context: pd.DataFrame = None):
|
||||
"""Registers the given configurations and scores.
|
||||
|
||||
Doesn't do anything on the RandomOptimizer except storing configurations for logging.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
configurations : pd.DataFrame
|
||||
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
|
||||
|
||||
scores : pd.Series
|
||||
Scores from running the configurations. The index is the same as the index of the configurations.
|
||||
|
||||
context : None
|
||||
Not Yet Implemented.
|
||||
"""
|
||||
self._observations.append((configurations, scores, context))
|
||||
# should we pop them from self.pending_observations?
|
||||
|
||||
def suggest(self, context: pd.DataFrame = None):
|
||||
"""Suggests a new configuration.
|
||||
|
||||
Sampled at random using ConfigSpace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
context : None
|
||||
Not Yet Implemented.
|
||||
|
||||
Returns
|
||||
-------
|
||||
configuration : pd.DataFrame
|
||||
Pandas dataframe with a single row. Column names are the parameter names.
|
||||
"""
|
||||
return self.parameter_space.sample_configuration().get_dictionary()
|
||||
|
||||
def register_pending(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
|
||||
self._pending_observations.append((configurations, context))
|
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
Tests for Bayesian Optimizers.
|
||||
"""
|
|
@ -0,0 +1,35 @@
|
|||
"""
|
||||
Tests for Bayesian Optimizers.
|
||||
"""
|
||||
|
||||
from typing import Type
|
||||
|
||||
import pytest
|
||||
|
||||
import ConfigSpace as CS
|
||||
|
||||
from mlos_core.optimizers import BaseOptimizer, EmukitOptimizer, SkoptOptimizer, RandomOptimizer
|
||||
|
||||
@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [
|
||||
# FIXME: hangs on emukit import
|
||||
#(EmukitOptimizer, {}),
|
||||
(SkoptOptimizer, {'base_estimator': 'gp'}),
|
||||
(RandomOptimizer, {})
|
||||
])
|
||||
def test_create_optimizer_and_suggest(optimizer_class: Type[BaseOptimizer], kwargs):
|
||||
"""
|
||||
Helper method for testing optimizers.
|
||||
"""
|
||||
# Start defining a ConfigurationSpace for the Optimizer to search.
|
||||
input_space = CS.ConfigurationSpace(seed=1234)
|
||||
|
||||
# Add a single continuous input dimension between 0 and 1.
|
||||
input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='x', lower=0, upper=1))
|
||||
|
||||
optimizer = optimizer_class(input_space, **kwargs)
|
||||
assert optimizer is not None
|
||||
|
||||
assert optimizer.parameter_space is not None
|
||||
|
||||
suggestion = optimizer.suggest()
|
||||
assert suggestion is not None
|
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
Tests for random optimizer.
|
||||
"""
|
|
@ -0,0 +1,46 @@
|
|||
"""
|
||||
Contains classes related to experiment exectution runners.
|
||||
These classes contain the policies for managing things like retries and failed
|
||||
configs when interacting with the optimizer(s).
|
||||
"""
|
||||
|
||||
# TODO: Implement retry/failure handling logic.
|
||||
|
||||
class ExperimentRunner:
|
||||
"""Manages pending observations for parallel & asynchronous optimization."""
|
||||
def __init__(self, optimizer):
|
||||
self.optimizer = optimizer
|
||||
|
||||
def register(self, configurations, scores, context=None):
|
||||
"""Registers the given configurations and scores with the optimizer associated with this ExperimentRunner.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
configurations : pd.DataFrame
|
||||
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
|
||||
|
||||
scores : pd.Series
|
||||
Scores from running the configurations. The index is the same as the index of the configurations.
|
||||
|
||||
context : pd.DataFrame
|
||||
Not Yet Implemented.
|
||||
"""
|
||||
self.optimizer.register(configurations, scores, context)
|
||||
|
||||
def suggest(self, configurations, context=None):
|
||||
"""Gets a new configuration suggestion from the optimizer associated
|
||||
with this ExperimentRunner and automatically registers it as "pending",
|
||||
under the assumption that it will be executed as an experiment trial.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
context : pd.DataFrame
|
||||
Not Yet Implemented.
|
||||
|
||||
Returns
|
||||
-------
|
||||
configuration : pd.DataFrame
|
||||
Pandas dataframe with a single row. Column names are the parameter names.
|
||||
"""
|
||||
configurations = self.optimizer.suggest(context)
|
||||
self.optimizer.register_pending(configurations, context)
|
|
@ -0,0 +1,67 @@
|
|||
"""
|
||||
Contains some helper functions for converting config
|
||||
"""
|
||||
|
||||
import ConfigSpace
|
||||
import numpy as np
|
||||
|
||||
|
||||
def configspace_to_skopt_space(config_space: ConfigSpace.ConfigurationSpace):
|
||||
"""Converts a ConfigSpace.ConfigurationSpace to a list of skopt spaces.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
config_space : ConfigSpace.ConfigurationSpace
|
||||
Input configuration space.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list of skopt.space.Space
|
||||
"""
|
||||
import skopt.space # pylint: disable=import-outside-toplevel
|
||||
def _one_parameter_convert(parameter):
|
||||
if isinstance(parameter, ConfigSpace.UniformFloatHyperparameter):
|
||||
return skopt.space.Real(
|
||||
low=parameter.lower,
|
||||
high=parameter.upper,
|
||||
prior='uniform' if not parameter.log else 'log-uniform',
|
||||
name=parameter.name)
|
||||
elif isinstance(parameter, ConfigSpace.UniformIntegerHyperparameter):
|
||||
return skopt.space.Integer(
|
||||
low=parameter.lower,
|
||||
high=parameter.upper,
|
||||
prior='uniform' if not parameter.log else 'log-uniform',
|
||||
name=parameter.name)
|
||||
elif isinstance(parameter, ConfigSpace.CategoricalHyperparameter):
|
||||
return skopt.space.Categorical(categories=parameter.choices, prior=parameter.weights, name=parameter.name)
|
||||
raise ValueError(f"Type of parameter {parameter} ({type(parameter)}) not supported.")
|
||||
|
||||
return [_one_parameter_convert(param) for param in config_space.get_hyperparameters()]
|
||||
|
||||
|
||||
def configspace_to_emukit_space(config_space: ConfigSpace.ConfigurationSpace):
|
||||
"""Converts a ConfigSpace.ConfigurationSpace to emukit.core.ParameterSpace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
config_space : ConfigSpace.ConfigurationSpace
|
||||
Input configuration space.
|
||||
|
||||
Returns
|
||||
-------
|
||||
emukit.core.ParameterSpace
|
||||
"""
|
||||
import emukit.core # pylint: disable=import-outside-toplevel
|
||||
def _one_parameter_convert(parameter):
|
||||
if parameter.log:
|
||||
raise ValueError("Emukit doesn't support log parameters.")
|
||||
if isinstance(parameter, ConfigSpace.UniformFloatHyperparameter):
|
||||
return emukit.core.ContinuousParameter(name=parameter.name, min_value=parameter.lower, max_value=parameter.upper)
|
||||
elif isinstance(parameter,ConfigSpace.UniformIntegerHyperparameter):
|
||||
return emukit.core.DiscreteParameter(name=parameter.name, domain=np.arange(parameter.lower, parameter.upper+1))
|
||||
elif isinstance(parameter, ConfigSpace.CategoricalHyperparameter):
|
||||
encoding = emukit.core.OneHotEncoding(parameter.choices)
|
||||
return emukit.core.CategoricalParameter(name=parameter.name, encoding=encoding)
|
||||
raise ValueError(f"Type of parameter {parameter} ({type(parameter)}) not supported.")
|
||||
|
||||
return emukit.core.ParameterSpace([_one_parameter_convert(param) for param in config_space.get_hyperparameters()])
|
|
@ -0,0 +1,8 @@
|
|||
[pytest]
|
||||
|
||||
# Note: --boxed is required for use with the pytest-timeout plugin and thread method.
|
||||
addopts = -svxl
|
||||
# --boxed
|
||||
# Moved these to Makefile
|
||||
#-n auto
|
||||
#--cov=mlos_core --cov-report=xml
|
|
@ -0,0 +1,27 @@
|
|||
"""
|
||||
Setup instructions for the mlos_core package.
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name="mlos-core",
|
||||
version="0.0.1",
|
||||
packages=find_packages(),
|
||||
install_requires=[
|
||||
'scikit-learn>=0.22.1',
|
||||
'scipy>=1.3.2',
|
||||
'numpy>=1.18.1',
|
||||
'pandas>=1.0.3',
|
||||
],
|
||||
extras_require={
|
||||
'emukit': 'emukit',
|
||||
'skopt': 'scikit-optimize',
|
||||
},
|
||||
author="Microsoft",
|
||||
author_email="amueller@microsoft.com",
|
||||
description=("MLOS Core Python interface for parameter optimization."),
|
||||
license="",
|
||||
keywords="",
|
||||
#python_requires='>=3.7',
|
||||
)
|
Загрузка…
Ссылка в новой задаче