Merged PR 382: basic tests for optimizers

This covers the basic API for the optimizers.
This commit is contained in:
Andreas Mueller 2022-03-16 01:27:21 +00:00
Родитель 92f8497d89
Коммит 57dea8f43f
6 изменённых файлов: 131 добавлений и 21 удалений

Просмотреть файл

@ -2,13 +2,10 @@
Basic initializer module for the mlos_core optimizers.
"""
from enum import Enum
import ConfigSpace
from mlos_core.optimizers.optimizer import BaseOptimizer
from mlos_core.optimizers.random_optimizer import RandomOptimizer
from mlos_core.optimizers.bayesian_optimizers import EmukitOptimizer, SkoptOptimizer
from mlos_core.optimizers.bayesian_optimizers import (
EmukitOptimizer, SkoptOptimizer)
__all__ = [
'BaseOptimizer',

Просмотреть файл

@ -13,6 +13,7 @@ from mlos_core.spaces import configspace_to_skopt_space, configspace_to_emukit_s
# TODO: provide a default optimizer.
class BaseBayesianOptimizer(BaseOptimizer, metaclass=ABCMeta):
"""Abstract base class defining the interface for Bayesian optimization. """
@abstractmethod
@ -27,7 +28,7 @@ class BaseBayesianOptimizer(BaseOptimizer, metaclass=ABCMeta):
context : pd.DataFrame
Not Yet Implemented.
"""
pass # pylint: disable=unnecessary-pass
pass # pylint: disable=unnecessary-pass # pragma: no cover
@abstractmethod
def acquisition_function(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
@ -41,7 +42,8 @@ class BaseBayesianOptimizer(BaseOptimizer, metaclass=ABCMeta):
context : pd.DataFrame
Not Yet Implemented.
"""
pass # pylint: disable=unnecessary-pass
pass # pylint: disable=unnecessary-pass # pragma: no cover
class EmukitOptimizer(BaseBayesianOptimizer):
"""Wrapper class for Emukit based Bayesian optimization.
@ -120,9 +122,13 @@ class EmukitOptimizer(BaseBayesianOptimizer):
raise NotImplementedError()
def surrogate_predict(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
# TODO: return variance in some way
mean_predictions, variance_predictions = self.gpbo.model.predict(configurations)
return mean_predictions
if context is not None:
raise NotImplementedError
# TODO return variance in some way
# TODO check columns in configurations
mean_predictions, variance_predictions = self.gpbo.model.predict(np.array(configurations))
# make 2ndim array into column vector
return mean_predictions.reshape(-1,)
def acquisition_function(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
raise NotImplementedError()
@ -136,7 +142,7 @@ class SkoptOptimizer(BaseBayesianOptimizer):
parameter_space : ConfigSpace.ConfigurationSpace
The parameter space to optimize.
"""
def __init__(self, parameter_space: ConfigSpace.ConfigurationSpace, base_estimator = 'gp'):
def __init__(self, parameter_space: ConfigSpace.ConfigurationSpace, base_estimator='gp'):
from skopt import Optimizer as Optimizer_Skopt # pylint: disable=import-outside-toplevel
self.base_optimizer = Optimizer_Skopt(configspace_to_skopt_space(parameter_space), base_estimator=base_estimator)
super().__init__(parameter_space)
@ -184,7 +190,8 @@ class SkoptOptimizer(BaseBayesianOptimizer):
def surrogate_predict(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
if context is not None:
raise NotImplementedError
return self.base_optimizer.models[-1].predict(configurations)
# TODO check configuration columns
return self.base_optimizer.models[-1].predict(np.array(configurations))
def acquisition_function(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
# This seems actually non-trivial to get out of skopt, so maybe we actually shouldn't implement this.

Просмотреть файл

@ -7,6 +7,7 @@ from abc import ABCMeta, abstractmethod
import ConfigSpace
import pandas as pd
class BaseOptimizer(metaclass=ABCMeta):
"""Optimizer abstract base class defining the basic interface.
@ -38,7 +39,7 @@ class BaseOptimizer(metaclass=ABCMeta):
context : pd.DataFrame
Not Yet Implemented.
"""
pass # pylint: disable=unnecessary-pass
pass # pylint: disable=unnecessary-pass # pragma: no cover
@abstractmethod
def suggest(self, context: pd.DataFrame = None):
@ -54,7 +55,7 @@ class BaseOptimizer(metaclass=ABCMeta):
configuration : pd.DataFrame
Pandas dataframe with a single row. Column names are the parameter names.
"""
pass # pylint: disable=unnecessary-pass
pass # pylint: disable=unnecessary-pass # pragma: no cover
@abstractmethod
def register_pending(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
@ -70,7 +71,7 @@ class BaseOptimizer(metaclass=ABCMeta):
context : pd.DataFrame
Not Yet Implemented.
"""
pass # pylint: disable=unnecessary-pass
pass # pylint: disable=unnecessary-pass # pragma: no cover
def get_observations(self):
"""Returns the observations as a dataframe.
@ -80,6 +81,8 @@ class BaseOptimizer(metaclass=ABCMeta):
observations : pd.DataFrame
Dataframe of observations. The columns are parameter names and "score" for the score, each row is an observation.
"""
if len(self._observations) == 0:
raise ValueError("No observations registered yet.")
configs = pd.concat([config for config, _, _ in self._observations])
scores = pd.concat([score for _, score, _ in self._observations])
try:
@ -88,7 +91,9 @@ class BaseOptimizer(metaclass=ABCMeta):
contexts = None
configs["score"] = scores
if contexts is not None:
configs = pd.concat([configs, contexts], axis=1)
# configs = pd.concat([configs, contexts], axis=1)
# Not reachable for now
raise NotImplementedError # pragma: no cover
return configs
def get_best_observation(self):

Просмотреть файл

@ -49,7 +49,8 @@ class RandomOptimizer(BaseOptimizer):
configuration : pd.DataFrame
Pandas dataframe with a single row. Column names are the parameter names.
"""
return self.parameter_space.sample_configuration().get_dictionary()
return pd.DataFrame(self.parameter_space.sample_configuration().get_dictionary(), index=[0])
def register_pending(self, configurations: pd.DataFrame, context: pd.DataFrame = None):
self._pending_observations.append((configurations, context))
raise NotImplementedError()
# self._pending_observations.append((configurations, context))

Просмотреть файл

@ -1,3 +1,39 @@
"""
Tests for Bayesian Optimizers.
"""
# pylint: disable=missing-function-docstring
from typing import Type
import pytest
import pandas as pd
import ConfigSpace as CS
from mlos_core.optimizers import BaseOptimizer, EmukitOptimizer, SkoptOptimizer
@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [
(EmukitOptimizer, {}),
(SkoptOptimizer, {'base_estimator': 'gp'}),
])
def test_context_not_implemented_error(optimizer_class: Type[BaseOptimizer], kwargs):
input_space = CS.ConfigurationSpace(seed=1234)
input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='x', lower=0, upper=1))
optimizer = optimizer_class(input_space, **kwargs)
suggestion = optimizer.suggest()
score = pd.DataFrame({'score': [1]})
# test context not implemented errors
with pytest.raises(NotImplementedError):
optimizer.register(suggestion, score, context="something")
with pytest.raises(NotImplementedError):
optimizer.suggest(context="something")
with pytest.raises(NotImplementedError):
optimizer.surrogate_predict(suggestion, context="something")
# acquisition function not implemented
with pytest.raises(NotImplementedError):
optimizer.acquisition_function(suggestion)

Просмотреть файл

@ -2,13 +2,19 @@
Tests for Bayesian Optimizers.
"""
# pylint: disable=missing-function-docstring
from typing import Type
import pytest
import pandas as pd
import numpy as np
import ConfigSpace as CS
from mlos_core.optimizers import BaseOptimizer, EmukitOptimizer, SkoptOptimizer, RandomOptimizer
from mlos_core.optimizers.bayesian_optimizers import BaseBayesianOptimizer
@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [
(EmukitOptimizer, {}),
@ -16,9 +22,6 @@ from mlos_core.optimizers import BaseOptimizer, EmukitOptimizer, SkoptOptimizer,
(RandomOptimizer, {})
])
def test_create_optimizer_and_suggest(optimizer_class: Type[BaseOptimizer], kwargs):
"""
Helper method for testing optimizers.
"""
# Start defining a ConfigurationSpace for the Optimizer to search.
input_space = CS.ConfigurationSpace(seed=1234)
@ -32,3 +35,64 @@ def test_create_optimizer_and_suggest(optimizer_class: Type[BaseOptimizer], kwar
suggestion = optimizer.suggest()
assert suggestion is not None
myrepr = repr(optimizer)
assert myrepr.startswith(optimizer_class.__name__)
# pending not implemented
with pytest.raises(NotImplementedError):
optimizer.register_pending(suggestion)
@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [
(EmukitOptimizer, {}),
(SkoptOptimizer, {'base_estimator': 'gp'}),
(SkoptOptimizer, {'base_estimator': 'et'}),
(RandomOptimizer, {})
])
def test_basic_interface_toy_problem(optimizer_class: Type[BaseOptimizer], kwargs):
def objective(x):
return (6*x-2)**2*np.sin(12*x-4)
# Start defining a ConfigurationSpace for the Optimizer to search.
input_space = CS.ConfigurationSpace(seed=1234)
# Add a single continuous input dimension between 0 and 1.
input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='x', lower=0, upper=1))
optimizer = optimizer_class(input_space, **kwargs)
with pytest.raises(ValueError, match="No observations"):
optimizer.get_best_observation()
with pytest.raises(ValueError, match="No observations"):
optimizer.get_observations()
for _ in range(20):
suggestion = optimizer.suggest()
assert isinstance(suggestion, pd.DataFrame)
assert suggestion.columns == ['x']
# check that suggestion is in the space
configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict())
# Raises an error if outside of configuration space
configuration.is_valid_configuration()
observation = objective(suggestion['x'])
assert isinstance(observation, pd.Series)
optimizer.register(suggestion, observation)
best_observation = optimizer.get_best_observation()
assert isinstance(best_observation, pd.DataFrame)
assert (best_observation.columns == ['x', 'score']).all()
assert best_observation['score'].iloc[0] < -1
all_observations = optimizer.get_observations()
assert isinstance(all_observations, pd.DataFrame)
assert all_observations.shape == (20, 2)
assert (all_observations.columns == ['x', 'score']).all()
# It would be better to put this into bayesian_optimizer_test but then we'd have to refit the model
if isinstance(optimizer, BaseBayesianOptimizer):
pred_best = optimizer.surrogate_predict(best_observation[['x']])
assert pred_best.shape == (1,)
pred_all = optimizer.surrogate_predict(all_observations[['x']])
assert pred_all.shape == (20,)