mypy type checking support for mlos_core (mlos_bench to follow later) (#301)

* mypy type checking support for mlos_core (mlos_bench to follow later)
This commit is contained in:
Brian Kroth 2023-04-12 11:11:58 -05:00 коммит произвёл GitHub
Родитель b7c557a80d
Коммит cac5351b98
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
30 изменённых файлов: 374 добавлений и 73 удалений

Просмотреть файл

@ -28,8 +28,10 @@
"llamatune",
"matplotlib",
"mlos",
"ndarray",
"nsmallest",
"numpy",
"obvs",
"perc",
"pinv",
"pylint",

Просмотреть файл

@ -45,8 +45,12 @@
"settings": {
// Adjust the python interpreter path to point to the conda environment
"python.defaultInterpreterPath": "/opt/conda/envs/mlos_core/bin/python",
"python.testing.pytestPath": "/opt/conda/envs/mlos_core/bin/pytest",
"python.linting.pylintPath": "/opt/conda/envs/mlos_core/bin/pylint",
"pylint.path": ["/opt/conda/envs/mlos_core/bin/pylint"]
"pylint.path": ["/opt/conda/envs/mlos_core/bin/pylint"],
"python.linting.mypyPath": "/opt/conda/envs/mlos_core/bin/mypy",
"mypy.dmypyExecutable": "/opt/conda/envs/mlos_core/bin/dmypy",
"mypy.runUsingActiveInterpreter": false
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
@ -55,6 +59,7 @@
"ms-python.vscode-pylance",
"ms-python.python",
"ms-python.pylint",
"matangover.mypy",
"donjayamanne.python-environment-manager",
"njpwerner.autodocstring",
"ms-toolsai.jupyter",

Просмотреть файл

@ -38,8 +38,9 @@ max-line-length=132
disable=
no-else-return,
consider-using-assignment-expr,
deprecated-typing-alias, # disable for now - only deprecated recently
deprecated-typing-alias, # disable for now - only deprecated recently
docstring-first-line-empty,
consider-alternative-union-syntax, # disable for now - still supporting python 3.8
missing-raises-doc
[STRING]

1
.vscode/extensions.json поставляемый
Просмотреть файл

@ -5,6 +5,7 @@
"ms-python.vscode-pylance",
"ms-python.python",
"ms-python.pylint",
"matangover.mypy",
"donjayamanne.python-environment-manager",
"njpwerner.autodocstring",
"lextudio.restructuredtext",

5
.vscode/settings.json поставляемый
Просмотреть файл

@ -1,11 +1,14 @@
// vim: set ft=jsonc:
{
"makefile.extensionOutputFolder": "./.vscode",
"python.defaultInterpreterPath": "${env:HOME}${env:USERPROFILE}/.conda/envs/mlos_core/bin/python",
"python.testing.pytestEnabled": true,
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.linting.pycodestyleEnabled": true,
"python.linting.pydocstyleEnabled": true,
"python.testing.pytestEnabled": true,
"python.linting.mypyEnabled": false, // use the dmypy daemon extension instead - it's faster
// "mypy.runUsingActiveInterpreter": true, // in the devcontainer we override this to use an explicit path
"autoDocstring.docstringFormat": "numpy",
"restructuredtext.linter.doc8.extraArgs": [
"--ignore D001"

Просмотреть файл

@ -41,7 +41,7 @@ clean-conda-env:
rm -f build/conda-env.${CONDA_ENV_NAME}.build-stamp
.PHONY: check
check: pycodestyle pydocstyle pylint # cspell licenseheaders markdown-link-check
check: pycodestyle pydocstyle pylint mypy # cspell licenseheaders markdown-link-check
.PHONY: pycodestyle
pycodestyle: conda-env build/pycodestyle.mlos_core.${CONDA_ENV_NAME}.build-stamp build/pycodestyle.mlos_bench.${CONDA_ENV_NAME}.build-stamp
@ -112,6 +112,17 @@ build/pylint.%.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.
conda run -n ${CONDA_ENV_NAME} pylint -j0 $(filter-out .pylintrc,$+)
touch $@
.PHONY: mypy
mypy: conda-env build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp # TODO: build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp
build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES)
build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES)
build/mypy.%.${CONDA_ENV_NAME}.build-stamp: scripts/dmypy-wrapper.sh build/conda-env.${CONDA_ENV_NAME}.build-stamp setup.cfg
conda run -n ${CONDA_ENV_NAME} scripts/dmypy-wrapper.sh \
$(filter-out scripts/dmypy-wrapper.sh build/conda-env.${CONDA_ENV_NAME}.build-stamp setup.cfg,$+)
touch $@
.PHONY: test
test: pytest
@ -365,6 +376,8 @@ clean-check:
rm -f build/pylint.${CONDA_ENV_NAME}.build-stamp
rm -f build/pylint.mlos_core.${CONDA_ENV_NAME}.build-stamp
rm -f build/pylint.mlos_bench.${CONDA_ENV_NAME}.build-stamp
rm -f build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp
rm -f build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp
rm -f build/pycodestyle.build-stamp
rm -f build/pycodestyle.${CONDA_ENV_NAME}.build-stamp
rm -f build/pycodestyle.mlos_core.${CONDA_ENV_NAME}.build-stamp

Просмотреть файл

@ -9,6 +9,7 @@ dependencies:
- pycodestyle
- autopep8
- pydocstyle
- mypy
- pytest
- pytest-cov
- pytest-forked
@ -28,5 +29,9 @@ dependencies:
- bump2version
- licenseheaders
- pytest-local-badge
- types-setuptools
- types-pygments
- types-colorama
- pandas-stubs
- "--editable ../mlos_core[full]"
- "--editable ../mlos_bench[full]"

Просмотреть файл

@ -9,6 +9,7 @@ dependencies:
- pycodestyle
- autopep8
- pydocstyle
- mypy
- pytest
- pytest-cov
- pytest-forked
@ -28,5 +29,9 @@ dependencies:
- bump2version
- licenseheaders
- pytest-local-badge
- types-setuptools
- types-pygments
- types-colorama
- pandas-stubs
- "--editable ../mlos_core[full]"
- "--editable ../mlos_bench[full]"

Просмотреть файл

@ -9,6 +9,7 @@ dependencies:
- pycodestyle
- autopep8
- pydocstyle
- mypy
- pytest
- pytest-cov
- pytest-forked
@ -28,5 +29,9 @@ dependencies:
- bump2version
- licenseheaders
- pytest-local-badge
- types-setuptools
- types-pygments
- types-colorama
- pandas-stubs
- "--editable ../mlos_core[full]"
- "--editable ../mlos_bench[full]"

Просмотреть файл

@ -9,6 +9,7 @@ dependencies:
- pycodestyle
- autopep8
- pydocstyle
- mypy
- pytest
- pytest-cov
- pytest-forked
@ -28,5 +29,9 @@ dependencies:
- bump2version
- licenseheaders
- pytest-local-badge
- types-setuptools
- types-pygments
- types-colorama
- pandas-stubs
- "--editable ../mlos_core[full]"
- "--editable ../mlos_bench[full]"

Просмотреть файл

@ -9,6 +9,7 @@ dependencies:
- pycodestyle
- autopep8
- pydocstyle
- mypy
- pytest
- pytest-cov
- pytest-forked
@ -31,5 +32,9 @@ dependencies:
- bump2version
- licenseheaders
- pytest-local-badge
- types-setuptools
- types-pygments
- types-colorama
- pandas-stubs
- "--editable ../mlos_core[full]"
- "--editable ../mlos_bench[full]"

Просмотреть файл

@ -9,6 +9,7 @@ dependencies:
- pycodestyle
- autopep8
- pydocstyle
- mypy
- pytest
- pytest-cov
- pytest-forked
@ -31,5 +32,9 @@ dependencies:
- bump2version
- licenseheaders
- pytest-local-badge
- types-setuptools
- types-pygments
- types-colorama
- pandas-stubs
- "--editable ../mlos_core[full]"
- "--editable ../mlos_bench[full]"

Просмотреть файл

@ -39,7 +39,15 @@ class OptimizerType(Enum):
"""An instance of SkoptOptimizer class will be used"""
ConcreteOptimizer = TypeVar('ConcreteOptimizer', *[member.value for member in OptimizerType])
# To make mypy happy, we need to define a type variable for each optimizer type.
# https://github.com/python/mypy/issues/12952
# ConcreteOptimizer = TypeVar('ConcreteOptimizer', *[member.value for member in OptimizerType])
# To address this, we add a test for complete coverage of the enum.
ConcreteOptimizer = TypeVar('ConcreteOptimizer',
RandomOptimizer,
EmukitOptimizer,
SkoptOptimizer,
)
class OptimizerFactory:
@ -52,7 +60,7 @@ class OptimizerFactory:
parameter_space: ConfigSpace.ConfigurationSpace,
optimizer_type: OptimizerType = OptimizerType.SKOPT,
optimizer_kwargs: Optional[dict] = None,
space_adapter_type: Optional[SpaceAdapterType] = SpaceAdapterType.IDENTITY,
space_adapter_type: SpaceAdapterType = SpaceAdapterType.IDENTITY,
space_adapter_kwargs: Optional[dict] = None,
) -> ConcreteOptimizer:
"""Creates a new optimizer instance, given the parameter space, optimizer type and potential optimizer options.
@ -83,4 +91,5 @@ class OptimizerFactory:
if optimizer_kwargs is None:
optimizer_kwargs = {}
space_adapter = SpaceAdapterFactory.create(parameter_space, space_adapter_type, space_adapter_kwargs=space_adapter_kwargs)
return optimizer_type.value(parameter_space, space_adapter=space_adapter, **optimizer_kwargs)
optimizer: ConcreteOptimizer = optimizer_type.value(parameter_space, space_adapter=space_adapter, **optimizer_kwargs)
return optimizer

Просмотреть файл

@ -6,11 +6,12 @@
Contains the wrapper classes for different Bayesian optimizers.
"""
from typing import Optional
from typing import Callable, Optional
from abc import ABCMeta, abstractmethod
import ConfigSpace
import numpy as np
import numpy.typing as npt
import pandas as pd
from mlos_core.optimizers.optimizer import BaseOptimizer
@ -18,6 +19,26 @@ from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
from mlos_core.spaces import configspace_to_skopt_space, configspace_to_emukit_space
def _df_to_ndarray(config: pd.DataFrame) -> npt.NDArray:
"""
Converts a single config to an ndarray.
Done this way to let mypy validate the different types across _transform function options.
Parameters
----------
config : pd.DataFrame
Dataframe of configurations / parameters.
The columns are parameter names and the row is the configuration.
Returns
-------
config : np.array
Numpy array of the data.
"""
return config.to_numpy()
class BaseBayesianOptimizer(BaseOptimizer, metaclass=ABCMeta):
"""Abstract base class defining the interface for Bayesian optimization."""
@ -65,7 +86,8 @@ class EmukitOptimizer(BaseBayesianOptimizer):
def __init__(self, parameter_space: ConfigSpace.ConfigurationSpace, space_adapter: Optional[BaseSpaceAdapter] = None):
super().__init__(parameter_space, space_adapter)
self.emukit_parameter_space = configspace_to_emukit_space(self.optimizer_parameter_space)
self.gpbo = None
from emukit.examples.gp_bayesian_optimization.single_objective_bayesian_optimization import GPBayesianOptimization # noqa pylint: disable=import-outside-toplevel
self.gpbo: GPBayesianOptimization
def _register(self, configurations: pd.DataFrame, scores: pd.Series, context: pd.DataFrame = None):
"""Registers the given configurations and scores.
@ -85,13 +107,13 @@ class EmukitOptimizer(BaseBayesianOptimizer):
if context is not None:
# not sure how that works here?
raise NotImplementedError()
if self.gpbo is None:
if getattr(self, 'gpbo', None) is None:
# we're in the random initialization phase
# just remembering the observation above is enough
return
results = []
for (_, config), score in zip(configurations.iterrows(), scores):
one_hot = self._to_1hot(config)
one_hot = self._to_1hot(pd.DataFrame([config]))
results.append(UserFunctionResult(one_hot[0], np.array([score])))
self.gpbo.loop_state.update(results)
self.gpbo._update_models() # pylint: disable=protected-access
@ -115,7 +137,7 @@ class EmukitOptimizer(BaseBayesianOptimizer):
from emukit.core.initial_designs import RandomDesign # pylint: disable=import-outside-toplevel
config = RandomDesign(self.emukit_parameter_space).get_samples(1)
else:
if self.gpbo is None:
if getattr(self, 'gpbo', None) is None:
# this should happen exactly once, when calling the 11th time
self._initialize_optimizer()
# this should happen any time after the initial model is created
@ -183,14 +205,15 @@ class SkoptOptimizer(BaseBayesianOptimizer):
base_estimator=base_estimator,
random_state=seed,
)
self._transform: Callable[[pd.DataFrame], npt.NDArray]
if base_estimator == 'et':
self._transform = self._to_1hot
elif base_estimator == 'gp':
self._transform = self._to_numeric
else:
self._transform = np.array
self._transform = _df_to_ndarray
def _to_numeric(self, config: pd.DataFrame) -> np.array:
def _to_numeric(self, config: pd.DataFrame) -> npt.NDArray:
"""
Convert categorical values in the DataFrame to ordinal integers and return a numpy array.
This transformation is necessary for the Gaussian Process based optimizer.

Просмотреть файл

@ -8,10 +8,11 @@ Contains the BaseOptimizer abstract class.
import collections
from abc import ABCMeta, abstractmethod
from typing import Optional
from typing import List, Optional, Tuple
import ConfigSpace
import numpy as np
import numpy.typing as npt
import pandas as pd
from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
@ -39,8 +40,8 @@ class BaseOptimizer(metaclass=ABCMeta):
raise ValueError("Given parameter space differs from the one given to space adapter")
self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter
self._observations = []
self._pending_observations = []
self._observations: List[Tuple[pd.DataFrame, pd.Series, Optional[pd.DataFrame]]] = []
self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = []
def __repr__(self):
return f"{self.__class__.__name__}(parameter_space={self.parameter_space})"
@ -173,7 +174,7 @@ class BaseOptimizer(metaclass=ABCMeta):
observations = self.get_observations()
return observations.nsmallest(1, columns='score')
def _from_1hot(self, config: np.array) -> pd.DataFrame:
def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame:
"""
Convert numpy array from one-hot encoding to a DataFrame
with categoricals and ints in proper columns.
@ -196,7 +197,7 @@ class BaseOptimizer(metaclass=ABCMeta):
j += 1
return pd.DataFrame(df_dict)
def _to_1hot(self, config: pd.DataFrame) -> np.array:
def _to_1hot(self, config: pd.DataFrame) -> npt.NDArray:
"""
Convert pandas DataFrame to one-hot-encoded numpy array.
"""

Просмотреть файл

@ -37,6 +37,8 @@ class RandomOptimizer(BaseOptimizer):
context : None
Not Yet Implemented.
"""
if context is not None:
raise NotImplementedError()
# should we pop them from self.pending_observations?
def _suggest(self, context: pd.DataFrame = None):
@ -54,6 +56,9 @@ class RandomOptimizer(BaseOptimizer):
configuration : pd.DataFrame
Pandas dataframe with a single row. Column names are the parameter names.
"""
if context is not None:
# not sure how that works here?
raise NotImplementedError()
return pd.DataFrame(self.optimizer_parameter_space.sample_configuration().get_dictionary(), index=[0])
def register_pending(self, configurations: pd.DataFrame, context: pd.DataFrame = None):

Просмотреть файл

Просмотреть файл

@ -11,9 +11,11 @@ from typing import Optional, TypeVar
import ConfigSpace
from mlos_core.spaces.adapters.identity_adapter import IdentityAdapter
from mlos_core.spaces.adapters.llamatune import LlamaTuneAdapter
__all__ = [
'IdentityAdapter',
'LlamaTuneAdapter',
]
@ -21,14 +23,21 @@ __all__ = [
class SpaceAdapterType(Enum):
"""Enumerate supported MlosCore space adapters."""
IDENTITY = None
IDENTITY = IdentityAdapter
"""A no-op adapter will be used"""
LLAMATUNE = LlamaTuneAdapter
"""An instance of LlamaTuneAdapter class will be used"""
ConcreteSpaceAdapter = TypeVar('ConcreteSpaceAdapter', *[member.value for member in SpaceAdapterType])
# To make mypy happy, we need to define a type variable for each optimizer type.
# https://github.com/python/mypy/issues/12952
# ConcreteSpaceAdapter = TypeVar('ConcreteSpaceAdapter', *[member.value for member in SpaceAdapterType])
# To address this, we add a test for complete coverage of the enum.
ConcreteSpaceAdapter = TypeVar('ConcreteSpaceAdapter',
IdentityAdapter,
LlamaTuneAdapter,
)
class SpaceAdapterFactory:
@ -59,8 +68,9 @@ class SpaceAdapterFactory:
-------
Instance of concrete optimizer (e.g., None, LlamaTuneAdapter, etc.)
"""
if space_adapter_type is None or space_adapter_type is SpaceAdapterType.IDENTITY:
return None
if space_adapter_type is None:
space_adapter_type = SpaceAdapterType.IDENTITY
if space_adapter_kwargs is None:
space_adapter_kwargs = {}
return space_adapter_type.value(parameter_space, **space_adapter_kwargs)
space_adapter: ConcreteSpaceAdapter = space_adapter_type.value(parameter_space, **space_adapter_kwargs)
return space_adapter

Просмотреть файл

@ -0,0 +1,32 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
"""
Contains the Identity (no-op) Space Adapter class.
"""
import ConfigSpace
import pandas as pd
from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
class IdentityAdapter(BaseSpaceAdapter):
"""Identity (no-op) SpaceAdapter class.
Parameters
----------
orig_parameter_space : ConfigSpace.ConfigurationSpace
The original parameter space to explore.
"""
@property
def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace:
return self._orig_parameter_space
def transform(self, configuration: pd.DataFrame) -> pd.DataFrame:
return configuration
def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame:
return configurations

Просмотреть файл

@ -5,11 +5,12 @@
"""
Implementation of LlamaTune space adapter.
"""
from typing import Optional
from typing import Dict, Optional
from warnings import warn
import ConfigSpace
import numpy as np
import numpy.typing as npt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
@ -81,8 +82,8 @@ class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-
self._sigma_vector = self._random_state.choice([-1, 1], num_orig_dims)
# Used to retrieve the low-dim point, given the high-dim one
self._suggested_configs = {}
self._pinv_matrix = None
self._suggested_configs: Dict[ConfigSpace.Configuration, ConfigSpace.Configuration] = {}
self._pinv_matrix: npt.NDArray
self._use_approximate_reverse_mapping = use_approximate_reverse_mapping
@property
@ -107,7 +108,7 @@ class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-
"previously by the optimizer can be registered.")
# ...yet, we try to support that by implementing an approximate reverse mapping using pseudo-inverse matrix.
if self._pinv_matrix is None:
if getattr(self, '_pinv_matrix', None) is None:
self._try_generate_approx_inverse_mapping()
# Perform approximate reverse mapping
@ -252,29 +253,28 @@ class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-
# Check if input value corresponds to some special value
perc_sum = 0.
ret: float
for special_value, biasing_perc in special_values_list:
perc_sum += biasing_perc
if input_value < perc_sum:
return param._inverse_transform(special_value) # pylint: disable=protected-access
ret = param._inverse_transform(special_value) # pylint: disable=protected-access
return ret
# Scale input value uniformly to non-special values
return param._inverse_transform( # pylint: disable=protected-access
ret = param._inverse_transform( # pylint: disable=protected-access
param._transform_scalar((input_value - perc_sum) / (1 - perc_sum))) # pylint: disable=protected-access
return ret
# pylint: disable=too-complex,too-many-branches
def _validate_special_param_values(self, special_param_values_dict: dict) -> dict:
def _validate_special_param_values(self, special_param_values_dict: dict):
"""Checks that the user-provided dict of special parameter values is valid.
And assigns it to the corresponding attribute.
Parameters
----------
special_param_values_dict: dict
User-provided dict of special parameter values.
Returns
-------
sanitized_dict: dict
Sanitized dictionary; keys are parameter names, values are lists of (special value, biasing %) tuple(s).
Raises
------
ValueError: if dictionary key, valid, or structure is invalid.
@ -299,7 +299,7 @@ class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-
tuple_list = [(value, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE)]
elif isinstance(value, tuple) and [type(v) for v in value] == [int, float]:
# User specifies both special value and biasing percentage
tuple_list = [value]
tuple_list = [value] # type: ignore
elif isinstance(value, list) and value:
if all(isinstance(t, int) for t in value):
# User specifies list of special values

Просмотреть файл

@ -0,0 +1,9 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
"""
Optimizer tests.
Note: this file is required so that mypy doesn't complain about overlapping conftest.py modules.
"""

Просмотреть файл

@ -13,12 +13,12 @@ import pytest
import pandas as pd
import ConfigSpace as CS
from mlos_core.optimizers import BaseOptimizer, EmukitOptimizer, SkoptOptimizer
from mlos_core.optimizers import BaseOptimizer, OptimizerType
from mlos_core.optimizers.bayesian_optimizers import BaseBayesianOptimizer
@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [
(EmukitOptimizer, {}),
(SkoptOptimizer, {'base_estimator': 'gp'}),
*[(member.value, {}) for member in OptimizerType],
])
def test_context_not_implemented_error(configuration_space: CS.ConfigurationSpace,
optimizer_class: Type[BaseOptimizer], kwargs):
@ -27,17 +27,18 @@ def test_context_not_implemented_error(configuration_space: CS.ConfigurationSpac
"""
optimizer = optimizer_class(configuration_space, **kwargs)
suggestion = optimizer.suggest()
score = pd.DataFrame({'score': [1]})
scores = pd.DataFrame({'score': [1]})
# test context not implemented errors
with pytest.raises(NotImplementedError):
optimizer.register(suggestion, score, context="something")
optimizer.register(suggestion, scores['score'], context=pd.DataFrame([["something"]]))
with pytest.raises(NotImplementedError):
optimizer.suggest(context="something")
optimizer.suggest(context=pd.DataFrame([["something"]]))
with pytest.raises(NotImplementedError):
optimizer.surrogate_predict(suggestion, context="something")
if isinstance(optimizer, BaseBayesianOptimizer):
with pytest.raises(NotImplementedError):
optimizer.surrogate_predict(suggestion, context=pd.DataFrame([["something"]]))
# acquisition function not implemented
with pytest.raises(NotImplementedError):
optimizer.acquisition_function(suggestion)
# acquisition function not implemented
with pytest.raises(NotImplementedError):
optimizer.acquisition_function(suggestion)

Просмотреть файл

@ -10,6 +10,7 @@ import pytest
import pandas as pd
import numpy as np
import numpy.typing as npt
import ConfigSpace as CS
from mlos_core.optimizers import EmukitOptimizer
@ -31,7 +32,7 @@ def data_frame() -> pd.DataFrame:
@pytest.fixture
def one_hot() -> np.array:
def one_hot() -> npt.NDArray:
"""
One-hot encoding of the `data_frame` above.
The columns follow the order of the hyperparameters in `configuration_space`.
@ -44,7 +45,7 @@ def one_hot() -> np.array:
def test_to_1hot(configuration_space: CS.ConfigurationSpace,
data_frame: pd.DataFrame, one_hot: np.array):
data_frame: pd.DataFrame, one_hot: npt.NDArray):
"""
Toy problem to test one-hot encoding.
"""
@ -53,7 +54,7 @@ def test_to_1hot(configuration_space: CS.ConfigurationSpace,
def test_from_1hot(configuration_space: CS.ConfigurationSpace,
data_frame: pd.DataFrame, one_hot: np.array):
data_frame: pd.DataFrame, one_hot: npt.NDArray):
"""
Toy problem to test one-hot decoding.
"""
@ -72,7 +73,7 @@ def test_round_trip(configuration_space: CS.ConfigurationSpace, data_frame: pd.D
assert (df_round_trip.z == data_frame.z).all()
def test_round_trip_reverse(configuration_space: CS.ConfigurationSpace, one_hot: np.array):
def test_round_trip_reverse(configuration_space: CS.ConfigurationSpace, one_hot: npt.NDArray):
"""
Round-trip test for one-hot-decoding and then encoding of a numpy array.
"""

Просмотреть файл

@ -6,7 +6,7 @@
Tests for Bayesian Optimizers.
"""
from typing import Type
from typing import Optional, Type
import pytest
@ -15,17 +15,16 @@ import numpy as np
import ConfigSpace as CS
from mlos_core.optimizers import (
OptimizerType, OptimizerFactory, BaseOptimizer,
EmukitOptimizer, SkoptOptimizer, RandomOptimizer)
OptimizerType, ConcreteOptimizer, OptimizerFactory, BaseOptimizer,
SkoptOptimizer)
from mlos_core.optimizers.bayesian_optimizers import BaseBayesianOptimizer
from mlos_core.spaces.adapters import SpaceAdapterType
@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [
(EmukitOptimizer, {}),
*[(member.value, {}) for member in OptimizerType],
(SkoptOptimizer, {'base_estimator': 'gp'}),
(RandomOptimizer, {})
])
def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace,
optimizer_class: Type[BaseOptimizer], kwargs):
@ -49,10 +48,9 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace
@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [
(EmukitOptimizer, {}),
*[(member.value, {}) for member in OptimizerType],
(SkoptOptimizer, {'base_estimator': 'gp', 'seed': 42}),
(SkoptOptimizer, {'base_estimator': 'et', 'seed': 42}),
(RandomOptimizer, {})
])
def test_basic_interface_toy_problem(configuration_space: CS.ConfigurationSpace,
optimizer_class: Type[BaseOptimizer], kwargs):
@ -102,6 +100,18 @@ def test_basic_interface_toy_problem(configuration_space: CS.ConfigurationSpace,
assert pred_all.shape == (20,)
@pytest.mark.parametrize(('optimizer_type'), [
# Enumerate all supported Optimizers
# *[member for member in OptimizerType],
*list(OptimizerType),
])
def test_concrete_optimizer_type(optimizer_type: OptimizerType):
"""
Test that all optimizer types are listed in the ConcreteOptimizer constraints.
"""
assert optimizer_type.value in ConcreteOptimizer.__constraints__ # type: ignore # pylint: disable=no-member
@pytest.mark.parametrize(('optimizer_type', 'kwargs'), [
# Default optimizer
(None, {}),
@ -111,7 +121,7 @@ def test_basic_interface_toy_problem(configuration_space: CS.ConfigurationSpace,
(OptimizerType.SKOPT, {'base_estimator': 'gp'}),
])
def test_create_optimizer_with_factory_method(configuration_space: CS.ConfigurationSpace,
optimizer_type: OptimizerType, kwargs):
optimizer_type: Optional[OptimizerType], kwargs):
"""
Test that we can create an optimizer via a factory.
"""
@ -155,11 +165,11 @@ def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs):
assert optimizer is not None
# Initialize another optimizer that uses LlamaTune space adapter
space_adapter_kwargs = dict(
num_low_dims=1,
special_param_values=None,
max_unique_values_per_param=None,
)
space_adapter_kwargs = {
"num_low_dims": 1,
"special_param_values": None,
"max_unique_values_per_param": None,
}
llamatune_optimizer = OptimizerFactory.create(
input_space,
optimizer_type,
@ -201,6 +211,6 @@ def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs):
assert (all_obvs.columns == ['x', 'y', 'score']).all()
# .surrogate_predict method not currently implemented if space adapter is employed
if isinstance(optimizer, BaseBayesianOptimizer):
if isinstance(llamatune_optimizer, BaseBayesianOptimizer):
with pytest.raises(NotImplementedError):
_ = llamatune_optimizer.surrogate_predict(llamatune_best_observation[['x']])

Просмотреть файл

@ -0,0 +1,41 @@
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
"""
Tests for Identity space adapter.
"""
# pylint: disable=missing-function-docstring
import ConfigSpace as CS
import pandas as pd
from mlos_core.spaces.adapters import IdentityAdapter
def test_identity_adapter():
"""
Tests identity adapter
"""
input_space = CS.ConfigurationSpace(seed=1234)
input_space.add_hyperparameter(
CS.UniformIntegerHyperparameter(name='int_1', lower=0, upper=100))
input_space.add_hyperparameter(
CS.UniformFloatHyperparameter(name='float_1', lower=0, upper=100))
input_space.add_hyperparameter(
CS.CategoricalHyperparameter(name='str_1', choices=['on', 'off']))
adapter = IdentityAdapter(input_space)
num_configs = 10
for sampled_config in input_space.sample_configuration(size=num_configs):
sampled_config_df = pd.DataFrame([sampled_config.values()], columns=sampled_config.keys())
target_config_df = adapter.inverse_transform(sampled_config_df)
assert target_config_df.equals(sampled_config_df)
target_config = CS.Configuration(adapter.target_parameter_space, values=target_config_df.iloc[0].to_dict())
assert target_config == sampled_config
orig_config_df = adapter.transform(target_config_df)
assert orig_config_df.equals(sampled_config_df)
orig_config = CS.Configuration(adapter.orig_parameter_space, values=orig_config_df.iloc[0].to_dict())
assert orig_config == sampled_config

Просмотреть файл

@ -8,6 +8,8 @@ Tests for LlamaTune space adapter.
# pylint: disable=missing-function-docstring
from typing import Dict, Set
import pytest
import ConfigSpace as CS
@ -372,7 +374,7 @@ def test_llamatune_pipeline(num_low_dims: int, special_param_values: dict, max_u
param: {special_value: 0 for special_value, _ in tuples_list}
for param, tuples_list in adapter._special_param_values_dict.items() # pylint: disable=protected-access
}
unique_values_dict = {param: set() for param in input_space.get_hyperparameter_names()}
unique_values_dict: Dict[str, Set] = {param: set() for param in input_space.get_hyperparameter_names()}
num_configs = 1000
for config in adapter.target_parameter_space.sample_configuration(size=num_configs):

Просмотреть файл

@ -8,12 +8,27 @@ Tests for space adapter factory.
# pylint: disable=missing-function-docstring
from typing import Optional
import pytest
import ConfigSpace as CS
from mlos_core.spaces.adapters import SpaceAdapterFactory, SpaceAdapterType
from mlos_core.spaces.adapters import SpaceAdapterFactory, SpaceAdapterType, ConcreteSpaceAdapter
from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
from mlos_core.spaces.adapters.identity_adapter import IdentityAdapter
@pytest.mark.parametrize(('space_adapter_type'), [
# Enumerate all supported SpaceAdapters
# *[member for member in SpaceAdapterType],
*list(SpaceAdapterType),
])
def test_concrete_optimizer_type(space_adapter_type: SpaceAdapterType):
"""
Test that all optimizer types are listed in the ConcreteOptimizer constraints.
"""
assert space_adapter_type.value in ConcreteSpaceAdapter.__constraints__ # type: ignore # pylint: disable=no-member
@pytest.mark.parametrize(('space_adapter_type', 'kwargs'), [
@ -22,7 +37,7 @@ from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
# Enumerate all supported Optimizers
*[(member, {}) for member in SpaceAdapterType],
])
def test_create_space_adapter_with_factory_method(space_adapter_type: SpaceAdapterType, kwargs):
def test_create_space_adapter_with_factory_method(space_adapter_type: Optional[SpaceAdapterType], kwargs):
# Start defining a ConfigurationSpace for the Optimizer to search.
input_space = CS.ConfigurationSpace(seed=1234)
@ -37,14 +52,14 @@ def test_create_space_adapter_with_factory_method(space_adapter_type: SpaceAdapt
kwargs = {}
kwargs.setdefault('num_low_dims', 1)
space_adapter: BaseSpaceAdapter = None
space_adapter: BaseSpaceAdapter
if space_adapter_type is None:
space_adapter = SpaceAdapterFactory.create(input_space)
else:
space_adapter = SpaceAdapterFactory.create(input_space, space_adapter_type, space_adapter_kwargs=kwargs)
if space_adapter_type is None or space_adapter_type is SpaceAdapterType.IDENTITY:
assert space_adapter is None
assert isinstance(space_adapter, IdentityAdapter)
else:
assert space_adapter is not None
assert space_adapter.orig_parameter_space is not None

Просмотреть файл

@ -6,8 +6,8 @@
Setup instructions for the mlos_core package.
"""
from logging import warning
from itertools import chain
from logging import warning
from setuptools import setup, find_packages
@ -31,13 +31,16 @@ extra_requires = {
# construct special 'full' extra that adds requirements for all built-in
# backend integrations and additional extra features.
extra_requires['full'] = list(set(chain(extra_requires.values())))
extra_requires['full'] = list(set(chain(extra_requires.values()))) # type: ignore
# pylint: disable=duplicate-code
setup(
name='mlos-core',
version=_VERSION,
packages=find_packages(),
package_data={
'mlos_core': ['py.typed'],
},
install_requires=[
'scikit-learn<1.2', # FIXME: temporarily work around some version mismatch issues (PR 850)
'joblib>=1.1.1', # CVE-2022-21797: scikit-learn dependency, addressed in 1.2.0dev0, which isn't currently released

43
scripts/dmypy-wrapper.sh Executable file
Просмотреть файл

@ -0,0 +1,43 @@
#!/bin/bash
##
## Copyright (c) Microsoft Corporation.
## Licensed under the MIT License.
##
set -eu
#set -x
# Start in the root dir.
scriptdir=$(dirname "$(readlink -f "$0")")
cd "$scriptdir/.."
DMYPY_STATUS_FILE='.dmypy.json'
DMYPY_STATUS_ARGS="--status-file $DMYPY_STATUS_FILE"
DMYPY_START_ARGS=''
while [ -z "${1:-}" ]; do
opt="$1"
case $opt in
--*)
DMYPY_START_ARGS+=" $opt"
shift
;;
*)
break
;;
esac
done
if [ -z "$DMYPY_START_ARGS" ]; then
DMYPY_START_ARGS='--pretty --cache-fine-grained --install-types --non-interactive'
fi
dmypy $DMYPY_STATUS_ARGS status >/dev/null || dmypy $DMYPY_STATUS_ARGS start -- $DMYPY_START_ARGS
# Restart the daemon if the config file has changed.
if [ setup.cfg -nt /proc/$(cat $DMYPY_STATUS_FILE | jq -e -r .pid) ]; then
dmypy $DMYPY_STATUS_ARGS restart -- $DMYPY_START_ARGS
fi
# Check the files passed as arguments.
dmypy $DMYPY_STATUS_ARGS check $*

Просмотреть файл

@ -39,3 +39,44 @@ addopts =
# Moved these to Makefile
#--cov=mlos_core --cov-report=xml
testpaths = mlos_core mlos_bench
#
# mypy static type checker configs
#
[mypy]
#ignore_missing_imports = True
warn_unused_configs = True
warn_unused_ignores = True
warn_unreachable = True
warn_return_any = True
# regex of files to skip type checking
exclude = /_pytest/|/build/|doc/|_version.py|setup.py
# https://github.com/automl/ConfigSpace/issues/293
[mypy-ConfigSpace.*]
ignore_missing_imports = True
# https://github.com/EmuKit/emukit/issues/440
[mypy-emukit.*]
ignore_missing_imports = True
# https://github.com/pytest-dev/pytest/issues/10435
[mypy-pytest]
ignore_missing_imports = True
# https://github.com/scikit-learn/scikit-learn/issues/16705
[mypy-sklearn.*]
ignore_missing_imports = True
# https://github.com/scipy/scipy/issues/17158
[mypy-scipy.*]
ignore_missing_imports = True
# https://github.com/pypa/setuptools_scm/issues/501
[mypy-setuptools_scm.*]
ignore_missing_imports = True
# https://github.com/scikit-optimize/scikit-optimize/issues/1111
[mypy-skopt.*]
ignore_missing_imports = True