From cac5351b98694ec14ae41cd8bf0472b5dac243fb Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 12 Apr 2023 11:11:58 -0500 Subject: [PATCH] mypy type checking support for mlos_core (mlos_bench to follow later) (#301) * mypy type checking support for mlos_core (mlos_bench to follow later) --- .cspell.json | 2 + .devcontainer/devcontainer.json | 7 ++- .pylintrc | 3 +- .vscode/extensions.json | 1 + .vscode/settings.json | 5 ++- Makefile | 15 ++++++- conda-envs/mlos_core-3.10.yml | 5 +++ conda-envs/mlos_core-3.11.yml | 5 +++ conda-envs/mlos_core-3.8.yml | 5 +++ conda-envs/mlos_core-3.9.yml | 5 +++ conda-envs/mlos_core-windows.yml | 5 +++ conda-envs/mlos_core.yml | 5 +++ mlos_core/mlos_core/optimizers/__init__.py | 15 +++++-- .../optimizers/bayesian_optimizers.py | 37 +++++++++++++--- mlos_core/mlos_core/optimizers/optimizer.py | 11 ++--- .../mlos_core/optimizers/random_optimizer.py | 5 +++ mlos_core/mlos_core/py.typed | 0 .../mlos_core/spaces/adapters/__init__.py | 20 ++++++--- .../spaces/adapters/identity_adapter.py | 32 ++++++++++++++ .../mlos_core/spaces/adapters/llamatune.py | 26 +++++------ .../mlos_core/tests/optimizers/__init__.py | 9 ++++ .../optimizers/bayesian_optimizers_test.py | 23 +++++----- .../tests/optimizers/one_hot_test.py | 9 ++-- .../tests/optimizers/optimizer_test.py | 38 ++++++++++------ .../spaces/adapters/identity_adapter_test.py | 41 ++++++++++++++++++ .../tests/spaces/adapters/llamatune_test.py | 4 +- .../adapters/space_adapter_factory_test.py | 23 ++++++++-- mlos_core/setup.py | 7 ++- scripts/dmypy-wrapper.sh | 43 +++++++++++++++++++ setup.cfg | 41 ++++++++++++++++++ 30 files changed, 374 insertions(+), 73 deletions(-) create mode 100644 mlos_core/mlos_core/py.typed create mode 100644 mlos_core/mlos_core/spaces/adapters/identity_adapter.py create mode 100644 mlos_core/mlos_core/tests/optimizers/__init__.py create mode 100644 mlos_core/mlos_core/tests/spaces/adapters/identity_adapter_test.py create mode 100755 scripts/dmypy-wrapper.sh diff --git a/.cspell.json b/.cspell.json index f97432c168..d65c0c7d9f 100644 --- a/.cspell.json +++ b/.cspell.json @@ -28,8 +28,10 @@ "llamatune", "matplotlib", "mlos", + "ndarray", "nsmallest", "numpy", + "obvs", "perc", "pinv", "pylint", diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 5d0601172b..3e15ad03b4 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -45,8 +45,12 @@ "settings": { // Adjust the python interpreter path to point to the conda environment "python.defaultInterpreterPath": "/opt/conda/envs/mlos_core/bin/python", + "python.testing.pytestPath": "/opt/conda/envs/mlos_core/bin/pytest", "python.linting.pylintPath": "/opt/conda/envs/mlos_core/bin/pylint", - "pylint.path": ["/opt/conda/envs/mlos_core/bin/pylint"] + "pylint.path": ["/opt/conda/envs/mlos_core/bin/pylint"], + "python.linting.mypyPath": "/opt/conda/envs/mlos_core/bin/mypy", + "mypy.dmypyExecutable": "/opt/conda/envs/mlos_core/bin/dmypy", + "mypy.runUsingActiveInterpreter": false }, // Add the IDs of extensions you want installed when the container is created. "extensions": [ @@ -55,6 +59,7 @@ "ms-python.vscode-pylance", "ms-python.python", "ms-python.pylint", + "matangover.mypy", "donjayamanne.python-environment-manager", "njpwerner.autodocstring", "ms-toolsai.jupyter", diff --git a/.pylintrc b/.pylintrc index e619ac4ea3..d185960364 100644 --- a/.pylintrc +++ b/.pylintrc @@ -38,8 +38,9 @@ max-line-length=132 disable= no-else-return, consider-using-assignment-expr, - deprecated-typing-alias, # disable for now - only deprecated recently + deprecated-typing-alias, # disable for now - only deprecated recently docstring-first-line-empty, + consider-alternative-union-syntax, # disable for now - still supporting python 3.8 missing-raises-doc [STRING] diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 674bb18832..76055ecadd 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -5,6 +5,7 @@ "ms-python.vscode-pylance", "ms-python.python", "ms-python.pylint", + "matangover.mypy", "donjayamanne.python-environment-manager", "njpwerner.autodocstring", "lextudio.restructuredtext", diff --git a/.vscode/settings.json b/.vscode/settings.json index 3f2945de3f..c88e5ac0b4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,11 +1,14 @@ +// vim: set ft=jsonc: { "makefile.extensionOutputFolder": "./.vscode", "python.defaultInterpreterPath": "${env:HOME}${env:USERPROFILE}/.conda/envs/mlos_core/bin/python", + "python.testing.pytestEnabled": true, "python.linting.enabled": true, "python.linting.pylintEnabled": true, "python.linting.pycodestyleEnabled": true, "python.linting.pydocstyleEnabled": true, - "python.testing.pytestEnabled": true, + "python.linting.mypyEnabled": false, // use the dmypy daemon extension instead - it's faster + // "mypy.runUsingActiveInterpreter": true, // in the devcontainer we override this to use an explicit path "autoDocstring.docstringFormat": "numpy", "restructuredtext.linter.doc8.extraArgs": [ "--ignore D001" diff --git a/Makefile b/Makefile index 89b71090a7..f26e23c1d8 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,7 @@ clean-conda-env: rm -f build/conda-env.${CONDA_ENV_NAME}.build-stamp .PHONY: check -check: pycodestyle pydocstyle pylint # cspell licenseheaders markdown-link-check +check: pycodestyle pydocstyle pylint mypy # cspell licenseheaders markdown-link-check .PHONY: pycodestyle pycodestyle: conda-env build/pycodestyle.mlos_core.${CONDA_ENV_NAME}.build-stamp build/pycodestyle.mlos_bench.${CONDA_ENV_NAME}.build-stamp @@ -112,6 +112,17 @@ build/pylint.%.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}. conda run -n ${CONDA_ENV_NAME} pylint -j0 $(filter-out .pylintrc,$+) touch $@ +.PHONY: mypy +mypy: conda-env build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp # TODO: build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp + +build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) + +build/mypy.%.${CONDA_ENV_NAME}.build-stamp: scripts/dmypy-wrapper.sh build/conda-env.${CONDA_ENV_NAME}.build-stamp setup.cfg + conda run -n ${CONDA_ENV_NAME} scripts/dmypy-wrapper.sh \ + $(filter-out scripts/dmypy-wrapper.sh build/conda-env.${CONDA_ENV_NAME}.build-stamp setup.cfg,$+) + touch $@ + .PHONY: test test: pytest @@ -365,6 +376,8 @@ clean-check: rm -f build/pylint.${CONDA_ENV_NAME}.build-stamp rm -f build/pylint.mlos_core.${CONDA_ENV_NAME}.build-stamp rm -f build/pylint.mlos_bench.${CONDA_ENV_NAME}.build-stamp + rm -f build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp + rm -f build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp rm -f build/pycodestyle.build-stamp rm -f build/pycodestyle.${CONDA_ENV_NAME}.build-stamp rm -f build/pycodestyle.mlos_core.${CONDA_ENV_NAME}.build-stamp diff --git a/conda-envs/mlos_core-3.10.yml b/conda-envs/mlos_core-3.10.yml index 927e0720a3..aade5798d1 100644 --- a/conda-envs/mlos_core-3.10.yml +++ b/conda-envs/mlos_core-3.10.yml @@ -9,6 +9,7 @@ dependencies: - pycodestyle - autopep8 - pydocstyle + - mypy - pytest - pytest-cov - pytest-forked @@ -28,5 +29,9 @@ dependencies: - bump2version - licenseheaders - pytest-local-badge + - types-setuptools + - types-pygments + - types-colorama + - pandas-stubs - "--editable ../mlos_core[full]" - "--editable ../mlos_bench[full]" diff --git a/conda-envs/mlos_core-3.11.yml b/conda-envs/mlos_core-3.11.yml index ab4659ff6a..9162fb21b6 100644 --- a/conda-envs/mlos_core-3.11.yml +++ b/conda-envs/mlos_core-3.11.yml @@ -9,6 +9,7 @@ dependencies: - pycodestyle - autopep8 - pydocstyle + - mypy - pytest - pytest-cov - pytest-forked @@ -28,5 +29,9 @@ dependencies: - bump2version - licenseheaders - pytest-local-badge + - types-setuptools + - types-pygments + - types-colorama + - pandas-stubs - "--editable ../mlos_core[full]" - "--editable ../mlos_bench[full]" diff --git a/conda-envs/mlos_core-3.8.yml b/conda-envs/mlos_core-3.8.yml index d3fce5d382..176110de82 100644 --- a/conda-envs/mlos_core-3.8.yml +++ b/conda-envs/mlos_core-3.8.yml @@ -9,6 +9,7 @@ dependencies: - pycodestyle - autopep8 - pydocstyle + - mypy - pytest - pytest-cov - pytest-forked @@ -28,5 +29,9 @@ dependencies: - bump2version - licenseheaders - pytest-local-badge + - types-setuptools + - types-pygments + - types-colorama + - pandas-stubs - "--editable ../mlos_core[full]" - "--editable ../mlos_bench[full]" diff --git a/conda-envs/mlos_core-3.9.yml b/conda-envs/mlos_core-3.9.yml index 8b64594f61..66236183eb 100644 --- a/conda-envs/mlos_core-3.9.yml +++ b/conda-envs/mlos_core-3.9.yml @@ -9,6 +9,7 @@ dependencies: - pycodestyle - autopep8 - pydocstyle + - mypy - pytest - pytest-cov - pytest-forked @@ -28,5 +29,9 @@ dependencies: - bump2version - licenseheaders - pytest-local-badge + - types-setuptools + - types-pygments + - types-colorama + - pandas-stubs - "--editable ../mlos_core[full]" - "--editable ../mlos_bench[full]" diff --git a/conda-envs/mlos_core-windows.yml b/conda-envs/mlos_core-windows.yml index 03c4e6ac92..16248e7990 100644 --- a/conda-envs/mlos_core-windows.yml +++ b/conda-envs/mlos_core-windows.yml @@ -9,6 +9,7 @@ dependencies: - pycodestyle - autopep8 - pydocstyle + - mypy - pytest - pytest-cov - pytest-forked @@ -31,5 +32,9 @@ dependencies: - bump2version - licenseheaders - pytest-local-badge + - types-setuptools + - types-pygments + - types-colorama + - pandas-stubs - "--editable ../mlos_core[full]" - "--editable ../mlos_bench[full]" diff --git a/conda-envs/mlos_core.yml b/conda-envs/mlos_core.yml index c0c4b588b3..7417484ed0 100644 --- a/conda-envs/mlos_core.yml +++ b/conda-envs/mlos_core.yml @@ -9,6 +9,7 @@ dependencies: - pycodestyle - autopep8 - pydocstyle + - mypy - pytest - pytest-cov - pytest-forked @@ -31,5 +32,9 @@ dependencies: - bump2version - licenseheaders - pytest-local-badge + - types-setuptools + - types-pygments + - types-colorama + - pandas-stubs - "--editable ../mlos_core[full]" - "--editable ../mlos_bench[full]" diff --git a/mlos_core/mlos_core/optimizers/__init__.py b/mlos_core/mlos_core/optimizers/__init__.py index 5a3dc0dfd7..37580b8314 100644 --- a/mlos_core/mlos_core/optimizers/__init__.py +++ b/mlos_core/mlos_core/optimizers/__init__.py @@ -39,7 +39,15 @@ class OptimizerType(Enum): """An instance of SkoptOptimizer class will be used""" -ConcreteOptimizer = TypeVar('ConcreteOptimizer', *[member.value for member in OptimizerType]) +# To make mypy happy, we need to define a type variable for each optimizer type. +# https://github.com/python/mypy/issues/12952 +# ConcreteOptimizer = TypeVar('ConcreteOptimizer', *[member.value for member in OptimizerType]) +# To address this, we add a test for complete coverage of the enum. +ConcreteOptimizer = TypeVar('ConcreteOptimizer', + RandomOptimizer, + EmukitOptimizer, + SkoptOptimizer, + ) class OptimizerFactory: @@ -52,7 +60,7 @@ class OptimizerFactory: parameter_space: ConfigSpace.ConfigurationSpace, optimizer_type: OptimizerType = OptimizerType.SKOPT, optimizer_kwargs: Optional[dict] = None, - space_adapter_type: Optional[SpaceAdapterType] = SpaceAdapterType.IDENTITY, + space_adapter_type: SpaceAdapterType = SpaceAdapterType.IDENTITY, space_adapter_kwargs: Optional[dict] = None, ) -> ConcreteOptimizer: """Creates a new optimizer instance, given the parameter space, optimizer type and potential optimizer options. @@ -83,4 +91,5 @@ class OptimizerFactory: if optimizer_kwargs is None: optimizer_kwargs = {} space_adapter = SpaceAdapterFactory.create(parameter_space, space_adapter_type, space_adapter_kwargs=space_adapter_kwargs) - return optimizer_type.value(parameter_space, space_adapter=space_adapter, **optimizer_kwargs) + optimizer: ConcreteOptimizer = optimizer_type.value(parameter_space, space_adapter=space_adapter, **optimizer_kwargs) + return optimizer diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers.py index cb1e6320a0..4be59b6283 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers.py @@ -6,11 +6,12 @@ Contains the wrapper classes for different Bayesian optimizers. """ -from typing import Optional +from typing import Callable, Optional from abc import ABCMeta, abstractmethod import ConfigSpace import numpy as np +import numpy.typing as npt import pandas as pd from mlos_core.optimizers.optimizer import BaseOptimizer @@ -18,6 +19,26 @@ from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter from mlos_core.spaces import configspace_to_skopt_space, configspace_to_emukit_space +def _df_to_ndarray(config: pd.DataFrame) -> npt.NDArray: + """ + Converts a single config to an ndarray. + + Done this way to let mypy validate the different types across _transform function options. + + Parameters + ---------- + config : pd.DataFrame + Dataframe of configurations / parameters. + The columns are parameter names and the row is the configuration. + + Returns + ------- + config : np.array + Numpy array of the data. + """ + return config.to_numpy() + + class BaseBayesianOptimizer(BaseOptimizer, metaclass=ABCMeta): """Abstract base class defining the interface for Bayesian optimization.""" @@ -65,7 +86,8 @@ class EmukitOptimizer(BaseBayesianOptimizer): def __init__(self, parameter_space: ConfigSpace.ConfigurationSpace, space_adapter: Optional[BaseSpaceAdapter] = None): super().__init__(parameter_space, space_adapter) self.emukit_parameter_space = configspace_to_emukit_space(self.optimizer_parameter_space) - self.gpbo = None + from emukit.examples.gp_bayesian_optimization.single_objective_bayesian_optimization import GPBayesianOptimization # noqa pylint: disable=import-outside-toplevel + self.gpbo: GPBayesianOptimization def _register(self, configurations: pd.DataFrame, scores: pd.Series, context: pd.DataFrame = None): """Registers the given configurations and scores. @@ -85,13 +107,13 @@ class EmukitOptimizer(BaseBayesianOptimizer): if context is not None: # not sure how that works here? raise NotImplementedError() - if self.gpbo is None: + if getattr(self, 'gpbo', None) is None: # we're in the random initialization phase # just remembering the observation above is enough return results = [] for (_, config), score in zip(configurations.iterrows(), scores): - one_hot = self._to_1hot(config) + one_hot = self._to_1hot(pd.DataFrame([config])) results.append(UserFunctionResult(one_hot[0], np.array([score]))) self.gpbo.loop_state.update(results) self.gpbo._update_models() # pylint: disable=protected-access @@ -115,7 +137,7 @@ class EmukitOptimizer(BaseBayesianOptimizer): from emukit.core.initial_designs import RandomDesign # pylint: disable=import-outside-toplevel config = RandomDesign(self.emukit_parameter_space).get_samples(1) else: - if self.gpbo is None: + if getattr(self, 'gpbo', None) is None: # this should happen exactly once, when calling the 11th time self._initialize_optimizer() # this should happen any time after the initial model is created @@ -183,14 +205,15 @@ class SkoptOptimizer(BaseBayesianOptimizer): base_estimator=base_estimator, random_state=seed, ) + self._transform: Callable[[pd.DataFrame], npt.NDArray] if base_estimator == 'et': self._transform = self._to_1hot elif base_estimator == 'gp': self._transform = self._to_numeric else: - self._transform = np.array + self._transform = _df_to_ndarray - def _to_numeric(self, config: pd.DataFrame) -> np.array: + def _to_numeric(self, config: pd.DataFrame) -> npt.NDArray: """ Convert categorical values in the DataFrame to ordinal integers and return a numpy array. This transformation is necessary for the Gaussian Process based optimizer. diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index ce486e6413..558099656f 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -8,10 +8,11 @@ Contains the BaseOptimizer abstract class. import collections from abc import ABCMeta, abstractmethod -from typing import Optional +from typing import List, Optional, Tuple import ConfigSpace import numpy as np +import numpy.typing as npt import pandas as pd from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter @@ -39,8 +40,8 @@ class BaseOptimizer(metaclass=ABCMeta): raise ValueError("Given parameter space differs from the one given to space adapter") self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter - self._observations = [] - self._pending_observations = [] + self._observations: List[Tuple[pd.DataFrame, pd.Series, Optional[pd.DataFrame]]] = [] + self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = [] def __repr__(self): return f"{self.__class__.__name__}(parameter_space={self.parameter_space})" @@ -173,7 +174,7 @@ class BaseOptimizer(metaclass=ABCMeta): observations = self.get_observations() return observations.nsmallest(1, columns='score') - def _from_1hot(self, config: np.array) -> pd.DataFrame: + def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: """ Convert numpy array from one-hot encoding to a DataFrame with categoricals and ints in proper columns. @@ -196,7 +197,7 @@ class BaseOptimizer(metaclass=ABCMeta): j += 1 return pd.DataFrame(df_dict) - def _to_1hot(self, config: pd.DataFrame) -> np.array: + def _to_1hot(self, config: pd.DataFrame) -> npt.NDArray: """ Convert pandas DataFrame to one-hot-encoded numpy array. """ diff --git a/mlos_core/mlos_core/optimizers/random_optimizer.py b/mlos_core/mlos_core/optimizers/random_optimizer.py index 1628d0ea64..6fd22ac409 100644 --- a/mlos_core/mlos_core/optimizers/random_optimizer.py +++ b/mlos_core/mlos_core/optimizers/random_optimizer.py @@ -37,6 +37,8 @@ class RandomOptimizer(BaseOptimizer): context : None Not Yet Implemented. """ + if context is not None: + raise NotImplementedError() # should we pop them from self.pending_observations? def _suggest(self, context: pd.DataFrame = None): @@ -54,6 +56,9 @@ class RandomOptimizer(BaseOptimizer): configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. """ + if context is not None: + # not sure how that works here? + raise NotImplementedError() return pd.DataFrame(self.optimizer_parameter_space.sample_configuration().get_dictionary(), index=[0]) def register_pending(self, configurations: pd.DataFrame, context: pd.DataFrame = None): diff --git a/mlos_core/mlos_core/py.typed b/mlos_core/mlos_core/py.typed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mlos_core/mlos_core/spaces/adapters/__init__.py b/mlos_core/mlos_core/spaces/adapters/__init__.py index d0ca8a9988..b0b8e31b47 100644 --- a/mlos_core/mlos_core/spaces/adapters/__init__.py +++ b/mlos_core/mlos_core/spaces/adapters/__init__.py @@ -11,9 +11,11 @@ from typing import Optional, TypeVar import ConfigSpace +from mlos_core.spaces.adapters.identity_adapter import IdentityAdapter from mlos_core.spaces.adapters.llamatune import LlamaTuneAdapter __all__ = [ + 'IdentityAdapter', 'LlamaTuneAdapter', ] @@ -21,14 +23,21 @@ __all__ = [ class SpaceAdapterType(Enum): """Enumerate supported MlosCore space adapters.""" - IDENTITY = None + IDENTITY = IdentityAdapter """A no-op adapter will be used""" LLAMATUNE = LlamaTuneAdapter """An instance of LlamaTuneAdapter class will be used""" -ConcreteSpaceAdapter = TypeVar('ConcreteSpaceAdapter', *[member.value for member in SpaceAdapterType]) +# To make mypy happy, we need to define a type variable for each optimizer type. +# https://github.com/python/mypy/issues/12952 +# ConcreteSpaceAdapter = TypeVar('ConcreteSpaceAdapter', *[member.value for member in SpaceAdapterType]) +# To address this, we add a test for complete coverage of the enum. +ConcreteSpaceAdapter = TypeVar('ConcreteSpaceAdapter', + IdentityAdapter, + LlamaTuneAdapter, + ) class SpaceAdapterFactory: @@ -59,8 +68,9 @@ class SpaceAdapterFactory: ------- Instance of concrete optimizer (e.g., None, LlamaTuneAdapter, etc.) """ - if space_adapter_type is None or space_adapter_type is SpaceAdapterType.IDENTITY: - return None + if space_adapter_type is None: + space_adapter_type = SpaceAdapterType.IDENTITY if space_adapter_kwargs is None: space_adapter_kwargs = {} - return space_adapter_type.value(parameter_space, **space_adapter_kwargs) + space_adapter: ConcreteSpaceAdapter = space_adapter_type.value(parameter_space, **space_adapter_kwargs) + return space_adapter diff --git a/mlos_core/mlos_core/spaces/adapters/identity_adapter.py b/mlos_core/mlos_core/spaces/adapters/identity_adapter.py new file mode 100644 index 0000000000..ad79fa21c9 --- /dev/null +++ b/mlos_core/mlos_core/spaces/adapters/identity_adapter.py @@ -0,0 +1,32 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Contains the Identity (no-op) Space Adapter class. +""" + +import ConfigSpace +import pandas as pd + +from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter + + +class IdentityAdapter(BaseSpaceAdapter): + """Identity (no-op) SpaceAdapter class. + + Parameters + ---------- + orig_parameter_space : ConfigSpace.ConfigurationSpace + The original parameter space to explore. + """ + + @property + def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace: + return self._orig_parameter_space + + def transform(self, configuration: pd.DataFrame) -> pd.DataFrame: + return configuration + + def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame: + return configurations diff --git a/mlos_core/mlos_core/spaces/adapters/llamatune.py b/mlos_core/mlos_core/spaces/adapters/llamatune.py index 2734e9352a..d6fcb6fdb1 100644 --- a/mlos_core/mlos_core/spaces/adapters/llamatune.py +++ b/mlos_core/mlos_core/spaces/adapters/llamatune.py @@ -5,11 +5,12 @@ """ Implementation of LlamaTune space adapter. """ -from typing import Optional +from typing import Dict, Optional from warnings import warn import ConfigSpace import numpy as np +import numpy.typing as npt import pandas as pd from sklearn.preprocessing import MinMaxScaler from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter @@ -81,8 +82,8 @@ class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance- self._sigma_vector = self._random_state.choice([-1, 1], num_orig_dims) # Used to retrieve the low-dim point, given the high-dim one - self._suggested_configs = {} - self._pinv_matrix = None + self._suggested_configs: Dict[ConfigSpace.Configuration, ConfigSpace.Configuration] = {} + self._pinv_matrix: npt.NDArray self._use_approximate_reverse_mapping = use_approximate_reverse_mapping @property @@ -107,7 +108,7 @@ class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance- "previously by the optimizer can be registered.") # ...yet, we try to support that by implementing an approximate reverse mapping using pseudo-inverse matrix. - if self._pinv_matrix is None: + if getattr(self, '_pinv_matrix', None) is None: self._try_generate_approx_inverse_mapping() # Perform approximate reverse mapping @@ -252,29 +253,28 @@ class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance- # Check if input value corresponds to some special value perc_sum = 0. + ret: float for special_value, biasing_perc in special_values_list: perc_sum += biasing_perc if input_value < perc_sum: - return param._inverse_transform(special_value) # pylint: disable=protected-access + ret = param._inverse_transform(special_value) # pylint: disable=protected-access + return ret # Scale input value uniformly to non-special values - return param._inverse_transform( # pylint: disable=protected-access + ret = param._inverse_transform( # pylint: disable=protected-access param._transform_scalar((input_value - perc_sum) / (1 - perc_sum))) # pylint: disable=protected-access + return ret # pylint: disable=too-complex,too-many-branches - def _validate_special_param_values(self, special_param_values_dict: dict) -> dict: + def _validate_special_param_values(self, special_param_values_dict: dict): """Checks that the user-provided dict of special parameter values is valid. + And assigns it to the corresponding attribute. Parameters ---------- special_param_values_dict: dict User-provided dict of special parameter values. - Returns - ------- - sanitized_dict: dict - Sanitized dictionary; keys are parameter names, values are lists of (special value, biasing %) tuple(s). - Raises ------ ValueError: if dictionary key, valid, or structure is invalid. @@ -299,7 +299,7 @@ class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance- tuple_list = [(value, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE)] elif isinstance(value, tuple) and [type(v) for v in value] == [int, float]: # User specifies both special value and biasing percentage - tuple_list = [value] + tuple_list = [value] # type: ignore elif isinstance(value, list) and value: if all(isinstance(t, int) for t in value): # User specifies list of special values diff --git a/mlos_core/mlos_core/tests/optimizers/__init__.py b/mlos_core/mlos_core/tests/optimizers/__init__.py new file mode 100644 index 0000000000..83b1b7d391 --- /dev/null +++ b/mlos_core/mlos_core/tests/optimizers/__init__.py @@ -0,0 +1,9 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Optimizer tests. + +Note: this file is required so that mypy doesn't complain about overlapping conftest.py modules. +""" diff --git a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py index 09deac8fb3..085952f4cd 100644 --- a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py +++ b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py @@ -13,12 +13,12 @@ import pytest import pandas as pd import ConfigSpace as CS -from mlos_core.optimizers import BaseOptimizer, EmukitOptimizer, SkoptOptimizer +from mlos_core.optimizers import BaseOptimizer, OptimizerType +from mlos_core.optimizers.bayesian_optimizers import BaseBayesianOptimizer @pytest.mark.parametrize(('optimizer_class', 'kwargs'), [ - (EmukitOptimizer, {}), - (SkoptOptimizer, {'base_estimator': 'gp'}), + *[(member.value, {}) for member in OptimizerType], ]) def test_context_not_implemented_error(configuration_space: CS.ConfigurationSpace, optimizer_class: Type[BaseOptimizer], kwargs): @@ -27,17 +27,18 @@ def test_context_not_implemented_error(configuration_space: CS.ConfigurationSpac """ optimizer = optimizer_class(configuration_space, **kwargs) suggestion = optimizer.suggest() - score = pd.DataFrame({'score': [1]}) + scores = pd.DataFrame({'score': [1]}) # test context not implemented errors with pytest.raises(NotImplementedError): - optimizer.register(suggestion, score, context="something") + optimizer.register(suggestion, scores['score'], context=pd.DataFrame([["something"]])) with pytest.raises(NotImplementedError): - optimizer.suggest(context="something") + optimizer.suggest(context=pd.DataFrame([["something"]])) - with pytest.raises(NotImplementedError): - optimizer.surrogate_predict(suggestion, context="something") + if isinstance(optimizer, BaseBayesianOptimizer): + with pytest.raises(NotImplementedError): + optimizer.surrogate_predict(suggestion, context=pd.DataFrame([["something"]])) - # acquisition function not implemented - with pytest.raises(NotImplementedError): - optimizer.acquisition_function(suggestion) + # acquisition function not implemented + with pytest.raises(NotImplementedError): + optimizer.acquisition_function(suggestion) diff --git a/mlos_core/mlos_core/tests/optimizers/one_hot_test.py b/mlos_core/mlos_core/tests/optimizers/one_hot_test.py index 0b83cc6cce..a36ed67b01 100644 --- a/mlos_core/mlos_core/tests/optimizers/one_hot_test.py +++ b/mlos_core/mlos_core/tests/optimizers/one_hot_test.py @@ -10,6 +10,7 @@ import pytest import pandas as pd import numpy as np +import numpy.typing as npt import ConfigSpace as CS from mlos_core.optimizers import EmukitOptimizer @@ -31,7 +32,7 @@ def data_frame() -> pd.DataFrame: @pytest.fixture -def one_hot() -> np.array: +def one_hot() -> npt.NDArray: """ One-hot encoding of the `data_frame` above. The columns follow the order of the hyperparameters in `configuration_space`. @@ -44,7 +45,7 @@ def one_hot() -> np.array: def test_to_1hot(configuration_space: CS.ConfigurationSpace, - data_frame: pd.DataFrame, one_hot: np.array): + data_frame: pd.DataFrame, one_hot: npt.NDArray): """ Toy problem to test one-hot encoding. """ @@ -53,7 +54,7 @@ def test_to_1hot(configuration_space: CS.ConfigurationSpace, def test_from_1hot(configuration_space: CS.ConfigurationSpace, - data_frame: pd.DataFrame, one_hot: np.array): + data_frame: pd.DataFrame, one_hot: npt.NDArray): """ Toy problem to test one-hot decoding. """ @@ -72,7 +73,7 @@ def test_round_trip(configuration_space: CS.ConfigurationSpace, data_frame: pd.D assert (df_round_trip.z == data_frame.z).all() -def test_round_trip_reverse(configuration_space: CS.ConfigurationSpace, one_hot: np.array): +def test_round_trip_reverse(configuration_space: CS.ConfigurationSpace, one_hot: npt.NDArray): """ Round-trip test for one-hot-decoding and then encoding of a numpy array. """ diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index ecc9132d6d..3880608be6 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -6,7 +6,7 @@ Tests for Bayesian Optimizers. """ -from typing import Type +from typing import Optional, Type import pytest @@ -15,17 +15,16 @@ import numpy as np import ConfigSpace as CS from mlos_core.optimizers import ( - OptimizerType, OptimizerFactory, BaseOptimizer, - EmukitOptimizer, SkoptOptimizer, RandomOptimizer) + OptimizerType, ConcreteOptimizer, OptimizerFactory, BaseOptimizer, + SkoptOptimizer) from mlos_core.optimizers.bayesian_optimizers import BaseBayesianOptimizer from mlos_core.spaces.adapters import SpaceAdapterType @pytest.mark.parametrize(('optimizer_class', 'kwargs'), [ - (EmukitOptimizer, {}), + *[(member.value, {}) for member in OptimizerType], (SkoptOptimizer, {'base_estimator': 'gp'}), - (RandomOptimizer, {}) ]) def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace, optimizer_class: Type[BaseOptimizer], kwargs): @@ -49,10 +48,9 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace @pytest.mark.parametrize(('optimizer_class', 'kwargs'), [ - (EmukitOptimizer, {}), + *[(member.value, {}) for member in OptimizerType], (SkoptOptimizer, {'base_estimator': 'gp', 'seed': 42}), (SkoptOptimizer, {'base_estimator': 'et', 'seed': 42}), - (RandomOptimizer, {}) ]) def test_basic_interface_toy_problem(configuration_space: CS.ConfigurationSpace, optimizer_class: Type[BaseOptimizer], kwargs): @@ -102,6 +100,18 @@ def test_basic_interface_toy_problem(configuration_space: CS.ConfigurationSpace, assert pred_all.shape == (20,) +@pytest.mark.parametrize(('optimizer_type'), [ + # Enumerate all supported Optimizers + # *[member for member in OptimizerType], + *list(OptimizerType), +]) +def test_concrete_optimizer_type(optimizer_type: OptimizerType): + """ + Test that all optimizer types are listed in the ConcreteOptimizer constraints. + """ + assert optimizer_type.value in ConcreteOptimizer.__constraints__ # type: ignore # pylint: disable=no-member + + @pytest.mark.parametrize(('optimizer_type', 'kwargs'), [ # Default optimizer (None, {}), @@ -111,7 +121,7 @@ def test_basic_interface_toy_problem(configuration_space: CS.ConfigurationSpace, (OptimizerType.SKOPT, {'base_estimator': 'gp'}), ]) def test_create_optimizer_with_factory_method(configuration_space: CS.ConfigurationSpace, - optimizer_type: OptimizerType, kwargs): + optimizer_type: Optional[OptimizerType], kwargs): """ Test that we can create an optimizer via a factory. """ @@ -155,11 +165,11 @@ def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs): assert optimizer is not None # Initialize another optimizer that uses LlamaTune space adapter - space_adapter_kwargs = dict( - num_low_dims=1, - special_param_values=None, - max_unique_values_per_param=None, - ) + space_adapter_kwargs = { + "num_low_dims": 1, + "special_param_values": None, + "max_unique_values_per_param": None, + } llamatune_optimizer = OptimizerFactory.create( input_space, optimizer_type, @@ -201,6 +211,6 @@ def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs): assert (all_obvs.columns == ['x', 'y', 'score']).all() # .surrogate_predict method not currently implemented if space adapter is employed - if isinstance(optimizer, BaseBayesianOptimizer): + if isinstance(llamatune_optimizer, BaseBayesianOptimizer): with pytest.raises(NotImplementedError): _ = llamatune_optimizer.surrogate_predict(llamatune_best_observation[['x']]) diff --git a/mlos_core/mlos_core/tests/spaces/adapters/identity_adapter_test.py b/mlos_core/mlos_core/tests/spaces/adapters/identity_adapter_test.py new file mode 100644 index 0000000000..402f2ec395 --- /dev/null +++ b/mlos_core/mlos_core/tests/spaces/adapters/identity_adapter_test.py @@ -0,0 +1,41 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Tests for Identity space adapter. +""" + +# pylint: disable=missing-function-docstring + +import ConfigSpace as CS +import pandas as pd + +from mlos_core.spaces.adapters import IdentityAdapter + + +def test_identity_adapter(): + """ + Tests identity adapter + """ + input_space = CS.ConfigurationSpace(seed=1234) + input_space.add_hyperparameter( + CS.UniformIntegerHyperparameter(name='int_1', lower=0, upper=100)) + input_space.add_hyperparameter( + CS.UniformFloatHyperparameter(name='float_1', lower=0, upper=100)) + input_space.add_hyperparameter( + CS.CategoricalHyperparameter(name='str_1', choices=['on', 'off'])) + + adapter = IdentityAdapter(input_space) + + num_configs = 10 + for sampled_config in input_space.sample_configuration(size=num_configs): + sampled_config_df = pd.DataFrame([sampled_config.values()], columns=sampled_config.keys()) + target_config_df = adapter.inverse_transform(sampled_config_df) + assert target_config_df.equals(sampled_config_df) + target_config = CS.Configuration(adapter.target_parameter_space, values=target_config_df.iloc[0].to_dict()) + assert target_config == sampled_config + orig_config_df = adapter.transform(target_config_df) + assert orig_config_df.equals(sampled_config_df) + orig_config = CS.Configuration(adapter.orig_parameter_space, values=orig_config_df.iloc[0].to_dict()) + assert orig_config == sampled_config diff --git a/mlos_core/mlos_core/tests/spaces/adapters/llamatune_test.py b/mlos_core/mlos_core/tests/spaces/adapters/llamatune_test.py index 9d5646eaf4..a1b9021d08 100644 --- a/mlos_core/mlos_core/tests/spaces/adapters/llamatune_test.py +++ b/mlos_core/mlos_core/tests/spaces/adapters/llamatune_test.py @@ -8,6 +8,8 @@ Tests for LlamaTune space adapter. # pylint: disable=missing-function-docstring +from typing import Dict, Set + import pytest import ConfigSpace as CS @@ -372,7 +374,7 @@ def test_llamatune_pipeline(num_low_dims: int, special_param_values: dict, max_u param: {special_value: 0 for special_value, _ in tuples_list} for param, tuples_list in adapter._special_param_values_dict.items() # pylint: disable=protected-access } - unique_values_dict = {param: set() for param in input_space.get_hyperparameter_names()} + unique_values_dict: Dict[str, Set] = {param: set() for param in input_space.get_hyperparameter_names()} num_configs = 1000 for config in adapter.target_parameter_space.sample_configuration(size=num_configs): diff --git a/mlos_core/mlos_core/tests/spaces/adapters/space_adapter_factory_test.py b/mlos_core/mlos_core/tests/spaces/adapters/space_adapter_factory_test.py index 5d3193d4bd..1320c74368 100644 --- a/mlos_core/mlos_core/tests/spaces/adapters/space_adapter_factory_test.py +++ b/mlos_core/mlos_core/tests/spaces/adapters/space_adapter_factory_test.py @@ -8,12 +8,27 @@ Tests for space adapter factory. # pylint: disable=missing-function-docstring +from typing import Optional + import pytest import ConfigSpace as CS -from mlos_core.spaces.adapters import SpaceAdapterFactory, SpaceAdapterType +from mlos_core.spaces.adapters import SpaceAdapterFactory, SpaceAdapterType, ConcreteSpaceAdapter from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter +from mlos_core.spaces.adapters.identity_adapter import IdentityAdapter + + +@pytest.mark.parametrize(('space_adapter_type'), [ + # Enumerate all supported SpaceAdapters + # *[member for member in SpaceAdapterType], + *list(SpaceAdapterType), +]) +def test_concrete_optimizer_type(space_adapter_type: SpaceAdapterType): + """ + Test that all optimizer types are listed in the ConcreteOptimizer constraints. + """ + assert space_adapter_type.value in ConcreteSpaceAdapter.__constraints__ # type: ignore # pylint: disable=no-member @pytest.mark.parametrize(('space_adapter_type', 'kwargs'), [ @@ -22,7 +37,7 @@ from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter # Enumerate all supported Optimizers *[(member, {}) for member in SpaceAdapterType], ]) -def test_create_space_adapter_with_factory_method(space_adapter_type: SpaceAdapterType, kwargs): +def test_create_space_adapter_with_factory_method(space_adapter_type: Optional[SpaceAdapterType], kwargs): # Start defining a ConfigurationSpace for the Optimizer to search. input_space = CS.ConfigurationSpace(seed=1234) @@ -37,14 +52,14 @@ def test_create_space_adapter_with_factory_method(space_adapter_type: SpaceAdapt kwargs = {} kwargs.setdefault('num_low_dims', 1) - space_adapter: BaseSpaceAdapter = None + space_adapter: BaseSpaceAdapter if space_adapter_type is None: space_adapter = SpaceAdapterFactory.create(input_space) else: space_adapter = SpaceAdapterFactory.create(input_space, space_adapter_type, space_adapter_kwargs=kwargs) if space_adapter_type is None or space_adapter_type is SpaceAdapterType.IDENTITY: - assert space_adapter is None + assert isinstance(space_adapter, IdentityAdapter) else: assert space_adapter is not None assert space_adapter.orig_parameter_space is not None diff --git a/mlos_core/setup.py b/mlos_core/setup.py index d29e89cddc..c5e3d2ec2b 100644 --- a/mlos_core/setup.py +++ b/mlos_core/setup.py @@ -6,8 +6,8 @@ Setup instructions for the mlos_core package. """ -from logging import warning from itertools import chain +from logging import warning from setuptools import setup, find_packages @@ -31,13 +31,16 @@ extra_requires = { # construct special 'full' extra that adds requirements for all built-in # backend integrations and additional extra features. -extra_requires['full'] = list(set(chain(extra_requires.values()))) +extra_requires['full'] = list(set(chain(extra_requires.values()))) # type: ignore # pylint: disable=duplicate-code setup( name='mlos-core', version=_VERSION, packages=find_packages(), + package_data={ + 'mlos_core': ['py.typed'], + }, install_requires=[ 'scikit-learn<1.2', # FIXME: temporarily work around some version mismatch issues (PR 850) 'joblib>=1.1.1', # CVE-2022-21797: scikit-learn dependency, addressed in 1.2.0dev0, which isn't currently released diff --git a/scripts/dmypy-wrapper.sh b/scripts/dmypy-wrapper.sh new file mode 100755 index 0000000000..5bccc4501c --- /dev/null +++ b/scripts/dmypy-wrapper.sh @@ -0,0 +1,43 @@ +#!/bin/bash +## +## Copyright (c) Microsoft Corporation. +## Licensed under the MIT License. +## + +set -eu + +#set -x + +# Start in the root dir. +scriptdir=$(dirname "$(readlink -f "$0")") +cd "$scriptdir/.." + +DMYPY_STATUS_FILE='.dmypy.json' +DMYPY_STATUS_ARGS="--status-file $DMYPY_STATUS_FILE" +DMYPY_START_ARGS='' + +while [ -z "${1:-}" ]; do + opt="$1" + case $opt in + --*) + DMYPY_START_ARGS+=" $opt" + shift + ;; + *) + break + ;; + esac +done +if [ -z "$DMYPY_START_ARGS" ]; then + DMYPY_START_ARGS='--pretty --cache-fine-grained --install-types --non-interactive' +fi + +dmypy $DMYPY_STATUS_ARGS status >/dev/null || dmypy $DMYPY_STATUS_ARGS start -- $DMYPY_START_ARGS + +# Restart the daemon if the config file has changed. +if [ setup.cfg -nt /proc/$(cat $DMYPY_STATUS_FILE | jq -e -r .pid) ]; then + dmypy $DMYPY_STATUS_ARGS restart -- $DMYPY_START_ARGS +fi + +# Check the files passed as arguments. +dmypy $DMYPY_STATUS_ARGS check $* diff --git a/setup.cfg b/setup.cfg index 34198a929a..575604c57f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -39,3 +39,44 @@ addopts = # Moved these to Makefile #--cov=mlos_core --cov-report=xml testpaths = mlos_core mlos_bench + +# +# mypy static type checker configs +# + +[mypy] +#ignore_missing_imports = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +warn_return_any = True +# regex of files to skip type checking +exclude = /_pytest/|/build/|doc/|_version.py|setup.py + +# https://github.com/automl/ConfigSpace/issues/293 +[mypy-ConfigSpace.*] +ignore_missing_imports = True + +# https://github.com/EmuKit/emukit/issues/440 +[mypy-emukit.*] +ignore_missing_imports = True + +# https://github.com/pytest-dev/pytest/issues/10435 +[mypy-pytest] +ignore_missing_imports = True + +# https://github.com/scikit-learn/scikit-learn/issues/16705 +[mypy-sklearn.*] +ignore_missing_imports = True + +# https://github.com/scipy/scipy/issues/17158 +[mypy-scipy.*] +ignore_missing_imports = True + +# https://github.com/pypa/setuptools_scm/issues/501 +[mypy-setuptools_scm.*] +ignore_missing_imports = True + +# https://github.com/scikit-optimize/scikit-optimize/issues/1111 +[mypy-skopt.*] +ignore_missing_imports = True