зеркало из https://github.com/microsoft/MLOS.git
Improve mlos-viz for multiple repeats of a config and add tests (#633)
- Mark `mlos_viz` as `typed` for `mypy` - Bump version - Mock calls to matplotlib/dabl for testing - Add plotting of top-N configs - Improve plots for handling repeat config trials via variance error bars --------- Co-authored-by: Sergiy Matusevych <sergiym@microsoft.com>
This commit is contained in:
Родитель
3a367972c6
Коммит
a45f97dc01
|
@ -1,5 +1,5 @@
|
|||
[bumpversion]
|
||||
current_version = 0.4.0
|
||||
current_version = 0.4.1
|
||||
commit = True
|
||||
tag = True
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
"jupyterlab",
|
||||
"keepalive",
|
||||
"kwargs",
|
||||
"kword",
|
||||
"libmamba",
|
||||
"linalg",
|
||||
"llamatune",
|
||||
|
@ -57,6 +58,7 @@
|
|||
"pylint",
|
||||
"pyplot",
|
||||
"pytest",
|
||||
"quantile",
|
||||
"Quickstart",
|
||||
"refcnt",
|
||||
"rexec",
|
||||
|
@ -82,6 +84,8 @@
|
|||
"workerinput",
|
||||
"xdist",
|
||||
"xlabel",
|
||||
"xlabels",
|
||||
"xticks",
|
||||
"ylabel"
|
||||
]
|
||||
// vim: set ft=jsonc:
|
||||
|
|
|
@ -114,6 +114,8 @@ if ($LASTEXITCODE -ne 0) {
|
|||
}
|
||||
|
||||
# Run a simple mlos_viz test.
|
||||
# To do that, we need the fixtures from mlos_bench, so make those available too.
|
||||
$env:PYTHONPATH = "mlos_bench"
|
||||
conda run -n mlos-dist-test python -m pytest mlos_viz/mlos_viz/tests/test_dabl_plot.py
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Error "Failed to run mlos_viz tests."
|
||||
|
|
3
Makefile
3
Makefile
|
@ -335,7 +335,8 @@ build/dist-test.$(PYTHON_VERSION).build-stamp: $(PYTHON_FILES) build/dist-test-e
|
|||
# Run a simple test that uses the mlos_bench wheel (full tests can be checked with `make test`).
|
||||
conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_bench/mlos_bench/tests/environments/mock_env_test.py
|
||||
# Run a simple test that uses the mlos_viz wheel (full tests can be checked with `make test`).
|
||||
conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_viz/mlos_viz/tests/test_dabl_plot.py
|
||||
# To do that, we need the fixtures from mlos_bench, so make those available too.
|
||||
PYTHONPATH=mlos_bench conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_viz/mlos_viz/tests/test_dabl_plot.py
|
||||
touch $@
|
||||
|
||||
dist-test-clean: dist-test-env-clean
|
||||
|
|
|
@ -36,7 +36,7 @@ copyright = '2024, GSL'
|
|||
author = 'GSL'
|
||||
|
||||
# The full version, including alpha/beta/rc tags
|
||||
release = '0.4.0'
|
||||
release = '0.4.1'
|
||||
|
||||
try:
|
||||
from setuptools_scm import get_version
|
||||
|
|
|
@ -7,4 +7,4 @@ Version number for the mlos_bench package.
|
|||
"""
|
||||
|
||||
# NOTE: This should be managed by bumpversion.
|
||||
_VERSION = '0.4.0'
|
||||
_VERSION = '0.4.1'
|
||||
|
|
|
@ -8,7 +8,7 @@ Base interface for accessing the stored benchmark experiment data.
|
|||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from distutils.util import strtobool # pylint: disable=deprecated-module
|
||||
from typing import Dict, Optional, Tuple, TYPE_CHECKING
|
||||
from typing import Dict, Literal, Optional, Tuple, TYPE_CHECKING
|
||||
|
||||
import pandas
|
||||
|
||||
|
@ -73,7 +73,7 @@ class ExperimentData(metaclass=ABCMeta):
|
|||
|
||||
@property
|
||||
@abstractmethod
|
||||
def objectives(self) -> Dict[str, str]:
|
||||
def objectives(self) -> Dict[str, Literal["min", "max"]]:
|
||||
"""
|
||||
Retrieve the experiment's objectives data from the storage.
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
"""
|
||||
An interface to access the experiment benchmark data stored in SQL DB.
|
||||
"""
|
||||
from typing import Dict, Optional
|
||||
from typing import Dict, Literal, Optional
|
||||
|
||||
import logging
|
||||
|
||||
|
@ -51,8 +51,8 @@ class ExperimentSqlData(ExperimentData):
|
|||
self._schema = schema
|
||||
|
||||
@property
|
||||
def objectives(self) -> Dict[str, str]:
|
||||
objectives: Dict[str, str] = {}
|
||||
def objectives(self) -> Dict[str, Literal["min", "max"]]:
|
||||
objectives: Dict[str, Literal["min", "max"]] = {}
|
||||
# First try to lookup the objectives from the experiment metadata in the storage layer.
|
||||
if hasattr(self._schema, "objectives"):
|
||||
with self._engine.connect() as conn:
|
||||
|
@ -60,6 +60,7 @@ class ExperimentSqlData(ExperimentData):
|
|||
self._schema.objectives.select().where(
|
||||
self._schema.objectives.c.exp_id == self._experiment_id,
|
||||
).order_by(
|
||||
# TODO: return weight as well
|
||||
self._schema.objectives.c.weight.desc(),
|
||||
self._schema.objectives.c.optimization_target.asc(),
|
||||
)
|
||||
|
@ -98,6 +99,8 @@ class ExperimentSqlData(ExperimentData):
|
|||
elif opt_direction != objectives[opt_target]:
|
||||
_LOG.warning("Experiment %s has multiple trial optimization directions for optimization_target %s=%s",
|
||||
self, opt_target, objectives[opt_target])
|
||||
for opt_tgt, opt_dir in objectives.items():
|
||||
assert opt_dir in {None, "min", "max"}, f"Unexpected opt_dir {opt_dir} for opt_tgt {opt_tgt}."
|
||||
return objectives
|
||||
|
||||
# TODO: provide a way to get individual data to avoid repeated bulk fetches where only small amounts of data is accessed.
|
||||
|
|
|
@ -52,7 +52,8 @@ class TunableConfigTrialGroupSqlData(TunableConfigTrialGroupData):
|
|||
with self._engine.connect() as conn:
|
||||
tunable_config_trial_group = conn.execute(
|
||||
self._schema.trial.select().with_only_columns(
|
||||
func.min(self._schema.trial.c.trial_id).cast(Integer).label('tunable_config_trial_group_id'),
|
||||
func.min(self._schema.trial.c.trial_id).cast(Integer).label( # pylint: disable=not-callable
|
||||
'tunable_config_trial_group_id'),
|
||||
).where(
|
||||
self._schema.trial.c.exp_id == self._experiment_id,
|
||||
self._schema.trial.c.config_id == self._tunable_config_id,
|
||||
|
|
|
@ -3,5 +3,5 @@
|
|||
# Licensed under the MIT License.
|
||||
#
|
||||
"""
|
||||
Test for mlos_bench sql storage.
|
||||
Tests for mlos_bench sql storage.
|
||||
"""
|
||||
|
|
|
@ -63,10 +63,14 @@ def exp_storage_with_trials(exp_storage: SqlStorage.Experiment) -> SqlStorage.Ex
|
|||
"""
|
||||
# Add some trials to that experiment.
|
||||
# Note: we're just fabricating some made up function for the ML libraries to try and learn.
|
||||
base_score = 5.0
|
||||
base_score = 10.0
|
||||
tunable_name = "kernel_sched_latency_ns"
|
||||
tunable_default = exp_storage.tunables.get_tunable(tunable_name)[0].default
|
||||
tunable = exp_storage.tunables.get_tunable(tunable_name)[0]
|
||||
tunable_default = tunable.default
|
||||
assert isinstance(tunable_default, int)
|
||||
tunable_min = tunable.range[0]
|
||||
tunable_max = tunable.range[1]
|
||||
tunable_range = tunable_max - tunable_min
|
||||
seed = 42
|
||||
rand_seed(seed)
|
||||
opt = MockOptimizer(tunables=exp_storage.tunables, config={
|
||||
|
@ -85,14 +89,15 @@ def exp_storage_with_trials(exp_storage: SqlStorage.Experiment) -> SqlStorage.Ex
|
|||
"trial_number": config_i * CONFIG_TRIAL_REPEAT_COUNT + repeat_j + 1,
|
||||
})
|
||||
assert trial.tunable_config_id == config_i + 1
|
||||
trial.update_telemetry(status=Status.RUNNING, metrics=[
|
||||
(datetime.utcnow(), "some-metric", base_score + random() / 10),
|
||||
])
|
||||
tunable_value = float(tunables.get_tunable(tunable_name)[0].numerical_value)
|
||||
tunable_value_norm = base_score * (tunable_value - tunable_min) / tunable_range
|
||||
trial.update_telemetry(status=Status.RUNNING, metrics=[
|
||||
(datetime.utcnow(), "some-metric", tunable_value_norm + random() / 100),
|
||||
])
|
||||
trial.update(Status.SUCCEEDED, datetime.utcnow(), metrics={
|
||||
# Give some variance on the score.
|
||||
# And some influence from the tunable value.
|
||||
"score": base_score + 10 * ((tunable_value / tunable_default) - 1) + random() / 10,
|
||||
"score": tunable_value_norm + random() / 100
|
||||
})
|
||||
return exp_storage
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ def test_exp_trial_data(exp_data: ExperimentData) -> None:
|
|||
assert trial.status == Status.SUCCEEDED
|
||||
assert trial.metadata_dict["trial_number"] == trial_id
|
||||
assert list(trial.results_dict.keys()) == ["score"]
|
||||
assert trial.results_dict["score"] == pytest.approx(5.0, rel=0.1)
|
||||
assert trial.results_dict["score"] == pytest.approx(0.0, abs=0.1)
|
||||
assert isinstance(trial.ts_start, datetime)
|
||||
assert isinstance(trial.ts_end, datetime)
|
||||
# Note: tests for telemetry are in test_update_telemetry()
|
||||
|
|
|
@ -7,4 +7,4 @@ Version number for the mlos_core package.
|
|||
"""
|
||||
|
||||
# NOTE: This should be managed by bumpversion.
|
||||
_VERSION = '0.4.0'
|
||||
_VERSION = '0.4.1'
|
||||
|
|
|
@ -8,13 +8,13 @@ from the mlos_bench framework for benchmarking and optimization automation.
|
|||
"""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Literal, Optional
|
||||
|
||||
import warnings
|
||||
|
||||
from matplotlib import pyplot as plt
|
||||
import seaborn as sns
|
||||
import pandas
|
||||
|
||||
from mlos_bench.storage.base_experiment_data import ExperimentData
|
||||
from mlos_viz import base
|
||||
from mlos_viz.util import expand_results_data_args
|
||||
|
||||
|
||||
class MlosVizMethod(Enum):
|
||||
|
@ -22,41 +22,8 @@ class MlosVizMethod(Enum):
|
|||
What method to use for visualizing the experiment results.
|
||||
"""
|
||||
|
||||
AUTO = "dabl" # use dabl as the current default
|
||||
DABL = "dabl"
|
||||
|
||||
|
||||
def _plot_optimizer_trends(exp_data: ExperimentData) -> None:
|
||||
"""
|
||||
Plots the optimizer trends for the Experiment.
|
||||
|
||||
Intended to be used from a Jupyter notebook.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
exp_data: ExperimentData
|
||||
The experiment data to plot.
|
||||
"""
|
||||
for objective in exp_data.objectives:
|
||||
objective_column = ExperimentData.RESULT_COLUMN_PREFIX + objective
|
||||
results_df = exp_data.results_df
|
||||
plt.rcParams["figure.figsize"] = (10, 4)
|
||||
|
||||
sns.scatterplot(
|
||||
x=results_df.trial_id, y=results_df[objective_column],
|
||||
alpha=0.7, label="Trial") # Result of each trial
|
||||
sns.lineplot(
|
||||
x=results_df.trial_id, y=results_df[objective_column].cummin(),
|
||||
label="Incumbent") # the best result so far (cummin)
|
||||
|
||||
plt.yscale('log')
|
||||
|
||||
plt.xlabel("Trial number")
|
||||
plt.ylabel(objective)
|
||||
|
||||
plt.title("Optimizer Trends for Experiment: " + exp_data.experiment_id)
|
||||
plt.grid()
|
||||
plt.show() # type: ignore[no-untyped-call]
|
||||
AUTO = DABL # use dabl as the current default
|
||||
|
||||
|
||||
def ignore_plotter_warnings(plotter_method: MlosVizMethod = MlosVizMethod.AUTO) -> None:
|
||||
|
@ -69,8 +36,7 @@ def ignore_plotter_warnings(plotter_method: MlosVizMethod = MlosVizMethod.AUTO)
|
|||
plotter_method: MlosVizMethod
|
||||
The method to use for visualizing the experiment results.
|
||||
"""
|
||||
warnings.filterwarnings("ignore", category=FutureWarning)
|
||||
|
||||
base.ignore_plotter_warnings()
|
||||
if plotter_method == MlosVizMethod.DABL:
|
||||
import mlos_viz.dabl # pylint: disable=import-outside-toplevel
|
||||
mlos_viz.dabl.ignore_plotter_warnings()
|
||||
|
@ -78,9 +44,12 @@ def ignore_plotter_warnings(plotter_method: MlosVizMethod = MlosVizMethod.AUTO)
|
|||
raise NotImplementedError(f"Unhandled method: {plotter_method}")
|
||||
|
||||
|
||||
def plot(exp_data: ExperimentData,
|
||||
def plot(exp_data: Optional[ExperimentData] = None, *,
|
||||
results_df: Optional[pandas.DataFrame] = None,
|
||||
objectives: Optional[Dict[str, Literal["min", "max"]]] = None,
|
||||
plotter_method: MlosVizMethod = MlosVizMethod.AUTO,
|
||||
filter_warnings: bool = True) -> None:
|
||||
filter_warnings: bool = True,
|
||||
**kwargs: Any) -> None:
|
||||
"""
|
||||
Plots the results of the experiment.
|
||||
|
||||
|
@ -90,18 +59,28 @@ def plot(exp_data: ExperimentData,
|
|||
----------
|
||||
exp_data: ExperimentData
|
||||
The experiment data to plot.
|
||||
results_df : Optional["pandas.DataFrame"]
|
||||
Optional results_df to plot.
|
||||
If not provided, defaults to exp_data.results_df property.
|
||||
objectives : Optional[Dict[str, Literal["min", "max"]]]
|
||||
Optional objectives to plot.
|
||||
If not provided, defaults to exp_data.objectives property.
|
||||
plotter_method: MlosVizMethod
|
||||
The method to use for visualizing the experiment results.
|
||||
filter_warnings: bool
|
||||
Whether or not to filter some warnings from the plotter.
|
||||
kwargs : dict
|
||||
Remaining keyword arguments are passed along to the underlying plotter(s).
|
||||
"""
|
||||
_plot_optimizer_trends(exp_data)
|
||||
|
||||
if filter_warnings:
|
||||
ignore_plotter_warnings(plotter_method)
|
||||
(results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives)
|
||||
|
||||
base.plot_optimizer_trends(exp_data, results_df=results_df, objectives=objectives)
|
||||
base.plot_top_n_configs(exp_data, results_df=results_df, objectives=objectives, **kwargs)
|
||||
|
||||
if MlosVizMethod.DABL:
|
||||
import mlos_viz.dabl # pylint: disable=import-outside-toplevel
|
||||
mlos_viz.dabl.plot(exp_data)
|
||||
mlos_viz.dabl.plot(exp_data, results_df=results_df, objectives=objectives)
|
||||
else:
|
||||
raise NotImplementedError(f"Unhandled method: {plotter_method}")
|
||||
|
|
|
@ -0,0 +1,439 @@
|
|||
#
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
#
|
||||
"""
|
||||
Base functions for visualizing, explain, and gain insights from results.
|
||||
"""
|
||||
|
||||
from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Tuple, Union
|
||||
|
||||
import re
|
||||
import warnings
|
||||
|
||||
from importlib.metadata import version
|
||||
|
||||
from matplotlib import pyplot as plt
|
||||
import pandas
|
||||
from pandas.api.types import is_numeric_dtype
|
||||
from pandas.core.groupby.generic import SeriesGroupBy
|
||||
import seaborn as sns
|
||||
|
||||
from mlos_bench.storage.base_experiment_data import ExperimentData
|
||||
from mlos_viz.util import expand_results_data_args
|
||||
|
||||
|
||||
_SEABORN_VERS = version('seaborn')
|
||||
|
||||
|
||||
def _get_kwarg_defaults(target: Callable, **kwargs: Any) -> Dict[str, Any]:
|
||||
"""
|
||||
Assembles a smaller kwargs dict for the specified target function.
|
||||
|
||||
Note: this only works with non-positional kwargs (e.g., those after a * arg).
|
||||
"""
|
||||
target_kwargs = {}
|
||||
for kword in target.__kwdefaults__: # or {} # intentionally omitted for now
|
||||
if kword in kwargs:
|
||||
target_kwargs[kword] = kwargs[kword]
|
||||
return target_kwargs
|
||||
|
||||
|
||||
def ignore_plotter_warnings() -> None:
|
||||
"""
|
||||
Suppress some annoying warnings from third-party data visualization packages by
|
||||
adding them to the warnings filter.
|
||||
"""
|
||||
warnings.filterwarnings("ignore", category=FutureWarning)
|
||||
if _SEABORN_VERS <= '0.13.1':
|
||||
warnings.filterwarnings("ignore", category=DeprecationWarning, module="seaborn", # but actually comes from pandas
|
||||
message="is_categorical_dtype is deprecated and will be removed in a future version.")
|
||||
|
||||
|
||||
def _add_groupby_desc_column(results_df: pandas.DataFrame,
|
||||
groupby_columns: Optional[List[str]] = None,
|
||||
) -> Tuple[pandas.DataFrame, List[str], str]:
|
||||
"""
|
||||
Adds a group descriptor column to the results_df.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
results_df: ExperimentData
|
||||
The experiment data to add the descriptor column to.
|
||||
groupby_columns: Optional[List[str]]
|
||||
"""
|
||||
# Compose a new groupby_column for display purposes that is the
|
||||
# concatenation of the min trial_id (the first one) of each config trial
|
||||
# group and the config_id.
|
||||
# Note: It's need to be a string (e.g., categorical) for boxplot and lineplot to
|
||||
# be on the same axis anyways.
|
||||
if groupby_columns is None:
|
||||
groupby_columns = ["tunable_config_trial_group_id", "tunable_config_id"]
|
||||
groupby_column = ",".join(groupby_columns)
|
||||
results_df[groupby_column] = results_df[groupby_columns].astype(str).apply(
|
||||
lambda x: ",".join(x), axis=1) # pylint: disable=unnecessary-lambda
|
||||
groupby_columns.append(groupby_column)
|
||||
return (results_df, groupby_columns, groupby_column)
|
||||
|
||||
|
||||
def augment_results_df_with_config_trial_group_stats(exp_data: Optional[ExperimentData] = None,
|
||||
*,
|
||||
results_df: Optional[pandas.DataFrame] = None,
|
||||
requested_result_cols: Optional[Iterable[str]] = None,
|
||||
) -> pandas.DataFrame:
|
||||
# pylint: disable=too-complex
|
||||
"""
|
||||
Add a number of useful statistical measure columns to the results dataframe.
|
||||
|
||||
In particular, for each numeric result, we add the following columns for each
|
||||
requested result column:
|
||||
|
||||
- ".p50": the median of each config trial group results
|
||||
|
||||
- ".p75": the p75 of each config trial group results
|
||||
|
||||
- ".p90": the p90 of each config trial group results
|
||||
|
||||
- ".p95": the p95 of each config trial group results
|
||||
|
||||
- ".p99": the p95 of each config trial group results
|
||||
|
||||
- ".mean": the mean of each config trial group results
|
||||
|
||||
- ".stddev": the mean of each config trial group results
|
||||
|
||||
- ".var": the variance of each config trial group results
|
||||
|
||||
- ".var_zscore": the zscore of this group (i.e., variance relative to the stddev
|
||||
of all group variances). This can be useful for filtering out outliers (e.g.,
|
||||
configs with high variance relative to others by restricting to abs < 2 to
|
||||
remove those two standard deviations from the mean variance across all config
|
||||
trial groups).
|
||||
|
||||
Additionally, we add a "tunable_config_trial_group_size" column that indicates
|
||||
the number of trials using a particular config.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
exp_data : ExperimentData
|
||||
The ExperimentData (e.g., obtained from the storage layer) to plot.
|
||||
results_df : Optional[pandas.DataFrame]
|
||||
The results dataframe to augment, by default None to use the results_df property.
|
||||
requested_result_cols : Optional[Iterable[str]]
|
||||
Which results columns to augment, by default None to use all results columns
|
||||
that look numeric.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pandas.DataFrame
|
||||
The augmented results dataframe.
|
||||
"""
|
||||
if results_df is None:
|
||||
if exp_data is None:
|
||||
raise ValueError("Either exp_data or results_df must be provided.")
|
||||
results_df = exp_data.results_df
|
||||
results_groups = results_df.groupby("tunable_config_id")
|
||||
if len(results_groups) <= 1:
|
||||
raise ValueError(f"Not enough data: {len(results_groups)}")
|
||||
|
||||
if requested_result_cols is None:
|
||||
result_cols = set(col for col in results_df.columns if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX))
|
||||
else:
|
||||
result_cols = set(col for col in requested_result_cols
|
||||
if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX) and col in results_df.columns)
|
||||
result_cols.update(set(ExperimentData.RESULT_COLUMN_PREFIX + col for col in requested_result_cols
|
||||
if ExperimentData.RESULT_COLUMN_PREFIX in results_df.columns))
|
||||
|
||||
def compute_zscore_for_group_agg(
|
||||
results_groups_perf: "SeriesGroupBy",
|
||||
stats_df: pandas.DataFrame,
|
||||
result_col: str,
|
||||
agg: Union[Literal["mean"], Literal["var"], Literal["std"]]
|
||||
) -> None:
|
||||
results_groups_perf_aggs = results_groups_perf.agg(agg) # TODO: avoid recalculating?
|
||||
# Compute the zscore of the chosen aggregate performance of each group into each row in the dataframe.
|
||||
stats_df[result_col + f".{agg}_mean"] = results_groups_perf_aggs.mean()
|
||||
stats_df[result_col + f".{agg}_stddev"] = results_groups_perf_aggs.std()
|
||||
stats_df[result_col + f".{agg}_zscore"] = \
|
||||
(stats_df[result_col + f".{agg}"] - stats_df[result_col + f".{agg}_mean"]) \
|
||||
/ stats_df[result_col + f".{agg}_stddev"]
|
||||
stats_df.drop(columns=[result_col + ".var_" + agg for agg in ("mean", "stddev")], inplace=True)
|
||||
|
||||
augmented_results_df = results_df
|
||||
augmented_results_df["tunable_config_trial_group_size"] = results_groups["trial_id"].transform("count")
|
||||
for result_col in result_cols:
|
||||
if not result_col.startswith(ExperimentData.RESULT_COLUMN_PREFIX):
|
||||
continue
|
||||
if re.search(r"(start|end).*time", result_col, flags=re.IGNORECASE):
|
||||
# Ignore computing variance on things like that look like timestamps.
|
||||
continue
|
||||
if not is_numeric_dtype(results_df[result_col]):
|
||||
continue
|
||||
if results_df[result_col].unique().size == 1:
|
||||
continue
|
||||
results_groups_perf = results_groups[result_col]
|
||||
stats_df = pandas.DataFrame()
|
||||
stats_df[result_col + ".mean"] = results_groups_perf.transform("mean", numeric_only=True)
|
||||
stats_df[result_col + ".var"] = results_groups_perf.transform("var")
|
||||
stats_df[result_col + ".stddev"] = stats_df[result_col + ".var"].apply(lambda x: x**0.5)
|
||||
|
||||
compute_zscore_for_group_agg(results_groups_perf, stats_df, result_col, "var")
|
||||
quantiles = [0.50, 0.75, 0.90, 0.95, 0.99]
|
||||
for quantile in quantiles: # TODO: can we do this in one pass?
|
||||
quantile_col = result_col + f".p{int(quantile*100)}"
|
||||
stats_df[quantile_col] = results_groups_perf.transform("quantile", quantile)
|
||||
augmented_results_df = pandas.concat([augmented_results_df, stats_df], axis=1)
|
||||
return augmented_results_df
|
||||
|
||||
|
||||
def limit_top_n_configs(exp_data: Optional[ExperimentData] = None,
|
||||
*,
|
||||
results_df: Optional[pandas.DataFrame] = None,
|
||||
objectives: Optional[Dict[str, Literal["min", "max"]]] = None,
|
||||
top_n_configs: int = 10,
|
||||
method: Literal["mean", "p50", "p75", "p90", "p95", "p99"] = "mean",
|
||||
) -> Tuple[pandas.DataFrame, List[int], Dict[str, bool]]:
|
||||
# pylint: disable=too-many-locals
|
||||
"""
|
||||
Utility function to process the results and determine the best performing
|
||||
configs including potential repeats to help assess variability.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
exp_data : Optional[ExperimentData]
|
||||
The ExperimentData (e.g., obtained from the storage layer) to operate on.
|
||||
results_df : Optional[pandas.DataFrame]
|
||||
The results dataframe to augment, by default None to use the results_df property.
|
||||
objectives : Iterable[str], optional
|
||||
Which result column(s) to use for sorting the configs, and in which direction ("min" or "max").
|
||||
By default None to automatically select the experiment objectives.
|
||||
top_n_configs : int, optional
|
||||
How many configs to return, including the default, by default 20.
|
||||
method: Literal["mean", "median", "p50", "p75", "p90", "p95", "p99"] = "mean",
|
||||
Which statistical method to use when sorting the config groups before determining the cutoff, by default "mean".
|
||||
|
||||
Returns
|
||||
-------
|
||||
(top_n_config_results_df, top_n_config_ids, orderby_cols) : Tuple[pandas.DataFrame, List[int], Dict[str, bool]]
|
||||
The filtered results dataframe, the config ids, and the columns used to order the configs.
|
||||
"""
|
||||
# Do some input checking first.
|
||||
if method not in ["mean", "median", "p50", "p75", "p90", "p95", "p99"]:
|
||||
raise ValueError(f"Invalid method: {method}")
|
||||
|
||||
# Prepare the orderby columns.
|
||||
(results_df, objs_cols) = expand_results_data_args(exp_data, results_df=results_df, objectives=objectives)
|
||||
assert isinstance(results_df, pandas.DataFrame)
|
||||
|
||||
# Augment the results dataframe with some useful stats.
|
||||
results_df = augment_results_df_with_config_trial_group_stats(
|
||||
exp_data=exp_data,
|
||||
results_df=results_df,
|
||||
requested_result_cols=objs_cols.keys(),
|
||||
)
|
||||
# Note: mypy seems to lose its mind for some reason and keeps forgetting that
|
||||
# results_df is not None and is in fact a DataFrame, so we periodically assert
|
||||
# it in this func for now.
|
||||
assert results_df is not None
|
||||
orderby_cols: Dict[str, bool] = {obj_col + f".{method}": ascending for (obj_col, ascending) in objs_cols.items()}
|
||||
|
||||
config_id_col = "tunable_config_id"
|
||||
group_id_col = "tunable_config_trial_group_id" # first trial_id per config group
|
||||
trial_id_col = "trial_id"
|
||||
|
||||
default_config_id = results_df[trial_id_col].min() if exp_data is None else exp_data.default_tunable_config_id
|
||||
assert default_config_id is not None, "Failed to determine default config id."
|
||||
|
||||
# Filter out configs whose variance is too large.
|
||||
# But also make sure the default configs is still in the resulting dataframe
|
||||
# (for comparison purposes).
|
||||
for obj_col in objs_cols:
|
||||
assert results_df is not None
|
||||
if method == "mean":
|
||||
singletons_mask = results_df["tunable_config_trial_group_size"] == 1
|
||||
else:
|
||||
singletons_mask = results_df["tunable_config_trial_group_size"] > 1
|
||||
results_df = results_df.loc[(
|
||||
(results_df[f"{obj_col}.var_zscore"].abs() < 2)
|
||||
| (singletons_mask)
|
||||
| (results_df[config_id_col] == default_config_id)
|
||||
)]
|
||||
assert results_df is not None
|
||||
|
||||
# Also, filter results that are worse than the default.
|
||||
default_config_results_df = results_df.loc[results_df[config_id_col] == default_config_id]
|
||||
for (orderby_col, ascending) in orderby_cols.items():
|
||||
default_vals = default_config_results_df[orderby_col].unique()
|
||||
assert len(default_vals) == 1
|
||||
default_val = default_vals[0]
|
||||
assert results_df is not None
|
||||
if ascending:
|
||||
results_df = results_df.loc[(results_df[orderby_col] <= default_val)]
|
||||
else:
|
||||
results_df = results_df.loc[(results_df[orderby_col] >= default_val)]
|
||||
|
||||
# Now regroup and filter to the top-N configs by their group performance dimensions.
|
||||
assert results_df is not None
|
||||
group_results_df: pandas.DataFrame = results_df.groupby(config_id_col).first()[orderby_cols.keys()]
|
||||
top_n_config_ids: List[int] = group_results_df.sort_values(
|
||||
by=list(orderby_cols.keys()), ascending=list(orderby_cols.values())).head(top_n_configs).index.tolist()
|
||||
|
||||
# Remove the default config if it's included. We'll add it back later.
|
||||
if default_config_id in top_n_config_ids:
|
||||
top_n_config_ids.remove(default_config_id)
|
||||
# Get just the top-n config results.
|
||||
# Sort by the group ids.
|
||||
top_n_config_results_df = results_df.loc[(
|
||||
results_df[config_id_col].isin(top_n_config_ids)
|
||||
)].sort_values([group_id_col, config_id_col, trial_id_col])
|
||||
# Place the default config at the top of the list.
|
||||
top_n_config_ids.insert(0, default_config_id)
|
||||
top_n_config_results_df = pandas.concat([default_config_results_df, top_n_config_results_df], axis=0)
|
||||
return (top_n_config_results_df, top_n_config_ids, orderby_cols)
|
||||
|
||||
|
||||
def plot_optimizer_trends(
|
||||
exp_data: Optional[ExperimentData] = None,
|
||||
*,
|
||||
results_df: Optional[pandas.DataFrame] = None,
|
||||
objectives: Optional[Dict[str, Literal["min", "max"]]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Plots the optimizer trends for the Experiment.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
exp_data : ExperimentData
|
||||
The ExperimentData (e.g., obtained from the storage layer) to plot.
|
||||
results_df : Optional["pandas.DataFrame"]
|
||||
Optional results_df to plot.
|
||||
If not provided, defaults to exp_data.results_df property.
|
||||
objectives : Optional[Dict[str, Literal["min", "max"]]]
|
||||
Optional objectives to plot.
|
||||
If not provided, defaults to exp_data.objectives property.
|
||||
"""
|
||||
(results_df, obj_cols) = expand_results_data_args(exp_data, results_df, objectives)
|
||||
(results_df, groupby_columns, groupby_column) = _add_groupby_desc_column(results_df)
|
||||
|
||||
for (objective_column, ascending) in obj_cols.items():
|
||||
incumbent_column = objective_column + ".incumbent"
|
||||
|
||||
# Determine the mean of each config trial group to match the box plots.
|
||||
group_results_df = results_df.groupby(groupby_columns)[objective_column].mean()\
|
||||
.reset_index().sort_values(groupby_columns)
|
||||
#
|
||||
# Note: technically the optimizer (usually) uses the *first* result for a
|
||||
# given config trial group before moving on to a new config (x-axis), so
|
||||
# plotting the mean may be slightly misleading when trying to understand the
|
||||
# actual path taken by the optimizer in case of high variance samples.
|
||||
# Here's a way to do that, though it can also be misleading if the optimizer
|
||||
# later gets a worse value for that config group as well.
|
||||
#
|
||||
# group_results_df = results_df.sort_values(groupby_columns + ["trial_id"]).groupby(
|
||||
# groupby_columns).head(1)[groupby_columns + [objective_column]].reset_index()
|
||||
|
||||
# Calculate the incumbent (best seen so far)
|
||||
if ascending:
|
||||
group_results_df[incumbent_column] = group_results_df[objective_column].cummin()
|
||||
else:
|
||||
group_results_df[incumbent_column] = group_results_df[objective_column].cummax()
|
||||
|
||||
(_fig, axis) = plt.subplots(figsize=(15, 5))
|
||||
|
||||
# Result of each set of trials for a config
|
||||
sns.boxplot(
|
||||
data=results_df,
|
||||
x=groupby_column,
|
||||
y=objective_column,
|
||||
ax=axis,
|
||||
)
|
||||
|
||||
# Results of the best so far.
|
||||
axis = sns.lineplot(
|
||||
data=group_results_df,
|
||||
x=groupby_column,
|
||||
y=incumbent_column,
|
||||
alpha=0.7,
|
||||
label="Mean of Incumbent Config Trial Group",
|
||||
ax=axis,
|
||||
)
|
||||
|
||||
plt.yscale('log')
|
||||
plt.ylabel(objective_column.replace(ExperimentData.RESULT_COLUMN_PREFIX, ""))
|
||||
|
||||
plt.xlabel("Config Trial Group ID, Config ID")
|
||||
plt.xticks(rotation=90, fontsize=8)
|
||||
|
||||
plt.title("Optimizer Trends for Experiment: " + exp_data.experiment_id if exp_data is not None else "")
|
||||
plt.grid()
|
||||
plt.show() # type: ignore[no-untyped-call]
|
||||
|
||||
|
||||
def plot_top_n_configs(exp_data: Optional[ExperimentData] = None,
|
||||
*,
|
||||
results_df: Optional[pandas.DataFrame] = None,
|
||||
objectives: Optional[Dict[str, Literal["min", "max"]]] = None,
|
||||
with_scatter_plot: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
# pylint: disable=too-many-locals
|
||||
"""
|
||||
Plots the top-N configs along with the default config for the given ExperimentData.
|
||||
|
||||
Intended to be used from a Jupyter notebook.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
exp_data: ExperimentData
|
||||
The experiment data to plot.
|
||||
results_df : Optional["pandas.DataFrame"]
|
||||
Optional results_df to plot.
|
||||
If not provided, defaults to exp_data.results_df property.
|
||||
objectives : Optional[Dict[str, Literal["min", "max"]]]
|
||||
Optional objectives to plot.
|
||||
If not provided, defaults to exp_data.objectives property.
|
||||
with_scatter_plot : bool
|
||||
Whether to also add scatter plot to the output figure.
|
||||
kwargs : dict
|
||||
Remaining keyword arguments are passed along to the limit_top_n_configs function.
|
||||
"""
|
||||
(results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives)
|
||||
top_n_config_args = _get_kwarg_defaults(limit_top_n_configs, **kwargs)
|
||||
if "results_df" not in top_n_config_args:
|
||||
top_n_config_args["results_df"] = results_df
|
||||
if "objectives" not in top_n_config_args:
|
||||
top_n_config_args["objectives"] = objectives
|
||||
(top_n_config_results_df, _top_n_config_ids, orderby_cols) = limit_top_n_configs(exp_data=exp_data, **top_n_config_args)
|
||||
|
||||
(top_n_config_results_df, _groupby_columns, groupby_column) = _add_groupby_desc_column(top_n_config_results_df)
|
||||
top_n = len(top_n_config_results_df[groupby_column].unique()) - 1
|
||||
|
||||
for (orderby_col, ascending) in orderby_cols.items():
|
||||
opt_tgt = orderby_col.replace(ExperimentData.RESULT_COLUMN_PREFIX, "")
|
||||
(_fig, axis) = plt.subplots()
|
||||
sns.violinplot(
|
||||
data=top_n_config_results_df,
|
||||
x=groupby_column,
|
||||
y=orderby_col,
|
||||
ax=axis,
|
||||
)
|
||||
if with_scatter_plot:
|
||||
sns.scatterplot(
|
||||
data=top_n_config_results_df,
|
||||
x=groupby_column,
|
||||
y=orderby_col,
|
||||
legend=None,
|
||||
ax=axis,
|
||||
)
|
||||
plt.grid()
|
||||
(xticks, xlabels) = plt.xticks()
|
||||
# default should be in the first position based on top_n_configs() return
|
||||
xlabels[0] = "default" # type: ignore[call-overload]
|
||||
plt.xticks(xticks, xlabels) # type: ignore[arg-type]
|
||||
plt.xlabel("Config Trial Group, Config ID")
|
||||
plt.xticks(rotation=90)
|
||||
plt.ylabel(opt_tgt)
|
||||
plt.yscale('log')
|
||||
extra_title = "(lower is better)" if ascending else "(lower is better)"
|
||||
plt.title(f"Top {top_n} configs {opt_tgt} {extra_title}")
|
||||
plt.show() # type: ignore[no-untyped-call]
|
|
@ -5,14 +5,22 @@
|
|||
"""
|
||||
Small wrapper functions for dabl plotting functions via mlos_bench data.
|
||||
"""
|
||||
from typing import Dict, Optional, Literal
|
||||
|
||||
import warnings
|
||||
|
||||
import dabl
|
||||
import pandas
|
||||
|
||||
from mlos_bench.storage.base_experiment_data import ExperimentData
|
||||
|
||||
from mlos_viz.util import expand_results_data_args
|
||||
|
||||
def plot(exp_data: ExperimentData) -> None:
|
||||
|
||||
def plot(exp_data: Optional[ExperimentData] = None, *,
|
||||
results_df: Optional[pandas.DataFrame] = None,
|
||||
objectives: Optional[Dict[str, Literal["min", "max"]]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Plots the Experiment results data using dabl.
|
||||
|
||||
|
@ -20,20 +28,35 @@ def plot(exp_data: ExperimentData) -> None:
|
|||
----------
|
||||
exp_data : ExperimentData
|
||||
The ExperimentData (e.g., obtained from the storage layer) to plot.
|
||||
results_df : Optional["pandas.DataFrame"]
|
||||
Optional results_df to plot.
|
||||
If not provided, defaults to exp_data.results_df property.
|
||||
objectives : Optional[Dict[str, Literal["min", "max"]]]
|
||||
Optional objectives to plot.
|
||||
If not provided, defaults to exp_data.objectives property.
|
||||
"""
|
||||
for objective in exp_data.objectives:
|
||||
objective_column = ExperimentData.RESULT_COLUMN_PREFIX + objective
|
||||
dabl.plot(exp_data.results_df, objective_column)
|
||||
(results_df, obj_cols) = expand_results_data_args(exp_data, results_df, objectives)
|
||||
for obj_col in obj_cols:
|
||||
dabl.plot(X=results_df, target_col=obj_col)
|
||||
|
||||
|
||||
def ignore_plotter_warnings() -> None:
|
||||
"""
|
||||
Add some filters to ignore warnings from the plotter.
|
||||
"""
|
||||
# pylint: disable=import-outside-toplevel
|
||||
warnings.filterwarnings("ignore", category=FutureWarning)
|
||||
warnings.filterwarnings("ignore", module="dabl", category=UserWarning, message="Could not infer format")
|
||||
warnings.filterwarnings("ignore", module="dabl", category=UserWarning, message="(Dropped|Discarding) .* outliers")
|
||||
warnings.filterwarnings("ignore", module="dabl", category=UserWarning, message="Not plotting highly correlated")
|
||||
warnings.filterwarnings("ignore", module="dabl", category=UserWarning,
|
||||
message="Missing values in target_col have been removed for regression")
|
||||
from sklearn.exceptions import UndefinedMetricWarning # pylint: disable=import-outside-toplevel
|
||||
from sklearn.exceptions import UndefinedMetricWarning
|
||||
warnings.filterwarnings("ignore", module="sklearn", category=UndefinedMetricWarning, message="Recall is ill-defined")
|
||||
warnings.filterwarnings("ignore", category=DeprecationWarning,
|
||||
message="is_categorical_dtype is deprecated and will be removed in a future version.")
|
||||
warnings.filterwarnings("ignore", category=DeprecationWarning, module="sklearn",
|
||||
message="is_sparse is deprecated and will be removed in a future version.")
|
||||
from matplotlib._api.deprecation import MatplotlibDeprecationWarning
|
||||
warnings.filterwarnings("ignore", category=MatplotlibDeprecationWarning, module="dabl",
|
||||
message="The legendHandles attribute was deprecated in Matplotlib 3.7 and will be removed")
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
# `mlos-viz` tests
|
||||
|
||||
For now we only check plotting via running the core APIs with `DISPLAY` disabled and potentially via basic mocking of the underlying libraries.
|
||||
|
||||
In the future we may want to consider adding more full fledge testing and check infra for the graphs produced.
|
||||
|
||||
## See Also
|
||||
|
||||
- [How can I write unit tests against code that uses matplotlib?](https://stackoverflow.com/questions/27948126/how-can-i-write-unit-tests-against-code-that-uses-matplotlib)
|
|
@ -0,0 +1,19 @@
|
|||
#
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
#
|
||||
"""
|
||||
Unit tests for mlos_viz.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
import seaborn # pylint: disable=unused-import # (used by patch) # noqa: unused
|
||||
|
||||
|
||||
BASE_MATPLOTLIB_SHOW_PATCH = "mlos_viz.base.plt.show"
|
||||
|
||||
if sys.version_info >= (3, 11):
|
||||
SEABORN_BOXPLOT_PATCH = "dabl.plot.supervised.sns.boxplot"
|
||||
else:
|
||||
SEABORN_BOXPLOT_PATCH = "seaborn.boxplot"
|
|
@ -0,0 +1,20 @@
|
|||
#
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
#
|
||||
"""
|
||||
Export test fixtures for mlos_viz.
|
||||
"""
|
||||
|
||||
from mlos_bench.tests import tunable_groups_fixtures
|
||||
from mlos_bench.tests.storage.sql import fixtures as sql_storage_fixtures
|
||||
|
||||
# Expose some of those as local names so they can be picked up as fixtures by pytest.
|
||||
|
||||
storage = sql_storage_fixtures.storage
|
||||
exp_storage = sql_storage_fixtures.exp_storage
|
||||
exp_storage_with_trials = sql_storage_fixtures.exp_storage_with_trials
|
||||
exp_data = sql_storage_fixtures.exp_data
|
||||
|
||||
tunable_groups_config = tunable_groups_fixtures.tunable_groups_config
|
||||
tunable_groups = tunable_groups_fixtures.tunable_groups
|
|
@ -0,0 +1,41 @@
|
|||
#
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
#
|
||||
"""
|
||||
Unit tests for mlos_viz.
|
||||
"""
|
||||
|
||||
import warnings
|
||||
|
||||
from unittest.mock import patch, Mock
|
||||
|
||||
from mlos_bench.storage.base_experiment_data import ExperimentData
|
||||
|
||||
from mlos_viz.base import ignore_plotter_warnings, plot_optimizer_trends, plot_top_n_configs
|
||||
|
||||
from mlos_viz.tests import BASE_MATPLOTLIB_SHOW_PATCH
|
||||
|
||||
|
||||
@patch(BASE_MATPLOTLIB_SHOW_PATCH)
|
||||
def test_plot_optimizer_trends(mock_show: Mock, exp_data: ExperimentData) -> None:
|
||||
"""Tests plotting optimizer trends."""
|
||||
# For now, just ensure that no errors are thrown.
|
||||
# TODO: Check that a plot was actually produced matching our specifications.
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
ignore_plotter_warnings()
|
||||
plot_optimizer_trends(exp_data)
|
||||
assert mock_show.call_count == 1
|
||||
|
||||
|
||||
@patch(BASE_MATPLOTLIB_SHOW_PATCH)
|
||||
def test_plot_top_n_configs(mock_show: Mock, exp_data: ExperimentData) -> None:
|
||||
"""Tests plotting top N configs."""
|
||||
# For now, just ensure that no errors are thrown.
|
||||
# TODO: Check that a plot was actually produced matching our specifications.
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
ignore_plotter_warnings()
|
||||
plot_top_n_configs(exp_data)
|
||||
assert mock_show.call_count == 1
|
|
@ -6,8 +6,24 @@
|
|||
Unit tests for mlos_viz.dabl.plot.
|
||||
"""
|
||||
|
||||
import warnings
|
||||
|
||||
def test_placeholder() -> None:
|
||||
"""Placeholder test."""
|
||||
# TODO: Remove this and implement real tests for mlos_viz.plot()
|
||||
# See Also: https://stackoverflow.com/questions/27948126/how-can-i-write-unit-tests-against-code-that-uses-matplotlib
|
||||
from unittest.mock import patch, Mock
|
||||
|
||||
from mlos_bench.storage.base_experiment_data import ExperimentData
|
||||
|
||||
from mlos_viz import dabl
|
||||
|
||||
from mlos_viz.tests import SEABORN_BOXPLOT_PATCH
|
||||
|
||||
|
||||
@patch(SEABORN_BOXPLOT_PATCH, create=True)
|
||||
def test_dabl_plot(mock_boxplot: Mock, exp_data: ExperimentData) -> None:
|
||||
"""Tests plotting via dabl."""
|
||||
# For now, just ensure that no errors are thrown.
|
||||
# TODO: Check that a plot was actually produced matching our specifications.
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
dabl.ignore_plotter_warnings()
|
||||
dabl.plot(exp_data)
|
||||
assert mock_boxplot.call_count >= 1
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
#
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
#
|
||||
"""
|
||||
Unit tests for mlos_viz.
|
||||
"""
|
||||
|
||||
import random
|
||||
import warnings
|
||||
|
||||
from unittest.mock import patch, Mock
|
||||
|
||||
from mlos_bench.storage.base_experiment_data import ExperimentData
|
||||
|
||||
from mlos_viz import MlosVizMethod, plot
|
||||
|
||||
from mlos_viz.tests import BASE_MATPLOTLIB_SHOW_PATCH, SEABORN_BOXPLOT_PATCH
|
||||
|
||||
|
||||
def test_auto_method_type() -> None:
|
||||
"""Ensure the AUTO method is what we expect."""
|
||||
assert MlosVizMethod.AUTO.value == MlosVizMethod.DABL.value
|
||||
|
||||
|
||||
@patch(BASE_MATPLOTLIB_SHOW_PATCH)
|
||||
@patch(SEABORN_BOXPLOT_PATCH)
|
||||
def test_plot(mock_show: Mock, mock_boxplot: Mock, exp_data: ExperimentData) -> None:
|
||||
"""Tests core plot() API."""
|
||||
# For now, just ensure that no errors are thrown.
|
||||
# TODO: Check that a plot was actually produced matching our specifications.
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
random.seed(42)
|
||||
plot(exp_data, filter_warnings=True)
|
||||
assert mock_show.call_count >= 2 # from the two base plots and anything dabl did
|
||||
assert mock_boxplot.call_count >= 1 # from anything dabl did
|
|
@ -0,0 +1,67 @@
|
|||
#
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
#
|
||||
"""
|
||||
Utility functions for manipulating experiment results data.
|
||||
"""
|
||||
from typing import Dict, Literal, Optional, Tuple
|
||||
|
||||
import pandas
|
||||
|
||||
from mlos_bench.storage.base_experiment_data import ExperimentData
|
||||
|
||||
|
||||
def expand_results_data_args(
|
||||
exp_data: Optional[ExperimentData] = None,
|
||||
results_df: Optional[pandas.DataFrame] = None,
|
||||
objectives: Optional[Dict[str, Literal["min", "max"]]] = None,
|
||||
) -> Tuple[pandas.DataFrame, Dict[str, bool]]:
|
||||
"""
|
||||
Expands some common arguments for working with results data.
|
||||
|
||||
Used by mlos_viz as well.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
exp_data : Optional[ExperimentData], optional
|
||||
ExperimentData to operate on.
|
||||
results_df : Optional[pandas.DataFrame], optional
|
||||
Optional results_df argument.
|
||||
Defaults to exp_data.results_df property.
|
||||
objectives : Optional[Dict[str, Literal["min", "max"]]], optional
|
||||
Optional objectives set to operate on.
|
||||
Defaults to exp_data.objectives property.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Tuple[pandas.DataFrame, Dict[str, bool]]
|
||||
The results dataframe and the objectives columns in the dataframe, plus whether or not they are in ascending order.
|
||||
"""
|
||||
# Prepare the orderby columns.
|
||||
if results_df is None:
|
||||
if exp_data is None:
|
||||
raise ValueError("Must provide either exp_data or both results_df and objectives.")
|
||||
results_df = exp_data.results_df
|
||||
|
||||
if objectives is None:
|
||||
if exp_data is None:
|
||||
raise ValueError("Must provide either exp_data or both results_df and objectives.")
|
||||
objectives = exp_data.objectives
|
||||
objs_cols: Dict[str, bool] = {}
|
||||
for (opt_tgt, opt_dir) in objectives.items():
|
||||
if opt_dir not in ["min", "max"]:
|
||||
raise ValueError(f"Unexpected optimization direction for target {opt_tgt}: {opt_dir}")
|
||||
ascending = opt_dir == "min"
|
||||
if opt_tgt.startswith(ExperimentData.RESULT_COLUMN_PREFIX) and opt_tgt in results_df.columns:
|
||||
objs_cols[opt_tgt] = ascending
|
||||
elif ExperimentData.RESULT_COLUMN_PREFIX + opt_tgt in results_df.columns:
|
||||
objs_cols[ExperimentData.RESULT_COLUMN_PREFIX + opt_tgt] = ascending
|
||||
else:
|
||||
raise UserWarning(f"{opt_tgt} is not a result column for experiment {exp_data}")
|
||||
# Note: these copies are important to avoid issues with downstream consumers.
|
||||
# It is more efficient to copy the dataframe than to go back to the original data source.
|
||||
# TODO: However, it should be possible to later fixup the downstream consumers
|
||||
# (which are currently still internal to mlos-viz) to make their own data
|
||||
# sources if necessary. That will of course need tests.
|
||||
return (results_df.copy(), objs_cols.copy())
|
Загрузка…
Ссылка в новой задаче