set black with 120 line length (#975)

* set black with 120 line length * apply pre-commit * apply black
2023-04-10 21:50:40 +02:00 · 2023-04-10 21:50:40 +02:00 · a701cd82f8
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -11,6 +11,7 @@ repos:
    rev: 23.1.0
    hooks:
    - id: black
+      args: ["--line-length=120"]
  - repo: https://github.com/pycqa/flake8
    rev: 6.0.0
    hooks:
--- a/flaml/autogen/code_utils.py
+++ b/flaml/autogen/code_utils.py
@ -27,9 +27,7 @@ def execute_code(code: str, max_exec_time: Optional[int] = 3):
    return int(result.returncode == 0)


-def generate_assertions(
-    definition: str, model: Optional[str] = "gpt-3.5-turbo"
-) -> Tuple[str, float]:
+def generate_assertions(definition: str, model: Optional[str] = "gpt-3.5-turbo") -> Tuple[str, float]:
    """Generate assertions for a function.

    Args:
@ -112,9 +110,7 @@ def eval_function_completions(
        for i in range(n):
            response = responses[i] = _remove_check(responses[i])
            code = (
-                f"{response}\n{assertions}"
-                if response.startswith("def")
-                else f"{definition}{response}\n{assertions}"
+                f"{response}\n{assertions}" if response.startswith("def") else f"{definition}{response}\n{assertions}"
            )
            succeed_assertions = execute_code(code)
            if succeed_assertions:
@ -149,9 +145,7 @@ def eval_function_completions(
 def implement(
    definition: str,
    configs: List[Dict],
-    assertions: Optional[
-        Union[str, Callable[[str], Tuple[str, float]]]
-    ] = generate_assertions,
+    assertions: Optional[Union[str, Callable[[str], Tuple[str, float]]]] = generate_assertions,
 ) -> Tuple[str, float]:
    """Implement a function from a definition.

@ -172,9 +166,7 @@ def implement(
        response = oai.Completion.create({"definition": definition}, **config)
        cost += oai.Completion.cost(config["model"], response)
        responses = oai.Completion.extract_text(response)
-        metrics = eval_function_completions(
-            responses, definition, assertions=assertions
-        )
+        metrics = eval_function_completions(responses, definition, assertions=assertions)
        assertions = metrics["assertions"]
        cost += metrics["gen_cost"]
        if metrics["succeed_assertions"] or i == len(configs) - 1:
--- a/flaml/autogen/oai/completion.py
+++ b/flaml/autogen/oai/completion.py
@ -21,9 +21,7 @@ try:

    ERROR = None
 except ImportError:
-    ERROR = ImportError(
-        "please install flaml[openai] option to use the flaml.oai subpackage."
-    )
+    ERROR = ImportError("please install flaml[openai] option to use the flaml.oai subpackage.")
 logger = logging.getLogger(__name__)
 if not logger.handlers:
    # Add the console handler.
@ -142,17 +140,11 @@ class Completion:
            if response is not None and (response != -1 or not eval_only):
                # print("using cached response")
                return response
-        openai_completion = (
-            openai.ChatCompletion
-            if config["model"] in cls.chat_models
-            else openai.Completion
-        )
+        openai_completion = openai.ChatCompletion if config["model"] in cls.chat_models else openai.Completion
        start_time = time.time()
        while True:
            try:
-                response = openai_completion.create(
-                    request_timeout=cls.request_timeout, **config
-                )
+                response = openai_completion.create(request_timeout=cls.request_timeout, **config)
                cls._cache.set(key, response)
                return response
            except (
@ -176,9 +168,7 @@ class Completion:
                if "azure" == openai.api_type and "model" in config:
                    # azure api uses "engine" instead of "model"
                    config = config.copy()
-                    config["engine"] = config.pop("model").replace(
-                        "gpt-3.5-turbo", "gpt-35-turbo"
-                    )
+                    config["engine"] = config.pop("model").replace("gpt-3.5-turbo", "gpt-35-turbo")
                else:
                    raise
        logger.warning(
@ -193,11 +183,7 @@ class Completion:
        # find the max value in max_valid_n_per_max_tokens
        # whose key is equal or larger than max_tokens
        return max(
-            (
-                value
-                for k, value in cls._max_valid_n_per_max_tokens.get(key, {}).items()
-                if k >= max_tokens
-            ),
+            (value for k, value in cls._max_valid_n_per_max_tokens.get(key, {}).items() if k >= max_tokens),
            default=1,
        )

@ -206,11 +192,7 @@ class Completion:
        # find the min value in min_invalid_n_per_max_tokens
        # whose key is equal or smaller than max_tokens
        return min(
-            (
-                value
-                for k, value in cls._min_invalid_n_per_max_tokens.get(key, {}).items()
-                if k <= max_tokens
-            ),
+            (value for k, value in cls._min_invalid_n_per_max_tokens.get(key, {}).items() if k <= max_tokens),
            default=None,
        )

@ -227,12 +209,10 @@ class Completion:
    def _update_invalid_n(cls, prune, region_key, max_tokens, num_completions):
        if prune:
            # update invalid n and prune this config
-            cls._min_invalid_n_per_max_tokens[
-                region_key
-            ] = invalid_n = cls._min_invalid_n_per_max_tokens.get(region_key, {})
-            invalid_n[max_tokens] = min(
-                num_completions, invalid_n.get(max_tokens, np.inf)
+            cls._min_invalid_n_per_max_tokens[region_key] = invalid_n = cls._min_invalid_n_per_max_tokens.get(
+                region_key, {}
            )
+            invalid_n[max_tokens] = min(num_completions, invalid_n.get(max_tokens, np.inf))

    @classmethod
    def _pop_subspace(cls, config):
@ -280,16 +260,12 @@ class Completion:
        model = config["model"]
        data_length = len(data)
        price = cls.price1K.get(model)
-        price_input, price_output = (
-            price if isinstance(price, tuple) else (price, price)
-        )
+        price_input, price_output = price if isinstance(price, tuple) else (price, price)
        inference_budget = getattr(cls, "inference_budget", None)
        prune_hp = getattr(cls, "_prune_hp", "n")
        metric = cls._metric
        config_n = config.get(prune_hp, 1)  # default value in OpenAI is 1
-        max_tokens = config.get(
-            "max_tokens", np.inf if model in cls.chat_models else 16
-        )
+        max_tokens = config.get("max_tokens", np.inf if model in cls.chat_models else 16)
        prompt, messages = cls._get_prompt_messages_from_config(model, config)
        stop = cls._stops and cls._stops[config["stop"]]
        target_output_tokens = None
@ -300,9 +276,7 @@ class Completion:
            region_key = cls._get_region_key(config)
            max_valid_n = cls._get_max_valid_n(region_key, max_tokens)
            if cls.avg_input_tokens:
-                target_output_tokens = (
-                    inference_budget * 1000 - cls.avg_input_tokens * price_input
-                ) / price_output
+                target_output_tokens = (inference_budget * 1000 - cls.avg_input_tokens * price_input) / price_output
                # max_tokens bounds the maximum tokens
                # so using it we can calculate a valid n according to the avg # input tokens
                max_valid_n = max(
@ -340,16 +314,12 @@ class Completion:
            while True:  # data_limit <= data_length
                # limit the number of data points to avoid rate limit
                for i in range(prev_data_limit, data_limit):
-                    logger.debug(
-                        f"num_completions={num_completions}, data instance={i}"
-                    )
+                    logger.debug(f"num_completions={num_completions}, data instance={i}")
                    data_i = data[i]
                    params = cls._construct_params(data_i, params, prompt, messages)
                    response = cls._get_response(params, eval_only)
                    if response == -1:  # rate limit error, treat as invalid
-                        cls._update_invalid_n(
-                            prune, region_key, max_tokens, num_completions
-                        )
+                        cls._update_invalid_n(prune, region_key, max_tokens, num_completions)
                        result[metric] = 0
                        result["cost"] = cost
                        return result
@ -361,16 +331,10 @@ class Completion:
                    if not cls.avg_input_tokens and not input_tokens[i]:
                        # store the # input tokens
                        input_tokens[i] = n_input_tokens
-                    query_cost = (
-                        price_input * n_input_tokens + price_output * n_output_tokens
-                    ) / 1000
+                    query_cost = (price_input * n_input_tokens + price_output * n_output_tokens) / 1000
                    cls._total_cost += query_cost
                    cost += query_cost
-                    if (
-                        cls.optimization_budget
-                        and cls._total_cost >= cls.optimization_budget
-                        and not eval_only
-                    ):
+                    if cls.optimization_budget and cls._total_cost >= cls.optimization_budget and not eval_only:
                        # limit the total tuning cost
                        return {
                            metric: 0,
@ -393,14 +357,8 @@ class Completion:
                )
                # Hoeffding-Serfling bound
                ratio = 0.1 * np.sqrt(rho / data_limit)
-                if (
-                    target_output_tokens
-                    and avg_n_tokens > target_output_tokens * (1 + ratio)
-                    and not eval_only
-                ):
-                    cls._update_invalid_n(
-                        prune, region_key, max_tokens, num_completions
-                    )
+                if target_output_tokens and avg_n_tokens > target_output_tokens * (1 + ratio) and not eval_only:
+                    cls._update_invalid_n(prune, region_key, max_tokens, num_completions)
                    result[metric] = 0
                    result["total_cost"] = cls._total_cost
                    result["cost"] = cost
@ -409,19 +367,13 @@ class Completion:
                    prune
                    and target_output_tokens
                    and avg_n_tokens <= target_output_tokens * (1 - ratio)
-                    and (
-                        num_completions < config_n
-                        or num_completions == config_n
-                        and data_limit == data_length
-                    )
+                    and (num_completions < config_n or num_completions == config_n and data_limit == data_length)
                ):
                    # update valid n
-                    cls._max_valid_n_per_max_tokens[
-                        region_key
-                    ] = valid_n = cls._max_valid_n_per_max_tokens.get(region_key, {})
-                    valid_n[max_tokens] = max(
-                        num_completions, valid_n.get(max_tokens, 0)
+                    cls._max_valid_n_per_max_tokens[region_key] = valid_n = cls._max_valid_n_per_max_tokens.get(
+                        region_key, {}
                    )
+                    valid_n[max_tokens] = max(num_completions, valid_n.get(max_tokens, 0))
                    if num_completions < config_n:
                        # valid already, skip the rest of the data
                        data_limit = data_length
@ -455,9 +407,7 @@ class Completion:
                        target_output_tokens = (
                            inference_budget * 1000 - cls.avg_input_tokens * price_input
                        ) / price_output
-                result["inference_cost"] = (
-                    avg_n_tokens * price_output + cls.avg_input_tokens * price_input
-                ) / 1000
+                result["inference_cost"] = (avg_n_tokens * price_output + cls.avg_input_tokens * price_input) / 1000
                break
            else:
                if data_early_stop:
@ -552,9 +502,7 @@ class Completion:
                space.pop("temperature_or_top_p")
                space["temperature"] = temperature
                space["top_p"] = top_p
-                logger.warning(
-                    "temperature and top_p are not recommended to vary together."
-                )
+                logger.warning("temperature and top_p are not recommended to vary together.")
        cls._max_valid_n_per_max_tokens, cls._min_invalid_n_per_max_tokens = {}, {}
        cls.optimization_budget = optimization_budget
        cls.inference_budget = inference_budget
@ -569,12 +517,8 @@ class Completion:
                cls._messages = [cls._messages]
            space["messages"] = tune.choice(list(range(len(cls._messages))))
        else:
-            assert (
-                space.get("messages") is None
-            ), "messages and prompt cannot be provided at the same time."
-            assert isinstance(
-                cls._prompts, (str, list)
-            ), "prompt must be a string or a list of strings."
+            assert space.get("messages") is None, "messages and prompt cannot be provided at the same time."
+            assert isinstance(cls._prompts, (str, list)), "prompt must be a string or a list of strings."
            if isinstance(cls._prompts, str):
                cls._prompts = [cls._prompts]
            space["prompt"] = tune.choice(list(range(len(cls._prompts))))
@ -703,9 +647,7 @@ class Completion:
        # or "messages" should be in config (for tuning chat models only)
        if prompt is None and model in cls.chat_models:
            if messages is None:
-                raise ValueError(
-                    "Either prompt or messages should be in config for chat models."
-                )
+                raise ValueError("Either prompt or messages should be in config for chat models.")
        if prompt is None:
            params["messages"] = [
                {
@ -725,18 +667,12 @@ class Completion:
            params["messages"] = [
                {
                    "role": "user",
-                    "content": prompt_msg
-                    if isinstance(prompt, str)
-                    else prompt(data_instance),
+                    "content": prompt_msg if isinstance(prompt, str) else prompt(data_instance),
                },
            ]
            params.pop("prompt", None)
        else:
-            params["prompt"] = (
-                prompt.format(**data_instance)
-                if isinstance(prompt, str)
-                else prompt(data_instance)
-            )
+            params["prompt"] = prompt.format(**data_instance) if isinstance(prompt, str) else prompt(data_instance)
        return params

    @classmethod
@ -855,9 +791,7 @@ class Completion:
        elif isinstance(agg_method, dict):
            for key in metric_keys:
                metric_agg_method = agg_method[key]
-                assert callable(
-                    metric_agg_method
-                ), "please provide a callable for each metric"
+                assert callable(metric_agg_method), "please provide a callable for each metric"
                result_agg[key] = metric_agg_method([r[key] for r in result_list])
        else:
            raise ValueError(
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@ -387,15 +387,11 @@ class AutoML(BaseEstimator):
        settings["free_mem_ratio"] = settings.get("free_mem_ratio", 0)
        settings["metric_constraints"] = settings.get("metric_constraints", [])
        settings["cv_score_agg_func"] = settings.get("cv_score_agg_func", None)
-        settings["fit_kwargs_by_estimator"] = settings.get(
-            "fit_kwargs_by_estimator", {}
-        )
+        settings["fit_kwargs_by_estimator"] = settings.get("fit_kwargs_by_estimator", {})
        settings["custom_hp"] = settings.get("custom_hp", {})
        settings["skip_transform"] = settings.get("skip_transform", False)

-        self._estimator_type = (
-            "classifier" if settings["task"] in CLASSIFICATION else "regressor"
-        )
+        self._estimator_type = "classifier" if settings["task"] in CLASSIFICATION else "regressor"

    def get_params(self, deep: bool = False) -> dict:
        return self._settings.copy()
@ -452,18 +448,14 @@ class AutoML(BaseEstimator):
    def best_config_per_estimator(self):
        """A dictionary of all estimators' best configuration."""
        return {
-            e: e_search_state.best_config
-            and AutoMLState.sanitize(e_search_state.best_config)
+            e: e_search_state.best_config and AutoMLState.sanitize(e_search_state.best_config)
            for e, e_search_state in self._search_states.items()
        }

    @property
    def best_loss_per_estimator(self):
        """A dictionary of all estimators' best loss."""
-        return {
-            e: e_search_state.best_loss
-            for e, e_search_state in self._search_states.items()
-        }
+        return {e: e_search_state.best_loss for e, e_search_state in self._search_states.items()}

    @property
    def best_loss(self):
@ -482,16 +474,12 @@ class AutoML(BaseEstimator):
        associated with the best config. These two objects correspond to the returned
        objects by the customized metric function for the config with the best loss."""
        state = self._search_states.get(self._best_estimator)
-        return self._state.best_loss, state and getattr(state, "best_result", {}).get(
-            "metric_for_logging"
-        )
+        return self._state.best_loss, state and getattr(state, "best_result", {}).get("metric_for_logging")

    @property
    def best_config_train_time(self):
        """A float of the seconds taken by training the best config."""
-        return getattr(
-            self._search_states[self._best_estimator], "best_config_train_time", None
-        )
+        return getattr(self._search_states[self._best_estimator], "best_config_train_time", None)

    def save_best_config(self, filename):
        best = {
@ -544,9 +532,7 @@ class AutoML(BaseEstimator):
    ):
        estimator = getattr(self, "_trained_estimator", None)
        if estimator is None:
-            logger.warning(
-                "No estimator is trained. Please run fit with enough budget."
-            )
+            logger.warning("No estimator is trained. Please run fit with enough budget.")
            return None
        X = self._state.task.preprocess(X, self._transformer)
        if self._label_transformer:
@ -588,23 +574,15 @@ class AutoML(BaseEstimator):
        """
        estimator = getattr(self, "_trained_estimator", None)
        if estimator is None:
-            logger.warning(
-                "No estimator is trained. Please run fit with enough budget."
-            )
+            logger.warning("No estimator is trained. Please run fit with enough budget.")
            return None
        X = self._state.task.preprocess(X, self._transformer)
        y_pred = estimator.predict(X, **pred_kwargs)

-        if (
-            isinstance(y_pred, np.ndarray)
-            and y_pred.ndim > 1
-            and isinstance(y_pred, np.ndarray)
-        ):
+        if isinstance(y_pred, np.ndarray) and y_pred.ndim > 1 and isinstance(y_pred, np.ndarray):
            y_pred = y_pred.flatten()
        if self._label_transformer:
-            return self._label_transformer.inverse_transform(
-                pd.Series(y_pred.astype(int))
-            )
+            return self._label_transformer.inverse_transform(pd.Series(y_pred.astype(int)))
        else:
            return y_pred

@ -623,9 +601,7 @@ class AutoML(BaseEstimator):
        """
        estimator = getattr(self, "_trained_estimator", None)
        if estimator is None:
-            logger.warning(
-                "No estimator is trained. Please run fit with enough budget."
-            )
+            logger.warning("No estimator is trained. Please run fit with enough budget.")
            return None
        X = self._state.task.preprocess(X, self._transformer)
        proba = self._trained_estimator.predict_proba(X, **pred_kwargs)
@ -640,9 +616,7 @@ class AutoML(BaseEstimator):
        """
        self._state.learner_classes[learner_name] = learner_class

-    def get_estimator_from_log(
-        self, log_file_name: str, record_id: int, task: Union[str, Task]
-    ):
+    def get_estimator_from_log(self, log_file_name: str, record_id: int, task: Union[str, Task]):
        """Get the estimator from log file.

        Args:
@ -820,30 +794,18 @@ class AutoML(BaseEstimator):
        split_ratio = split_ratio or self._settings.get("split_ratio")
        n_splits = n_splits or self._settings.get("n_splits")
        split_type = split_type or self._settings.get("split_type")
-        auto_augment = (
-            self._settings.get("auto_augment") if auto_augment is None else auto_augment
-        )
+        auto_augment = self._settings.get("auto_augment") if auto_augment is None else auto_augment
        self._state.task = task
        self._estimator_type = "classifier" if task.is_classification() else "regressor"

        self._state.fit_kwargs = fit_kwargs
        self._state.custom_hp = custom_hp or self._settings.get("custom_hp")
-        self._skip_transform = (
-            self._settings.get("skip_transform")
-            if skip_transform is None
-            else skip_transform
-        )
-        self._state.fit_kwargs_by_estimator = (
-            fit_kwargs_by_estimator or self._settings.get("fit_kwargs_by_estimator")
-        )
+        self._skip_transform = self._settings.get("skip_transform") if skip_transform is None else skip_transform
+        self._state.fit_kwargs_by_estimator = fit_kwargs_by_estimator or self._settings.get("fit_kwargs_by_estimator")
        self.preserve_checkpoint = (
-            self._settings.get("preserve_checkpoint")
-            if preserve_checkpoint is None
-            else preserve_checkpoint
-        )
-        task.validate_data(
-            self, self._state, X_train, y_train, dataframe, label, groups=groups
+            self._settings.get("preserve_checkpoint") if preserve_checkpoint is None else preserve_checkpoint
        )
+        task.validate_data(self, self._state, X_train, y_train, dataframe, label, groups=groups)

        logger.info("log file name {}".format(log_file_name))

@ -877,9 +839,7 @@ class AutoML(BaseEstimator):
                            best_val_loss = val_loss
                            sample_size = size
                if not training_duration:
-                    logger.warning(
-                        f"No estimator found within time_budget={time_budget}"
-                    )
+                    logger.warning(f"No estimator found within time_budget={time_budget}")
                    from .model import BaseEstimator as Estimator

                    self._trained_estimator = Estimator()
@ -901,9 +861,7 @@ class AutoML(BaseEstimator):
            self._state.fit_kwargs_by_estimator[best_estimator] = self._state.fit_kwargs

        logger.info(
-            "estimator = {}, config = {}, #training instances = {}".format(
-                best_estimator, best_config, sample_size
-            )
+            "estimator = {}, config = {}, #training instances = {}".format(best_estimator, best_config, sample_size)
        )
        # Partially copied from fit() function
        # Initilize some attributes required for retrain_from_log
@ -944,9 +902,7 @@ class AutoML(BaseEstimator):
                "auto",
                "cv",
            ], "eval_method must be 'auto' or 'cv' for custom data splitter."
-            assert (
-                self._state.X_val is None
-            ), "custom splitter and custom validation data can't be used together."
+            assert self._state.X_val is None, "custom splitter and custom validation data can't be used together."
            return "cv"
        if self._state.X_val is not None:
            assert eval_method in [
@ -1051,10 +1007,7 @@ class AutoML(BaseEstimator):
                c = self._search_states[estimator].cat_hp_cost
                configs.append(c)
            configs.append(
-                [
-                    self._state.learner_classes.get(estimator).cost_relative2lgbm()
-                    for estimator in self.estimator_list
-                ]
+                [self._state.learner_classes.get(estimator).cost_relative2lgbm() for estimator in self.estimator_list]
            )
            config = {"ml": configs}
        return config
@ -1112,9 +1065,7 @@ class AutoML(BaseEstimator):
        for estimator in self.estimator_list:
            search_state = self._search_states[estimator]
            if hasattr(search_state, "training_function"):
-                estimator_to_training_function[
-                    estimator
-                ] = search_state.training_function
+                estimator_to_training_function[estimator] = search_state.training_function
                del search_state.training_function

        with open(output_file_name, "wb") as f:
@ -1577,74 +1528,44 @@ class AutoML(BaseEstimator):
        eval_method = eval_method or self._settings.get("eval_method")
        split_ratio = split_ratio or self._settings.get("split_ratio")
        n_splits = n_splits or self._settings.get("n_splits")
-        auto_augment = (
-            self._settings.get("auto_augment") if auto_augment is None else auto_augment
-        )
+        auto_augment = self._settings.get("auto_augment") if auto_augment is None else auto_augment
        metric = metric or self._settings.get("metric")
        estimator_list = estimator_list or self._settings.get("estimator_list")
-        log_file_name = (
-            self._settings.get("log_file_name")
-            if log_file_name is None
-            else log_file_name
-        )
+        log_file_name = self._settings.get("log_file_name") if log_file_name is None else log_file_name
        max_iter = self._settings.get("max_iter") if max_iter is None else max_iter
        sample_is_none = sample is None
        if sample_is_none:
            sample = self._settings.get("sample")
        ensemble = self._settings.get("ensemble") if ensemble is None else ensemble
        log_type = log_type or self._settings.get("log_type")
-        model_history = (
-            self._settings.get("model_history")
-            if model_history is None
-            else model_history
-        )
+        model_history = self._settings.get("model_history") if model_history is None else model_history
        log_training_metric = (
-            self._settings.get("log_training_metric")
-            if log_training_metric is None
-            else log_training_metric
+            self._settings.get("log_training_metric") if log_training_metric is None else log_training_metric
        )
        mem_thres = mem_thres or self._settings.get("mem_thres")
        pred_time_limit = pred_time_limit or self._settings.get("pred_time_limit")
        train_time_limit = train_time_limit or self._settings.get("train_time_limit")
-        self._metric_constraints = metric_constraints or self._settings.get(
-            "metric_constraints"
-        )
+        self._metric_constraints = metric_constraints or self._settings.get("metric_constraints")
        if np.isfinite(pred_time_limit):
            self._metric_constraints.append(("pred_time", "<=", pred_time_limit))
        verbose = self._settings.get("verbose") if verbose is None else verbose
-        retrain_full = (
-            self._settings.get("retrain_full") if retrain_full is None else retrain_full
-        )
+        retrain_full = self._settings.get("retrain_full") if retrain_full is None else retrain_full
        split_type = split_type or self._settings.get("split_type")
        hpo_method = hpo_method or self._settings.get("hpo_method")
        learner_selector = learner_selector or self._settings.get("learner_selector")
        no_starting_points = starting_points is None
        if no_starting_points:
            starting_points = self._settings.get("starting_points")
-        n_concurrent_trials = n_concurrent_trials or self._settings.get(
-            "n_concurrent_trials"
-        )
-        keep_search_state = (
-            self._settings.get("keep_search_state")
-            if keep_search_state is None
-            else keep_search_state
-        )
+        n_concurrent_trials = n_concurrent_trials or self._settings.get("n_concurrent_trials")
+        keep_search_state = self._settings.get("keep_search_state") if keep_search_state is None else keep_search_state
        self.preserve_checkpoint = (
-            self._settings.get("preserve_checkpoint")
-            if preserve_checkpoint is None
-            else preserve_checkpoint
-        )
-        early_stop = (
-            self._settings.get("early_stop") if early_stop is None else early_stop
-        )
-        force_cancel = (
-            self._settings.get("force_cancel") if force_cancel is None else force_cancel
+            self._settings.get("preserve_checkpoint") if preserve_checkpoint is None else preserve_checkpoint
        )
+        early_stop = self._settings.get("early_stop") if early_stop is None else early_stop
+        force_cancel = self._settings.get("force_cancel") if force_cancel is None else force_cancel
        # no search budget is provided?
        no_budget = time_budget < 0 and max_iter is None and not early_stop
-        append_log = (
-            self._settings.get("append_log") if append_log is None else append_log
-        )
+        append_log = self._settings.get("append_log") if append_log is None else append_log
        min_sample_size = min_sample_size or self._settings.get("min_sample_size")
        use_ray = self._settings.get("use_ray") if use_ray is None else use_ray
        use_spark = self._settings.get("use_spark") if use_spark is None else use_spark
@ -1698,11 +1619,7 @@ class AutoML(BaseEstimator):
        if self._use_ray is not False:
            import ray

-            n_cpus = (
-                ray.is_initialized()
-                and ray.available_resources()["CPU"]
-                or os.cpu_count()
-            )
+            n_cpus = ray.is_initialized() and ray.available_resources()["CPU"] or os.cpu_count()

            self._state.resources_per_trial = (
                # when using gpu, default cpu is 1 per job; otherwise, default cpu is n_cpus / n_concurrent_trials
@ -1724,30 +1641,16 @@ class AutoML(BaseEstimator):
                dataframe = ray.get(dataframe)
        else:
            # TODO: Integrate with Spark
-            self._state.resources_per_trial = (
-                {"cpu": n_jobs} if n_jobs > 0 else {"cpu": 1}
-            )
-        self._state.free_mem_ratio = (
-            self._settings.get("free_mem_ratio")
-            if free_mem_ratio is None
-            else free_mem_ratio
-        )
+            self._state.resources_per_trial = {"cpu": n_jobs} if n_jobs > 0 else {"cpu": 1}
+        self._state.free_mem_ratio = self._settings.get("free_mem_ratio") if free_mem_ratio is None else free_mem_ratio
        self._state.task = task
        self._state.log_training_metric = log_training_metric

        self._state.fit_kwargs = fit_kwargs
        custom_hp = custom_hp or self._settings.get("custom_hp")
-        self._skip_transform = (
-            self._settings.get("skip_transform")
-            if skip_transform is None
-            else skip_transform
-        )
-        fit_kwargs_by_estimator = fit_kwargs_by_estimator or self._settings.get(
-            "fit_kwargs_by_estimator"
-        )
-        self._state.fit_kwargs_by_estimator = (
-            fit_kwargs_by_estimator.copy()
-        )  # shallow copy of fit_kwargs_by_estimator
+        self._skip_transform = self._settings.get("skip_transform") if skip_transform is None else skip_transform
+        fit_kwargs_by_estimator = fit_kwargs_by_estimator or self._settings.get("fit_kwargs_by_estimator")
+        self._state.fit_kwargs_by_estimator = fit_kwargs_by_estimator.copy()  # shallow copy of fit_kwargs_by_estimator
        self._state.weight_val = sample_weight_val

        task.validate_data(
@ -1777,13 +1680,9 @@ class AutoML(BaseEstimator):
        eval_method = self._decide_eval_method(eval_method, time_budget)
        self._state.eval_method = eval_method
        logger.info("Evaluation method: {}".format(eval_method))
-        self._state.cv_score_agg_func = cv_score_agg_func or self._settings.get(
-            "cv_score_agg_func"
-        )
+        self._state.cv_score_agg_func = cv_score_agg_func or self._settings.get("cv_score_agg_func")

-        self._retrain_in_budget = retrain_full == "budget" and (
-            eval_method == "holdout" and self._state.X_val is None
-        )
+        self._retrain_in_budget = retrain_full == "budget" and (eval_method == "holdout" and self._state.X_val is None)
        self._auto_augment = auto_augment

        _sample_size_from_starting_points = {}
@ -1805,9 +1704,7 @@ class AutoML(BaseEstimator):
                        ]
                    )
                    if _sample_size_set:
-                        _sample_size_from_starting_points[_estimator] = min(
-                            _sample_size_set
-                        )
+                        _sample_size_from_starting_points[_estimator] = min(_sample_size_set)
                    if len(_sample_size_set) > 1:
                        logger.warning(
                            "Using the min FLAML_sample_size of all the provided starting points for estimator {}. (Provided FLAML_sample_size are: {})".format(
@ -1831,10 +1728,7 @@ class AutoML(BaseEstimator):
                    sample
                    and not task.is_rank()
                    and eval_method != "cv"
-                    and (
-                        self._min_sample_size[k] * SAMPLE_MULTIPLY_FACTOR
-                        < self._state.data_size[0]
-                    ),
+                    and (self._min_sample_size[k] * SAMPLE_MULTIPLY_FACTOR < self._state.data_size[0]),
                )
                for k in self._min_sample_size.keys()
            }
@ -1843,10 +1737,7 @@ class AutoML(BaseEstimator):
                sample
                and not task.is_rank()
                and eval_method != "cv"
-                and (
-                    self._min_sample_size * SAMPLE_MULTIPLY_FACTOR
-                    < self._state.data_size[0]
-                )
+                and (self._min_sample_size * SAMPLE_MULTIPLY_FACTOR < self._state.data_size[0])
            )

        metric = task.default_metric(metric)
@ -1874,10 +1765,7 @@ class AutoML(BaseEstimator):
            if task.is_nlp():
                from flaml.automl.ml import huggingface_metric_to_mode

-                if (
-                    metric in huggingface_metric_to_mode
-                    and huggingface_metric_to_mode[metric] == "max"
-                ):
+                if metric in huggingface_metric_to_mode and huggingface_metric_to_mode[metric] == "max":
                    return True, f"-{metric}"
            return False, None

@ -1891,9 +1779,7 @@ class AutoML(BaseEstimator):
            error_metric = "customized metric"
        logger.info(f"Minimizing error metric: {error_metric}")

-        is_spark_dataframe = isinstance(X_train, psDataFrame) or isinstance(
-            dataframe, psDataFrame
-        )
+        is_spark_dataframe = isinstance(X_train, psDataFrame) or isinstance(dataframe, psDataFrame)
        estimator_list = task.default_estimator_list(estimator_list, is_spark_dataframe)

        if is_spark_dataframe and self._use_spark:
@ -1954,9 +1840,7 @@ class AutoML(BaseEstimator):
                        location,
                        k=1,
                    )
-                    starting_points[estimator_name] = [
-                        x["hyperparameters"] for x in configs
-                    ]
+                    starting_points[estimator_name] = [x["hyperparameters"] for x in configs]
                except FileNotFoundError:
                    pass
            try:
@ -1978,9 +1862,7 @@ class AutoML(BaseEstimator):
        for estimator_name in estimator_list:
            estimator_class = self._state.learner_classes[estimator_name]
            estimator_class.init()
-            this_estimator_kwargs = self._state.fit_kwargs_by_estimator.get(
-                estimator_name
-            )
+            this_estimator_kwargs = self._state.fit_kwargs_by_estimator.get(estimator_name)
            if this_estimator_kwargs:
                # make another shallow copy of the value (a dict obj), so user's fit_kwargs_by_estimator won't be updated
                this_estimator_kwargs = this_estimator_kwargs.copy()
@ -1991,9 +1873,7 @@ class AutoML(BaseEstimator):
                    estimator_name
                ] = this_estimator_kwargs  # set self._state.fit_kwargs_by_estimator[estimator_name] to the update, so only self._state.fit_kwargs_by_estimator will be updated
            else:
-                self._state.fit_kwargs_by_estimator[
-                    estimator_name
-                ] = self._state.fit_kwargs
+                self._state.fit_kwargs_by_estimator[estimator_name] = self._state.fit_kwargs

            self._search_states[estimator_name] = SearchState(
                learner_class=estimator_class,
@ -2004,9 +1884,7 @@ class AutoML(BaseEstimator):
                    "period"
                ),  # NOTE: this is after kwargs is updated to fit_kwargs_by_estimator
                custom_hp=custom_hp and custom_hp.get(estimator_name),
-                max_iter=max_iter / len(estimator_list)
-                if self._learner_selector == "roundrobin"
-                else max_iter,
+                max_iter=max_iter / len(estimator_list) if self._learner_selector == "roundrobin" else max_iter,
                budget=self._state.time_budget,
            )
        logger.info("List of ML learners in AutoML Run: {}".format(estimator_list))
@ -2040,9 +1918,7 @@ class AutoML(BaseEstimator):
            self._search()
        if self._best_estimator:
            logger.info("fit succeeded")
-            logger.info(
-                f"Time taken to find the best model: {self._time_taken_best_iter}"
-            )
+            logger.info(f"Time taken to find the best model: {self._time_taken_best_iter}")
            if (
                self._hpo_method in ("cfo", "bs")
                and self._state.time_budget > 0
@ -2085,10 +1961,7 @@ class AutoML(BaseEstimator):
                    from ray.tune.search import ConcurrencyLimiter
                import ray
            except (ImportError, AssertionError):
-                raise ImportError(
-                    "use_ray=True requires installation of ray. "
-                    "Please run pip install flaml[ray]"
-                )
+                raise ImportError("use_ray=True requires installation of ray. " "Please run pip install flaml[ray]")
        else:
            from flaml.tune.searcher.suggestion import ConcurrencyLimiter

@ -2116,16 +1989,11 @@ class AutoML(BaseEstimator):
                from flaml.tune.searcher.suggestion import OptunaSearch as SearchAlgo
        else:
            raise NotImplementedError(
-                f"hpo_method={self._hpo_method} is not recognized. "
-                "'auto', 'cfo' and 'bs' are supported."
+                f"hpo_method={self._hpo_method} is not recognized. " "'auto', 'cfo' and 'bs' are supported."
            )
        space = self.search_space
        self._state.time_from_start = time.time() - self._start_time_flag
-        time_budget_s = (
-            self._state.time_budget - self._state.time_from_start
-            if self._state.time_budget >= 0
-            else None
-        )
+        time_budget_s = self._state.time_budget - self._state.time_from_start if self._state.time_budget >= 0 else None
        if self._hpo_method != "optuna":
            min_resource = self.min_resource
            if isinstance(min_resource, dict):
@ -2146,9 +2014,7 @@ class AutoML(BaseEstimator):
                resource_attr=self.resource_attr,
                min_resource=min_resource_all_estimator,
                max_resource=self.max_resource,
-                config_constraints=[
-                    (partial(size, self._state.learner_classes), "<=", self._mem_thres)
-                ],
+                config_constraints=[(partial(size, self._state.learner_classes), "<=", self._mem_thres)],
                metric_constraints=self.metric_constraints,
                seed=self._seed,
                time_budget_s=time_budget_s,
@ -2171,9 +2037,7 @@ class AutoML(BaseEstimator):
            search_alg = SearchAlgo(
                metric="val_loss",
                mode="min",
-                points_to_evaluate=[
-                    p for p in new_points_to_evaluate if len(p) == len(converted_space)
-                ],
+                points_to_evaluate=[p for p in new_points_to_evaluate if len(p) == len(converted_space)],
            )
        search_alg = ConcurrencyLimiter(search_alg, self._n_concurrent_trials)
        resources_per_trial = self._state.resources_per_trial
@ -2218,8 +2082,7 @@ class AutoML(BaseEstimator):
            (
                trial
                for trial in analysis.trials
-                if trial.last_result
-                and trial.last_result.get("wall_clock_time") is not None
+                if trial.last_result and trial.last_result.get("wall_clock_time") is not None
            ),
            key=lambda x: x.last_result["wall_clock_time"],
        )
@ -2272,14 +2135,10 @@ class AutoML(BaseEstimator):
                if (search_state.metric_for_logging is not None) and (
                    "intermediate_results" in search_state.metric_for_logging
                ):
-                    for each_entry in search_state.metric_for_logging[
-                        "intermediate_results"
-                    ]:
+                    for each_entry in search_state.metric_for_logging["intermediate_results"]:
                        with mlflow.start_run(nested=True):
                            mlflow.log_metrics(each_entry)
-                            mlflow.log_metric(
-                                "iter_counter", self._iter_per_learner[estimator]
-                            )
+                            mlflow.log_metric("iter_counter", self._iter_per_learner[estimator])
                    del search_state.metric_for_logging["intermediate_results"]
                if search_state.metric_for_logging:
                    mlflow.log_metrics(search_state.metric_for_logging)
@ -2325,8 +2184,7 @@ class AutoML(BaseEstimator):
            from flaml.tune.searcher.cfo_cat import CFOCat as SearchAlgo
        else:
            raise NotImplementedError(
-                f"hpo_method={self._hpo_method} is not recognized. "
-                "'cfo' and 'bs' are supported."
+                f"hpo_method={self._hpo_method} is not recognized. " "'cfo' and 'bs' are supported."
            )

        est_retrain_time = next_trial_time = 0
@ -2348,8 +2206,7 @@ class AutoML(BaseEstimator):
                if not self._retrain_in_budget
                or better
                or (not self.best_estimator)
-                or self._search_states[self.best_estimator].sample_size
-                < self._state.data_size[0]
+                or self._search_states[self.best_estimator].sample_size < self._state.data_size[0]
                else time_left - est_retrain_time
            )
            if not search_state.search_alg:
@ -2363,8 +2220,7 @@ class AutoML(BaseEstimator):
                    resource_attr = "FLAML_sample_size"
                    min_resource = (
                        self._min_sample_size[estimator]
-                        if isinstance(self._min_sample_size, dict)
-                        and estimator in self._min_sample_size
+                        if isinstance(self._min_sample_size, dict) and estimator in self._min_sample_size
                        else self._min_sample_size_input
                    )
                    max_resource = self._state.data_size[0]
@ -2391,9 +2247,7 @@ class AutoML(BaseEstimator):

                    low_cost_partial_config = search_state.low_cost_partial_config
                time_budget_s = (
-                    min(budget_left, self._state.train_time_limit or np.inf)
-                    if self._state.time_budget >= 0
-                    else None
+                    min(budget_left, self._state.train_time_limit or np.inf) if self._state.time_budget >= 0 else None
                )
                if self._hpo_method in ("bs", "cfo", "grid", "cfocat", "random"):
                    algo = SearchAlgo(
@ -2406,9 +2260,7 @@ class AutoML(BaseEstimator):
                        resource_attr=resource_attr,
                        min_resource=min_resource,
                        max_resource=max_resource,
-                        config_constraints=[
-                            (learner_class.size, "<=", self._mem_thres)
-                        ],
+                        config_constraints=[(learner_class.size, "<=", self._mem_thres)],
                        metric_constraints=self.metric_constraints,
                        seed=self._seed,
                        allow_empty_config=True,
@ -2419,9 +2271,7 @@ class AutoML(BaseEstimator):
                    # if self._hpo_method is optuna, sometimes the search space and the initial config dimension do not match
                    # need to remove the extra keys from the search space to be consistent with the initial config
                    converted_space = SearchAlgo.convert_search_space(search_space)
-                    removed_keys = set(search_space.keys()).difference(
-                        converted_space.keys()
-                    )
+                    removed_keys = set(search_space.keys()).difference(converted_space.keys())
                    new_points_to_evaluate = []
                    for idx in range(len(points_to_evaluate)):
                        r = points_to_evaluate[idx].copy()
@ -2434,9 +2284,7 @@ class AutoML(BaseEstimator):
                        metric="val_loss",
                        mode="min",
                        space=search_space,
-                        points_to_evaluate=[
-                            p for p in points_to_evaluate if len(p) == len(search_space)
-                        ],
+                        points_to_evaluate=[p for p in points_to_evaluate if len(p) == len(search_space)],
                    )
                search_state.search_alg = ConcurrencyLimiter(algo, max_concurrent=1)
                # search_state.search_alg = algo
@ -2467,9 +2315,7 @@ class AutoML(BaseEstimator):
                    eci_base = search_state.init_eci
                    self._eci.append(search_state.estimated_cost4improvement)
                    for e in self.estimator_list[1:]:
-                        self._eci.append(
-                            self._search_states[e].init_eci / eci_base * self._eci[0]
-                        )
+                        self._eci.append(self._search_states[e].init_eci / eci_base * self._eci[0])
                    self._estimator_index = 0
                    min_budget = max(10 * self._eci[0], sum(self._eci))
                    max_budget = 10000 * self._eci[0]
@ -2536,14 +2382,10 @@ class AutoML(BaseEstimator):
                if (
                    self._hpo_method in ("cfo", "bs")
                    and all(
-                        state.search_alg
-                        and state.search_alg.searcher.is_ls_ever_converged
+                        state.search_alg and state.search_alg.searcher.is_ls_ever_converged
                        for state in self._search_states.values()
                    )
-                    and (
-                        self._state.time_from_start
-                        > self._warn_threshold * self._time_taken_best_iter
-                    )
+                    and (self._state.time_from_start > self._warn_threshold * self._time_taken_best_iter)
                ):
                    logger.warning(
                        "All estimator hyperparameters local search has "
@ -2566,8 +2408,7 @@ class AutoML(BaseEstimator):
                and best_config_sig
                and est_retrain_time
                and not better
-                and self._search_states[self._best_estimator].sample_size
-                == self._state.data_size[0]
+                and self._search_states[self._best_estimator].sample_size == self._state.data_size[0]
                and (
                    est_retrain_time
                    <= self._state.time_budget - self._state.time_from_start
@ -2580,18 +2421,11 @@ class AutoML(BaseEstimator):
                    state.best_config,
                    self.data_size_full,
                )
-                logger.info(
-                    "retrain {} for {:.1f}s".format(self._best_estimator, retrain_time)
-                )
-                self._retrained_config[
-                    best_config_sig
-                ] = state.best_config_train_time = retrain_time
+                logger.info("retrain {} for {:.1f}s".format(self._best_estimator, retrain_time))
+                self._retrained_config[best_config_sig] = state.best_config_train_time = retrain_time
                est_retrain_time = 0
            self._state.time_from_start = time.time() - self._start_time_flag
-            if (
-                self._state.time_from_start >= self._state.time_budget >= 0
-                or not self._active_estimators
-            ):
+            if self._state.time_from_start >= self._state.time_budget >= 0 or not self._active_estimators:
                break
            if self._ensemble and self._best_estimator:
                time_left = self._state.time_budget - self._state.time_from_start
@ -2636,9 +2470,7 @@ class AutoML(BaseEstimator):
        self._state.time_from_start = time.time() - self._start_time_flag
        if self._best_estimator:
            self._selected = self._search_states[self._best_estimator]
-            self.modelcount = sum(
-                search_state.total_iter for search_state in self._search_states.values()
-            )
+            self.modelcount = sum(search_state.total_iter for search_state in self._search_states.values())
            if self._trained_estimator:
                logger.info(f"selected model: {self._trained_estimator.model}")
            estimators = []
@ -2647,9 +2479,7 @@ class AutoML(BaseEstimator):
                "multiclass",
                "regression",
            ):
-                search_states = list(
-                    x for x in self._search_states.items() if x[1].best_config
-                )
+                search_states = list(x for x in self._search_states.items() if x[1].best_config)
                search_states.sort(key=lambda x: x[1].best_loss)
                estimators = [
                    (
@ -2674,9 +2504,7 @@ class AutoML(BaseEstimator):
                    for x in search_states[2:]
                    if x[1].best_loss < 4 * self._selected.best_loss
                ]
-                logger.info(
-                    [(estimator[0], estimator[1].params) for estimator in estimators]
-                )
+                logger.info([(estimator[0], estimator[1].params) for estimator in estimators])
            if len(estimators) > 1:
                if self._state.task.is_classification():
                    from sklearn.ensemble import StackingClassifier as Stacker
@ -2685,11 +2513,7 @@ class AutoML(BaseEstimator):
                if self._use_ray is not False:
                    import ray

-                    n_cpus = (
-                        ray.is_initialized()
-                        and ray.available_resources()["CPU"]
-                        or os.cpu_count()
-                    )
+                    n_cpus = ray.is_initialized() and ray.available_resources()["CPU"] or os.cpu_count()
                elif self._use_spark:
                    from flaml.tune.spark.utils import get_n_cpus

@ -2698,15 +2522,12 @@ class AutoML(BaseEstimator):
                    n_cpus = os.cpu_count()
                ensemble_n_jobs = (
                    -self._state.n_jobs  # maximize total parallelization degree
-                    if abs(self._state.n_jobs)
-                    == 1  # 1 and -1 correspond to min/max parallelization
+                    if abs(self._state.n_jobs) == 1  # 1 and -1 correspond to min/max parallelization
                    else max(1, int(n_cpus / 2 / self._state.n_jobs))
                    # the total degree of parallelization = parallelization degree per estimator * parallelization degree of ensemble
                )
                if isinstance(self._ensemble, dict):
-                    final_estimator = self._ensemble.get(
-                        "final_estimator", self._trained_estimator
-                    )
+                    final_estimator = self._ensemble.get("final_estimator", self._trained_estimator)
                    passthrough = self._ensemble.get("passthrough", True)
                    ensemble_n_jobs = self._ensemble.get("n_jobs", ensemble_n_jobs)
                else:
@ -2719,9 +2540,7 @@ class AutoML(BaseEstimator):
                    passthrough=passthrough,
                )
                sample_weight_dict = (
-                    (self._sample_weight_full is not None)
-                    and {"sample_weight": self._sample_weight_full}
-                    or {}
+                    (self._sample_weight_full is not None) and {"sample_weight": self._sample_weight_full} or {}
                )
                for e in estimators:
                    e[1].__class__.init()
@ -2776,8 +2595,7 @@ class AutoML(BaseEstimator):
                        or self._state.time_budget - self._state.time_from_start
                        > self._selected.est_retrain_time(self.data_size_full)
                    )
-                    and self._selected.best_config_sample_size
-                    == self._state.data_size[0]
+                    and self._selected.best_config_sample_size == self._state.data_size[0]
                ):
                    state = self._search_states[self._best_estimator]
                    (
@ -2788,11 +2606,7 @@ class AutoML(BaseEstimator):
                        state.best_config,
                        self.data_size_full,
                    )
-                    logger.info(
-                        "retrain {} for {:.1f}s".format(
-                            self._best_estimator, retrain_time
-                        )
-                    )
+                    logger.info("retrain {} for {:.1f}s".format(self._best_estimator, retrain_time))
                    state.best_config_train_time = retrain_time
                    if self._trained_estimator:
                        logger.info(f"retrained model: {self._trained_estimator.model}")
@ -2827,16 +2641,12 @@ class AutoML(BaseEstimator):
                    self._state.time_budget >= 0
                    and self._search_states[estimator].time2eval_best
                    > self._state.time_budget - self._state.time_from_start
-                    or self._iter_per_learner_fullsize[estimator]
-                    >= self._max_iter_per_learner
+                    or self._iter_per_learner_fullsize[estimator] >= self._max_iter_per_learner
                ):
                    inv.append(0)
                    continue
                estimated_cost = search_state.estimated_cost4improvement
-                if (
-                    search_state.sample_size < self._state.data_size[0]
-                    and self._state.time_budget >= 0
-                ):
+                if search_state.sample_size < self._state.data_size[0] and self._state.time_budget >= 0:
                    estimated_cost = min(
                        estimated_cost,
                        search_state.time2eval_best
@ -2847,12 +2657,8 @@ class AutoML(BaseEstimator):
                    )
                gap = search_state.best_loss - self._state.best_loss
                if gap > 0 and not self._ensemble:
-                    delta_loss = (
-                        search_state.best_loss_old - search_state.best_loss
-                    ) or search_state.best_loss
-                    delta_time = (
-                        search_state.total_time_used - search_state.time_best_found_old
-                    ) or 1e-10
+                    delta_loss = (search_state.best_loss_old - search_state.best_loss) or search_state.best_loss
+                    delta_time = (search_state.total_time_used - search_state.time_best_found_old) or 1e-10
                    speed = delta_loss / delta_time
                    if speed:
                        estimated_cost = max(2 * gap / speed, estimated_cost)
--- a/flaml/automl/data.py
+++ b/flaml/automl/data.py
@ -35,9 +35,7 @@ TS_TIMESTAMP_COL = "ds"
 TS_VALUE_COL = "y"


-def load_openml_dataset(
-    dataset_id, data_dir=None, random_state=0, dataset_format="dataframe"
-):
+def load_openml_dataset(dataset_id, data_dir=None, random_state=0, dataset_format="dataframe"):
    """Load dataset from open ML.

    If the file is not cached locally, download it from open ML.
@ -77,9 +75,7 @@ def load_openml_dataset(
            pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
    print("Dataset name:", dataset.name)
    try:
-        X, y, *__ = dataset.get_data(
-            target=dataset.default_target_attribute, dataset_format=dataset_format
-        )
+        X, y, *__ = dataset.get_data(target=dataset.default_target_attribute, dataset_format=dataset_format)
    except ValueError:
        from sklearn.datasets import fetch_openml

@ -267,9 +263,7 @@ def add_time_idx_col(X):
 class DataTransformer:
    """Transform input training data."""

-    def fit_transform(
-        self, X: Union[DataFrame, np.ndarray], y, task: Union[str, "Task"]
-    ):
+    def fit_transform(self, X: Union[DataFrame, np.ndarray], y, task: Union[str, "Task"]):
        """Fit transformer and process the input training data according to the task type.

        Args:
@ -312,21 +306,13 @@ class DataTransformer:
            for column in X.columns:
                # sklearn\utils\validation.py needs int/float values
                if X[column].dtype.name in ("object", "category"):
-                    if (
-                        X[column].nunique() == 1
-                        or X[column].nunique(dropna=True)
-                        == n - X[column].isnull().sum()
-                    ):
+                    if X[column].nunique() == 1 or X[column].nunique(dropna=True) == n - X[column].isnull().sum():
                        X.drop(columns=column, inplace=True)
                        drop = True
                    elif X[column].dtype.name == "category":
                        current_categories = X[column].cat.categories
                        if "__NAN__" not in current_categories:
-                            X[column] = (
-                                X[column]
-                                .cat.add_categories("__NAN__")
-                                .fillna("__NAN__")
-                            )
+                            X[column] = X[column].cat.add_categories("__NAN__").fillna("__NAN__")
                        cat_columns.append(column)
                    else:
                        X[column] = X[column].fillna("__NAN__")
@ -349,10 +335,7 @@ class DataTransformer:
                            f"quarter_{column}": tmp_dt.quarter,
                        }
                        for key, value in new_columns_dict.items():
-                            if (
-                                key not in X.columns
-                                and value.nunique(dropna=False) >= 2
-                            ):
+                            if key not in X.columns and value.nunique(dropna=False) >= 2:
                                X[key] = value
                                num_columns.append(key)
                        X[column] = X[column].map(datetime.toordinal)
@ -368,9 +351,7 @@ class DataTransformer:
            if num_columns:
                X_num = X[num_columns]
                if np.issubdtype(X_num.columns.dtype, np.integer) and (
-                    drop
-                    or min(X_num.columns) != 0
-                    or max(X_num.columns) != X_num.shape[1] - 1
+                    drop or min(X_num.columns) != 0 or max(X_num.columns) != X_num.shape[1] - 1
                ):
                    X_num.columns = range(X_num.shape[1])
                    drop = True
@ -395,11 +376,7 @@ class DataTransformer:
                datetime_columns,
            )
            self._drop = drop
-        if (
-            task.is_classification()
-            or not pd.api.types.is_numeric_dtype(y)
-            and not task.is_nlg()
-        ):
+        if task.is_classification() or not pd.api.types.is_numeric_dtype(y) and not task.is_nlg():
            if not task.is_token_classification():
                from sklearn.preprocessing import LabelEncoder

@ -466,9 +443,7 @@ class DataTransformer:
                elif X[column].dtype.name == "category":
                    current_categories = X[column].cat.categories
                    if "__NAN__" not in current_categories:
-                        X[column] = (
-                            X[column].cat.add_categories("__NAN__").fillna("__NAN__")
-                        )
+                        X[column] = X[column].cat.add_categories("__NAN__").fillna("__NAN__")
            if cat_columns:
                X[cat_columns] = X[cat_columns].astype("category")
            if num_columns:
--- a/flaml/automl/ml.py
+++ b/flaml/automl/ml.py
@ -132,9 +132,7 @@ def get_estimator_class(task: str, estimator_name: str) -> EstimatorSubclass:
    if "xgboost" == estimator_name:
        estimator_class = XGBoost_TS if task in TS_FORECAST else XGBoostSklearnEstimator
    elif "xgb_limitdepth" == estimator_name:
-        estimator_class = (
-            XGBoostLimitDepth_TS if task in TS_FORECAST else XGBoostLimitDepthEstimator
-        )
+        estimator_class = XGBoostLimitDepth_TS if task in TS_FORECAST else XGBoostLimitDepthEstimator
    elif "rf" == estimator_name:
        estimator_class = RF_TS if task in TS_FORECAST else RandomForestEstimator
    elif "lgbm" == estimator_name:
@ -203,40 +201,27 @@ def metric_loss_score(
        try:
            import datasets

-            datasets_metric_name = huggingface_submetric_to_metric.get(
-                metric_name, metric_name.split(":")[0]
-            )
+            datasets_metric_name = huggingface_submetric_to_metric.get(metric_name, metric_name.split(":")[0])
            metric = datasets.load_metric(datasets_metric_name)
            metric_mode = huggingface_metric_to_mode[datasets_metric_name]

            if metric_name.startswith("seqeval"):
-                y_processed_true = [
-                    [labels[tr] for tr in each_list] for each_list in y_processed_true
-                ]
+                y_processed_true = [[labels[tr] for tr in each_list] for each_list in y_processed_true]
            elif metric in ("pearsonr", "spearmanr"):
                y_processed_true = (
-                    y_processed_true.to_list()
-                    if isinstance(y_processed_true, pd.Series)
-                    else list(y_processed_true)
+                    y_processed_true.to_list() if isinstance(y_processed_true, pd.Series) else list(y_processed_true)
                )
-            score_dict = metric.compute(
-                predictions=y_processed_predict, references=y_processed_true
-            )
+            score_dict = metric.compute(predictions=y_processed_predict, references=y_processed_true)
            if "rouge" in metric_name:
                score = score_dict[metric_name].mid.fmeasure
            elif metric_name.startswith("seqeval"):
                metric_submetric_names = metric_name.split(":")
-                score = score_dict[
-                    metric_submetric_names[1]
-                    if len(metric_submetric_names) > 1
-                    else "overall_accuracy"
-                ]
+                score = score_dict[metric_submetric_names[1] if len(metric_submetric_names) > 1 else "overall_accuracy"]
            else:
                score = score_dict[metric_name]
        except ImportError:
            raise ValueError(
-                metric_name
-                + " is not an built-in sklearn metric and [hf] is not installed. "
+                metric_name + " is not an built-in sklearn metric and [hf] is not installed. "
                "Currently built-in sklearn metrics are: "
                "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,"
                "log_loss, mape, f1, micro_f1, macro_f1, ap. "
@ -303,9 +288,7 @@ def sklearn_metric_loss_score(
    if "r2" == metric_name:
        score = 1.0 - r2_score(y_true, y_predict, sample_weight=sample_weight)
    elif metric_name == "rmse":
-        score = np.sqrt(
-            mean_squared_error(y_true, y_predict, sample_weight=sample_weight)
-        )
+        score = np.sqrt(mean_squared_error(y_true, y_predict, sample_weight=sample_weight))
    elif metric_name == "mae":
        score = mean_absolute_error(y_true, y_predict, sample_weight=sample_weight)
    elif metric_name == "mse":
@ -315,17 +298,11 @@ def sklearn_metric_loss_score(
    elif metric_name == "roc_auc":
        score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight)
    elif metric_name == "roc_auc_ovr":
-        score = 1.0 - roc_auc_score(
-            y_true, y_predict, sample_weight=sample_weight, multi_class="ovr"
-        )
+        score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight, multi_class="ovr")
    elif metric_name == "roc_auc_ovo":
-        score = 1.0 - roc_auc_score(
-            y_true, y_predict, sample_weight=sample_weight, multi_class="ovo"
-        )
+        score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight, multi_class="ovo")
    elif metric_name == "roc_auc_weighted":
-        score = 1.0 - roc_auc_score(
-            y_true, y_predict, sample_weight=sample_weight, average="weighted"
-        )
+        score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight, average="weighted")
    elif metric_name == "roc_auc_ovo_weighted":
        score = 1.0 - roc_auc_score(
            y_true,
@ -350,19 +327,13 @@ def sklearn_metric_loss_score(
        except ValueError:
            return np.inf
    elif "micro_f1" == metric_name:
-        score = 1 - f1_score(
-            y_true, y_predict, sample_weight=sample_weight, average="micro"
-        )
+        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight, average="micro")
    elif "macro_f1" == metric_name:
-        score = 1 - f1_score(
-            y_true, y_predict, sample_weight=sample_weight, average="macro"
-        )
+        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight, average="macro")
    elif "f1" == metric_name:
        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight)
    elif "ap" == metric_name:
-        score = 1 - average_precision_score(
-            y_true, y_predict, sample_weight=sample_weight
-        )
+        score = 1 - average_precision_score(y_true, y_predict, sample_weight=sample_weight)
    elif "ndcg" in metric_name:
        if "@" in metric_name:
            k = int(metric_name.split("@", 1)[-1])
@ -524,9 +495,7 @@ def default_cv_score_agg_func(val_loss_folds, log_metrics_folds):
    if metrics_to_log:
        n = len(val_loss_folds)
        metrics_to_log = (
-            {k: v / n for k, v in metrics_to_log.items()}
-            if isinstance(metrics_to_log, dict)
-            else metrics_to_log / n
+            {k: v / n for k, v in metrics_to_log.items()} if isinstance(metrics_to_log, dict) else metrics_to_log / n
        )
    return metric_to_minimize, metrics_to_log

@ -546,9 +515,7 @@ def compute_estimator(
    eval_method: str,
    eval_metric: Union[str, Callable],
    best_val_loss=np.Inf,
-    n_jobs: Optional[
-        int
-    ] = 1,  # some estimators of EstimatorSubclass don't accept n_jobs. Should be None in that case.
+    n_jobs: Optional[int] = 1,  # some estimators of EstimatorSubclass don't accept n_jobs. Should be None in that case.
    estimator_class: Optional[EstimatorSubclass] = None,
    cv_score_agg_func: Optional[callable] = None,
    log_training_metric: Optional[bool] = False,
@ -583,9 +550,7 @@ def compute_estimator(
            groups_val,
            eval_metric,
            task,
-            labels=fit_kwargs.get(
-                "label_list"
-            ),  # pass the label list on to compute the evaluation metric
+            labels=fit_kwargs.get("label_list"),  # pass the label list on to compute the evaluation metric
            budget=budget,
            log_training_metric=log_training_metric,
            fit_kwargs=fit_kwargs,
@ -619,9 +584,7 @@ def train_estimator(
    y_train,
    task: str,
    estimator_name: str,
-    n_jobs: Optional[
-        int
-    ] = 1,  # some estimators of EstimatorSubclass don't accept n_jobs. Should be None in that case.
+    n_jobs: Optional[int] = 1,  # some estimators of EstimatorSubclass don't accept n_jobs. Should be None in that case.
    estimator_class: Optional[EstimatorSubclass] = None,
    budget=None,
    fit_kwargs: Optional[dict] = None,
@ -642,18 +605,14 @@ def train_estimator(
        fit_kwargs["metric"] = eval_metric

    if X_train is not None:
-        train_time = estimator.fit(
-            X_train, y_train, budget, free_mem_ratio, **fit_kwargs
-        )
+        train_time = estimator.fit(X_train, y_train, budget, free_mem_ratio, **fit_kwargs)
    else:
        estimator = estimator.estimator_class(**estimator.params)
    train_time = time.time() - start_time
    return estimator, train_time


-def norm_confusion_matrix(
-    y_true: Union[np.array, pd.Series], y_pred: Union[np.array, pd.Series]
-):
+def norm_confusion_matrix(y_true: Union[np.array, pd.Series], y_pred: Union[np.array, pd.Series]):
    """normalized confusion matrix.

    Args:
--- a/flaml/automl/model.py
+++ b/flaml/automl/model.py
@ -139,9 +139,7 @@ class BaseEstimator:
        if "_estimator_type" in config:
            self._estimator_type = self.params.pop("_estimator_type")
        else:
-            self._estimator_type = (
-                "classifier" if task in CLASSIFICATION else "regressor"
-            )
+            self._estimator_type = "classifier" if task in CLASSIFICATION else "regressor"

    def get_params(self, deep=False):
        params = self.params.copy()
@ -255,8 +253,7 @@ class BaseEstimator:
            mem = psutil.virtual_memory() if psutil is not None else None
            try:
                with limit_resource(
-                    mem.available * (1 - free_mem_ratio)
-                    + psutil.Process(os.getpid()).memory_info().rss
+                    mem.available * (1 - free_mem_ratio) + psutil.Process(os.getpid()).memory_info().rss
                    if mem is not None
                    else -1,
                    budget,
@ -290,9 +287,7 @@ class BaseEstimator:
            X = self._preprocess(X)
            return self._model.predict(X, **kwargs)
        else:
-            logger.warning(
-                "Estimator is not fit yet. Please run fit() before predict()."
-            )
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
            return np.ones(X.shape[0])

    def predict_proba(self, X, **kwargs):
@ -341,9 +336,7 @@ class BaseEstimator:

        if self._model is not None:
            if self._task == "rank":
-                raise NotImplementedError(
-                    "AutoML.score() is not implemented for ranking"
-                )
+                raise NotImplementedError("AutoML.score() is not implemented for ranking")
            else:
                X_val = self._preprocess(X_val)
                metric = kwargs.pop("metric", None)
@ -356,9 +349,7 @@ class BaseEstimator:
                else:
                    return self._model.score(X_val, y_val, **kwargs)
        else:
-            logger.warning(
-                "Estimator is not fit yet. Please run fit() before predict()."
-            )
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
            return 0.0

    def cleanup(self):
@ -425,9 +416,7 @@ class SparkEstimator(BaseEstimator):

    def __init__(self, task="binary", **config):
        if not _have_spark:
-            raise ImportError(
-                "pyspark is not installed. Try `pip install flaml[spark]`."
-            )
+            raise ImportError("pyspark is not installed. Try `pip install flaml[spark]`.")
        super().__init__(task, **config)
        self.df_train = None

@ -473,9 +462,7 @@ class SparkEstimator(BaseEstimator):
        current_time = time.time()
        pipeline_model = self.estimator_class(**self.params, **kwargs)
        if logger.level == logging.DEBUG:
-            logger.debug(
-                f"flaml.model - {pipeline_model} fit started with params {self.params}"
-            )
+            logger.debug(f"flaml.model - {pipeline_model} fit started with params {self.params}")
        pipeline_model.fit(df_train)
        if logger.level == logging.DEBUG:
            logger.debug(f"flaml.model - {pipeline_model} fit finished")
@ -494,9 +481,7 @@ class SparkEstimator(BaseEstimator):
        """
        if self._model is not None:
            X = self._preprocess(X, index_col=index_col)
-            predictions = to_pandas_on_spark(
-                self._model.transform(X), index_col=index_col
-            )
+            predictions = to_pandas_on_spark(self._model.transform(X), index_col=index_col)
            predictions.index.name = None
            pred_y = predictions["prediction"]
            if return_all:
@ -504,9 +489,7 @@ class SparkEstimator(BaseEstimator):
            else:
                return pred_y
        else:
-            logger.warning(
-                "Estimator is not fit yet. Please run fit() before predict()."
-            )
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
            return np.ones(X.shape[0])

    def predict_proba(self, X, index_col="tmp_index_col", return_all=False, **kwargs):
@ -524,9 +507,7 @@ class SparkEstimator(BaseEstimator):
        assert self._task in CLASSIFICATION, "predict_proba() only for classification."
        if self._model is not None:
            X = self._preprocess(X, index_col=index_col)
-            predictions = to_pandas_on_spark(
-                self._model.transform(X), index_col=index_col
-            )
+            predictions = to_pandas_on_spark(self._model.transform(X), index_col=index_col)
            predictions.index.name = None
            pred_y = predictions["probability"]

@ -535,9 +516,7 @@ class SparkEstimator(BaseEstimator):
            else:
                return pred_y
        else:
-            logger.warning(
-                "Estimator is not fit yet. Please run fit() before predict()."
-            )
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
            return np.ones(X.shape[0])


@ -600,9 +579,7 @@ class SparkLGBMEstimator(SparkEstimator):

    @classmethod
    def size(cls, config):
-        num_leaves = int(
-            round(config.get("numLeaves") or 1 << config.get("maxDepth", 16))
-        )
+        num_leaves = int(round(config.get("numLeaves") or 1 << config.get("maxDepth", 16)))
        n_estimators = int(round(config["numIterations"]))
        return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8

@ -653,23 +630,17 @@ class SparkLGBMEstimator(SparkEstimator):
    ):
        start_time = time.time()
        if self.model_n_classes_ is None and self._task not in ["regression", "rank"]:
-            self.model_n_classes_, self.model_classes_ = len_labels(
-                y_train, return_labels=True
-            )
+            self.model_n_classes_, self.model_classes_ = len_labels(y_train, return_labels=True)
        df_train = self._preprocess(X_train, y_train, index_col=index_col)
        # n_iter = self.params.get(self.ITER_HP, self.DEFAULT_ITER)
        # trained = False
        # mem0 = psutil.virtual_memory().available if psutil is not None else 1
        _kwargs = kwargs.copy()
        if self._task not in ["regression", "rank"] and "objective" not in _kwargs:
-            _kwargs["objective"] = (
-                "binary" if self.model_n_classes_ == 2 else "multiclass"
-            )
+            _kwargs["objective"] = "binary" if self.model_n_classes_ == 2 else "multiclass"
        for k in list(_kwargs.keys()):
            if k not in self.estimator_params:
-                logger.warning(
-                    f"[SparkLGBMEstimator] [Warning] Ignored unknown parameter: {k}"
-                )
+                logger.warning(f"[SparkLGBMEstimator] [Warning] Ignored unknown parameter: {k}")
                _kwargs.pop(k)
        # TODO: find a better estimation of early stopping
        # if (
@ -803,9 +774,7 @@ class TransformersEstimator(BaseEstimator):
                "If you need to fix the value of {} to {}, the only way is to add a single-value domain in the search "
                "space by adding:\n '{}': {{ 'domain': {} }} to 'custom_hp'. For example:"
                'automl_settings["custom_hp"] = {{ "transformer": {{ "model_path": {{ "domain" : '
-                '"google/electra-small-discriminator" }} }} }}'.format(
-                    key, key, val, key, val
-                )
+                '"google/electra-small-discriminator" }} }} }}'.format(key, key, val, key, val)
            )

        """
@ -823,25 +792,18 @@ class TransformersEstimator(BaseEstimator):
        """
            Update the attributes in TrainingArguments that depends on the values of self.params
        """
-        local_dir = os.path.join(
-            self._training_args.output_dir, "train_{}".format(date_str())
-        )
+        local_dir = os.path.join(self._training_args.output_dir, "train_{}".format(date_str()))
        if self._use_ray is True:
            import ray

            self._training_args.output_dir = ray.tune.get_trial_dir()
        else:
-            self._training_args.output_dir = Counter.get_trial_fold_name(
-                local_dir, self.params, self.trial_id
-            )
+            self._training_args.output_dir = Counter.get_trial_fold_name(local_dir, self.params, self.trial_id)

        self._training_args.fp16 = self.fp16
        self._training_args.no_cuda = self.no_cuda

-        if (
-            self._task == TOKENCLASSIFICATION
-            and self._training_args.max_seq_length is not None
-        ):
+        if self._task == TOKENCLASSIFICATION and self._training_args.max_seq_length is not None:
            logger.warning(
                "For token classification task, FLAML currently does not support customizing the max_seq_length, max_seq_length will be reset to None."
            )
@ -938,10 +900,7 @@ class TransformersEstimator(BaseEstimator):
            }

            for key in list(kwargs.keys()):
-                if (
-                    key not in data_collator_class.__dict__.keys()
-                    and key != "tokenizer"
-                ):
+                if key not in data_collator_class.__dict__.keys() and key != "tokenizer":
                    del kwargs[key]
            return data_collator_class(**kwargs)
        else:
@ -984,9 +943,7 @@ class TransformersEstimator(BaseEstimator):
        )  # If using roberta model, must set add_prefix_space to True to avoid the assertion error at
        # https://github.com/huggingface/transformers/blob/main/src/transformers/models/roberta/tokenization_roberta_fast.py#L249

-        train_dataset, self._X_train, self._y_train = self._preprocess_data(
-            X_train, y_train
-        )
+        train_dataset, self._X_train, self._y_train = self._preprocess_data(X_train, y_train)
        if X_val is not None:
            eval_dataset, self._X_val, self._y_val = self._preprocess_data(X_val, y_val)
        else:
@ -1007,10 +964,7 @@ class TransformersEstimator(BaseEstimator):
                    self.time_per_iter = time.time() - self.step_begin_time
                if (
                    budget
-                    and (
-                        time.time() + self.time_per_iter
-                        > self.train_begin_time + budget
-                    )
+                    and (time.time() + self.time_per_iter > self.train_begin_time + budget)
                    or state.global_step >= this_params[TransformersEstimator.ITER_HP]
                ):
                    control.should_training_stop = True
@ -1019,10 +973,7 @@ class TransformersEstimator(BaseEstimator):
                return control

            def on_epoch_end(self, args, state, control, **callback_kwargs):
-                if (
-                    control.should_training_stop
-                    or state.epoch + 1 >= args.num_train_epochs
-                ):
+                if control.should_training_stop or state.epoch + 1 >= args.num_train_epochs:
                    control.should_save = True
                    control.should_evaluate = True

@ -1051,9 +1002,7 @@ class TransformersEstimator(BaseEstimator):
            # if gpu_per_trial == 0:
            #     os.environ["CUDA_VISIBLE_DEVICES"] = ""
            if tmp_cuda_visible_devices.count(",") != math.ceil(gpu_per_trial) - 1:
-                os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
-                    [str(x) for x in range(math.ceil(gpu_per_trial))]
-                )
+                os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(x) for x in range(math.ceil(gpu_per_trial))])

        import time

@ -1070,10 +1019,7 @@ class TransformersEstimator(BaseEstimator):

        if hasattr(self._trainer, "intermediate_results"):
            self.intermediate_results = [
-                x[1]
-                for x in sorted(
-                    self._trainer.intermediate_results.items(), key=lambda x: x[0]
-                )
+                x[1] for x in sorted(self._trainer.intermediate_results.items(), key=lambda x: x[0])
            ]
        self._trainer = None

@ -1094,9 +1040,7 @@ class TransformersEstimator(BaseEstimator):
        from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR

        if trainer.ckpt_to_metric:
-            best_ckpt, _ = min(
-                trainer.ckpt_to_metric.items(), key=lambda x: x[1]["eval_automl_metric"]
-            )
+            best_ckpt, _ = min(trainer.ckpt_to_metric.items(), key=lambda x: x[1]["eval_automl_metric"])
            best_ckpt_global_step = trainer.ckpt_to_global_step[best_ckpt]
            for each_ckpt in list(trainer.ckpt_to_metric):
                if each_ckpt != best_ckpt:
@ -1158,9 +1102,7 @@ class TransformersEstimator(BaseEstimator):
            Need to reinit training_args because of a bug in deepspeed: if not reinit, the deepspeed config will be inconsistent
            with HF config https://github.com/huggingface/transformers/blob/main/src/transformers/training_args.py#L947
        """
-        training_args = self._TrainingArguments(
-            local_rank=-1, model_path=self._checkpoint_path, fp16=self.fp16
-        )
+        training_args = self._TrainingArguments(local_rank=-1, model_path=self._checkpoint_path, fp16=self.fp16)
        for key, val in self._training_args.__dict__.items():
            if key not in ("local_rank", "model_path", "fp16"):
                setattr(training_args, key, val)
@ -1183,9 +1125,7 @@ class TransformersEstimator(BaseEstimator):
            for key, val in pred_kwargs.items():
                setattr(self._training_args, key, val)

-        assert (
-            self._task in CLASSIFICATION
-        ), "predict_proba() only for classification tasks."
+        assert self._task in CLASSIFICATION, "predict_proba() only for classification tasks."

        X_test, _ = self._tokenize_text(X, **self._kwargs)
        test_dataset = Dataset.from_pandas(X_test)
@ -1245,9 +1185,7 @@ class TransformersEstimator(BaseEstimator):

    def config2params(self, config: dict) -> dict:
        params = super().config2params(config)
-        params[TransformersEstimator.ITER_HP] = params.get(
-            TransformersEstimator.ITER_HP, sys.maxsize
-        )
+        params[TransformersEstimator.ITER_HP] = params.get(TransformersEstimator.ITER_HP, sys.maxsize)
        return params


@ -1257,9 +1195,7 @@ class TransformersEstimatorModelSelection(TransformersEstimator):

    @classmethod
    def search_space(cls, data_size, task, **params):
-        search_space_dict = TransformersEstimator.search_space(
-            data_size, task, **params
-        )
+        search_space_dict = TransformersEstimator.search_space(data_size, task, **params)

        """
            For model selection, use the same search space regardless of memory constraint
@ -1368,11 +1304,7 @@ class LGBMEstimator(BaseEstimator):
    @classmethod
    def size(cls, config):
        num_leaves = int(
-            round(
-                config.get("num_leaves")
-                or config.get("max_leaves")
-                or 1 << config.get("max_depth", 16)
-            )
+            round(config.get("num_leaves") or config.get("max_leaves") or 1 << config.get("max_depth", 16))
        )
        n_estimators = int(round(config["n_estimators"]))
        return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8
@ -1399,11 +1331,7 @@ class LGBMEstimator(BaseEstimator):
        self.HAS_CALLBACK = self.HAS_CALLBACK and self._callbacks(0, 0, 0) is not None

    def _preprocess(self, X):
-        if (
-            not isinstance(X, DataFrame)
-            and issparse(X)
-            and np.issubdtype(X.dtype, np.integer)
-        ):
+        if not isinstance(X, DataFrame) and issparse(X) and np.issubdtype(X.dtype, np.integer):
            X = X.astype(float)
        elif isinstance(X, np.ndarray) and X.dtype.kind not in "buif":
            # numpy array is not of numeric dtype
@ -1422,10 +1350,7 @@ class LGBMEstimator(BaseEstimator):
        if not self.HAS_CALLBACK:
            mem0 = psutil.virtual_memory().available if psutil is not None else 1
            if (
-                (
-                    not self._time_per_iter
-                    or abs(self._train_size - X_train.shape[0]) > 4
-                )
+                (not self._time_per_iter or abs(self._train_size - X_train.shape[0]) > 4)
                and budget is not None
                or self._mem_per_iter < 0
                and psutil is not None
@ -1445,9 +1370,7 @@ class LGBMEstimator(BaseEstimator):
                # elif self._mem2 <= 0:
                #     self._mem_per_iter = self._mem1
                # else:
-                self._mem_per_iter = min(
-                    self._mem1, self._mem2 / self.params[self.ITER_HP]
-                )
+                self._mem_per_iter = min(self._mem1, self._mem2 / self.params[self.ITER_HP])
                # if self._mem_per_iter <= 1 and psutil is not None:
                #     n_iter = self.params[self.ITER_HP]
                self._time_per_iter = (
@ -1458,11 +1381,7 @@ class LGBMEstimator(BaseEstimator):
                    else 0.001
                )
                self._train_size = X_train.shape[0]
-                if (
-                    budget is not None
-                    and self._t1 + self._t2 >= budget
-                    or n_iter == self.params[self.ITER_HP]
-                ):
+                if budget is not None and self._t1 + self._t2 >= budget or n_iter == self.params[self.ITER_HP]:
                    # self.params[self.ITER_HP] = n_iter
                    return time.time() - start_time
                trained = True
@ -1471,11 +1390,7 @@ class LGBMEstimator(BaseEstimator):
            if n_iter > 1:
                max_iter = min(
                    n_iter,
-                    int(
-                        (budget - time.time() + start_time - self._t1)
-                        / self._time_per_iter
-                        + 1
-                    )
+                    int((budget - time.time() + start_time - self._t1) / self._time_per_iter + 1)
                    if budget is not None
                    else n_iter,
                    int((1 - free_mem_ratio) * mem0 / self._mem_per_iter)
@ -1489,9 +1404,7 @@ class LGBMEstimator(BaseEstimator):
        if self.HAS_CALLBACK:
            kwargs_callbacks = kwargs.get("callbacks")
            if kwargs_callbacks:
-                callbacks = kwargs_callbacks + self._callbacks(
-                    start_time, deadline, free_mem_ratio
-                )
+                callbacks = kwargs_callbacks + self._callbacks(start_time, deadline, free_mem_ratio)
                kwargs.pop("callbacks")
            else:
                callbacks = self._callbacks(start_time, deadline, free_mem_ratio)
@ -1816,9 +1729,7 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
    def config2params(self, config: dict) -> dict:
        params = super().config2params(config)
        if "max_leaves" in params:
-            params["max_leaf_nodes"] = params.get(
-                "max_leaf_nodes", params.pop("max_leaves")
-            )
+            params["max_leaf_nodes"] = params.get("max_leaf_nodes", params.pop("max_leaves"))
        if self._task not in CLASSIFICATION and "criterion" in config:
            params.pop("criterion")
        if "random_state" not in params:
@ -1952,12 +1863,7 @@ class CatBoostEstimator(BaseEstimator):
            if not cat_columns.empty:
                X = X.copy()
                X[cat_columns] = X[cat_columns].apply(
-                    lambda x: x.cat.rename_categories(
-                        [
-                            str(c) if isinstance(c, float) else c
-                            for c in x.cat.categories
-                        ]
-                    )
+                    lambda x: x.cat.rename_categories([str(c) if isinstance(c, float) else c for c in x.cat.categories])
                )
        elif isinstance(X, np.ndarray) and X.dtype.kind not in "buif":
            # numpy array is not of numeric dtype
@ -2005,19 +1911,11 @@ class CatBoostEstimator(BaseEstimator):
        else:
            cat_features = []
        use_best_model = kwargs.get("use_best_model", True)
-        n = (
-            max(int(len(y_train) * 0.9), len(y_train) - 1000)
-            if use_best_model
-            else len(y_train)
-        )
+        n = max(int(len(y_train) * 0.9), len(y_train) - 1000) if use_best_model else len(y_train)
        X_tr, y_tr = X_train[:n], y_train[:n]
        from catboost import Pool, __version__

-        eval_set = (
-            Pool(data=X_train[n:], label=y_train[n:], cat_features=cat_features)
-            if use_best_model
-            else None
-        )
+        eval_set = Pool(data=X_train[n:], label=y_train[n:], cat_features=cat_features) if use_best_model else None
        if "sample_weight" in kwargs:
            weight = kwargs["sample_weight"]
            if weight is not None:
@ -2190,9 +2088,7 @@ class Prophet(SKLearnEstimator):
            forecast = self._model.predict(X, **kwargs)
            return forecast["yhat"]
        else:
-            logger.warning(
-                "Estimator is not fit yet. Please run fit() before predict()."
-            )
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
            return np.ones(X.shape[0])

    def score(self, X_val: DataFrame, y_val: Series, **kwargs):
@ -2279,9 +2175,7 @@ class ARIMA(Prophet):
                if len(X.columns) > 1:
                    X = self._preprocess(X.drop(columns=TS_TIMESTAMP_COL))
                    regressors = list(X)
-                    forecast = self._model.predict(
-                        start=start, end=end, exog=X[regressors], **kwargs
-                    )
+                    forecast = self._model.predict(start=start, end=end, exog=X[regressors], **kwargs)
                else:
                    forecast = self._model.predict(start=start, end=end, **kwargs)
            else:
@ -2398,9 +2292,7 @@ class HoltWinters(ARIMA):
            },
            "use_boxcox": {"domain": tune.choice([False, True]), "init_value": False},
            "seasonal_periods": {  # statsmodels casts this to None if "seasonal" is None
-                "domain": tune.choice(
-                    [7, 12, 4, 52, 6]
-                ),  # weekly, yearly, quarterly, weekly w yearly data
+                "domain": tune.choice([7, 12, 4, 52, 6]),  # weekly, yearly, quarterly, weekly w yearly data
                "init_value": 7,
            },
        }
@ -2486,9 +2378,7 @@ class TS_SKLearn(SKLearnEstimator):
                    "low_cost_init_value": False,
                },
                "lags": {
-                    "domain": tune.randint(
-                        lower=1, upper=max(2, int(np.sqrt(data_size[0])))
-                    ),
+                    "domain": tune.randint(lower=1, upper=max(2, int(np.sqrt(data_size[0])))),
                    "init_value": 3,
                },
            }
@ -2498,9 +2388,7 @@ class TS_SKLearn(SKLearnEstimator):
    def __init__(self, task="ts_forecast", **params):
        super().__init__(task, **params)
        self.hcrystaball_model = None
-        self.ts_task = (
-            "regression" if task in TS_FORECASTREGRESSION else "classification"
-        )
+        self.ts_task = "regression" if task in TS_FORECASTREGRESSION else "classification"

    def transform_X(self, X):
        cols = list(X)
@ -2532,9 +2420,7 @@ class TS_SKLearn(SKLearnEstimator):
                (
                    X_fit,
                    y_fit,
-                ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
-                    X_train, y_train, i
-                )
+                ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_train, y_train, i)
                self.hcrystaball_model.model.set_params(**estimator.params)
                model = self.hcrystaball_model.model.fit(X_fit, y_fit)
                model_list.append(model)
@ -2543,9 +2429,7 @@ class TS_SKLearn(SKLearnEstimator):
            (
                X_fit,
                y_fit,
-            ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
-                X_train, y_train, kwargs["period"]
-            )
+            ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_train, y_train, kwargs["period"])
            self.hcrystaball_model.model.set_params(**estimator.params)
            model = self.hcrystaball_model.model.fit(X_fit, y_fit)
            self._model = model
@ -2569,9 +2453,7 @@ class TS_SKLearn(SKLearnEstimator):
                    (
                        X_pred,
                        _,
-                    ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
-                        X.iloc[:i, :]
-                    )
+                    ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X.iloc[:i, :])
                    preds.append(self._model[i - 1].predict(X_pred, **kwargs)[-1])
                forecast = Series(preds)
            else:
@ -2582,9 +2464,7 @@ class TS_SKLearn(SKLearnEstimator):
                forecast = self._model.predict(X_pred, **kwargs)
            return forecast
        else:
-            logger.warning(
-                "Estimator is not fit yet. Please run fit() before predict()."
-            )
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
            return np.ones(X.shape[0])


@ -2681,13 +2561,9 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
            max_prediction_length=max_prediction_length,
            static_categoricals=kwargs.get("static_categoricals", []),
            static_reals=kwargs.get("static_reals", []),
-            time_varying_known_categoricals=kwargs.get(
-                "time_varying_known_categoricals", []
-            ),
+            time_varying_known_categoricals=kwargs.get("time_varying_known_categoricals", []),
            time_varying_known_reals=kwargs.get("time_varying_known_reals", []),
-            time_varying_unknown_categoricals=kwargs.get(
-                "time_varying_unknown_categoricals", []
-            ),
+            time_varying_unknown_categoricals=kwargs.get("time_varying_unknown_categoricals", []),
            time_varying_unknown_reals=kwargs.get("time_varying_unknown_reals", []),
            variable_groups=kwargs.get(
                "variable_groups", {}
@ -2703,18 +2579,12 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):

        # create validation set (predict=True) which means to predict the last max_prediction_length points in time
        # for each series
-        validation = TimeSeriesDataSet.from_dataset(
-            training, self.data, predict=True, stop_randomization=True
-        )
+        validation = TimeSeriesDataSet.from_dataset(training, self.data, predict=True, stop_randomization=True)

        # create dataloaders for model
        batch_size = kwargs.get("batch_size", 64)
-        train_dataloader = training.to_dataloader(
-            train=True, batch_size=batch_size, num_workers=0
-        )
-        val_dataloader = validation.to_dataloader(
-            train=False, batch_size=batch_size * 10, num_workers=0
-        )
+        train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
+        val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

        return training, train_dataloader, val_dataloader

@ -2728,27 +2598,19 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):

        warnings.filterwarnings("ignore")
        current_time = time.time()
-        training, train_dataloader, val_dataloader = self.transform_ds(
-            X_train, y_train, **kwargs
-        )
+        training, train_dataloader, val_dataloader = self.transform_ds(X_train, y_train, **kwargs)
        params = self.params.copy()
        gradient_clip_val = params.pop("gradient_clip_val")
        params.pop("n_jobs")
        max_epochs = kwargs.get("max_epochs", 20)
-        early_stop_callback = EarlyStopping(
-            monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min"
-        )
+        early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")

        def _fit(log):
            default_trainer_kwargs = dict(
-                gpus=kwargs.get("gpu_per_trial", [0])
-                if torch.cuda.is_available()
-                else None,
+                gpus=kwargs.get("gpu_per_trial", [0]) if torch.cuda.is_available() else None,
                max_epochs=max_epochs,
                gradient_clip_val=gradient_clip_val,
-                callbacks=[LearningRateMonitor(), early_stop_callback]
-                if log
-                else [early_stop_callback],
+                callbacks=[LearningRateMonitor(), early_stop_callback] if log else [early_stop_callback],
                logger=log,
            )
            trainer = pl.Trainer(
@ -2794,9 +2656,7 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):

        ids = self.group_ids.copy()
        ids.append(TS_TIMESTAMP_COL)
-        encoder_data = self.data[
-            lambda x: x.time_idx > x.time_idx.max() - self.max_encoder_length
-        ]
+        encoder_data = self.data[lambda x: x.time_idx > x.time_idx.max() - self.max_encoder_length]
        # following pytorchforecasting example, make all target values equal to the last data
        last_data_cols = self.group_ids.copy()
        last_data_cols.append(TS_VALUE_COL)
@ -2804,9 +2664,7 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
        decoder_data = X
        if "time_idx" not in decoder_data:
            decoder_data = add_time_idx_col(decoder_data)
-        decoder_data["time_idx"] += (
-            encoder_data["time_idx"].max() + 1 - decoder_data["time_idx"].min()
-        )
+        decoder_data["time_idx"] += encoder_data["time_idx"].max() + 1 - decoder_data["time_idx"].min()
        # decoder_data[TS_VALUE_COL] = 0
        decoder_data = decoder_data.merge(last_data, how="inner", on=self.group_ids)
        decoder_data = decoder_data.sort_values(ids)
--- a/flaml/automl/nlp/huggingface/data_collator.py
+++ b/flaml/automl/nlp/huggingface/data_collator.py
@ -22,22 +22,15 @@ class DataCollatorForMultipleChoiceClassification(DataCollatorWithPadding):
        import torch

        label_name = "label" if "label" in features[0].keys() else "labels"
-        labels = (
-            [feature.pop(label_name) for feature in features]
-            if label_name in features[0]
-            else None
-        )
+        labels = [feature.pop(label_name) for feature in features] if label_name in features[0] else None

        batch_size = len(features)
        num_choices = len(features[0]["input_ids"])
        flattened_features = [
-            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)]
-            for feature in features
+            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
        ]
        flattened_features = list(chain(*flattened_features))
-        batch = super(DataCollatorForMultipleChoiceClassification, self).__call__(
-            flattened_features
-        )
+        batch = super(DataCollatorForMultipleChoiceClassification, self).__call__(flattened_features)
        # Un-flatten
        batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
        # Add back labels
--- a/flaml/automl/nlp/huggingface/trainer.py
+++ b/flaml/automl/nlp/huggingface/trainer.py
@ -24,9 +24,7 @@ class TrainerForAuto(Seq2SeqTrainer):
                num_beams=num_beams,
            )
        else:
-            return super(Seq2SeqTrainer, self).predict(
-                test_dataset, ignore_keys, metric_key_prefix
-            )
+            return super(Seq2SeqTrainer, self).predict(test_dataset, ignore_keys, metric_key_prefix)

    def prediction_step(
        self,
@ -36,13 +34,9 @@ class TrainerForAuto(Seq2SeqTrainer):
        ignore_keys,
    ):
        if getattr(self, "_is_seq2seq", None):
-            return super().prediction_step(
-                model, inputs, prediction_loss_only, ignore_keys
-            )
+            return super().prediction_step(model, inputs, prediction_loss_only, ignore_keys)
        else:
-            return super(Seq2SeqTrainer, self).prediction_step(
-                model, inputs, prediction_loss_only, ignore_keys
-            )
+            return super(Seq2SeqTrainer, self).prediction_step(model, inputs, prediction_loss_only, ignore_keys)

    def log(self, logs) -> None:
        if getattr(self, "_is_seq2seq", None):
@ -66,9 +60,7 @@ class TrainerForAuto(Seq2SeqTrainer):
        """Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path."""
        from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR

-        ckpt_dir = os.path.join(
-            self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}"
-        )
+        ckpt_dir = os.path.join(self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}")
        eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset

        # TODO: if your task is seq2seq (i.e., SUMMARIZATION), uncomment the code below (add indentation before metrics = eval_dataset...
--- a/flaml/automl/nlp/huggingface/training_args.py
+++ b/flaml/automl/nlp/huggingface/training_args.py
@ -69,21 +69,13 @@ class TrainingArgumentsForAuto(TrainingArguments):
        metadata={"help": "per gpu evaluation batch size"},
    )

-    label_list: Optional[List[str]] = field(
-        default=None, metadata={"help": "The string list of the label names. "}
-    )
+    label_list: Optional[List[str]] = field(default=None, metadata={"help": "The string list of the label names. "})

-    eval_steps: int = field(
-        default=500, metadata={"help": "Run an evaluation every X steps."}
-    )
+    eval_steps: int = field(default=500, metadata={"help": "Run an evaluation every X steps."})

-    save_steps: int = field(
-        default=500, metadata={"help": "Save checkpoint every X updates steps."}
-    )
+    save_steps: int = field(default=500, metadata={"help": "Save checkpoint every X updates steps."})

-    logging_steps: int = field(
-        default=500, metadata={"help": "Log every X updates steps."}
-    )
+    logging_steps: int = field(default=500, metadata={"help": "Log every X updates steps."})

    @staticmethod
    def load_args_from_console():
@ -96,12 +88,8 @@ class TrainingArgumentsForAuto(TrainingArguments):
                "--" + each_field.name,
                type=each_field.type,
                help=each_field.metadata["help"],
-                required=each_field.metadata["required"]
-                if "required" in each_field.metadata
-                else False,
-                choices=each_field.metadata["choices"]
-                if "choices" in each_field.metadata
-                else None,
+                required=each_field.metadata["required"] if "required" in each_field.metadata else False,
+                choices=each_field.metadata["choices"] if "choices" in each_field.metadata else None,
                default=each_field.default,
            )
        console_args, unknown = arg_parser.parse_known_args()
@ -112,19 +100,13 @@ class TrainingArgumentsForAuto(TrainingArguments):
 class Seq2SeqTrainingArgumentsForAuto(TrainingArgumentsForAuto):
    model_path: str = field(
        default="t5-small",
-        metadata={
-            "help": "model path for HPO natural language generation tasks, default is set to t5-small"
-        },
+        metadata={"help": "model path for HPO natural language generation tasks, default is set to t5-small"},
    )

-    sortish_sampler: bool = field(
-        default=False, metadata={"help": "Whether to use SortishSampler or not."}
-    )
+    sortish_sampler: bool = field(default=False, metadata={"help": "Whether to use SortishSampler or not."})
    predict_with_generate: bool = field(
        default=True,
-        metadata={
-            "help": "Whether to use generate to calculate generative metrics (ROUGE, BLEU)."
-        },
+        metadata={"help": "Whether to use generate to calculate generative metrics (ROUGE, BLEU)."},
    )
    generation_max_length: Optional[int] = field(
        default=None,
--- a/flaml/automl/nlp/huggingface/utils.py
+++ b/flaml/automl/nlp/huggingface/utils.py
@ -38,16 +38,12 @@ def tokenize_text(X, Y=None, task=None, hf_args=None, tokenizer=None):
        Y_tokenized = Y
        label_col_name = ["label"]
    elif task == TOKENCLASSIFICATION:
-        X_tokenized, Y_tokenized = tokenize_text_tokclassification(
-            X, Y, tokenizer=tokenizer, hf_args=hf_args
-        )
+        X_tokenized, Y_tokenized = tokenize_text_tokclassification(X, Y, tokenizer=tokenizer, hf_args=hf_args)
        label_col_name = ["labels"]
    elif task in NLG_TASKS:
        return tokenize_seq2seq(X, Y, tokenizer=tokenizer, task=task, hf_args=hf_args)
    elif task == MULTICHOICECLASSIFICATION:
-        X_tokenized = tokenize_text_multiplechoice(
-            X, tokenizer=tokenizer, hf_args=hf_args
-        )
+        X_tokenized = tokenize_text_multiplechoice(X, tokenizer=tokenizer, hf_args=hf_args)
        label_col_name = ["label"]
        Y_tokenized = Y
    Y_tokenized = todf(X_tokenized, Y_tokenized, label_col_name)
@ -75,9 +71,7 @@ def tokenize_seq2seq(X, Y, tokenizer, task=None, hf_args=None):
            [(each_l if each_l != tokenizer.pad_token_id else -100) for each_l in label]
            for label in model_outputs["input_ids"]
        ]
-        model_outputs = model_outputs.drop(
-            columns=["attention_mask", "input_ids", "decoder_input_ids"]
-        )
+        model_outputs = model_outputs.drop(columns=["attention_mask", "input_ids", "decoder_input_ids"])
    return model_inputs, model_outputs


@ -116,9 +110,7 @@ def tokenize_and_align_labels(
                # Use the label_all_tokens to control whether to copy the label to all subtokens or to pad the additional tokens as -100
                if hf_args.label_all_tokens:
                    # If the B- word is converted into multiple subtokens, map the additional subtokens to I-
-                    label_ids.append(
-                        b_to_i_label[label_to_id[examples[Y_sent_key][word_idx]]]
-                    )
+                    label_ids.append(b_to_i_label[label_to_id[examples[Y_sent_key][word_idx]]])
                else:
                    label_ids.append(-100)
            previous_word_idx = word_idx
@ -173,9 +165,7 @@ def tokenize_text_tokclassification(X, Y, tokenizer, hf_args=None):
            result_type="expand",
        )
        label_idx = tokenized_column_names.index("labels")
-        other_indices = sorted(
-            set(range(len(tokenized_column_names))).difference({label_idx})
-        )
+        other_indices = sorted(set(range(len(tokenized_column_names))).difference({label_idx}))
        other_column_names = [tokenized_column_names[x] for x in other_indices]
        d = X_and_Y_tokenized.iloc[:, other_indices]
        y_tokenized = X_and_Y_tokenized.iloc[:, label_idx]
@ -298,10 +288,7 @@ def tokenize_swag(this_row, tokenizer, hf_args=None, return_column_name=False):
    # get each 1st sentence, multiply to 4 sentences
    question_headers = this_row["sent2"]
    # sent2 are the noun part of 2nd line
-    second_sentences = [
-        question_headers + " " + this_row[key]
-        for key in ["ending0", "ending1", "ending2", "ending3"]
-    ]
+    second_sentences = [question_headers + " " + this_row[key] for key in ["ending0", "ending1", "ending2", "ending3"]]
    # now the 2nd-sentences are formed by combing the noun part and 4 ending parts

    # Flatten out
@ -322,18 +309,14 @@ def tokenize_swag(this_row, tokenizer, hf_args=None, return_column_name=False):
        return [tokenized_example[x] for x in tmp_column_names]


-def postprocess_prediction_and_true(
-    task, y_pred, tokenizer, hf_args, y_true=None, X=None
-):
+def postprocess_prediction_and_true(task, y_pred, tokenizer, hf_args, y_true=None, X=None):
    # postprocess the matrix prediction y_pred and ground truth y_true into user readable format, e.g., for summarization, decode into text
    if task == SEQCLASSIFICATION:
        return np.argmax(y_pred, axis=1), y_true
    elif task == SEQREGRESSION:
        return np.squeeze(y_pred), y_true  # predictions.reshape((len(predictions),))
    elif task == TOKENCLASSIFICATION:
-        assert (y_true is not None) or (
-            X is not None
-        ), "One of y_true and X must not be None"
+        assert (y_true is not None) or (X is not None), "One of y_true and X must not be None"
        ## If y_true is not None, we use y_true to remove the -100 in the prediction (postprocessing), and return the postprocessed y_true and prediction
        # If y_true is None, we use X to compute y_is_pad (i.e., whether y_true is -100 in that position), and use y_is_pad to remove the -100 in the prediction, and return the postprocessed prediction (not the y_true)
        y_predict = pd.Series(np.argmax(y_pred, axis=2).tolist())
@ -354,17 +337,12 @@ def postprocess_prediction_and_true(
            for (each_pred, each_is_pad) in zip(y_predict, y_is_pad)
        ]
        y_pred_label = [
-            [
-                hf_args.label_list[p] if 0 <= p < label_len else -1
-                for (p, ispd) in each_list
-            ]
+            [hf_args.label_list[p] if 0 <= p < label_len else -1 for (p, ispd) in each_list]
            for each_list in zip_pred_ispad
        ]  # To compute precision and recall, y_pred and y_true must be converted to string labels
        # (B-PER, I-PER, etc.), so that the category-based precision/recall (i.e., PER, LOC, etc.) scores can be computed
        if y_true is not None:
-            y_true_label = [
-                [tr for (p, tr) in each_list] for each_list in zip_pred_ispad
-            ]
+            y_true_label = [[tr for (p, tr) in each_list] for each_list in zip_pred_ispad]
        else:
            y_true_label = None
        return y_pred_label, y_true_label
@ -381,13 +359,9 @@ def postprocess_prediction_and_true(

        if y_true is not None:
            y_true_labels = np.where(y_true != -100, y_true, tokenizer.pad_token_id)
-            decoded_y_true_labels = tokenizer.batch_decode(
-                y_true_labels, skip_special_tokens=True
-            )
+            decoded_y_true_labels = tokenizer.batch_decode(y_true_labels, skip_special_tokens=True)
            decoded_y_true_labels = [label.strip() for label in decoded_y_true_labels]
-            decoded_y_true_labels = [
-                "\n".join(nltk.sent_tokenize(label)) for label in decoded_y_true_labels
-            ]
+            decoded_y_true_labels = ["\n".join(nltk.sent_tokenize(label)) for label in decoded_y_true_labels]
        else:
            decoded_y_true_labels = None

@ -419,17 +393,11 @@ def load_model(checkpoint_path, task, num_labels=None):
                checkpoint_path, config=model_config, ignore_mismatched_sizes=True
            )
        elif task == TOKENCLASSIFICATION:
-            return AutoModelForTokenClassification.from_pretrained(
-                checkpoint_path, config=model_config
-            )
+            return AutoModelForTokenClassification.from_pretrained(checkpoint_path, config=model_config)
        elif task in NLG_TASKS:
-            return AutoModelForSeq2SeqLM.from_pretrained(
-                checkpoint_path, config=model_config
-            )
+            return AutoModelForSeq2SeqLM.from_pretrained(checkpoint_path, config=model_config)
        elif task == MULTICHOICECLASSIFICATION:
-            return AutoModelForMultipleChoice.from_pretrained(
-                checkpoint_path, config=model_config
-            )
+            return AutoModelForMultipleChoice.from_pretrained(checkpoint_path, config=model_config)

    def _set_model_config(checkpoint_path):
        if task in (SEQCLASSIFICATION, SEQREGRESSION, TOKENCLASSIFICATION):
--- a/flaml/automl/nlp/utils.py
+++ b/flaml/automl/nlp/utils.py
@ -85,12 +85,8 @@ class Counter:
    @staticmethod
    def get_trial_fold_name(local_dir, trial_config, trial_id):
        Counter.counter += 1
-        experiment_tag = "{0}_{1}".format(
-            str(Counter.counter), format_vars(trial_config)
-        )
-        logdir = get_logdir_name(
-            _generate_dirname(experiment_tag, trial_id=trial_id), local_dir
-        )
+        experiment_tag = "{0}_{1}".format(str(Counter.counter), format_vars(trial_config))
+        logdir = get_logdir_name(_generate_dirname(experiment_tag, trial_id=trial_id), local_dir)
        return logdir


@ -99,15 +95,11 @@ class LabelEncoderforTokenClassification:
        # if the labels are tokens, convert them to ids
        if any(isinstance(id, str) for id in y[0]):
            self.label_list = sorted(list(set().union(*y)))
-            self._tokenlabel_to_id = {
-                self.label_list[id]: id for id in range(len(self.label_list))
-            }
+            self._tokenlabel_to_id = {self.label_list[id]: id for id in range(len(self.label_list))}
            y = y.apply(lambda sent: [self._tokenlabel_to_id[token] for token in sent])
        # if the labels are not tokens, they must be ids
        else:
-            assert all(
-                isinstance(id, (int, np.integer)) for id in y[0]
-            ), "The labels must either be tokens or ids"
+            assert all(isinstance(id, (int, np.integer)) for id in y[0]), "The labels must either be tokens or ids"
        return y

    def transform(self, y):
--- a/flaml/automl/spark/metrics.py
+++ b/flaml/automl/spark/metrics.py
@ -40,9 +40,7 @@ def _process_df(df, label_col, prediction_col):
 def _compute_label_from_probability(df, probability_col, prediction_col):
    # array_max finds the maximum value in the 'probability' array
    # array_position finds the index of the maximum value in the 'probability' array
-    max_index_expr = F.expr(
-        f"array_position({probability_col}, array_max({probability_col}))-1"
-    )
+    max_index_expr = F.expr(f"array_position({probability_col}, array_max({probability_col}))-1")
    # Create a new column 'prediction' based on the maximum probability value
    df = df.withColumn(prediction_col, max_index_expr.cast("double"))
    return df
@ -143,9 +141,7 @@ def spark_metric_loss_score(
        )
    elif metric_name == "log_loss":
        # For log_loss, prediction_col should be probability, and we need to convert it to label
-        df = _compute_label_from_probability(
-            df, prediction_col, prediction_col + "_label"
-        )
+        df = _compute_label_from_probability(df, prediction_col, prediction_col + "_label")
        evaluator = MulticlassClassificationEvaluator(
            metricName="logLoss",
            labelCol=label_col,
@ -214,17 +210,11 @@ def spark_metric_loss_score(
                score /= len(counts)
                score += 1
        else:
-            evaluator = RankingEvaluator(
-                metricName="ndcgAtK", labelCol=label_col, predictionCol=prediction_col
-            )
+            evaluator = RankingEvaluator(metricName="ndcgAtK", labelCol=label_col, predictionCol=prediction_col)
            df = _process_df(df, label_col, prediction_col)
            score = 1 - evaluator.evaluate(df)
        return score
    else:
        raise ValueError(f"Unknown metric name: {metric_name} for spark models.")

-    return (
-        evaluator.evaluate(df)
-        if metric_name in min_mode_metrics
-        else 1 - evaluator.evaluate(df)
-    )
+    return evaluator.evaluate(df) if metric_name in min_mode_metrics else 1 - evaluator.evaluate(df)
--- a/flaml/automl/spark/utils.py
+++ b/flaml/automl/spark/utils.py
@ -73,9 +73,7 @@ def to_pandas_on_spark(
    elif isinstance(df, (ps.DataFrame, ps.Series)):
        return df
    else:
-        raise TypeError(
-            f"{type(df)} is not one of pandas.DataFrame, pandas.Series and pyspark.sql.DataFrame"
-        )
+        raise TypeError(f"{type(df)} is not one of pandas.DataFrame, pandas.Series and pyspark.sql.DataFrame")


 def train_test_split_pyspark(
@ -106,10 +104,7 @@ def train_test_split_pyspark(
    if stratify_column:
        # Test data
        test_fraction_dict = (
-            df.select(stratify_column)
-            .distinct()
-            .withColumn("fraction", F.lit(test_fraction))
-            .rdd.collectAsMap()
+            df.select(stratify_column).distinct().withColumn("fraction", F.lit(test_fraction)).rdd.collectAsMap()
        )
        df_test = df.stat.sampleBy(stratify_column, test_fraction_dict, seed)
        # Train data
@ -128,9 +123,7 @@ def train_test_split_pyspark(
    return [df_train, df_test]


-def unique_pandas_on_spark(
-    psds: Union[ps.Series, ps.DataFrame]
-) -> Tuple[np.ndarray, np.ndarray]:
+def unique_pandas_on_spark(psds: Union[ps.Series, ps.DataFrame]) -> Tuple[np.ndarray, np.ndarray]:
    """Get the unique values and counts of a pandas_on_spark series."""
    if isinstance(psds, ps.DataFrame):
        psds = psds.iloc[:, 0]
@ -140,9 +133,7 @@ def unique_pandas_on_spark(
    return label_set, counts


-def len_labels(
-    y: Union[ps.Series, np.ndarray], return_labels=False
-) -> Union[int, Optional[np.ndarray]]:
+def len_labels(y: Union[ps.Series, np.ndarray], return_labels=False) -> Union[int, Optional[np.ndarray]]:
    """Get the number of unique labels in y."""
    if not isinstance(y, (ps.DataFrame, ps.Series)):
        labels = np.unique(y)
@ -153,9 +144,7 @@ def len_labels(
    return len(labels)


-def unique_value_first_index(
-    y: Union[pd.Series, ps.Series, np.ndarray]
-) -> Tuple[np.ndarray, np.ndarray]:
+def unique_value_first_index(y: Union[pd.Series, ps.Series, np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
    """Get the unique values and indices of a pandas series,
    pandas_on_spark series or numpy array."""
    if isinstance(y, ps.Series):
@ -196,9 +185,7 @@ def iloc_pandas_on_spark(
            psdfiloc = psdfiloc.drop(columns=[index_col])
        return psdfiloc
    else:
-        raise TypeError(
-            f"{type(index)} is not one of int, slice and list for pandas_on_spark iloc"
-        )
+        raise TypeError(f"{type(index)} is not one of int, slice and list for pandas_on_spark iloc")


 def spark_kFold(
@ -241,9 +228,7 @@ def spark_kFold(
            condition = (df[randCol] >= validateLB) & (df[randCol] < validateUB)
            validation = to_pandas_on_spark(df.filter(condition), index_col=index_col)
            train = to_pandas_on_spark(df.filter(~condition), index_col=index_col)
-            datasets.append(
-                (train.drop(columns=[randCol]), validation.drop(columns=[randCol]))
-            )
+            datasets.append((train.drop(columns=[randCol]), validation.drop(columns=[randCol])))
    else:
        # Use user-specified fold column
        def get_fold_num(foldNum: int) -> int:
--- a/flaml/automl/state.py
+++ b/flaml/automl/state.py
@ -63,9 +63,7 @@ class SearchState:
            Notice (2) include the case starting point not in user specified search space custom_hp
        """
        if isinstance(domain_one_dim, sample.Domain):
-            renamed_type = list(
-                inspect.signature(domain_one_dim.is_valid).parameters.values()
-            )[0].annotation
+            renamed_type = list(inspect.signature(domain_one_dim.is_valid).parameters.values())[0].annotation
            type_match = (
                renamed_type == Any
                or isinstance(value_one_dim, renamed_type)
@ -106,9 +104,7 @@ class SearchState:
        self.learner_class = learner_class
        self._budget = budget
        if task in TS_FORECAST:
-            search_space = learner_class.search_space(
-                data_size=data_size, task=task, pred_horizon=period
-            )
+            search_space = learner_class.search_space(data_size=data_size, task=task, pred_horizon=period)
        else:
            search_space = learner_class.search_space(data_size=data_size, task=task)

@ -117,14 +113,10 @@ class SearchState:

        if isinstance(starting_point, dict):
            starting_point = AutoMLState.sanitize(starting_point)
-            if max_iter > 1 and not self.valid_starting_point(
-                starting_point, search_space
-            ):
+            if max_iter > 1 and not self.valid_starting_point(starting_point, search_space):
                # If the number of iterations is larger than 1, remove invalid point
                logger.warning(
-                    "Starting point {} removed because it is outside of the search space".format(
-                        starting_point
-                    )
+                    "Starting point {} removed because it is outside of the search space".format(starting_point)
                )
                starting_point = None
        elif isinstance(starting_point, list):
@ -132,11 +124,7 @@ class SearchState:
            if max_iter > len(starting_point):
                # If the number of starting points is no smaller than max iter, avoid the checking
                starting_point_len = len(starting_point)
-                starting_point = [
-                    x
-                    for x in starting_point
-                    if self.valid_starting_point(x, search_space)
-                ]
+                starting_point = [x for x in starting_point if self.valid_starting_point(x, search_space)]
                if starting_point_len > len(starting_point):
                    logger.warning(
                        "Starting points outside of the search space are removed. "
@ -145,9 +133,7 @@ class SearchState:
                starting_point = starting_point or None

        for name, space in search_space.items():
-            assert (
-                "domain" in space
-            ), f"{name}'s domain is missing in the search space spec {space}"
+            assert "domain" in space, f"{name}'s domain is missing in the search space spec {space}"
            if space["domain"] is None:
                # don't search this hp
                continue
@ -159,19 +145,14 @@ class SearchState:
                self.cat_hp_cost[name] = space["cat_hp_cost"]
            # if a starting point is provided, set the init config to be
            # the starting point provided
-            if (
-                isinstance(starting_point, dict)
-                and starting_point.get(name) is not None
-            ):
+            if isinstance(starting_point, dict) and starting_point.get(name) is not None:
                if self.init_config is None:
                    self.init_config = {}
                self.init_config[name] = starting_point[name]
            elif (
                not isinstance(starting_point, list)
                and "init_value" in space
-                and self.valid_starting_point_one_dim(
-                    space["init_value"], space["domain"]
-                )
+                and self.valid_starting_point_one_dim(space["init_value"], space["domain"])
            ):
                if self.init_config is None:
                    self.init_config = {}
@ -241,11 +222,7 @@ class SearchState:
            if time2eval:
                self.time2eval_best_old = self.time2eval_best
                self.time2eval_best = time2eval
-            if (
-                self.trained_estimator
-                and trained_estimator
-                and self.trained_estimator != trained_estimator
-            ):
+            if self.trained_estimator and trained_estimator and self.trained_estimator != trained_estimator:
                self.trained_estimator.cleanup()
            if trained_estimator:
                self.trained_estimator = trained_estimator
@ -260,9 +237,7 @@ class SearchState:
        return config_sig

    def est_retrain_time(self, retrain_sample_size):
-        assert (
-            self.best_config_sample_size is not None
-        ), "need to first get best_config_sample_size"
+        assert self.best_config_sample_size is not None, "need to first get best_config_sample_size"
        return self.time2eval_best * retrain_sample_size / self.best_config_sample_size


@ -283,9 +258,7 @@ class AutoMLState:
            )  # NOTE: _prepare_sample_train_data is before kwargs is updated to fit_kwargs_by_estimator
            if weight is not None:
                sampled_weight = (
-                    weight.iloc[:sample_size]
-                    if isinstance(weight, (pd.Series, psSeries))
-                    else weight[:sample_size]
+                    weight.iloc[:sample_size] if isinstance(weight, (pd.Series, psSeries)) else weight[:sample_size]
                )
            if self.groups is not None:
                groups = (
@ -338,10 +311,7 @@ class AutoMLState:
            if state.time_budget < 0
            else state.time_budget - state.time_from_start
            if sample_size == state.data_size[0]
-            else (state.time_budget - state.time_from_start)
-            / 2
-            * sample_size
-            / state.data_size[0]
+            else (state.time_budget - state.time_from_start) / 2 * sample_size / state.data_size[0]
        )

        (
@ -357,9 +327,7 @@ class AutoMLState:
            state.y_val,
            state.weight_val,
            state.groups_val,
-            state.train_time_limit
-            if budget is None
-            else min(budget, state.train_time_limit or np.inf),
+            state.train_time_limit if budget is None else min(budget, state.train_time_limit or np.inf),
            state.kf,
            config,
            state.task,
@ -406,9 +374,7 @@ class AutoMLState:
        sample_size: Optional[int] = None,
    ):
        if not sample_size:
-            sample_size = config_w_resource.get(
-                "FLAML_sample_size", len(self.y_train_all)
-            )
+            sample_size = config_w_resource.get("FLAML_sample_size", len(self.y_train_all))
        config = AutoMLState.sanitize(config_w_resource)

        this_estimator_kwargs = self.fit_kwargs_by_estimator.get(
@ -432,9 +398,7 @@ class AutoMLState:
                "groups"
            ] = groups  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator

-        budget = (
-            None if self.time_budget < 0 else self.time_budget - self.time_from_start
-        )
+        budget = None if self.time_budget < 0 else self.time_budget - self.time_from_start

        estimator, train_time = train_estimator(
            X_train=sampled_X_train,
--- a/flaml/automl/task/generic_task.py
+++ b/flaml/automl/task/generic_task.py
@ -108,44 +108,28 @@ class GenericTask(Task):
        groups=None,
    ):
        if X_train_all is not None and y_train_all is not None:
-            assert isinstance(
-                X_train_all, (np.ndarray, pd.DataFrame, psDataFrame)
-            ) or issparse(X_train_all), (
+            assert isinstance(X_train_all, (np.ndarray, pd.DataFrame, psDataFrame)) or issparse(X_train_all), (
                "X_train_all must be a numpy array, a pandas dataframe, "
                "a Scipy sparse matrix or a pyspark.pandas dataframe."
            )
            assert isinstance(
                y_train_all, (np.ndarray, pd.Series, psSeries)
            ), "y_train_all must be a numpy array, a pandas series or a pyspark.pandas series."
-            assert (
-                X_train_all.size != 0 and y_train_all.size != 0
-            ), "Input data must not be empty."
+            assert X_train_all.size != 0 and y_train_all.size != 0, "Input data must not be empty."
            if isinstance(X_train_all, np.ndarray) and len(X_train_all.shape) == 1:
                X_train_all = np.reshape(X_train_all, (X_train_all.size, 1))
            if isinstance(y_train_all, np.ndarray):
                y_train_all = y_train_all.flatten()
-            assert (
-                X_train_all.shape[0] == y_train_all.shape[0]
-            ), "# rows in X_train must match length of y_train."
+            assert X_train_all.shape[0] == y_train_all.shape[0], "# rows in X_train must match length of y_train."
            if isinstance(X_train_all, psDataFrame):
-                X_train_all = (
-                    X_train_all.spark.cache()
-                )  # cache data to improve compute speed
+                X_train_all = X_train_all.spark.cache()  # cache data to improve compute speed
                y_train_all = y_train_all.to_frame().spark.cache()[y_train_all.name]
-                logger.debug(
-                    f"X_train_all and y_train_all cached, shape of X_train_all: {X_train_all.shape}"
-                )
+                logger.debug(f"X_train_all and y_train_all cached, shape of X_train_all: {X_train_all.shape}")
            automl._df = isinstance(X_train_all, (pd.DataFrame, psDataFrame))
            automl._nrow, automl._ndim = X_train_all.shape
            if self.is_ts_forecast():
-                X_train_all = (
-                    pd.DataFrame(X_train_all)
-                    if isinstance(X_train_all, np.ndarray)
-                    else X_train_all
-                )
-                X_train_all, y_train_all = self._validate_ts_data(
-                    X_train_all, y_train_all
-                )
+                X_train_all = pd.DataFrame(X_train_all) if isinstance(X_train_all, np.ndarray) else X_train_all
+                X_train_all, y_train_all = self._validate_ts_data(X_train_all, y_train_all)
            X, y = X_train_all, y_train_all
        elif dataframe is not None and label is not None:
            assert isinstance(
@ -155,9 +139,7 @@ class GenericTask(Task):
                label in dataframe.columns
            ), f"The provided label column name `{label}` doesn't exist in the provided dataframe."
            if isinstance(dataframe, psDataFrame):
-                dataframe = (
-                    dataframe.spark.cache()
-                )  # cache data to improve compute speed
+                dataframe = dataframe.spark.cache()  # cache data to improve compute speed
                logger.debug(f"dataframe cached, shape of dataframe: {dataframe.shape}")
            automl._df = True
            if self.is_ts_forecast():
@ -183,9 +165,7 @@ class GenericTask(Task):
                for _, each_cell in X[column].items():
                    if each_cell is not None:
                        is_str = isinstance(each_cell, str)
-                        is_list_of_int = isinstance(each_cell, list) and all(
-                            isinstance(x, int) for x in each_cell
-                        )
+                        is_list_of_int = isinstance(each_cell, list) and all(isinstance(x, int) for x in each_cell)
                        is_list_of_str = is_a_list_of_str(each_cell)
                        if self.is_token_classification():
                            assert is_list_of_str, (
@ -222,9 +202,7 @@ class GenericTask(Task):
            automl._label_transformer = automl._transformer.label_transformer
            if self.is_token_classification():
                if hasattr(automl._label_transformer, "label_list"):
-                    state.fit_kwargs.update(
-                        {"label_list": automl._label_transformer.label_list}
-                    )
+                    state.fit_kwargs.update({"label_list": automl._label_transformer.label_list})
                elif "label_list" not in state.fit_kwargs:
                    for each_fit_kwargs in state.fit_kwargs_by_estimator.values():
                        assert (
@ -232,34 +210,26 @@ class GenericTask(Task):
                        ), "For the token-classification task, you must either (1) pass token labels; or (2) pass id labels and the label list. "
                        "Please refer to the documentation for more details: https://microsoft.github.io/FLAML/docs/Examples/AutoML-NLP#a-simple-token-classification-example"
            automl._feature_names_in_ = (
-                automl._X_train_all.columns.to_list()
-                if hasattr(automl._X_train_all, "columns")
-                else None
+                automl._X_train_all.columns.to_list() if hasattr(automl._X_train_all, "columns") else None
            )

        automl._sample_weight_full = state.fit_kwargs.get(
            "sample_weight"
        )  # NOTE: _validate_data is before kwargs is updated to fit_kwargs_by_estimator
        if X_val is not None and y_val is not None:
-            assert isinstance(
-                X_val, (np.ndarray, pd.DataFrame, psDataFrame)
-            ) or issparse(X_train_all), (
+            assert isinstance(X_val, (np.ndarray, pd.DataFrame, psDataFrame)) or issparse(X_train_all), (
                "X_val must be None, a numpy array, a pandas dataframe, "
                "a Scipy sparse matrix or a pyspark.pandas dataframe."
            )
            assert isinstance(y_val, (np.ndarray, pd.Series, psSeries)), (
-                "y_val must be None, a numpy array, a pandas series "
-                "or a pyspark.pandas series."
+                "y_val must be None, a numpy array, a pandas series " "or a pyspark.pandas series."
            )
            assert X_val.size != 0 and y_val.size != 0, (
-                "Validation data are expected to be nonempty. "
-                "Use None for X_val and y_val if no validation data."
+                "Validation data are expected to be nonempty. " "Use None for X_val and y_val if no validation data."
            )
            if isinstance(y_val, np.ndarray):
                y_val = y_val.flatten()
-            assert (
-                X_val.shape[0] == y_val.shape[0]
-            ), "# rows in X_val must match length of y_val."
+            assert X_val.shape[0] == y_val.shape[0], "# rows in X_val must match length of y_val."
            if automl._transformer:
                state.X_val = automl._transformer.transform(X_val)
            else:
@ -276,13 +246,9 @@ class GenericTask(Task):
        if groups is not None and len(groups) != automl._nrow:
            # groups is given as group counts
            state.groups = np.concatenate([[i] * c for i, c in enumerate(groups)])
-            assert (
-                len(state.groups) == automl._nrow
-            ), "the sum of group counts must match the number of examples"
+            assert len(state.groups) == automl._nrow, "the sum of group counts must match the number of examples"
            state.groups_val = (
-                np.concatenate([[i] * c for i, c in enumerate(groups_val)])
-                if groups_val is not None
-                else None
+                np.concatenate([[i] * c for i, c in enumerate(groups_val)]) if groups_val is not None else None
            )
        else:
            state.groups_val = groups_val
@ -345,11 +311,7 @@ class GenericTask(Task):
        if not isinstance(y_train_all, (psDataFrame, psSeries)):
            raise ValueError("y_train_all must be a pyspark.pandas dataframe or series")
        df_all_in_one = X_train_all.join(y_train_all)
-        stratify_column = (
-            y_train_all.name
-            if isinstance(y_train_all, psSeries)
-            else y_train_all.columns[0]
-        )
+        stratify_column = y_train_all.name if isinstance(y_train_all, psSeries) else y_train_all.columns[0]
        ret_sample_weight = False
        if (
            "sample_weight" in state.fit_kwargs
@ -367,9 +329,7 @@ class GenericTask(Task):
            test_fraction=split_ratio,
            seed=RANDOM_SEED,
        )
-        columns_to_drop = [
-            c for c in df_all_train.columns if c in [stratify_column, "sample_weight"]
-        ]
+        columns_to_drop = [c for c in df_all_train.columns if c in [stratify_column, "sample_weight"]]
        X_train = df_all_train.drop(columns_to_drop)
        X_val = df_all_val.drop(columns_to_drop)
        y_train = df_all_train[stratify_column]
@ -387,17 +347,13 @@ class GenericTask(Task):
        return X_train, X_val, y_train, y_val

    @staticmethod
-    def _train_test_split(
-        state, X, y, first=None, rest=None, split_ratio=0.2, stratify=None
-    ):
+    def _train_test_split(state, X, y, first=None, rest=None, split_ratio=0.2, stratify=None):
        condition_type = isinstance(X, (psDataFrame, psSeries))
        # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
        condition_param = "sample_weight" in state.fit_kwargs
        if not condition_type and condition_param:
            sample_weight = (
-                state.fit_kwargs["sample_weight"]
-                if rest is None
-                else state.fit_kwargs["sample_weight"][rest]
+                state.fit_kwargs["sample_weight"] if rest is None else state.fit_kwargs["sample_weight"][rest]
            )
            (
                X_train,
@ -448,9 +404,7 @@ class GenericTask(Task):
                state.weight_val = weight_val
                state.fit_kwargs["sample_weight"] = weight_train
        else:
-            X_train, X_val, y_train, y_val = GenericTask._split_pyspark(
-                state, X, y, split_ratio, stratify
-            )
+            X_train, X_val, y_train, y_val = GenericTask._split_pyspark(state, X, y, split_ratio, stratify)
        return X_train, X_val, y_train, y_val

    def prepare_data(
@ -498,21 +452,13 @@ class GenericTask(Task):
                n = len(y_train_all)
                while count < rare_threshld:
                    if data_is_df:
-                        X_train_all = concat(
-                            X_train_all, X_train_all.iloc[:n].loc[rare_index]
-                        )
+                        X_train_all = concat(X_train_all, X_train_all.iloc[:n].loc[rare_index])
                    else:
-                        X_train_all = concat(
-                            X_train_all, X_train_all[:n][rare_index, :]
-                        )
+                        X_train_all = concat(X_train_all, X_train_all[:n][rare_index, :])
                    if isinstance(y_train_all, (pd.Series, psSeries)):
-                        y_train_all = concat(
-                            y_train_all, y_train_all.iloc[:n].loc[rare_index]
-                        )
+                        y_train_all = concat(y_train_all, y_train_all.iloc[:n].loc[rare_index])
                    else:
-                        y_train_all = np.concatenate(
-                            [y_train_all, y_train_all[:n][rare_index]]
-                        )
+                        y_train_all = np.concatenate([y_train_all, y_train_all[:n][rare_index]])
                    count += rare_count
                logger.info(f"class {label} augmented from {rare_count} to {count}")
        SHUFFLE_SPLIT_TYPES = ["uniform", "stratified"]
@ -535,9 +481,7 @@ class GenericTask(Task):
                if isinstance(state.sample_weight_all, pd.Series):
                    state.sample_weight_all.reset_index(drop=True, inplace=True)
            else:
-                X_train_all, y_train_all = shuffle(
-                    X_train_all, y_train_all, random_state=RANDOM_SEED
-                )
+                X_train_all, y_train_all = shuffle(X_train_all, y_train_all, random_state=RANDOM_SEED)
            if data_is_df:
                X_train_all.reset_index(drop=True, inplace=True)
            if isinstance(y_train_all, pd.Series):
@ -569,21 +513,13 @@ class GenericTask(Task):
                        X_train_all = X_train_all.sort_values(ids)
                        y_train_all = y_train_all.sort_values(ids)
                        training_cutoff = X_train_all["time_idx"].max() - period
-                        X_train = X_train_all[
-                            X_train_all["time_idx"] <= training_cutoff
-                        ]
-                        y_train = y_train_all[
-                            y_train_all["time_idx"] <= training_cutoff
-                        ].drop(columns=ids)
+                        X_train = X_train_all[X_train_all["time_idx"] <= training_cutoff]
+                        y_train = y_train_all[y_train_all["time_idx"] <= training_cutoff].drop(columns=ids)
                        X_val = X_train_all[X_train_all["time_idx"] > training_cutoff]
-                        y_val = y_train_all[
-                            y_train_all["time_idx"] > training_cutoff
-                        ].drop(columns=ids)
+                        y_val = y_train_all[y_train_all["time_idx"] > training_cutoff].drop(columns=ids)
                    else:
                        num_samples = X_train_all.shape[0]
-                        assert (
-                            period < num_samples
-                        ), f"period={period}>#examples={num_samples}"
+                        assert period < num_samples, f"period={period}>#examples={num_samples}"
                        split_idx = num_samples - period
                        X_train = X_train_all[:split_idx]
                        y_train = y_train_all[:split_idx]
@ -627,20 +563,14 @@ class GenericTask(Task):
                                "sample_weight"
                            ],  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                            state.weight_val,
-                        ) = self._split_pyspark(
-                            state, X_train_all, y_train_all, split_ratio
-                        )
+                        ) = self._split_pyspark(state, X_train_all, y_train_all, split_ratio)
                    else:
                        X_train, X_val, y_train, y_val = self._split_pyspark(
                            state, X_train_all, y_train_all, split_ratio
                        )
            elif split_type == "group":
-                gss = GroupShuffleSplit(
-                    n_splits=1, test_size=split_ratio, random_state=RANDOM_SEED
-                )
-                for train_idx, val_idx in gss.split(
-                    X_train_all, y_train_all, state.groups_all
-                ):
+                gss = GroupShuffleSplit(n_splits=1, test_size=split_ratio, random_state=RANDOM_SEED)
+                for train_idx, val_idx in gss.split(X_train_all, y_train_all, state.groups_all):
                    if data_is_df:
                        X_train = X_train_all.iloc[train_idx]
                        X_val = X_train_all.iloc[val_idx]
@ -674,17 +604,9 @@ class GenericTask(Task):
                    state, X_rest, y_rest, first, rest, split_ratio, stratify
                )
                X_train = concat(X_first, X_train)
-                y_train = (
-                    concat(label_set, y_train)
-                    if data_is_df
-                    else np.concatenate([label_set, y_train])
-                )
+                y_train = concat(label_set, y_train) if data_is_df else np.concatenate([label_set, y_train])
                X_val = concat(X_first, X_val)
-                y_val = (
-                    concat(label_set, y_val)
-                    if data_is_df
-                    else np.concatenate([label_set, y_val])
-                )
+                y_val = concat(label_set, y_val) if data_is_df else np.concatenate([label_set, y_val])
            elif self.is_regression():
                X_train, X_val, y_train, y_val = self._train_test_split(
                    state, X_train_all, y_train_all, split_ratio=split_ratio
@ -700,9 +622,7 @@ class GenericTask(Task):
            return
        if split_type == "group":
            # logger.info("Using GroupKFold")
-            assert (
-                len(state.groups_all) == y_train_all_size
-            ), "the length of groups must match the number of examples"
+            assert len(state.groups_all) == y_train_all_size, "the length of groups must match the number of examples"
            assert (
                len_labels(state.groups_all) >= n_splits
            ), "the number of groups must be equal or larger than n_splits"
@ -710,16 +630,13 @@ class GenericTask(Task):
        elif split_type == "stratified":
            # logger.info("Using StratifiedKFold")
            assert y_train_all_size >= n_splits, (
-                f"{n_splits}-fold cross validation"
-                f" requires input data with at least {n_splits} examples."
+                f"{n_splits}-fold cross validation" f" requires input data with at least {n_splits} examples."
            )
            assert y_train_all_size >= 2 * n_splits, (
                f"{n_splits}-fold cross validation with metric=r2 "
                f"requires input data with at least {n_splits*2} examples."
            )
-            state.kf = RepeatedStratifiedKFold(
-                n_splits=n_splits, n_repeats=1, random_state=RANDOM_SEED
-            )
+            state.kf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=1, random_state=RANDOM_SEED)
        elif split_type == "time":
            # logger.info("Using TimeSeriesSplit")
            if self.is_ts_forecast() and not self.is_ts_forecastpanel():
@ -735,20 +652,14 @@ class GenericTask(Task):
                    logger.info(f"Using nsplits={n_splits} due to data size limit.")
                state.kf = TimeSeriesSplit(n_splits=n_splits, test_size=period)
            elif self.is_ts_forecastpanel():
-                n_groups = len(
-                    X_train.groupby(state.fit_kwargs.get("group_ids")).size()
-                )
+                n_groups = len(X_train.groupby(state.fit_kwargs.get("group_ids")).size())
                period = state.fit_kwargs.get("period")
-                state.kf = TimeSeriesSplit(
-                    n_splits=n_splits, test_size=period * n_groups
-                )
+                state.kf = TimeSeriesSplit(n_splits=n_splits, test_size=period * n_groups)
            else:
                state.kf = TimeSeriesSplit(n_splits=n_splits)
        elif isinstance(split_type, str):
            # logger.info("Using RepeatedKFold")
-            state.kf = RepeatedKFold(
-                n_splits=n_splits, n_repeats=1, random_state=RANDOM_SEED
-            )
+            state.kf = RepeatedKFold(n_splits=n_splits, n_repeats=1, random_state=RANDOM_SEED)
        else:
            # logger.info("Using splitter object")
            state.kf = split_type
@ -790,11 +701,7 @@ class GenericTask(Task):

        elif self.is_classification():
            assert split_type in ["auto", "stratified", "uniform", "time", "group"]
-            return (
-                split_type
-                if split_type != "auto"
-                else groups is None and "stratified" or "group"
-            )
+            return split_type if split_type != "auto" else groups is None and "stratified" or "group"

        elif self.is_regression():
            assert split_type in ["auto", "uniform", "time", "group"]
@ -825,9 +732,7 @@ class GenericTask(Task):
                    )
                )
            except IndexError:
-                raise IndexError(
-                    "Test data contains more columns than training data, exiting"
-                )
+                raise IndexError("Test data contains more columns than training data, exiting")
        elif isinstance(X, int):
            return X
        elif isinstance(X, psDataFrame):
@ -872,9 +777,7 @@ class GenericTask(Task):
        if self.is_classification():
            labels = _, labels = len_labels(y_train_all, return_labels=True)
        else:
-            labels = fit_kwargs.get(
-                "label_list"
-            )  # pass the label list on to compute the evaluation metric
+            labels = fit_kwargs.get("label_list")  # pass the label list on to compute the evaluation metric
        if "sample_weight" in fit_kwargs:
            weight = fit_kwargs["sample_weight"]
            weight_val = None
@ -889,9 +792,7 @@ class GenericTask(Task):
            if isinstance(kf, (GroupKFold, StratifiedGroupKFold)):
                groups = kf.groups
                dataframe = dataframe.join(groups)
-            kf = spark_kFold(
-                dataframe, nFolds=n, foldCol=groups.name if groups is not None else ""
-            )
+            kf = spark_kFold(dataframe, nFolds=n, foldCol=groups.name if groups is not None else "")
            shuffle = False
        else:
            X_train_split, y_train_split = X_train_all, y_train_all
@ -934,15 +835,9 @@ class GenericTask(Task):
                    )
                if groups is not None:
                    fit_kwargs["groups"] = (
-                        groups[train_index]
-                        if isinstance(groups, np.ndarray)
-                        else groups.iloc[train_index]
-                    )
-                    groups_val = (
-                        groups[val_index]
-                        if isinstance(groups, np.ndarray)
-                        else groups.iloc[val_index]
+                        groups[train_index] if isinstance(groups, np.ndarray) else groups.iloc[train_index]
                    )
+                    groups_val = groups[val_index] if isinstance(groups, np.ndarray) else groups.iloc[val_index]
                else:
                    groups_val = None

@ -983,16 +878,12 @@ class GenericTask(Task):
        pred_time /= n
        return val_loss, metric, train_time, pred_time

-    def default_estimator_list(
-        self, estimator_list: List[str], is_spark_dataframe: bool = False
-    ) -> List[str]:
+    def default_estimator_list(self, estimator_list: List[str], is_spark_dataframe: bool = False) -> List[str]:
        if "auto" != estimator_list:
            n_estimators = len(estimator_list)
            if is_spark_dataframe:
                # For spark dataframe, only estimators ending with '_spark' are supported
-                estimator_list = [
-                    est for est in estimator_list if est.endswith("_spark")
-                ]
+                estimator_list = [est for est in estimator_list if est.endswith("_spark")]
                if len(estimator_list) == 0:
                    raise ValueError(
                        "Spark dataframes only support estimator names ending with `_spark`. Non-supported "
@ -1005,9 +896,7 @@ class GenericTask(Task):
                    )
            else:
                # For non-spark dataframe, only estimators not ending with '_spark' are supported
-                estimator_list = [
-                    est for est in estimator_list if not est.endswith("_spark")
-                ]
+                estimator_list = [est for est in estimator_list if not est.endswith("_spark")]
                if len(estimator_list) == 0:
                    raise ValueError(
                        "Non-spark dataframes only support estimator names not ending with `_spark`. Non-supported "
@ -1069,11 +958,7 @@ class GenericTask(Task):
        estimator_list = [
            est
            for est in estimator_list
-            if (
-                est.endswith("_spark")
-                if is_spark_dataframe
-                else not est.endswith("_spark")
-            )
+            if (est.endswith("_spark") if is_spark_dataframe else not est.endswith("_spark"))
        ]
        return estimator_list

--- a/flaml/automl/task/task.py
+++ b/flaml/automl/task/task.py
@ -333,9 +333,7 @@ class Task(ABC):
        return self.name == other

    @classmethod
-    def estimator_class_from_str(
-        cls, estimator_name: str
-    ) -> "flaml.automl.ml.BaseEstimator":
+    def estimator_class_from_str(cls, estimator_name: str) -> "flaml.automl.ml.BaseEstimator":
        """Determine the estimator class corresponding to the provided name.

        Args:
--- a/flaml/automl/training_log.py
+++ b/flaml/automl/training_log.py
@ -111,9 +111,7 @@ class TrainingLogWriter(object):
        if self.file is None:
            raise IOError("Call open() to open the output file first.")
        if self.current_best_loss_record_id is None:
-            logger.warning(
-                "flaml.training_log: checkpoint() called before any record is written, skipped."
-            )
+            logger.warning("flaml.training_log: checkpoint() called before any record is written, skipped.")
            return
        record = TrainingLogCheckPoint(self.current_best_loss_record_id)
        record.dump(self.file)
--- a/flaml/default/estimator.py
+++ b/flaml/default/estimator.py
@ -75,10 +75,7 @@ def flamlize_estimator(super_class, name: str, task: str, alternatives=None):
                        break
            estimator_name = (
                "choose_xgb"
-                if (
-                    estimator_name == "xgb_limitdepth"
-                    and "max_depth" not in self._params
-                )
+                if (estimator_name == "xgb_limitdepth" and "max_depth" not in self._params)
                else estimator_name
            )
            (
@ -88,18 +85,14 @@ def flamlize_estimator(super_class, name: str, task: str, alternatives=None):
                y_transformed,
                self._feature_transformer,
                self._label_transformer,
-            ) = preprocess_and_suggest_hyperparams(
-                task, X, y, estimator_name, self._default_location
-            )
+            ) = preprocess_and_suggest_hyperparams(task, X, y, estimator_name, self._default_location)
            assert estimator_class == super_class
            hyperparams.update(self._params)
            return hyperparams, estimator_name, X_transformed, y_transformed

        @wraps(super_class.fit)
        def fit(self, X, y, *args, **params):
-            hyperparams, estimator_name, X, y_transformed = self.suggest_hyperparams(
-                X, y
-            )
+            hyperparams, estimator_name, X, y_transformed = self.suggest_hyperparams(X, y)
            self.set_params(**hyperparams)
            if self._label_transformer and estimator_name in [
                "rf",
@ -150,26 +143,16 @@ def flamlize_estimator(super_class, name: str, task: str, alternatives=None):
    return EstimatorClass


-RandomForestRegressor = flamlize_estimator(
-    ensemble.RandomForestRegressor, "rf", "regression"
-)
-RandomForestClassifier = flamlize_estimator(
-    ensemble.RandomForestClassifier, "rf", "classification"
-)
-ExtraTreesRegressor = flamlize_estimator(
-    ensemble.ExtraTreesRegressor, "extra_tree", "regression"
-)
-ExtraTreesClassifier = flamlize_estimator(
-    ensemble.ExtraTreesClassifier, "extra_tree", "classification"
-)
+RandomForestRegressor = flamlize_estimator(ensemble.RandomForestRegressor, "rf", "regression")
+RandomForestClassifier = flamlize_estimator(ensemble.RandomForestClassifier, "rf", "classification")
+ExtraTreesRegressor = flamlize_estimator(ensemble.ExtraTreesRegressor, "extra_tree", "regression")
+ExtraTreesClassifier = flamlize_estimator(ensemble.ExtraTreesClassifier, "extra_tree", "classification")

 try:
    import lightgbm

    LGBMRegressor = flamlize_estimator(lightgbm.LGBMRegressor, "lgbm", "regression")
-    LGBMClassifier = flamlize_estimator(
-        lightgbm.LGBMClassifier, "lgbm", "classification"
-    )
+    LGBMClassifier = flamlize_estimator(lightgbm.LGBMClassifier, "lgbm", "classification")
 except ImportError:
    pass

--- a/flaml/default/greedy.py
+++ b/flaml/default/greedy.py
@ -71,17 +71,12 @@ def construct_portfolio(regret_matrix, meta_features, regret_bound):
        sorted_losses = np.sort(losses)
        if sorted_losses[1] - sorted_losses[0] < eps:
            minloss = np.nanmin(losses)
-            print(
-                f"tie detected at loss = {sorted_losses[0]}, using alternative metric."
-            )
+            print(f"tie detected at loss = {sorted_losses[0]}, using alternative metric.")
            tied = np.flatnonzero(losses - minloss < eps)
            losses = [(avg_regret[i], i) for i in tied]
            minloss, ind = min(losses)
            if minloss > prev - eps:
-                print(
-                    f"May be overfitting at k = {i + 1}, current = {minloss:.5f}, "
-                    f"prev = {prev:.5f}. Stopping."
-                )
+                print(f"May be overfitting at k = {i + 1}, current = {minloss:.5f}, " f"prev = {prev:.5f}. Stopping.")
                break
            configs = candidates[ind]
            prev = minloss
@ -89,9 +84,7 @@ def construct_portfolio(regret_matrix, meta_features, regret_bound):
            configs = candidates[np.nanargmin(losses)]
        i += 1
        if sorted_losses[0] <= eps:
-            print(
-                f"Reached target regret bound of {regret_bound}! k = {i}. Declining to pick further!"
-            )
+            print(f"Reached target regret bound of {regret_bound}! k = {i}. Declining to pick further!")
            break

    return configs
--- a/flaml/default/portfolio.py
+++ b/flaml/default/portfolio.py
@ -109,9 +109,7 @@ def serialize(configs, regret, meta_features, output_file, config_path):
    except FileNotFoundError:
        pass

-    meta_features_norm, preferences, proc = config_predictor_tuple(
-        regret.columns, configs, meta_features, regret
-    )
+    meta_features_norm, preferences, proc = config_predictor_tuple(regret.columns, configs, meta_features, regret)
    portfolio = [load_json(config_path.joinpath(m + ".json")) for m in configs]
    regret = regret.loc[configs]

@ -122,9 +120,7 @@ def serialize(configs, regret, meta_features, output_file, config_path):
        "preprocessing": proc,
        "neighbors": [
            {"features": tuple(x), "choice": _filter(preferences[y], regret[y])}
-            for x, y in zip(
-                meta_features_norm.to_records(index=False), preferences.columns
-            )
+            for x, y in zip(meta_features_norm.to_records(index=False), preferences.columns)
        ],
        "configsource": list(configs),
    }
@ -164,9 +160,7 @@ def serialize(configs, regret, meta_features, output_file, config_path):

 def main():
    parser = argparse.ArgumentParser(description="Build a portfolio.")
-    parser.add_argument(
-        "--strategy", help="One of {greedy, greedy-feedback}", default="greedy"
-    )
+    parser.add_argument("--strategy", help="One of {greedy, greedy-feedback}", default="greedy")
    parser.add_argument("--input", help="Input path")
    parser.add_argument("--metafeatures", help="CSV of task metafeatures")
    parser.add_argument("--exclude", help="One task name to exclude (for LOO purposes)")
@ -188,9 +182,7 @@ def main():
    all_results = None
    for estimator in args.estimator:
        # produce regret
-        all, baseline = load_result(
-            f"{args.input}/{estimator}/results.csv", args.task, "result"
-        )
+        all, baseline = load_result(f"{args.input}/{estimator}/results.csv", args.task, "result")
        regret = build_regret(all, baseline)
        regret = regret.replace(np.inf, np.nan).dropna(axis=1, how="all")

@ -198,9 +190,7 @@ def main():
            regret = regret.loc[[i for i in regret.index if args.exclude not in i]]
            regret = regret[[c for c in regret.columns if args.exclude not in c]]

-        print(
-            f"Regret matrix complete: {100 * regret.count().sum() / regret.shape[0] / regret.shape[1]}%"
-        )
+        print(f"Regret matrix complete: {100 * regret.count().sum() / regret.shape[0] / regret.shape[1]}%")
        print(f"Num models considered: {regret.shape[0]}")

        configs = build_portfolio(meta_features, regret, args.strategy)
@ -214,11 +204,7 @@ def main():
        configsource = meta_predictor["configsource"]
        all = all.loc[configsource]
        all.rename({x: f"{estimator}/{x}" for x in regret.index.values}, inplace=True)
-        baseline_best = (
-            baseline
-            if baseline_best is None
-            else pd.DataFrame({0: baseline_best, 1: baseline}).max(1)
-        )
+        baseline_best = baseline if baseline_best is None else pd.DataFrame({0: baseline_best, 1: baseline}).max(1)
        all_results = all if all_results is None else pd.concat([all_results, all])
        # analyze(regret, meta_predictor)
    regrets = build_regret(all_results, baseline_best)
--- a/flaml/default/regret.py
+++ b/flaml/default/regret.py
@ -18,14 +18,8 @@ def load_result(filename, task_type, metric):
        (df[metric].notnull()) & (df.type == task_type),
        ["task", "fold", "params", metric],
    ]
-    df["params"] = df["params"].apply(
-        lambda x: path.splitext(path.basename(eval(x)["_modeljson"]))[0]
-    )
-    baseline = (
-        df.loc[df["task"] == df["params"], ["task", metric]]
-        .groupby("task")
-        .mean()[metric]
-    )
+    df["params"] = df["params"].apply(lambda x: path.splitext(path.basename(eval(x)["_modeljson"]))[0])
+    baseline = df.loc[df["task"] == df["params"], ["task", metric]].groupby("task").mean()[metric]
    df = df.pivot_table(index="params", columns="task", values=metric)
    return df, baseline

@ -34,9 +28,7 @@ def main():
    parser = argparse.ArgumentParser(description="Build a regret matrix.")
    parser.add_argument("--result_csv", help="File of experiment results")
    parser.add_argument("--task_type", help="Type of task")
-    parser.add_argument(
-        "--metric", help="Metric for calculating regret", default="result"
-    )
+    parser.add_argument("--metric", help="Metric for calculating regret", default="result")
    parser.add_argument("--output", help="Location to write regret CSV to")
    args = parser.parse_args()

--- a/flaml/default/suggest.py
+++ b/flaml/default/suggest.py
@ -53,10 +53,7 @@ def meta_feature(task, X_train, y_train, meta_feature_names):
            try:
                # this feature is only supported for dataframe
                this_feature.append(
-                    X_train.select_dtypes(
-                        include=[np.number, "float", "int", "long"]
-                    ).shape[1]
-                    / n_feat
+                    X_train.select_dtypes(include=[np.number, "float", "int", "long"]).shape[1] / n_feat
                )
            except AttributeError:
                # 'numpy.ndarray' object has no attribute 'select_dtypes'
@ -79,9 +76,7 @@ def load_config_predictor(estimator_name, task, location=None):
        with open(f"{location}/{estimator_name}/{task}.json", "r") as f:
            CONFIG_PREDICTORS[key] = predictor = json.load(f)
    except FileNotFoundError:
-        raise FileNotFoundError(
-            f"Portfolio has not been built for {estimator_name} on {task} task."
-        )
+        raise FileNotFoundError(f"Portfolio has not been built for {estimator_name} on {task} task.")
    return predictor


@ -99,11 +94,7 @@ def suggest_config(
    The returned configs can be used as starting points for AutoML.fit().
    `FLAML_sample_size` is removed from the configs.
    """
-    task = (
-        get_classification_objective(len_labels(y))
-        if task == "classification" and y is not None
-        else task
-    )
+    task = get_classification_objective(len_labels(y)) if task == "classification" and y is not None else task
    predictor = (
        load_config_predictor(estimator_or_predictor, task, location)
        if isinstance(estimator_or_predictor, str)
@ -112,15 +103,9 @@ def suggest_config(

    older_version = "1.0.2"
    # TODO: update older_version when the newer code can no longer handle the older version json file
-    assert (
-        version_parse(__version__)
-        >= version_parse(predictor["version"])
-        >= version_parse(older_version)
-    )
+    assert version_parse(__version__) >= version_parse(predictor["version"]) >= version_parse(older_version)
    prep = predictor["preprocessing"]
-    feature = meta_feature_fn(
-        task, X_train=X, y_train=y, meta_feature_names=predictor["meta_feature_names"]
-    )
+    feature = meta_feature_fn(task, X_train=X, y_train=y, meta_feature_names=predictor["meta_feature_names"])
    feature = (np.array(feature) - np.array(prep["center"])) / np.array(prep["scale"])
    neighbors = predictor["neighbors"]
    nn = NearestNeighbors(n_neighbors=1)
@ -138,9 +123,7 @@ def suggest_config(
    return configs


-def suggest_learner(
-    task, X, y, estimator_or_predictor="all", estimator_list=None, location=None
-):
+def suggest_learner(task, X, y, estimator_or_predictor="all", estimator_list=None, location=None):
    """Suggest best learner within estimator_list."""
    configs = suggest_config(task, X, y, estimator_or_predictor, location)
    if not estimator_list:
@ -193,9 +176,7 @@ def suggest_hyperparams(task, X, y, estimator_or_predictor, location=None):
        hyperparams: A dict of the hyperparameter configurations.
        estiamtor_class: A class of the underlying estimator, e.g., lightgbm.LGBMClassifier.
    """
-    config = suggest_config(task, X, y, estimator_or_predictor, location=location, k=1)[
-        0
-    ]
+    config = suggest_config(task, X, y, estimator_or_predictor, location=location, k=1)[0]
    estimator = config["class"]
    model_class = get_estimator_class(task, estimator)
    hyperparams = config["hyperparameters"]
@ -279,9 +260,7 @@ def preprocess_and_suggest_hyperparams(
            estimator_list=["xgb_limitdepth", "xgboost"],
            location=location,
        )
-    config = suggest_config(task, X, y, estimator_or_predictor, location=location, k=1)[
-        0
-    ]
+    config = suggest_config(task, X, y, estimator_or_predictor, location=location, k=1)[0]
    estimator = config["class"]
    model_class = get_estimator_class(task, estimator)
    hyperparams = config["hyperparameters"]
--- a/flaml/onlineml/autovw.py
+++ b/flaml/onlineml/autovw.py
@ -113,12 +113,8 @@ class AutoVW:
        search_space = self._search_space.copy()
        for k, v in self._search_space.items():
            if k == self.VW_INTERACTION_ARG_NAME and v == self.AUTOMATIC:
-                raw_namespaces = self.get_ns_feature_dim_from_vw_example(
-                    vw_example
-                ).keys()
-                search_space[k] = polynomial_expansion_set(
-                    init_monomials=set(raw_namespaces)
-                )
+                raw_namespaces = self.get_ns_feature_dim_from_vw_example(vw_example).keys()
+                search_space[k] = polynomial_expansion_set(init_monomials=set(raw_namespaces))
        # setup the init config based on the input _init_config and search space
        init_config = self._init_config.copy()
        for k, v in search_space.items():
@ -158,10 +154,7 @@ class AutoVW:
        self._best_trial = self._select_best_trial()
        self._y_predict = self._best_trial.predict(data_sample)
        # code for debugging purpose
-        if (
-            self._prediction_trial_id is None
-            or self._prediction_trial_id != self._best_trial.trial_id
-        ):
+        if self._prediction_trial_id is None or self._prediction_trial_id != self._best_trial.trial_id:
            self._prediction_trial_id = self._best_trial.trial_id
            logger.info(
                "prediction trial id changed to %s at iter %s, resource used: %s",
@ -183,14 +176,11 @@ class AutoVW:

    def _select_best_trial(self):
        """Select a best trial from the running trials according to the _model_select_policy."""
-        best_score = (
-            float("+inf") if self._model_selection_mode == "min" else float("-inf")
-        )
+        best_score = float("+inf") if self._model_selection_mode == "min" else float("-inf")
        new_best_trial = None
        for trial in self._trial_runner.running_trials:
            if trial.result is not None and (
-                "threshold" not in self._model_select_policy
-                or trial.result.resource_used >= self.WARMSTART_NUM
+                "threshold" not in self._model_select_policy or trial.result.resource_used >= self.WARMSTART_NUM
            ):
                score = trial.result.get_score(self._model_select_policy)
                if ("min" == self._model_selection_mode and score < best_score) or (
@ -199,18 +189,13 @@ class AutoVW:
                    best_score = score
                    new_best_trial = trial
        if new_best_trial is not None:
-            logger.debug(
-                "best_trial resource used: %s", new_best_trial.result.resource_used
-            )
+            logger.debug("best_trial resource used: %s", new_best_trial.result.resource_used)
            return new_best_trial
        else:
            # This branch will be triggered when the resource consumption all trials are smaller
            # than the WARMSTART_NUM threshold. In this case, we will select the _best_trial
            # selected in the previous iteration.
-            if (
-                self._best_trial is not None
-                and self._best_trial.status == Trial.RUNNING
-            ):
+            if self._best_trial is not None and self._best_trial.status == Trial.RUNNING:
                logger.debug("old best trial %s", self._best_trial.trial_id)
                return self._best_trial
            else:
--- a/flaml/onlineml/trial.py
+++ b/flaml/onlineml/trial.py
@ -106,9 +106,7 @@ class OnlineResult:
        self._loss_cb = self._update_loss_cb(bound_of_range, data_dimension)
        self._loss_queue.append(new_loss)

-    def _update_loss_cb(
-        self, bound_of_range, data_dim, bound_name="sample_complexity_bound"
-    ):
+    def _update_loss_cb(self, bound_of_range, data_dim, bound_name="sample_complexity_bound"):
        """Calculate the coefficient of the confidence bound."""
        if bound_name == "sample_complexity_bound":
            # set the coefficient in the loss bound
@ -119,9 +117,7 @@ class OnlineResult:

            comp_F = math.sqrt(data_dim)
            n = self.observation_count
-            return (
-                coef * comp_F * math.sqrt((np.log10(n / OnlineResult.prob_delta)) / n)
-            )
+            return coef * comp_F * math.sqrt((np.log10(n / OnlineResult.prob_delta)) / n)
        else:
            raise NotImplementedError

@ -147,11 +143,7 @@ class OnlineResult:

    @property
    def loss_avg_recent(self):
-        return (
-            sum(self._loss_queue) / len(self._loss_queue)
-            if len(self._loss_queue) != 0
-            else self._init_loss
-        )
+        return sum(self._loss_queue) / len(self._loss_queue) if len(self._loss_queue) != 0 else self._init_loss

    def get_score(self, score_name, cb_ratio=1):
        if "lcb" in score_name:
@ -282,9 +274,7 @@ class VowpalWabbitTrial(BaseOnlineTrial):
        try:
            from vowpalwabbit import pyvw
        except ImportError:
-            raise ImportError(
-                "To use AutoVW, please run pip install flaml[vw] to install vowpalwabbit"
-            )
+            raise ImportError("To use AutoVW, please run pip install flaml[vw] to install vowpalwabbit")
        # attributes
        self.trial_id = self._config_to_id(config) if trial_id is None else trial_id
        logger.info("Create trial with trial_id: %s", self.trial_id)
@ -327,14 +317,10 @@ class VowpalWabbitTrial(BaseOnlineTrial):
    def _initialize_vw_model(self, vw_example):
        """Initialize a vw model using the trainable_class"""
        self._vw_config = self.config.copy()
-        ns_interactions = self.config.get(
-            VowpalWabbitTrial.interactions_config_key, None
-        )
+        ns_interactions = self.config.get(VowpalWabbitTrial.interactions_config_key, None)
        # ensure the feature interaction config is a list (required by VW)
        if ns_interactions is not None:
-            self._vw_config[VowpalWabbitTrial.interactions_config_key] = list(
-                ns_interactions
-            )
+            self._vw_config[VowpalWabbitTrial.interactions_config_key] = list(ns_interactions)
        # get the dimensionality of the feature according to the namespace configuration
        namespace_feature_dim = get_ns_feature_dim_from_vw_example(vw_example)
        self._dim = self._get_dim_from_ns(namespace_feature_dim, ns_interactions)
@ -361,9 +347,7 @@ class VowpalWabbitTrial(BaseOnlineTrial):
        # do one step of learning
        self.model.learn(data_sample)
        # update training related results accordingly
-        new_loss = self._get_loss(
-            y, y_pred, self._metric, self._y_min_observed, self._y_max_observed
-        )
+        new_loss = self._get_loss(y, y_pred, self._metric, self._y_min_observed, self._y_max_observed)
        # udpate sample size, sum of loss, and cost
        data_sample_size = 1
        bound_of_range = self._y_max_observed - self._y_min_observed
@ -391,11 +375,7 @@ class VowpalWabbitTrial(BaseOnlineTrial):
            loss_func = mean_squared_error
        elif "mae" in loss_func_name or "absolute" in loss_func_name:
            loss_func = mean_absolute_error
-            if (
-                y_min_observed is not None
-                and y_max_observed is not None
-                and "clip" in loss_func_name
-            ):
+            if y_min_observed is not None and y_max_observed is not None and "clip" in loss_func_name:
                # clip y_pred in the observed range of y
                y_pred = min(y_max_observed, max(y_pred, y_min_observed))
        else:
@ -410,9 +390,7 @@ class VowpalWabbitTrial(BaseOnlineTrial):
            self._y_max_observed = y

    @staticmethod
-    def _get_dim_from_ns(
-        namespace_feature_dim: dict, namespace_interactions: Union[set, list]
-    ):
+    def _get_dim_from_ns(namespace_feature_dim: dict, namespace_interactions: Union[set, list]):
        """Get the dimensionality of the corresponding feature of input namespace set."""
        total_dim = sum(namespace_feature_dim.values())
        if namespace_interactions:
--- a/flaml/onlineml/trial_runner.py
+++ b/flaml/onlineml/trial_runner.py
@ -33,12 +33,7 @@ class OnlineTrialRunner:
    WARMSTART_NUM = 100

    def __init__(
-        self,
-        max_live_model_num: int,
-        searcher=None,
-        scheduler=None,
-        champion_test_policy="loss_ucb",
-        **kwargs
+        self, max_live_model_num: int, searcher=None, scheduler=None, champion_test_policy="loss_ucb", **kwargs
    ):
        """Constructor.

@ -192,9 +187,7 @@ class OnlineTrialRunner:

    def get_top_running_trials(self, top_ratio=None, top_metric="ucb") -> list:
        """Get a list of trial ids, whose performance is among the top running trials."""
-        running_valid_trials = [
-            trial for trial in self._running_trials if trial.result is not None
-        ]
+        running_valid_trials = [trial for trial in self._running_trials if trial.result is not None]
        if not running_valid_trials:
            return
        if top_ratio is None:
@ -215,20 +208,14 @@ class OnlineTrialRunner:
        else:
            raise NotImplementedError
        top_running_valid_trials = []
-        logger.info(
-            "Running trial ids %s", [trial.trial_id for trial in running_valid_trials]
-        )
+        logger.info("Running trial ids %s", [trial.trial_id for trial in running_valid_trials])
        self._random_state.shuffle(running_valid_trials)
-        results = [
-            trial.result.get_score(test_attribute) for trial in running_valid_trials
-        ]
+        results = [trial.result.get_score(test_attribute) for trial in running_valid_trials]
        # sorted result (small to large) index
        sorted_index = np.argsort(np.array(results))
        for i in range(min(top_number, len(running_valid_trials))):
            top_running_valid_trials.append(running_valid_trials[sorted_index[i]])
-        logger.info(
-            "Top running ids %s", [trial.trial_id for trial in top_running_valid_trials]
-        )
+        logger.info("Top running ids %s", [trial.trial_id for trial in top_running_valid_trials])
        return top_running_valid_trials

    def _add_trial_from_searcher(self):
@ -240,16 +227,9 @@ class OnlineTrialRunner:
        """
        # (optionally) upper bound the number of trials in the OnlineTrialRunner
        if self._bound_trial_num and self._first_challenger_pool_size is not None:
-            active_trial_size = len(
-                [t for t in self._trials if t.status != Trial.TERMINATED]
-            )
+            active_trial_size = len([t for t in self._trials if t.status != Trial.TERMINATED])
            trial_num_upper_bound = (
-                int(
-                    round(
-                        (np.log10(self._total_steps) + 1)
-                        * self._first_challenger_pool_size
-                    )
-                )
+                int(round((np.log10(self._total_steps) + 1) * self._first_challenger_pool_size))
                if self._first_challenger_pool_size
                else np.inf
            )
@ -286,9 +266,7 @@ class OnlineTrialRunner:
        if self._best_challenger_trial is not None:
            assert self._best_challenger_trial.trial_id != self._champion_trial.trial_id
            # test whether a new champion is found and set the trial properties accordingly
-            is_new_champion_found = self._better_than_champion_test(
-                self._best_challenger_trial
-            )
+            is_new_champion_found = self._better_than_champion_test(self._best_challenger_trial)
            if is_new_champion_found:
                self._set_champion(new_champion_trial=self._best_challenger_trial)

@ -303,10 +281,7 @@ class OnlineTrialRunner:
                    if worse_than_champion:
                        to_stop.append(trial_to_test)
            # we want to ensure there are at least #max_live_model_num of challengers remaining
-            max_to_stop_num = (
-                len([t for t in self._trials if t.status != Trial.TERMINATED])
-                - self._max_live_model_num
-            )
+            max_to_stop_num = len([t for t in self._trials if t.status != Trial.TERMINATED]) - self._max_live_model_num
            for i in range(min(max_to_stop_num, len(to_stop))):
                self.stop_trial(to_stop[i])

@ -331,9 +306,7 @@ class OnlineTrialRunner:
        ]
        if active_trials:
            self._random_state.shuffle(active_trials)
-            results = [
-                trial.result.get_score(test_attribute) for trial in active_trials
-            ]
+            results = [trial.result.get_score(test_attribute) for trial in active_trials]
            best_index = np.argmin(results)
            self._best_challenger_trial = active_trials[best_index]

@ -358,9 +331,7 @@ class OnlineTrialRunner:
            # calling set_search_properties of searcher will trigger
            # new challenger generation. we do not do this for init champion
            # as this step is already done when first constructing the searcher
-            self._searcher.set_search_properties(
-                setting={self._searcher.CHAMPION_TRIAL_NAME: self._champion_trial}
-            )
+            self._searcher.set_search_properties(setting={self._searcher.CHAMPION_TRIAL_NAME: self._champion_trial})
        else:
            self._champion_update_times = 0

@ -450,13 +421,9 @@ class OnlineTrialRunner:
        """
        if trial_to_test.result is not None and self._champion_trial.result is not None:
            if "ucb" in self._champion_test_policy:
-                return self._test_lcb_ucb(
-                    self._champion_trial, trial_to_test, self.WARMSTART_NUM
-                )
+                return self._test_lcb_ucb(self._champion_trial, trial_to_test, self.WARMSTART_NUM)
            elif "avg" in self._champion_test_policy:
-                return self._test_avg_loss(
-                    self._champion_trial, trial_to_test, self.WARMSTART_NUM
-                )
+                return self._test_avg_loss(self._champion_trial, trial_to_test, self.WARMSTART_NUM)
            elif "martingale" in self._champion_test_policy:
                return self._test_martingale(self._champion_trial, trial_to_test)
            else:
@ -474,9 +441,7 @@ class OnlineTrialRunner:
                    trial.trial_id,
                    champion_trial.trial_id,
                )
-                logger.info(
-                    "trial %s %s %s", trial.config, trial.result, trial.resource_lease
-                )
+                logger.info("trial %s %s %s", trial.config, trial.result, trial.resource_lease)
                logger.info(
                    "trial loss_avg:%s, trial loss_cb %s",
                    trial.result.loss_avg,
@ -508,13 +473,8 @@ class OnlineTrialRunner:
        """
        assert trial.trial_id != champion_trial.trial_id
        if trial.result.resource_used >= warmstart_num:
-            if (
-                trial.result.loss_ucb
-                < champion_trial.result.loss_lcb - champion_trial.result.loss_cb
-            ):
-                logger.info(
-                    "======new champion condition satisfied: using lcb vs ucb====="
-                )
+            if trial.result.loss_ucb < champion_trial.result.loss_lcb - champion_trial.result.loss_cb:
+                logger.info("======new champion condition satisfied: using lcb vs ucb=====")
                logger.info(
                    "new champion trial %s %s %s",
                    trial.trial_id,
--- a/flaml/tune/analysis.py
+++ b/flaml/tune/analysis.py
@ -126,9 +126,7 @@ class ExperimentAnalysis:
                'metric {} for scope {} not in ["all", "last", "avg", '
                '"last-5-avg", "last-10-avg"]. '
                "If you didn't pass a `metric` parameter to `tune.run()`, "
-                "you have to pass one when fetching the best trial.".format(
-                    metric, scope
-                )
+                "you have to pass one when fetching the best trial.".format(metric, scope)
            )
        best_trial = None
        best_metric_score = None
@ -155,10 +153,7 @@ class ExperimentAnalysis:
                best_metric_score = metric_score
                best_trial = trial
        if not best_trial:
-            logger.warning(
-                "Could not find best trial. Did you pass the correct `metric` "
-                "parameter?"
-            )
+            logger.warning("Could not find best trial. Did you pass the correct `metric` " "parameter?")
        return best_trial

    def get_best_config(
--- a/flaml/tune/result.py
+++ b/flaml/tune/result.py
@ -124,9 +124,7 @@ STDERR_FILE = "__stderr_file__"

 # Where Tune writes result files by default
 DEFAULT_RESULTS_DIR = (
-    os.environ.get("TEST_TMPDIR")
-    or os.environ.get("TUNE_RESULT_DIR")
-    or os.path.expanduser("~/ray_results")
+    os.environ.get("TEST_TMPDIR") or os.environ.get("TUNE_RESULT_DIR") or os.path.expanduser("~/ray_results")
 )

 # Meta file about status under each experiment directory, can be
--- a/flaml/tune/sample.py
+++ b/flaml/tune/sample.py
@ -53,13 +53,9 @@ except ImportError:

        def __init__(
            self,
-            generator_or_seed: Optional[
-                Union["np_random_generator", np.random.RandomState, int]
-            ] = None,
+            generator_or_seed: Optional[Union["np_random_generator", np.random.RandomState, int]] = None,
        ):
-            if generator_or_seed is None or isinstance(
-                generator_or_seed, (np.random.RandomState, np_random_generator)
-            ):
+            if generator_or_seed is None or isinstance(generator_or_seed, (np.random.RandomState, np_random_generator)):
                self._rng = generator_or_seed
            elif LEGACY_RNG:
                self._rng = np.random.RandomState(generator_or_seed)
@ -85,9 +81,7 @@ except ImportError:
            return getattr(self.rng, name)


-RandomState = Union[
-    None, _BackwardsCompatibleNumpyRng, np_random_generator, np.random.RandomState, int
-]
+RandomState = Union[None, _BackwardsCompatibleNumpyRng, np_random_generator, np.random.RandomState, int]


 class Domain:
@ -112,9 +106,7 @@ class Domain:
            raise ValueError(
                "You can only choose one sampler for parameter "
                "domains. Existing sampler for parameter {}: "
-                "{}. Tried to add {}".format(
-                    self.__class__.__name__, self.sampler, sampler
-                )
+                "{}. Tried to add {}".format(self.__class__.__name__, self.sampler, sampler)
            )
        self.sampler = sampler

@ -231,9 +223,7 @@ class Float(Domain):
            if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
                random_state = _BackwardsCompatibleNumpyRng(random_state)
            assert domain.lower > 0, "LogUniform needs a lower bound greater than 0"
-            assert (
-                0 < domain.upper < float("inf")
-            ), "LogUniform needs a upper bound greater than 0"
+            assert 0 < domain.upper < float("inf"), "LogUniform needs a upper bound greater than 0"
            logmin = np.log(domain.lower) / np.log(self.base)
            logmax = np.log(domain.upper) / np.log(self.base)

@ -271,15 +261,9 @@ class Float(Domain):

    def uniform(self):
        if not self.lower > float("-inf"):
-            raise ValueError(
-                "Uniform requires a lower bound. Make sure to set the "
-                "`lower` parameter of `Float()`."
-            )
+            raise ValueError("Uniform requires a lower bound. Make sure to set the " "`lower` parameter of `Float()`.")
        if not self.upper < float("inf"):
-            raise ValueError(
-                "Uniform requires a upper bound. Make sure to set the "
-                "`upper` parameter of `Float()`."
-            )
+            raise ValueError("Uniform requires a upper bound. Make sure to set the " "`upper` parameter of `Float()`.")
        new = copy(self)
        new.set_sampler(self._Uniform())
        return new
@ -309,20 +293,10 @@ class Float(Domain):
        return new

    def quantized(self, q: float):
-        if self.lower > float("-inf") and not isclose(
-            self.lower / q, round(self.lower / q)
-        ):
-            raise ValueError(
-                f"Your lower variable bound {self.lower} is not divisible by "
-                f"quantization factor {q}."
-            )
-        if self.upper < float("inf") and not isclose(
-            self.upper / q, round(self.upper / q)
-        ):
-            raise ValueError(
-                f"Your upper variable bound {self.upper} is not divisible by "
-                f"quantization factor {q}."
-            )
+        if self.lower > float("-inf") and not isclose(self.lower / q, round(self.lower / q)):
+            raise ValueError(f"Your lower variable bound {self.lower} is not divisible by " f"quantization factor {q}.")
+        if self.upper < float("inf") and not isclose(self.upper / q, round(self.upper / q)):
+            raise ValueError(f"Your upper variable bound {self.upper} is not divisible by " f"quantization factor {q}.")

        new = copy(self)
        new.set_sampler(Quantized(new.get_sampler(), q), allow_override=True)
@ -361,9 +335,7 @@ class Integer(Domain):
            if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
                random_state = _BackwardsCompatibleNumpyRng(random_state)
            assert domain.lower > 0, "LogUniform needs a lower bound greater than 0"
-            assert (
-                0 < domain.upper < float("inf")
-            ), "LogUniform needs a upper bound greater than 0"
+            assert 0 < domain.upper < float("inf"), "LogUniform needs a upper bound greater than 0"
            logmin = np.log(domain.lower) / np.log(self.base)
            logmax = np.log(domain.upper) / np.log(self.base)

@ -430,9 +402,7 @@ class Categorical(Domain):
                random_state = _BackwardsCompatibleNumpyRng(random_state)
            # do not use .choice() directly on domain.categories
            # as that will coerce them to a single dtype
-            indices = random_state.choice(
-                np.arange(0, len(domain.categories)), size=size
-            )
+            indices = random_state.choice(np.arange(0, len(domain.categories)), size=size)
            items = [domain.categories[index] for index in indices]
            return items if len(items) > 1 else domain.cast(items[0])

@ -491,9 +461,7 @@ class Quantized(Sampler):
        quantized_domain = copy(domain)
        quantized_domain.lower = np.ceil(domain.lower / self.q) * self.q
        quantized_domain.upper = np.floor(domain.upper / self.q) * self.q
-        values = self.sampler.sample(
-            quantized_domain, spec, size, random_state=random_state
-        )
+        values = self.sampler.sample(quantized_domain, spec, size, random_state=random_state)
        quantized = np.round(np.divide(values, self.q)) * self.q

        if not isinstance(quantized, np.ndarray):
@ -509,11 +477,7 @@ class PolynomialExpansionSet:
        allow_self_inter: bool = False,
    ):
        self._init_monomials = init_monomials
-        self._highest_poly_order = (
-            highest_poly_order
-            if highest_poly_order is not None
-            else len(self._init_monomials)
-        )
+        self._highest_poly_order = highest_poly_order if highest_poly_order is not None else len(self._init_monomials)
        self._allow_self_inter = allow_self_inter

    @property
@ -644,7 +608,5 @@ def qrandn(mean: float, sd: float, q: float):
    return Float(None, None).normal(mean, sd).quantized(q)


-def polynomial_expansion_set(
-    init_monomials: set, highest_poly_order: int = None, allow_self_inter: bool = False
-):
+def polynomial_expansion_set(init_monomials: set, highest_poly_order: int = None, allow_self_inter: bool = False):
    return PolynomialExpansionSet(init_monomials, highest_poly_order, allow_self_inter)
--- a/flaml/tune/scheduler/online_scheduler.py
+++ b/flaml/tune/scheduler/online_scheduler.py
@ -27,10 +27,7 @@ class OnlineScheduler(TrialScheduler):
        min_paused_resource_trial = None
        for trial in trial_runner.get_trials():
            # if there is a tie, prefer the earlier added ones
-            if (
-                trial.status == Trial.PAUSED
-                and trial.resource_lease < min_paused_resource
-            ):
+            if trial.status == Trial.PAUSED and trial.resource_lease < min_paused_resource:
                min_paused_resource = trial.resource_lease
                min_paused_resource_trial = trial
        if min_paused_resource_trial is not None:
@ -122,8 +119,6 @@ class ChaChaScheduler(OnlineSuccessiveDoublingScheduler):
                )
                logger.debug("top_learners: %s", top_trials)
                if trial in top_trials:
-                    logger.debug(
-                        "top runner %s: set from PAUSE to CONTINUE", trial.trial_id
-                    )
+                    logger.debug("top runner %s: set from PAUSE to CONTINUE", trial.trial_id)
                    return TrialScheduler.CONTINUE
        return decision
--- a/flaml/tune/searcher/blendsearch.py
+++ b/flaml/tune/searcher/blendsearch.py
@ -56,9 +56,7 @@ class BlendSearch(Searcher):
        max_resource: Optional[float] = None,
        reduction_factor: Optional[float] = None,
        global_search_alg: Optional[Searcher] = None,
-        config_constraints: Optional[
-            List[Tuple[Callable[[dict], float], str, float]]
-        ] = None,
+        config_constraints: Optional[List[Tuple[Callable[[dict], float], str, float]]] = None,
        metric_constraints: Optional[List[Tuple[str, str, float]]] = None,
        seed: Optional[int] = 20,
        cost_attr: Optional[str] = "auto",
@ -196,9 +194,7 @@ class BlendSearch(Searcher):
        self._config_constraints = config_constraints
        self._metric_constraints = metric_constraints
        if metric_constraints:
-            assert all(
-                x[1] in ["<=", ">="] for x in metric_constraints
-            ), "sign of metric constraints must be <= or >=."
+            assert all(x[1] in ["<=", ">="] for x in metric_constraints), "sign of metric constraints must be <= or >=."
            # metric modified by lagrange
            metric += self.lagrange
        self._cat_hp_cost = cat_hp_cost or {}
@ -232,9 +228,7 @@ class BlendSearch(Searcher):
            if experimental:
                import optuna as ot

-                sampler = ot.samplers.TPESampler(
-                    seed=gs_seed, multivariate=True, group=True
-                )
+                sampler = ot.samplers.TPESampler(seed=gs_seed, multivariate=True, group=True)
            else:
                sampler = None
            try:
@ -260,11 +254,7 @@ class BlendSearch(Searcher):
        else:
            self._gs = None
        self._experimental = experimental
-        if (
-            getattr(self, "__name__", None) == "CFO"
-            and points_to_evaluate
-            and len(self._points_to_evaluate) > 1
-        ):
+        if getattr(self, "__name__", None) == "CFO" and points_to_evaluate and len(self._points_to_evaluate) > 1:
            # use the best config in points_to_evaluate as the start point
            self._candidate_start_points = {}
            self._started_from_low_cost = not low_cost_partial_config
@ -383,9 +373,7 @@ class BlendSearch(Searcher):

        if self._metric_constraints:
            self._metric_constraint_satisfied = False
-            self._metric_constraint_penalty = [
-                self.penalty for _ in self._metric_constraints
-            ]
+            self._metric_constraint_penalty = [self.penalty for _ in self._metric_constraints]
        else:
            self._metric_constraint_satisfied = True
            self._metric_constraint_penalty = None
@ -424,9 +412,7 @@ class BlendSearch(Searcher):
    def is_ls_ever_converged(self):
        return self._is_ls_ever_converged

-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
        """search thread updater and cleaner."""
        metric_constraint_satisfied = True
        if result and not error and self._metric_constraints:
@ -440,11 +426,7 @@ class BlendSearch(Searcher):
                    violation = (value - threshold) * sign_op
                    if violation > 0:
                        # add penalty term to the metric
-                        objective += (
-                            self._metric_constraint_penalty[i]
-                            * violation
-                            * self._ls.metric_op
-                        )
+                        objective += self._metric_constraint_penalty[i] * violation * self._ls.metric_op
                        metric_constraint_satisfied = False
                        if self._metric_constraint_penalty[i] < self.penalty:
                            self._metric_constraint_penalty[i] += violation
@ -455,9 +437,7 @@ class BlendSearch(Searcher):
            self._metric_constraint_satisfied |= metric_constraint_satisfied
        thread_id = self._trial_proposed_by.get(trial_id)
        if thread_id in self._search_thread_pool:
-            self._search_thread_pool[thread_id].on_trial_complete(
-                trial_id, result, error
-            )
+            self._search_thread_pool[thread_id].on_trial_complete(trial_id, result, error)
            del self._trial_proposed_by[trial_id]
        if result:
            config = result.get("config", {})
@ -467,9 +447,7 @@ class BlendSearch(Searcher):
                        config[key[7:]] = value
            if self._allow_empty_config and not config:
                return
-            signature = self._ls.config_signature(
-                config, self._subspace.get(trial_id, {})
-            )
+            signature = self._ls.config_signature(config, self._subspace.get(trial_id, {}))
            if error:  # remove from result cache
                del self._result[signature]
            else:  # add to result cache
@ -489,11 +467,7 @@ class BlendSearch(Searcher):
                            self._ls_bound_max,
                            self._subspace.get(trial_id, self._ls.space),
                        )
-                    if (
-                        self._gs is not None
-                        and self._experimental
-                        and (not self._ls.hierarchical)
-                    ):
+                    if self._gs is not None and self._experimental and (not self._ls.hierarchical):
                        self._gs.add_evaluated_point(flatten_dict(config), objective)
                        # TODO: recover when supported
                        # converted = convert_key(config, self._gs.space)
@ -502,17 +476,12 @@ class BlendSearch(Searcher):
                elif metric_constraint_satisfied and self._create_condition(result):
                    # thread creator
                    thread_id = self._thread_count
-                    self._started_from_given = (
-                        self._candidate_start_points
-                        and trial_id in self._candidate_start_points
-                    )
+                    self._started_from_given = self._candidate_start_points and trial_id in self._candidate_start_points
                    if self._started_from_given:
                        del self._candidate_start_points[trial_id]
                    else:
                        self._started_from_low_cost = True
-                    self._create_thread(
-                        config, result, self._subspace.get(trial_id, self._ls.space)
-                    )
+                    self._create_thread(config, result, self._subspace.get(trial_id, self._ls.space))
                # reset admissible region to ls bounding box
                self._gs_admissible_min.update(self._ls_bound_min)
                self._gs_admissible_max.update(self._ls_bound_max)
@ -595,9 +564,7 @@ class BlendSearch(Searcher):
        """create thread condition"""
        if len(self._search_thread_pool) < 2:
            return True
-        obj_median = np.median(
-            [thread.obj_best1 for id, thread in self._search_thread_pool.items() if id]
-        )
+        obj_median = np.median([thread.obj_best1 for id, thread in self._search_thread_pool.items() if id])
        return result[self._ls.metric] * self._ls.metric_op < obj_median

    def _clean(self, thread_id: int):
@ -648,10 +615,7 @@ class BlendSearch(Searcher):
        best_trial_id = None
        obj_best = None
        for trial_id, r in self._candidate_start_points.items():
-            if r and (
-                best_trial_id is None
-                or r[self._ls.metric] * self._ls.metric_op < obj_best
-            ):
+            if r and (best_trial_id is None or r[self._ls.metric] * self._ls.metric_op < obj_best):
                best_trial_id = trial_id
                obj_best = r[self._ls.metric] * self._ls.metric_op
        if best_trial_id:
@ -663,9 +627,7 @@ class BlendSearch(Searcher):
                    config[key[7:]] = value
            self._started_from_given = True
            del self._candidate_start_points[best_trial_id]
-            self._create_thread(
-                config, result, self._subspace.get(best_trial_id, self._ls.space)
-            )
+            self._create_thread(config, result, self._subspace.get(best_trial_id, self._ls.space))

    def _expand_admissible_region(self, lower, upper, space):
        """expand the admissible region for the subspace `space`"""
@ -674,9 +636,7 @@ class BlendSearch(Searcher):
            if isinstance(ub, list):
                choice = space[key].get("_choice_")
                if choice:
-                    self._expand_admissible_region(
-                        lower[key][choice], upper[key][choice], space[key]
-                    )
+                    self._expand_admissible_region(lower[key][choice], upper[key][choice], space[key])
            elif isinstance(ub, dict):
                self._expand_admissible_region(lower[key], ub, space[key])
            else:
@ -752,9 +712,7 @@ class BlendSearch(Searcher):
                if choice == backup:
                    # use CFO's init point
                    init_config = self._ls.init_config
-                    config, space = self._ls.complete_config(
-                        init_config, self._ls_bound_min, self._ls_bound_max
-                    )
+                    config, space = self._ls.complete_config(init_config, self._ls_bound_min, self._ls_bound_max)
                    self._trial_proposed_by[trial_id] = choice
                    self._search_thread_pool[choice].running += 1
                else:
@ -801,9 +759,7 @@ class BlendSearch(Searcher):
            if self._allow_empty_config and not init_config:
                assert reward is None, "Empty config can't have reward."
                return init_config
-            config, space = self._ls.complete_config(
-                init_config, self._ls_bound_min, self._ls_bound_max
-            )
+            config, space = self._ls.complete_config(init_config, self._ls_bound_min, self._ls_bound_max)
            config_signature = self._ls.config_signature(config, space)
            if reward is None:
                result = self._result.get(config_signature)
@ -827,9 +783,7 @@ class BlendSearch(Searcher):
                return
        if self._use_incumbent_result_in_evaluation:
            if self._trial_proposed_by[trial_id] > 0:
-                choice_thread = self._search_thread_pool[
-                    self._trial_proposed_by[trial_id]
-                ]
+                choice_thread = self._search_thread_pool[self._trial_proposed_by[trial_id]]
                config[INCUMBENT_RESULT] = choice_thread.best_result
        return config

@ -874,9 +828,7 @@ class BlendSearch(Searcher):
            if choice >= 0:  # not fallback to rs
                result = self._result.get(config_signature)
                if result:  # finished
-                    self._search_thread_pool[choice].on_trial_complete(
-                        trial_id, result, error=False
-                    )
+                    self._search_thread_pool[choice].on_trial_complete(trial_id, result, error=False)
                    if choice:
                        # local search thread
                        self._clean(choice)
@ -938,9 +890,7 @@ class BlendSearch(Searcher):
                    backup_thread_id = thread_id
        return top_thread_id, backup_thread_id

-    def _valid(
-        self, config: Dict, space: Dict, subspace: Dict, lower: Dict, upper: Dict
-    ) -> bool:
+    def _valid(self, config: Dict, space: Dict, subspace: Dict, lower: Dict, upper: Dict) -> bool:
        """config validator"""
        normalized_config = normalize(config, subspace, config, {})
        for key, lb in lower.items():
@ -962,10 +912,7 @@ class BlendSearch(Searcher):
                    valid = self._valid(value, domain, nestedspace, lb, ub)
                    if not valid:
                        return False
-                elif (
-                    value + self._ls.STEPSIZE < lower[key]
-                    or value > upper[key] + self._ls.STEPSIZE
-                ):
+                elif value + self._ls.STEPSIZE < lower[key] or value > upper[key] + self._ls.STEPSIZE:
                    return False
        return True

@ -1033,9 +980,7 @@ class BlendSearchTuner(BlendSearch, NNITuner):
        result = {
            "config": parameters,
            self._metric: extract_scalar_reward(value),
-            self.cost_attr: 1
-            if isinstance(value, float)
-            else value.get(self.cost_attr, value.get("sequence", 1))
+            self.cost_attr: 1 if isinstance(value, float) else value.get(self.cost_attr, value.get("sequence", 1))
            # if nni does not report training cost,
            # using sequence as an approximation.
            # if no sequence, using a constant 1
@ -1145,11 +1090,7 @@ class CFO(BlendSearchTuner):
        if self._candidate_start_points and self._thread_count == 1:
            # result needs to match or exceed the best candidate start point
            obj_best = min(
-                (
-                    self._ls.metric_op * r[self._ls.metric]
-                    for r in self._candidate_start_points.values()
-                    if r
-                ),
+                (self._ls.metric_op * r[self._ls.metric] for r in self._candidate_start_points.values() if r),
                default=-np.inf,
            )

@ -1157,9 +1098,7 @@ class CFO(BlendSearchTuner):
        else:
            return True

-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
        super().on_trial_complete(trial_id, result, error)
        if self._candidate_start_points and trial_id in self._candidate_start_points:
            # the trial is a candidate start point
@ -1177,9 +1116,7 @@ class RandomSearch(CFO):
        config, _ = self._ls.complete_config({})
        return config

-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
        return

    def on_trial_result(self, trial_id: str, result: Dict):
--- a/flaml/tune/searcher/flow2.py
+++ b/flaml/tune/searcher/flow2.py
@ -124,21 +124,13 @@ class FLOW2(Searcher):
        self.lexico_objectives = lexico_objectives
        if self.lexico_objectives is not None:
            if "modes" not in self.lexico_objectives.keys():
-                self.lexico_objectives["modes"] = ["min"] * len(
-                    self.lexico_objectives["metrics"]
-                )
-            for t_metric, t_mode in zip(
-                self.lexico_objectives["metrics"], self.lexico_objectives["modes"]
-            ):
+                self.lexico_objectives["modes"] = ["min"] * len(self.lexico_objectives["metrics"])
+            for t_metric, t_mode in zip(self.lexico_objectives["metrics"], self.lexico_objectives["modes"]):
                if t_metric not in self.lexico_objectives["tolerances"].keys():
                    self.lexico_objectives["tolerances"][t_metric] = 0
                if t_metric not in self.lexico_objectives["targets"].keys():
-                    self.lexico_objectives["targets"][t_metric] = (
-                        -float("inf") if t_mode == "min" else float("inf")
-                    )
-        self.resource_multiple_factor = (
-            resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR
-        )
+                    self.lexico_objectives["targets"][t_metric] = -float("inf") if t_mode == "min" else float("inf")
+        self.resource_multiple_factor = resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR
        self.cost_attr = cost_attr
        self.max_resource = max_resource
        self._resource = None
@ -166,13 +158,9 @@ class FLOW2(Searcher):
                    q = sampler.q
                    sampler = sampler.get_sampler()
                    if str(sampler) == "Uniform":
-                        self._step_lb = min(
-                            self._step_lb, q / (domain.upper - domain.lower + 1)
-                        )
+                        self._step_lb = min(self._step_lb, q / (domain.upper - domain.lower + 1))
                elif isinstance(domain, sample.Integer) and str(sampler) == "Uniform":
-                    self._step_lb = min(
-                        self._step_lb, 1.0 / (domain.upper - domain.lower)
-                    )
+                    self._step_lb = min(self._step_lb, 1.0 / (domain.upper - domain.lower))
                if isinstance(domain, sample.Categorical):
                    if not domain.ordered:
                        self._unordered_cat_hp[key] = len(domain.categories)
@ -186,11 +174,7 @@ class FLOW2(Searcher):
        if not hier:
            self._space_keys = sorted(self._tunable_keys)
        self.hierarchical = hier
-        if (
-            self.resource_attr
-            and self.resource_attr not in self._space
-            and self.max_resource
-        ):
+        if self.resource_attr and self.resource_attr not in self._space and self.max_resource:
            self.min_resource = self.min_resource or self._min_resource()
            self._resource = self._round(self.min_resource)
            if not hier:
@ -244,14 +228,12 @@ class FLOW2(Searcher):
                if str(sampler_inner) == "LogUniform":
                    step_lb = min(
                        step_lb,
-                        np.log(1.0 + q / self.best_config[key])
-                        / np.log(domain.upper / domain.lower),
+                        np.log(1.0 + q / self.best_config[key]) / np.log(domain.upper / domain.lower),
                    )
            elif isinstance(domain, sample.Integer) and str(sampler) == "LogUniform":
                step_lb = min(
                    step_lb,
-                    np.log(1.0 + 1.0 / self.best_config[key])
-                    / np.log((domain.upper - 1) / domain.lower),
+                    np.log(1.0 + 1.0 / self.best_config[key]) / np.log((domain.upper - 1) / domain.lower),
                )
        if np.isinf(step_lb):
            step_lb = self.STEP_LOWER_BOUND
@ -288,18 +270,14 @@ class FLOW2(Searcher):
        """
        disturb = self._reset_times and partial_config == self.init_config
        # if not the first time to complete init_config, use random gaussian
-        config, space = complete_config(
-            partial_config, self.space, self, disturb, lower, upper
-        )
+        config, space = complete_config(partial_config, self.space, self, disturb, lower, upper)
        if partial_config == self.init_config:
            self._reset_times += 1
        if self._resource:
            config[self.resource_attr] = self.min_resource
        return config, space

-    def create(
-        self, init_config: Dict, obj: float, cost: float, space: Dict
-    ) -> Searcher:
+    def create(self, init_config: Dict, obj: float, cost: float, space: Dict) -> Searcher:
        # space is the subspace where the init_config is located
        flow2 = self.__class__(
            init_config,
@ -318,12 +296,7 @@ class FLOW2(Searcher):
            flow2.best_obj = {}
            for k, v in obj.items():
                flow2.best_obj[k] = (
-                    -v
-                    if self.lexico_objectives["modes"][
-                        self.lexico_objectives["metrics"].index(k)
-                    ]
-                    == "max"
-                    else v
+                    -v if self.lexico_objectives["modes"][self.lexico_objectives["metrics"].index(k)] == "max" else v
                )
        else:
            flow2.best_obj = obj * self.metric_op  # minimize internally
@ -333,15 +306,11 @@ class FLOW2(Searcher):

    def normalize(self, config, recursive=False) -> Dict:
        """normalize each dimension in config to [0,1]."""
-        return normalize(
-            config, self._space, self.best_config, self.incumbent, recursive
-        )
+        return normalize(config, self._space, self.best_config, self.incumbent, recursive)

    def denormalize(self, config):
        """denormalize each dimension in config from [0,1]."""
-        return denormalize(
-            config, self._space, self.best_config, self.incumbent, self._random
-        )
+        return denormalize(config, self._space, self.best_config, self.incumbent, self._random)

    def set_search_properties(
        self,
@ -374,20 +343,13 @@ class FLOW2(Searcher):
            feasible_value = k_values.take(feasible_index)
            self._f_best[k_metric] = np.min(feasible_value)
            if not isinstance(self.lexico_objectives["tolerances"][k_metric], str):
-                tolerance_bound = (
-                    self._f_best[k_metric]
-                    + self.lexico_objectives["tolerances"][k_metric]
-                )
+                tolerance_bound = self._f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric]
            else:
                assert (
                    self.lexico_objectives["tolerances"][k_metric][-1] == "%"
                ), "String tolerance of {} should use %% as the suffix".format(k_metric)
                tolerance_bound = self._f_best[k_metric] * (
-                    1
-                    + 0.01
-                    * float(
-                        self.lexico_objectives["tolerances"][k_metric].replace("%", "")
-                    )
+                    1 + 0.01 * float(self.lexico_objectives["tolerances"][k_metric].replace("%", ""))
                )
            feasible_index_filter = np.where(
                feasible_value
@ -409,33 +371,20 @@ class FLOW2(Searcher):
            for k in self.lexico_objectives["metrics"]:
                self._histories[k].append(result[k])
            self.update_fbest()
-            for k_metric, k_mode in zip(
-                self.lexico_objectives["metrics"], self.lexico_objectives["modes"]
-            ):
+            for k_metric, k_mode in zip(self.lexico_objectives["metrics"], self.lexico_objectives["modes"]):
                k_target = (
                    self.lexico_objectives["targets"][k_metric]
                    if k_mode == "min"
                    else -self.lexico_objectives["targets"][k_metric]
                )
                if not isinstance(self.lexico_objectives["tolerances"][k_metric], str):
-                    tolerance_bound = (
-                        self._f_best[k_metric]
-                        + self.lexico_objectives["tolerances"][k_metric]
-                    )
+                    tolerance_bound = self._f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric]
                else:
                    assert (
                        self.lexico_objectives["tolerances"][k_metric][-1] == "%"
-                    ), "String tolerance of {} should use %% as the suffix".format(
-                        k_metric
-                    )
+                    ), "String tolerance of {} should use %% as the suffix".format(k_metric)
                    tolerance_bound = self._f_best[k_metric] * (
-                        1
-                        + 0.01
-                        * float(
-                            self.lexico_objectives["tolerances"][k_metric].replace(
-                                "%", ""
-                            )
-                        )
+                        1 + 0.01 * float(self.lexico_objectives["tolerances"][k_metric].replace("%", ""))
                    )
                if (result[k_metric] < max(tolerance_bound, k_target)) and (
                    self.best_obj[k_metric]
@ -457,9 +406,7 @@ class FLOW2(Searcher):
                else:
                    return False

-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
        """
        Compare with incumbent.
        If better, move, reset num_complete and num_proposed.
@ -512,21 +459,12 @@ class FLOW2(Searcher):
        proposed_by = self._proposed_by.get(trial_id)
        if proposed_by == self.incumbent:
            self._num_complete4incumbent += 1
-            cost = (
-                result.get(self.cost_attr, 1)
-                if result
-                else self._trial_cost.get(trial_id)
-            )
+            cost = result.get(self.cost_attr, 1) if result else self._trial_cost.get(trial_id)
            if cost:
                self._cost_complete4incumbent += cost
-            if (
-                self._num_complete4incumbent >= 2 * self.dim
-                and self._num_allowed4incumbent == 0
-            ):
+            if self._num_complete4incumbent >= 2 * self.dim and self._num_allowed4incumbent == 0:
                self._num_allowed4incumbent = 2
-            if self._num_complete4incumbent == self.dir and (
-                not self._resource or self._resource == self.max_resource
-            ):
+            if self._num_complete4incumbent == self.dir and (not self._resource or self._resource == self.max_resource):
                self._num_complete4incumbent -= 2
                self._num_allowed4incumbent = max(self._num_allowed4incumbent, 2)

@ -593,10 +531,7 @@ class FLOW2(Searcher):
            and self.cost_incumbent
            and self._resource
            and self._resource < self.max_resource
-            and (
-                self._cost_complete4incumbent
-                >= self.cost_incumbent * self.resource_multiple_factor
-            )
+            and (self._cost_complete4incumbent >= self.cost_incumbent * self.resource_multiple_factor)
        ):
            return self._increase_resource(trial_id)
        self._num_allowed4incumbent -= 1
@ -608,9 +543,7 @@ class FLOW2(Searcher):
            self._direction_tried = None
        else:
            # propose a new direction
-            self._direction_tried = (
-                self.rand_vector_unit_sphere(self.dim, self._trunc) * self.step
-            )
+            self._direction_tried = self.rand_vector_unit_sphere(self.dim, self._trunc) * self.step
            for i, key in enumerate(self._tunable_keys):
                move[key] += self._direction_tried[i]
        self._project(move)
@ -622,25 +555,17 @@ class FLOW2(Searcher):
        if self._init_phase:
            if self._direction_tried is None:
                if self._same:
-                    same = not any(
-                        key not in best_config or value != best_config[key]
-                        for key, value in config.items()
-                    )
+                    same = not any(key not in best_config or value != best_config[key] for key, value in config.items())

                    if same:
                        # increase step size
                        self.step += self.STEPSIZE
                        self.step = min(self.step, self.step_ub)
            else:
-                same = not any(
-                    key not in best_config or value != best_config[key]
-                    for key, value in config.items()
-                )
+                same = not any(key not in best_config or value != best_config[key] for key, value in config.items())

                self._same = same
-        if self._num_proposedby_incumbent == self.dir and (
-            not self._resource or self._resource == self.max_resource
-        ):
+        if self._num_proposedby_incumbent == self.dir and (not self._resource or self._resource == self.max_resource):
            # check stuck condition if using max resource
            self._num_proposedby_incumbent -= 2
            self._init_phase = False
@ -714,9 +639,7 @@ class FLOW2(Searcher):
                # key must be in space
                domain = space[key]
                if self.hierarchical and not (
-                    domain is None
-                    or type(domain) in (str, int, float)
-                    or isinstance(domain, sample.Domain)
+                    domain is None or type(domain) in (str, int, float) or isinstance(domain, sample.Domain)
                ):
                    # not domain or hashable
                    # get rid of list type for hierarchical search space.
@ -746,10 +669,5 @@ class FLOW2(Searcher):
            # unordered cat choice is hard to reach by chance
            if config1[key] != config2.get(key):
                return False
-        delta = np.array(
-            [
-                incumbent1[key] - incumbent2.get(key, np.inf)
-                for key in self._tunable_keys
-            ]
-        )
+        delta = np.array([incumbent1[key] - incumbent2.get(key, np.inf) for key in self._tunable_keys])
        return np.linalg.norm(delta) <= self.step
--- a/flaml/tune/searcher/online_searcher.py
+++ b/flaml/tune/searcher/online_searcher.py
@ -128,9 +128,7 @@ class ChampionFrontierSearcher(BaseSearcher):

        self._challenger_list = []
        # initialize the search in set_search_properties
-        self.set_search_properties(
-            setting={self.CHAMPION_TRIAL_NAME: None}, init_call=True
-        )
+        self.set_search_properties(setting={self.CHAMPION_TRIAL_NAME: None}, init_call=True)
        logger.debug("using random seed %s in config oracle", self._seed)

    def set_search_properties(
@ -202,16 +200,12 @@ class ChampionFrontierSearcher(BaseSearcher):
            config_domain = self._space[k]
            if isinstance(config_domain, PolynomialExpansionSet):
                # get candidate configs for hyperparameters of the PolynomialExpansionSet type
-                partial_new_configs = self._generate_independent_hp_configs(
-                    k, v, config_domain
-                )
+                partial_new_configs = self._generate_independent_hp_configs(k, v, config_domain)
                if partial_new_configs:
                    hyperparameter_config_groups.append(partial_new_configs)
                    # does not have searcher_trial_ids
                    searcher_trial_ids_groups.append([])
-            elif isinstance(config_domain, Float) or isinstance(
-                config_domain, Categorical
-            ):
+            elif isinstance(config_domain, Float) or isinstance(config_domain, Categorical):
                # otherwise we need to deal with them in group
                nonpoly_config[k] = v
                if k not in self._space_of_nonpoly_hp:
@ -229,29 +223,17 @@ class ChampionFrontierSearcher(BaseSearcher):
                        metric=self.CFO_SEARCHER_METRIC_NAME,
                    )
                    # initialize the search in set_search_properties
-                    self._searcher_for_nonpoly_hp[
-                        seed_config_trial_id
-                    ].set_search_properties(
+                    self._searcher_for_nonpoly_hp[seed_config_trial_id].set_search_properties(
                        setting={"metric_target": self.CFO_SEARCHER_LARGE_LOSS}
                    )
                    # We need to call this for once, such that the seed config in points_to_evaluate will be called
                    # to be tried
-                    self._searcher_for_nonpoly_hp[seed_config_trial_id].suggest(
-                        seed_config_searcher_trial_id
-                    )
+                    self._searcher_for_nonpoly_hp[seed_config_trial_id].suggest(seed_config_searcher_trial_id)
                # assuming minimization
-                if (
-                    self._searcher_for_nonpoly_hp[seed_config_trial_id].metric_target
-                    is None
-                ):
+                if self._searcher_for_nonpoly_hp[seed_config_trial_id].metric_target is None:
                    pseudo_loss = self.CFO_SEARCHER_LARGE_LOSS
                else:
-                    pseudo_loss = (
-                        self._searcher_for_nonpoly_hp[
-                            seed_config_trial_id
-                        ].metric_target
-                        * 0.95
-                    )
+                    pseudo_loss = self._searcher_for_nonpoly_hp[seed_config_trial_id].metric_target * 0.95
                pseudo_result_to_report = {}
                for k, v in nonpoly_config.items():
                    pseudo_result_to_report["config/" + str(k)] = v
@ -264,14 +246,10 @@ class ChampionFrontierSearcher(BaseSearcher):
                    # suggest multiple times
                    new_searcher_trial_id = Trial.generate_id()
                    new_searcher_trial_ids.append(new_searcher_trial_id)
-                    suggestion = self._searcher_for_nonpoly_hp[
-                        seed_config_trial_id
-                    ].suggest(new_searcher_trial_id)
+                    suggestion = self._searcher_for_nonpoly_hp[seed_config_trial_id].suggest(new_searcher_trial_id)
                    if suggestion is not None:
                        partial_new_nonpoly_configs.append(suggestion)
-                logger.info(
-                    "partial_new_nonpoly_configs %s", partial_new_nonpoly_configs
-                )
+                logger.info("partial_new_nonpoly_configs %s", partial_new_nonpoly_configs)
            else:
                raise NotImplementedError
            if partial_new_nonpoly_configs:
@ -298,20 +276,14 @@ class ChampionFrontierSearcher(BaseSearcher):
                    new_searcher_trial_id = searcher_trial_ids_groups[i][j]
                else:
                    new_searcher_trial_id = None
-                new_trial = self._create_trial_from_config(
-                    new_seed_config, new_searcher_trial_id
-                )
+                new_trial = self._create_trial_from_config(new_seed_config, new_searcher_trial_id)
                new_trials.append(new_trial)
        logger.info("new_configs %s", [t.trial_id for t in new_trials])
        return new_trials

-    def _generate_independent_hp_configs(
-        self, hp_name, current_config_value, config_domain
-    ) -> List:
+    def _generate_independent_hp_configs(self, hp_name, current_config_value, config_domain) -> List:
        if isinstance(config_domain, PolynomialExpansionSet):
-            seed_interactions = list(current_config_value) + list(
-                config_domain.init_monomials
-            )
+            seed_interactions = list(current_config_value) + list(config_domain.init_monomials)
            logger.info(
                "**Important** Seed namespaces (singletons and interactions): %s",
                seed_interactions,
@ -340,13 +312,7 @@ class ChampionFrontierSearcher(BaseSearcher):
        champion_all_combinations = self._generate_all_comb(
            seed_interactions, order, allow_self_inter, highest_poly_order
        )
-        space = sorted(
-            list(
-                itertools.combinations(
-                    champion_all_combinations, interaction_num_to_add
-                )
-            )
-        )
+        space = sorted(list(itertools.combinations(champion_all_combinations, interaction_num_to_add)))
        self._random_state.shuffle(space)
        candidate_configs = [set(seed_interactions) | set(item) for item in space]
        final_candidate_configs = []
@ -413,15 +379,10 @@ class ChampionFrontierSearcher(BaseSearcher):
            all_interactions_no_self_inter = []
            for s in all_interactions:
                s_no_inter = strip_self_inter(s)
-                if (
-                    len(s_no_inter) > 1
-                    and s_no_inter not in all_interactions_no_self_inter
-                ):
+                if len(s_no_inter) > 1 and s_no_inter not in all_interactions_no_self_inter:
                    all_interactions_no_self_inter.append(s_no_inter)
            all_interactions = all_interactions_no_self_inter
        if highest_poly_order is not None:
-            all_interactions = [
-                c for c in all_interactions if len(c) <= highest_poly_order
-            ]
+            all_interactions = [c for c in all_interactions if len(c) <= highest_poly_order]
        logger.info("all_combinations %s", all_interactions)
        return all_interactions
--- a/flaml/tune/searcher/search_thread.py
+++ b/flaml/tune/searcher/search_thread.py
@ -38,14 +38,10 @@ class SearchThread:
        self._is_ls = isinstance(search_alg, FLOW2)
        self._mode = mode
        self._metric_op = 1 if mode == "min" else -1
-        self.cost_best = self.cost_last = self.cost_total = self.cost_best1 = getattr(
-            search_alg, "cost_incumbent", 0
-        )
+        self.cost_best = self.cost_last = self.cost_total = self.cost_best1 = getattr(search_alg, "cost_incumbent", 0)
        self._eps = eps
        self.cost_best2 = 0
-        self.obj_best1 = self.obj_best2 = getattr(
-            search_alg, "best_obj", np.inf
-        )  # inherently minimize
+        self.obj_best1 = self.obj_best2 = getattr(search_alg, "best_obj", np.inf)  # inherently minimize
        self.best_result = None
        # eci: estimated cost for improvement
        self.eci = self.cost_best
@ -55,11 +51,7 @@ class SearchThread:
        self.cost_attr = cost_attr
        if search_alg:
            self.space = self._space = search_alg.space  # unflattened space
-            if (
-                self.space
-                and not isinstance(search_alg, FLOW2)
-                and isinstance(search_alg._space, dict)
-            ):
+            if self.space and not isinstance(search_alg, FLOW2) and isinstance(search_alg._space, dict):
                # remember const config
                self._const = add_cost_to_space(self.space, {}, {})

@ -76,10 +68,7 @@ class SearchThread:
                    # define by run
                    config, self.space = unflatten_hierarchical(config, self._space)
            except FloatingPointError:
-                logger.warning(
-                    "The global search method raises FloatingPointError. "
-                    "Ignoring for this iteration."
-                )
+                logger.warning("The global search method raises FloatingPointError. " "Ignoring for this iteration.")
                config = None
        if config is not None:
            self.running += 1
@ -94,9 +83,7 @@ class SearchThread:
        best_obj = metric_target * self._metric_op
        if not self.speed:
            self.speed = max_speed
-        self.eci = max(
-            self.cost_total - self.cost_best1, self.cost_best1 - self.cost_best2
-        )
+        self.eci = max(self.cost_total - self.cost_best1, self.cost_best1 - self.cost_best2)
        if self.obj_best1 > best_obj and self.speed > 0:
            self.eci = max(self.eci, 2 * (self.obj_best1 - best_obj) / self.speed)

@ -105,31 +92,23 @@ class SearchThread:
        if self.obj_best2 > self.obj_best1:
            # discount the speed if there are unfinished trials
            self.speed = (
-                (self.obj_best2 - self.obj_best1)
-                / self.running
-                / (max(self.cost_total - self.cost_best2, self._eps))
+                (self.obj_best2 - self.obj_best1) / self.running / (max(self.cost_total - self.cost_best2, self._eps))
            )
        else:
            self.speed = 0

-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
        """Update the statistics of the thread."""
        if not self._search_alg:
            return
-        if not hasattr(self._search_alg, "_ot_trials") or (
-            not error and trial_id in self._search_alg._ot_trials
-        ):
+        if not hasattr(self._search_alg, "_ot_trials") or (not error and trial_id in self._search_alg._ot_trials):
            # optuna doesn't handle error
            if self._is_ls or not self._init_config:
                try:
                    self._search_alg.on_trial_complete(trial_id, result, error)
                except RuntimeError as e:
                    # rs is used in place of optuna sometimes
-                    if not str(e).endswith(
-                        "has already finished and can not be updated."
-                    ):
+                    if not str(e).endswith("has already finished and can not be updated."):
                        raise e
            else:
                # init config is not proposed by self._search_alg
@ -138,9 +117,7 @@ class SearchThread:
        if result:
            self.cost_last = result.get(self.cost_attr, 1)
            self.cost_total += self.cost_last
-            if self._search_alg.metric in result and (
-                getattr(self._search_alg, "lexico_objectives", None) is None
-            ):
+            if self._search_alg.metric in result and (getattr(self._search_alg, "lexico_objectives", None) is None):
                # TODO: Improve this behavior. When lexico_objectives is provided to CFO,
                # related variables are not callable.
                obj = result[self._search_alg.metric] * self._metric_op
@ -162,9 +139,7 @@ class SearchThread:
        # TODO update the statistics of the thread with partial result?
        if not self._search_alg:
            return
-        if not hasattr(self._search_alg, "_ot_trials") or (
-            trial_id in self._search_alg._ot_trials
-        ):
+        if not hasattr(self._search_alg, "_ot_trials") or (trial_id in self._search_alg._ot_trials):
            try:
                self._search_alg.on_trial_result(trial_id, result)
            except RuntimeError as e:
--- a/flaml/tune/searcher/suggestion.py
+++ b/flaml/tune/searcher/suggestion.py
@ -112,22 +112,16 @@ class Searcher:
            # Early return to avoid assertions
            return

-        assert isinstance(
-            metric, type(mode)
-        ), "metric and mode must be of the same type"
+        assert isinstance(metric, type(mode)), "metric and mode must be of the same type"
        if isinstance(mode, str):
            assert mode in ["min", "max"], "if `mode` is a str must be 'min' or 'max'!"
        elif isinstance(mode, list):
            assert len(mode) == len(metric), "Metric and mode must be the same length"
-            assert all(
-                mod in ["min", "max", "obs"] for mod in mode
-            ), "All of mode must be 'min' or 'max' or 'obs'!"
+            assert all(mod in ["min", "max", "obs"] for mod in mode), "All of mode must be 'min' or 'max' or 'obs'!"
        else:
            raise ValueError("Mode must either be a list or string")

-    def set_search_properties(
-        self, metric: Optional[str], mode: Optional[str], config: Dict
-    ) -> bool:
+    def set_search_properties(self, metric: Optional[str], mode: Optional[str], config: Dict) -> bool:
        """Pass search properties to searcher.
        This method acts as an alternative to instantiating search algorithms
        with their own specific search spaces. Instead they can accept a
@ -193,18 +187,13 @@ class ConcurrencyLimiter(Searcher):
        self.batch = batch
        self.live_trials = set()
        self.cached_results = {}
-        super(ConcurrencyLimiter, self).__init__(
-            metric=self.searcher.metric, mode=self.searcher.mode
-        )
+        super(ConcurrencyLimiter, self).__init__(metric=self.searcher.metric, mode=self.searcher.mode)

    def suggest(self, trial_id: str) -> Optional[Dict]:
-        assert (
-            trial_id not in self.live_trials
-        ), f"Trial ID {trial_id} must be unique: already found in set."
+        assert trial_id not in self.live_trials, f"Trial ID {trial_id} must be unique: already found in set."
        if len(self.live_trials) >= self.max_concurrent:
            logger.debug(
-                f"Not providing a suggestion for {trial_id} due to "
-                "concurrency limit: %s/%s.",
+                f"Not providing a suggestion for {trial_id} due to " "concurrency limit: %s/%s.",
                len(self.live_trials),
                self.max_concurrent,
            )
@ -215,9 +204,7 @@ class ConcurrencyLimiter(Searcher):
            self.live_trials.add(trial_id)
        return suggestion

-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
        if trial_id not in self.live_trials:
            return
        elif self.batch:
@ -226,9 +213,7 @@ class ConcurrencyLimiter(Searcher):
                # Update the underlying searcher once the
                # full batch is completed.
                for trial_id, (result, error) in self.cached_results.items():
-                    self.searcher.on_trial_complete(
-                        trial_id, result=result, error=error
-                    )
+                    self.searcher.on_trial_complete(trial_id, result=result, error=error)
                    self.live_trials.remove(trial_id)
                self.cached_results = {}
            else:
@ -257,9 +242,7 @@ class ConcurrencyLimiter(Searcher):
    def on_unpause(self, trial_id: str):
        self.searcher.on_unpause(trial_id)

-    def set_search_properties(
-        self, metric: Optional[str], mode: Optional[str], config: Dict
-    ) -> bool:
+    def set_search_properties(self, metric: Optional[str], mode: Optional[str], config: Dict) -> bool:
        return self.searcher.set_search_properties(metric, mode, config)


@ -301,17 +284,10 @@ def validate_warmstart(
    """
    if points_to_evaluate:
        if not isinstance(points_to_evaluate, list):
-            raise TypeError(
-                "points_to_evaluate expected to be a list, got {}.".format(
-                    type(points_to_evaluate)
-                )
-            )
+            raise TypeError("points_to_evaluate expected to be a list, got {}.".format(type(points_to_evaluate)))
        for point in points_to_evaluate:
            if not isinstance(point, (dict, list)):
-                raise TypeError(
-                    f"points_to_evaluate expected to include list or dict, "
-                    f"got {point}."
-                )
+                raise TypeError(f"points_to_evaluate expected to include list or dict, " f"got {point}.")

            if validate_point_name_lengths and (not len(point) == len(parameter_names)):
                raise ValueError(
@ -322,11 +298,7 @@ def validate_warmstart(

    if points_to_evaluate and evaluated_rewards:
        if not isinstance(evaluated_rewards, list):
-            raise TypeError(
-                "evaluated_rewards expected to be a list, got {}.".format(
-                    type(evaluated_rewards)
-                )
-            )
+            raise TypeError("evaluated_rewards expected to be a list, got {}.".format(type(evaluated_rewards)))
        if not len(evaluated_rewards) == len(points_to_evaluate):
            raise ValueError(
                "Dim of evaluated_rewards {}".format(evaluated_rewards)
@ -461,16 +433,12 @@ class OptunaSearch(Searcher):
        evaluated_rewards: Optional[List] = None,
    ):
        assert ot is not None, "Optuna must be installed! Run `pip install optuna`."
-        super(OptunaSearch, self).__init__(
-            metric=metric, mode=mode, max_concurrent=None, use_early_stopped_trials=None
-        )
+        super(OptunaSearch, self).__init__(metric=metric, mode=mode, max_concurrent=None, use_early_stopped_trials=None)

        if isinstance(space, dict) and space:
            resolved_vars, domain_vars, grid_vars = parse_spec_vars(space)
            if domain_vars or grid_vars:
-                logger.warning(
-                    UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self).__name__)
-                )
+                logger.warning(UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self).__name__))
                space = self.convert_search_space(space)
            else:
                # Flatten to support nested dicts
@ -493,8 +461,7 @@ class OptunaSearch(Searcher):
        self._sampler = sampler or ot.samplers.TPESampler(seed=seed)

        assert isinstance(self._sampler, BaseSampler), (
-            "You can only pass an instance of `optuna.samplers.BaseSampler` "
-            "as a sampler to `OptunaSearcher`."
+            "You can only pass an instance of `optuna.samplers.BaseSampler` " "as a sampler to `OptunaSearcher`."
        )

        self._ot_trials = {}
@ -527,17 +494,13 @@ class OptunaSearch(Searcher):
                validate_point_name_lengths=not callable(self._space),
            )
            if self._evaluated_rewards:
-                for point, reward in zip(
-                    self._points_to_evaluate, self._evaluated_rewards
-                ):
+                for point, reward in zip(self._points_to_evaluate, self._evaluated_rewards):
                    self.add_evaluated_point(point, reward)
            else:
                for point in self._points_to_evaluate:
                    self._ot_study.enqueue_trial(point)

-    def set_search_properties(
-        self, metric: Optional[str], mode: Optional[str], config: Dict
-    ) -> bool:
+    def set_search_properties(self, metric: Optional[str], mode: Optional[str], config: Dict) -> bool:
        if self._space:
            return False
        space = self.convert_search_space(config)
@ -585,16 +548,10 @@ class OptunaSearch(Searcher):

    def suggest(self, trial_id: str) -> Optional[Dict]:
        if not self._space:
-            raise RuntimeError(
-                UNDEFINED_SEARCH_SPACE.format(
-                    cls=self.__class__.__name__, space="space"
-                )
-            )
+            raise RuntimeError(UNDEFINED_SEARCH_SPACE.format(cls=self.__class__.__name__, space="space"))
        if not self._metric or not self._mode:
            raise RuntimeError(
-                UNDEFINED_METRIC_MODE.format(
-                    cls=self.__class__.__name__, metric=self._metric, mode=self._mode
-                )
+                UNDEFINED_METRIC_MODE.format(cls=self.__class__.__name__, metric=self._metric, mode=self._mode)
            )

        if isinstance(self._space, list):
@ -607,9 +564,7 @@ class OptunaSearch(Searcher):

            # getattr will fetch the trial.suggest_ function on Optuna trials
            params = {
-                args[0]
-                if len(args) > 0
-                else kwargs["name"]: getattr(ot_trial, fn)(*args, **kwargs)
+                args[0] if len(args) > 0 else kwargs["name"]: getattr(ot_trial, fn)(*args, **kwargs)
                for (fn, args, kwargs) in self._space
            }
        elif callable(self._space):
@ -622,9 +577,7 @@ class OptunaSearch(Searcher):
        else:
            # Use Optuna ask interface (since version 2.6.0)
            if trial_id not in self._ot_trials:
-                self._ot_trials[trial_id] = self._ot_study.ask(
-                    fixed_distributions=self._space
-                )
+                self._ot_trials[trial_id] = self._ot_study.ask(fixed_distributions=self._space)
            ot_trial = self._ot_trials[trial_id]
            params = ot_trial.params

@ -636,9 +589,7 @@ class OptunaSearch(Searcher):
        ot_trial = self._ot_trials[trial_id]
        ot_trial.report(metric, step)

-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
        ot_trial = self._ot_trials[trial_id]

        val = result.get(self.metric, None) if result else None
@ -662,16 +613,10 @@ class OptunaSearch(Searcher):
        intermediate_values: Optional[List[float]] = None,
    ):
        if not self._space:
-            raise RuntimeError(
-                UNDEFINED_SEARCH_SPACE.format(
-                    cls=self.__class__.__name__, space="space"
-                )
-            )
+            raise RuntimeError(UNDEFINED_SEARCH_SPACE.format(cls=self.__class__.__name__, space="space"))
        if not self._metric or not self._mode:
            raise RuntimeError(
-                UNDEFINED_METRIC_MODE.format(
-                    cls=self.__class__.__name__, metric=self._metric, mode=self._mode
-                )
+                UNDEFINED_METRIC_MODE.format(cls=self.__class__.__name__, metric=self._metric, mode=self._mode)
            )

        ot_trial_state = OptunaTrialState.COMPLETE
@ -681,9 +626,7 @@ class OptunaSearch(Searcher):
            ot_trial_state = OptunaTrialState.PRUNED

        if intermediate_values:
-            intermediate_values_dict = {
-                i: value for i, value in enumerate(intermediate_values)
-            }
+            intermediate_values_dict = {i: value for i, value in enumerate(intermediate_values)}
        else:
            intermediate_values_dict = None

@ -736,10 +679,7 @@ class OptunaSearch(Searcher):
            return {}

        if grid_vars:
-            raise ValueError(
-                "Grid search parameters cannot be automatically converted "
-                "to an Optuna search space."
-            )
+            raise ValueError("Grid search parameters cannot be automatically converted " "to an Optuna search space.")

        # Flatten and resolve again after checking for grid search.
        spec = flatten_dict(spec, prevent_delimiter=True)
@ -766,18 +706,12 @@ class OptunaSearch(Searcher):
                            "Optuna does not support both quantization and "
                            "sampling from LogUniform. Dropped quantization."
                        )
-                    return ot.distributions.LogUniformDistribution(
-                        domain.lower, domain.upper
-                    )
+                    return ot.distributions.LogUniformDistribution(domain.lower, domain.upper)

                elif isinstance(sampler, Uniform):
                    if quantize:
-                        return ot.distributions.DiscreteUniformDistribution(
-                            domain.lower, domain.upper, quantize
-                        )
-                    return ot.distributions.UniformDistribution(
-                        domain.lower, domain.upper
-                    )
+                        return ot.distributions.DiscreteUniformDistribution(domain.lower, domain.upper, quantize)
+                    return ot.distributions.UniformDistribution(domain.lower, domain.upper)

            elif isinstance(domain, Integer):
                if isinstance(sampler, LogUniform):
@ -798,9 +732,7 @@ class OptunaSearch(Searcher):

            raise ValueError(
                "Optuna search does not support parameters of type "
-                "`{}` with samplers of type `{}`".format(
-                    type(domain).__name__, type(domain.sampler).__name__
-                )
+                "`{}` with samplers of type `{}`".format(type(domain).__name__, type(domain.sampler).__name__)
            )

        # Parameter name is e.g. "a/b/c" for nested dicts
--- a/flaml/tune/searcher/variant_generator.py
+++ b/flaml/tune/searcher/variant_generator.py
@ -143,9 +143,7 @@ def _generate_variants(
    for resolved_spec in grid_search:
        if not constant_grid_search or not all_resolved:
            # In this path, we sample the remaining random variables
-            _, resolved_vars = _resolve_domain_vars(
-                resolved_spec, to_resolve, random_state=random_state
-            )
+            _, resolved_vars = _resolve_domain_vars(resolved_spec, to_resolve, random_state=random_state)

        for resolved, spec in _generate_variants(
            resolved_spec,
@ -155,11 +153,7 @@ def _generate_variants(
            for path, value in grid_vars:
                resolved_vars[path] = _get_value(spec, path)
            for k, v in resolved.items():
-                if (
-                    k in resolved_vars
-                    and v != resolved_vars[k]
-                    and _is_resolved(resolved_vars[k])
-                ):
+                if k in resolved_vars and v != resolved_vars[k] and _is_resolved(resolved_vars[k]):
                    raise ValueError(
                        "The variable `{}` could not be unambiguously "
                        "resolved to a single value. Consider simplifying "
@ -197,9 +191,7 @@ def _resolve_domain_vars(
            if path in resolved:
                continue
            try:
-                value = domain.sample(
-                    _UnresolvedAccessGuard(spec), random_state=random_state
-                )
+                value = domain.sample(_UnresolvedAccessGuard(spec), random_state=random_state)
            except RecursiveDependencyError as e:
                error = e
            # except Exception:
@ -217,9 +209,7 @@ def _resolve_domain_vars(
    return True, resolved


-def _grid_search_generator(
-    unresolved_spec: Dict, grid_vars: List
-) -> Generator[Dict, None, None]:
+def _grid_search_generator(unresolved_spec: Dict, grid_vars: List) -> Generator[Dict, None, None]:
    value_indices = [0] * len(grid_vars)

    def increment(i):
@ -260,9 +250,7 @@ def _try_resolve(v) -> Tuple[bool, Any]:
        # Grid search values
        grid_values = v["grid_search"]
        if not isinstance(grid_values, list):
-            raise TuneError(
-                "Grid search expected list of values, got: {}".format(grid_values)
-            )
+            raise TuneError("Grid search expected list of values, got: {}".format(grid_values))
        return False, Categorical(grid_values).grid()
    return True, v

@ -318,9 +306,7 @@ class _UnresolvedAccessGuard(dict):
    def __getattribute__(self, item):
        value = dict.__getattribute__(self, item)
        if not _is_resolved(value):
-            raise RecursiveDependencyError(
-                "`{}` recursively depends on {}".format(item, value)
-            )
+            raise RecursiveDependencyError("`{}` recursively depends on {}".format(item, value))
        elif isinstance(value, dict):
            return _UnresolvedAccessGuard(value)
        else:
--- a/flaml/tune/space.py
+++ b/flaml/tune/space.py
@ -70,15 +70,11 @@ def define_by_run_func(trial, space: Dict, path: str = "") -> Optional[Dict[str,
            else:
                raise ValueError(
                    "Optuna search does not support parameters of type "
-                    "`{}` with samplers of type `{}`".format(
-                        type(domain).__name__, type(domain.sampler).__name__
-                    )
+                    "`{}` with samplers of type `{}`".format(type(domain).__name__, type(domain.sampler).__name__)
                )
        elif isinstance(domain, sample.Integer):
            if isinstance(sampler, sample.LogUniform):
-                trial.suggest_int(
-                    key, domain.lower, domain.upper - int(bool(not quantize)), log=True
-                )
+                trial.suggest_int(key, domain.lower, domain.upper - int(bool(not quantize)), log=True)
            elif isinstance(sampler, sample.Uniform):
                # Upper bound should be inclusive for quantization and
                # exclusive otherwise
@ -103,9 +99,7 @@ def define_by_run_func(trial, space: Dict, path: str = "") -> Optional[Dict[str,
        else:
            raise ValueError(
                "Optuna search does not support parameters of type "
-                "`{}` with samplers of type `{}`".format(
-                    type(domain).__name__, type(domain.sampler).__name__
-                )
+                "`{}` with samplers of type `{}`".format(type(domain).__name__, type(domain.sampler).__name__)
            )
    # Return all constants in a dictionary.
    return config
@ -148,9 +142,7 @@ def unflatten_hierarchical(config: Dict, space: Dict) -> Tuple[Dict, Dict]:
            pos = key.rfind(":")
            true_key = key[:pos]
            choice = int(key[pos + 1 :])
-            hier[true_key], subspace[true_key] = unflatten_hierarchical(
-                value, space[true_key][choice]
-            )
+            hier[true_key], subspace[true_key] = unflatten_hierarchical(value, space[true_key][choice])
        else:
            if key.endswith("_choice_"):
                key = key[:-8]
@ -212,9 +204,7 @@ def add_cost_to_space(space: Dict, low_cost_point: Dict, choice_cost: Dict):
                        choice_cost_dict = choice_cost_list[i]
                    else:
                        choice_cost_dict = {}
-                    domain.const.append(
-                        add_cost_to_space(cat, low_cost_dict, choice_cost_dict)
-                    )
+                    domain.const.append(add_cost_to_space(cat, low_cost_dict, choice_cost_dict))
                else:
                    domain.const.append(None)
            if choice_cost_list:
@ -233,18 +223,14 @@ def add_cost_to_space(space: Dict, low_cost_point: Dict, choice_cost: Dict):
                ordered = getattr(domain, "ordered", None)
                if ordered is None:
                    # automatically decide whether to order the choices based on the value type
-                    domain.ordered = ordered = all(
-                        isinstance(x, (int, float)) for x in domain.categories
-                    )
+                    domain.ordered = ordered = all(isinstance(x, (int, float)) for x in domain.categories)
                if ordered:
                    # sort the choices by value
                    ind = np.argsort(domain.categories)
                    domain.categories = [domain.categories[i] for i in ind]

            if low_cost and low_cost not in domain.categories:
-                assert isinstance(
-                    low_cost, list
-                ), f"low cost {low_cost} not in domain {domain.categories}"
+                assert isinstance(low_cost, list), f"low cost {low_cost} not in domain {domain.categories}"
                if domain.ordered:
                    sorted_points = [low_cost[i] for i in ind]
                    for i, point in enumerate(sorted_points):
@ -292,11 +278,7 @@ def normalize(
                    # low_cost_point list
                    norm = []
                    for i, cat in enumerate(domain.categories):
-                        norm.append(
-                            normalize(value[i], cat, reference_config[key][i], {})
-                            if recursive
-                            else value[i]
-                        )
+                        norm.append(normalize(value[i], cat, reference_config[key][i], {}) if recursive else value[i])
                    if len(value) > len(domain.categories):
                        # the low cost index was appended to low_cost_point list
                        index = value[-1]
@ -335,16 +317,10 @@ def normalize(
        else:
            quantize = None
        if str(sampler) == "LogUniform":
-            upper = domain.upper - (
-                isinstance(domain, sample.Integer) & (quantize is None)
-            )
-            config_norm[key] = np.log(value / domain.lower) / np.log(
-                upper / domain.lower
-            )
+            upper = domain.upper - (isinstance(domain, sample.Integer) & (quantize is None))
+            config_norm[key] = np.log(value / domain.lower) / np.log(upper / domain.lower)
        elif str(sampler) == "Uniform":
-            upper = domain.upper - (
-                isinstance(domain, sample.Integer) & (quantize is None)
-            )
+            upper = domain.upper - (isinstance(domain, sample.Integer) & (quantize is None))
            config_norm[key] = (value - domain.lower) / (upper - domain.lower)
        elif str(sampler) == "Normal":
            # N(mean, sd) -> N(0,1)
@ -366,9 +342,7 @@ def denormalize(
        if key in space:
            # domain: sample.Categorical/Integer/Float/Function
            domain = space[key]
-            if isinstance(value, dict) or not callable(
-                getattr(domain, "get_sampler", None)
-            ):
+            if isinstance(value, dict) or not callable(getattr(domain, "get_sampler", None)):
                config_denorm[key] = value
            else:
                if isinstance(domain, sample.Categorical):
@ -376,16 +350,12 @@ def denormalize(
                    n = len(domain.categories)
                    if isinstance(value, list):
                        # denormalize list
-                        choice = min(
-                            n - 1, int(np.floor(value[-1] * n))
-                        )  # max choice is n-1
+                        choice = min(n - 1, int(np.floor(value[-1] * n)))  # max choice is n-1
                        config_denorm[key] = point = value[choice]
                        point["_choice_"] = choice
                        continue
                    if domain.ordered:
-                        config_denorm[key] = domain.categories[
-                            min(n - 1, int(np.floor(value * n)))
-                        ]
+                        config_denorm[key] = domain.categories[min(n - 1, int(np.floor(value * n)))]
                    else:
                        assert key in normalized_reference_config
                        if min(n - 1, np.floor(value * n)) == min(
@ -394,11 +364,7 @@ def denormalize(
                            config_denorm[key] = reference_config[key]
                        else:  # ****random value each time!****
                            config_denorm[key] = random_state.choice(
-                                [
-                                    x
-                                    for x in domain.categories
-                                    if x != reference_config[key]
-                                ]
+                                [x for x in domain.categories if x != reference_config[key]]
                            )
                    continue
                # Uniform/LogUniform/Normal/Base
@ -411,14 +377,10 @@ def denormalize(
                    quantize = None
                # Handle Log/Uniform
                if str(sampler) == "LogUniform":
-                    upper = domain.upper - (
-                        isinstance(domain, sample.Integer) & (quantize is None)
-                    )
+                    upper = domain.upper - (isinstance(domain, sample.Integer) & (quantize is None))
                    config_denorm[key] = (upper / domain.lower) ** value * domain.lower
                elif str(sampler) == "Uniform":
-                    upper = domain.upper - (
-                        isinstance(domain, sample.Integer) & (quantize is None)
-                    )
+                    upper = domain.upper - (isinstance(domain, sample.Integer) & (quantize is None))
                    config_denorm[key] = value * (upper - domain.lower) + domain.lower
                elif str(sampler) == "Normal":
                    # denormalization for 'Normal'
@ -427,9 +389,7 @@ def denormalize(
                #     config_denorm[key] = value
                # Handle quantized
                if quantize is not None:
-                    config_denorm[key] = (
-                        np.round(np.divide(config_denorm[key], quantize)) * quantize
-                    )
+                    config_denorm[key] = np.round(np.divide(config_denorm[key], quantize)) * quantize
                # Handle int (4.6 -> 5)
                if isinstance(domain, sample.Integer):
                    config_denorm[key] = int(round(config_denorm[key]))
@ -525,9 +485,7 @@ def complete_config(
    for key, value in space.items():
        if key not in config:
            config[key] = value
-    for _, generated in generate_variants_compatible(
-        {"config": config}, random_state=flow2.rs_random
-    ):
+    for _, generated in generate_variants_compatible({"config": config}, random_state=flow2.rs_random):
        config = generated["config"]
        break
    subspace = {}
@ -550,9 +508,7 @@ def complete_config(
                    lower and lower.get(key) and lower[key][index],
                    upper and upper.get(key) and upper[key][index],
                )
-                assert (
-                    "_choice_" not in subspace[key]
-                ), "_choice_ is a reserved key for hierarchical search space"
+                assert "_choice_" not in subspace[key], "_choice_ is a reserved key for hierarchical search space"
                subspace[key]["_choice_"] = index
            else:
                config[key], subspace[key] = complete_config(
--- a/flaml/tune/spark/utils.py
+++ b/flaml/tune/spark/utils.py
@ -69,11 +69,7 @@ def get_n_cpus(node="driver"):
    """
    assert node in ["driver", "executor"]
    try:
-        n_cpus = int(
-            SparkSession.builder.getOrCreate()
-            .sparkContext.getConf()
-            .get(f"spark.{node}.cores")
-        )
+        n_cpus = int(SparkSession.builder.getOrCreate().sparkContext.getConf().get(f"spark.{node}.cores"))
    except (TypeError, RuntimeError):
        n_cpus = os.cpu_count()
    return n_cpus
@ -113,9 +109,7 @@ def with_parameters(trainable, **kwargs):

    if not callable(trainable):
        raise ValueError(
-            f"`with_parameters() only works with function trainables`. "
-            f"Got type: "
-            f"{type(trainable)}."
+            f"`with_parameters() only works with function trainables`. " f"Got type: " f"{type(trainable)}."
        )

    spark_available, spark_error_msg = check_spark()
--- a/flaml/tune/trial.py
+++ b/flaml/tune/trial.py
@ -116,25 +116,19 @@ class Trial:
                        self.metric_n_steps[metric][str(n)] = deque([value], maxlen=n)
                else:
                    step = result["training_iteration"] or 1
-                    self.metric_analysis[metric]["max"] = max(
-                        value, self.metric_analysis[metric]["max"]
-                    )
-                    self.metric_analysis[metric]["min"] = min(
-                        value, self.metric_analysis[metric]["min"]
-                    )
+                    self.metric_analysis[metric]["max"] = max(value, self.metric_analysis[metric]["max"])
+                    self.metric_analysis[metric]["min"] = min(value, self.metric_analysis[metric]["min"])
                    self.metric_analysis[metric]["avg"] = (
-                        1
-                        / step
-                        * (value + (step - 1) * self.metric_analysis[metric]["avg"])
+                        1 / step * (value + (step - 1) * self.metric_analysis[metric]["avg"])
                    )
                    self.metric_analysis[metric]["last"] = value

                    for n in self.n_steps:
                        key = "last-{:d}-avg".format(n)
                        self.metric_n_steps[metric][str(n)].append(value)
-                        self.metric_analysis[metric][key] = sum(
+                        self.metric_analysis[metric][key] = sum(self.metric_n_steps[metric][str(n)]) / len(
                            self.metric_n_steps[metric][str(n)]
-                        ) / len(self.metric_n_steps[metric][str(n)])
+                        )

    def set_status(self, status):
        """Sets the status of the trial."""
--- a/flaml/tune/trial_runner.py
+++ b/flaml/tune/trial_runner.py
@ -96,17 +96,13 @@ class BaseTrialRunner:
        """Stops trial."""
        if trial.status not in [Trial.ERROR, Trial.TERMINATED]:
            if self._scheduler_alg:
-                self._scheduler_alg.on_trial_complete(
-                    self, trial.trial_id, trial.last_result
-                )
+                self._scheduler_alg.on_trial_complete(self, trial.trial_id, trial.last_result)
            self._search_alg.on_trial_complete(trial.trial_id, trial.last_result)
            trial.set_status(Trial.TERMINATED)
        elif self._scheduler_alg:
            self._scheduler_alg.on_trial_remove(self, trial)
            if trial.status == Trial.ERROR:
-                self._search_alg.on_trial_complete(
-                    trial.trial_id, trial.last_result, error=True
-                )
+                self._search_alg.on_trial_complete(trial.trial_id, trial.last_result, error=True)


 class SequentialTrialRunner(BaseTrialRunner):
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@ -65,11 +65,7 @@ class ExperimentAnalysis(EA):
            return self.get_best_config(self.default_metric, self.default_mode)

    def lexico_best(self, trials):
-        results = {
-            index: trial.last_result
-            for index, trial in enumerate(trials)
-            if trial.last_result
-        }
+        results = {index: trial.last_result for index, trial in enumerate(trials) if trial.last_result}
        metrics = self.lexico_objectives["metrics"]
        modes = self.lexico_objectives["modes"]
        f_best = {}
@ -79,15 +75,11 @@ class ExperimentAnalysis(EA):
        for time_index in range(length):
            for objective, mode in zip(metrics, modes):
                histories[objective].append(
-                    results[keys[time_index]][objective]
-                    if mode == "min"
-                    else -results[keys[time_index]][objective]
+                    results[keys[time_index]][objective] if mode == "min" else -results[keys[time_index]][objective]
                )
        obj_initial = self.lexico_objectives["metrics"][0]
        feasible_index = np.array([*range(len(histories[obj_initial]))])
-        for k_metric, k_mode in zip(
-            self.lexico_objectives["metrics"], self.lexico_objectives["modes"]
-        ):
+        for k_metric, k_mode in zip(self.lexico_objectives["metrics"], self.lexico_objectives["modes"]):
            k_values = np.array(histories[k_metric])
            k_target = (
                -self.lexico_objectives["targets"][k_metric]
@ -101,19 +93,9 @@ class ExperimentAnalysis(EA):
                feasible_value
                <= max(
                    f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric]
-                    if not isinstance(
-                        self.lexico_objectives["tolerances"][k_metric], str
-                    )
+                    if not isinstance(self.lexico_objectives["tolerances"][k_metric], str)
                    else f_best[k_metric]
-                    * (
-                        1
-                        + 0.01
-                        * float(
-                            self.lexico_objectives["tolerances"][k_metric].replace(
-                                "%", ""
-                            )
-                        )
-                    ),
+                    * (1 + 0.01 * float(self.lexico_objectives["tolerances"][k_metric].replace("%", ""))),
                    k_target,
                )
            )[0]
@ -237,9 +219,7 @@ def run(
    local_dir: Optional[str] = None,
    num_samples: Optional[int] = 1,
    resources_per_trial: Optional[dict] = None,
-    config_constraints: Optional[
-        List[Tuple[Callable[[dict], float], str, float]]
-    ] = None,
+    config_constraints: Optional[List[Tuple[Callable[[dict], float], str, float]]] = None,
    metric_constraints: Optional[List[Tuple[str, str, float]]] = None,
    max_failure: Optional[int] = 100,
    use_ray: Optional[bool] = False,
@ -463,9 +443,7 @@ def run(
            os.makedirs(dir_name, exist_ok=True)
    elif local_dir and verbose > 0:
        os.makedirs(local_dir, exist_ok=True)
-        log_file_name = os.path.join(
-            local_dir, "tune_" + str(datetime.datetime.now()).replace(":", "-") + ".log"
-        )
+        log_file_name = os.path.join(local_dir, "tune_" + str(datetime.datetime.now()).replace(":", "-") + ".log")
    if use_ray and use_spark:
        raise ValueError("use_ray and use_spark cannot be both True.")
    if not use_ray:
@ -506,9 +484,7 @@ def run(
    from .searcher.blendsearch import BlendSearch, CFO

    if lexico_objectives is not None:
-        logger.warning(
-            "If lexico_objectives is not None, search_alg is forced to be CFO"
-        )
+        logger.warning("If lexico_objectives is not None, search_alg is forced to be CFO")
        search_alg = None
    if search_alg is None:
        flaml_scheduler_resource_attr = (
@ -529,14 +505,10 @@ def run(
                import optuna as _

                SearchAlgorithm = BlendSearch
-                logger.info(
-                    "Using search algorithm {}.".format(SearchAlgorithm.__name__)
-                )
+                logger.info("Using search algorithm {}.".format(SearchAlgorithm.__name__))
            except ImportError:
                SearchAlgorithm = CFO
-                logger.warning(
-                    "Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]"
-                )
+                logger.warning("Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]")
            metric = metric or DEFAULT_METRIC
        else:
            SearchAlgorithm = CFO
@ -581,14 +553,8 @@ def run(
            ]
            and use_incumbent_result_in_evaluation is not None
        ):
-            search_alg.use_incumbent_result_in_evaluation = (
-                use_incumbent_result_in_evaluation
-            )
-        searcher = (
-            search_alg.searcher
-            if isinstance(search_alg, ConcurrencyLimiter)
-            else search_alg
-        )
+            search_alg.use_incumbent_result_in_evaluation = use_incumbent_result_in_evaluation
+        searcher = search_alg.searcher if isinstance(search_alg, ConcurrencyLimiter) else search_alg
        if isinstance(searcher, BlendSearch):
            setting = {}
            if time_budget_s:
@ -617,10 +583,7 @@ def run(
        try:
            from ray import tune
        except ImportError:
-            raise ImportError(
-                "Failed to import ray tune. "
-                "Please install ray[tune] or set use_ray=False"
-            )
+            raise ImportError("Failed to import ray tune. " "Please install ray[tune] or set use_ray=False")
        _use_ray = True
        try:
            analysis = tune.run(
@ -659,19 +622,14 @@ def run(
            from joblib import Parallel, delayed, parallel_backend
            from joblibspark import register_spark
        except ImportError as e:
-            raise ImportError(
-                f"{e}. Try pip install flaml[spark] or set use_spark=False."
-            )
+            raise ImportError(f"{e}. Try pip install flaml[spark] or set use_spark=False.")
        from flaml.tune.searcher.suggestion import ConcurrencyLimiter
        from .trial_runner import SparkTrialRunner

        register_spark()
        spark = SparkSession.builder.getOrCreate()
        sc = spark._jsc.sc()
-        num_executors = (
-            len([executor.host() for executor in sc.statusTracker().getExecutorInfos()])
-            - 1
-        )
+        num_executors = len([executor.host() for executor in sc.statusTracker().getExecutorInfos()]) - 1
        """
        By default, the number of executors is the number of VMs in the cluster. And we can
        launch one trial per executor. However, sometimes we can launch more trials than
@ -708,9 +666,7 @@ def run(
            max_concurrent,
        )
        with parallel_backend("spark"):
-            with Parallel(
-                n_jobs=n_concurrent_trials, verbose=max(0, (verbose - 1) * 50)
-            ) as parallel:
+            with Parallel(n_jobs=n_concurrent_trials, verbose=max(0, (verbose - 1) * 50)) as parallel:
                try:
                    _runner = SparkTrialRunner(
                        search_alg=search_alg,
@ -722,9 +678,7 @@ def run(
                    if time_budget_s is None:
                        time_budget_s = np.inf
                    num_failures = 0
-                    upperbound_num_failures = (
-                        len(evaluated_rewards) if evaluated_rewards else 0
-                    ) + max_failure
+                    upperbound_num_failures = (len(evaluated_rewards) if evaluated_rewards else 0) + max_failure
                    while (
                        time.time() - time_start < time_budget_s
                        and (num_samples < 0 or num_trials < num_samples)
@ -742,9 +696,7 @@ def run(
                                    break
                        trials_to_run = _runner.running_trials
                        if not trials_to_run:
-                            logger.warning(
-                                f"fail to sample a trial for {max_failure} times in a row, stopping."
-                            )
+                            logger.warning(f"fail to sample a trial for {max_failure} times in a row, stopping.")
                            break
                        logger.info(
                            f"Number of trials: {num_trials}/{num_samples}, {len(_runner.running_trials)} RUNNING,"
@ -754,12 +706,9 @@ def run(
                            f"Configs of Trials to run: {[trial_to_run.config for trial_to_run in trials_to_run]}"
                        )
                        results = None
-                        with PySparkOvertimeMonitor(
-                            time_start, time_budget_s, force_cancel, parallel=parallel
-                        ):
+                        with PySparkOvertimeMonitor(time_start, time_budget_s, force_cancel, parallel=parallel):
                            results = parallel(
-                                delayed(evaluation_function)(trial_to_run.config)
-                                for trial_to_run in trials_to_run
+                                delayed(evaluation_function)(trial_to_run.config) for trial_to_run in trials_to_run
                            )
                        # results = [evaluation_function(trial_to_run.config) for trial_to_run in trials_to_run]
                        while results:
@ -775,9 +724,7 @@ def run(
                                        # When the result returned is an empty dict, set the trial status to error
                                        trial_to_run.set_status(Trial.ERROR)
                                else:
-                                    logger.info(
-                                        "Brief result: {}".format({metric: result})
-                                    )
+                                    logger.info("Brief result: {}".format({metric: result}))
                                    report(_metric=result)
                            _runner.stop_trial(trial_to_run)
                        num_failures = 0
@ -817,9 +764,7 @@ def run(
        if time_budget_s is None:
            time_budget_s = np.inf
        num_failures = 0
-        upperbound_num_failures = (
-            len(evaluated_rewards) if evaluated_rewards else 0
-        ) + max_failure
+        upperbound_num_failures = (len(evaluated_rewards) if evaluated_rewards else 0) + max_failure
        while (
            time.time() - time_start < time_budget_s
            and (num_samples < 0 or num_trials < num_samples)
@ -852,9 +797,7 @@ def run(
                # break with upperbound_num_failures consecutive failures
                num_failures += 1
        if num_failures == upperbound_num_failures:
-            logger.warning(
-                f"fail to sample a trial for {max_failure} times in a row, stopping."
-            )
+            logger.warning(f"fail to sample a trial for {max_failure} times in a row, stopping.")
        analysis = ExperimentAnalysis(
            _runner.get_trials(),
            metric=metric,
--- a/flaml/tune/utils.py
+++ b/flaml/tune/utils.py
@ -23,9 +23,5 @@ def choice(categories: Sequence, order=None):
            Numerical categories have an order, while string categories do not.
    """
    domain = sample.Categorical(categories).uniform()
-    domain.ordered = (
-        order
-        if order is not None
-        else all(isinstance(x, (int, float)) for x in categories)
-    )
+    domain.ordered = order if order is not None else all(isinstance(x, (int, float)) for x in categories)
    return domain
--- a/test/automl/test_classification.py
+++ b/test/automl/test_classification.py
@ -265,9 +265,7 @@ class TestClassification(unittest.TestCase):
        import xgboost as xgb

        callback = xgb.callback.TrainingCallback()
-        automl.fit(
-            X_train=X_train, y_train=y_train, callbacks=[callback], **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, callbacks=[callback], **automl_settings)
        print(automl.predict(X_train))
        print(automl.model)
        print(automl.config_history)
@ -279,16 +277,12 @@ class TestClassification(unittest.TestCase):
        import subprocess
        import sys

-        subprocess.check_call(
-            [sys.executable, "-m", "pip", "install", "xgboost==1.3.3", "--user"]
-        )
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "xgboost==1.3.3", "--user"])
        automl = AutoML()
        automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
        print(automl.feature_names_in_)
        print(automl.feature_importances_)
-        subprocess.check_call(
-            [sys.executable, "-m", "pip", "install", "-U", "xgboost", "--user"]
-        )
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "xgboost", "--user"])

    def test_ray_classification(self):
        X, y = load_breast_cancer(return_X_y=True)
@ -337,9 +331,7 @@ class TestClassification(unittest.TestCase):
            import ray

            X_train_ref = ray.put(X_train)
-            automl_experiment.fit(
-                X_train=X_train_ref, y_train=y_train, **automl_settings
-            )
+            automl_experiment.fit(X_train=X_train_ref, y_train=y_train, **automl_settings)
            print(automl_experiment.predict(X_train))
            print(automl_experiment.model)
            print(automl_experiment.config_history)
@ -355,9 +347,7 @@ class TestClassification(unittest.TestCase):

    def test_random_skip_oom(self):
        automl_experiment = AutoML()
-        automl_experiment.add_learner(
-            learner_name="large_lgbm", learner_class=MyLargeLGBM
-        )
+        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
        automl_settings = {
            "time_budget": 2,
            "task": "classification",
@ -396,9 +386,7 @@ class TestClassification(unittest.TestCase):
        }
        X_train = scipy.sparse.random(3000, 3000, density=0.1)
        y_train = np.random.randint(2, size=3000)
-        automl_experiment.fit(
-            X_train=X_train, y_train=y_train, train_time_limit=1, **automl_settings
-        )
+        automl_experiment.fit(X_train=X_train, y_train=y_train, train_time_limit=1, **automl_settings)
        automl_settings["time_budget"] = 5
        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
        print(automl_experiment.predict(X_train))
--- a/test/automl/test_constraints.py
+++ b/test/automl/test_constraints.py
@ -31,9 +31,7 @@ def test_metric_constraints():
        from sklearn.datasets import load_wine

        X, y = load_wine(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.33, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
    print(automl.estimator_list)
    print(automl.search_space)
@ -55,9 +53,7 @@ def test_metric_constraints():
        min_resource=automl.min_resource,
        max_resource=automl.max_resource,
        time_budget_s=automl._state.time_budget,
-        config_constraints=[
-            (partial(size, automl._state.learner_classes), "<=", automl._mem_thres)
-        ],
+        config_constraints=[(partial(size, automl._state.learner_classes), "<=", automl._mem_thres)],
        metric_constraints=automl.metric_constraints,
        num_samples=5,
    )
@ -121,18 +117,12 @@ def test_metric_constraints_custom():
        from sklearn.datasets import load_wine

        X, y = load_wine(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.33, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
    print(automl.estimator_list)
    print(automl.search_space)
    print(automl.points_to_evaluate)
-    print(
-        "Best minimization objective on validation data: {0:.4g}".format(
-            automl.best_loss
-        )
-    )
+    print("Best minimization objective on validation data: {0:.4g}".format(automl.best_loss))
    print(
        "pred_time of the best config on validation data: {0:.4g}".format(
            automl.metrics_for_best_config[1]["pred_time"]
@ -161,9 +151,7 @@ def test_metric_constraints_custom():
        min_resource=automl.min_resource,
        max_resource=automl.max_resource,
        time_budget_s=automl._state.time_budget,
-        config_constraints=[
-            (partial(size, automl._state.learner_classes), "<=", automl._mem_thres)
-        ],
+        config_constraints=[(partial(size, automl._state.learner_classes), "<=", automl._mem_thres)],
        metric_constraints=automl.metric_constraints,
        num_samples=5,
    )
--- a/test/automl/test_forecast.py
+++ b/test/automl/test_forecast.py
@ -2,20 +2,12 @@ import numpy as np
 from flaml import AutoML


-def test_forecast_automl(
-    budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]
-):
+def test_forecast_automl(budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]):
    # using dataframe
    import statsmodels.api as sm

    data = sm.datasets.co2.load_pandas().data["co2"].resample("MS").mean()
-    data = (
-        data.bfill()
-        .ffill()
-        .to_frame()
-        .reset_index()
-        .rename(columns={"index": "ds", "co2": "y"})
-    )
+    data = data.bfill().ffill().to_frame().reset_index().rename(columns={"index": "ds", "co2": "y"})
    num_samples = data.shape[0]
    time_horizon = 12
    split_idx = num_samples - time_horizon
@ -163,9 +155,7 @@ def load_multi_dataset():
    return df


-def test_multivariate_forecast_num(
-    budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]
-):
+def test_multivariate_forecast_num(budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]):
    df = load_multi_dataset()
    # split data into train and test
    time_horizon = 180
@ -282,9 +272,7 @@ def load_multi_dataset_cat(time_horizon):
            return 0

    df["season"] = df["timeStamp"].apply(season)
-    df["above_monthly_avg"] = df.apply(
-        lambda x: above_monthly_avg(x["timeStamp"], x["temp"]), axis=1
-    )
+    df["above_monthly_avg"] = df.apply(lambda x: above_monthly_avg(x["timeStamp"], x["temp"]), axis=1)

    # split data into train and test
    num_samples = df.shape[0]
@ -297,9 +285,7 @@ def load_multi_dataset_cat(time_horizon):
    return train_df, test_df


-def test_multivariate_forecast_cat(
-    budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]
-):
+def test_multivariate_forecast_cat(budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]):
    time_horizon = 180
    train_df, test_df = load_multi_dataset_cat(time_horizon)
    X_test = test_df[
@ -456,16 +442,10 @@ def get_stalliion_data():
    data["time_idx"] = data["date"].dt.year * 12 + data["date"].dt.month
    data["time_idx"] -= data["time_idx"].min()
    # add additional features
-    data["month"] = data.date.dt.month.astype(str).astype(
-        "category"
-    )  # categories have be strings
+    data["month"] = data.date.dt.month.astype(str).astype("category")  # categories have be strings
    data["log_volume"] = np.log(data.volume + 1e-8)
-    data["avg_volume_by_sku"] = data.groupby(
-        ["time_idx", "sku"], observed=True
-    ).volume.transform("mean")
-    data["avg_volume_by_agency"] = data.groupby(
-        ["time_idx", "agency"], observed=True
-    ).volume.transform("mean")
+    data["avg_volume_by_sku"] = data.groupby(["time_idx", "sku"], observed=True).volume.transform("mean")
+    data["avg_volume_by_agency"] = data.groupby(["time_idx", "agency"], observed=True).volume.transform("mean")
    # we want to encode special days as one variable and thus need to first reverse one-hot encoding
    special_days = [
        "easter_day",
@ -479,11 +459,7 @@ def get_stalliion_data():
        "beer_capital",
        "music_fest",
    ]
-    data[special_days] = (
-        data[special_days]
-        .apply(lambda x: x.map({0: "-", 1: x.name}))
-        .astype("category")
-    )
+    data[special_days] = data[special_days].apply(lambda x: x.map({0: "-", 1: x.name})).astype("category")
    return data, special_days


@ -571,8 +547,7 @@ def test_forecast_panel(budget=5):

        y_test, y_pred = np.array(y_test), np.array(y_pred)
        return round(
-            np.mean(np.abs(y_pred - y_test) / ((np.abs(y_pred) + np.abs(y_test)) / 2))
-            * 100,
+            np.mean(np.abs(y_pred - y_test) / ((np.abs(y_pred) + np.abs(y_test)) / 2)) * 100,
            2,
        )

--- a/test/automl/test_multiclass.py
+++ b/test/automl/test_multiclass.py
@ -156,9 +156,7 @@ class TestMultiClass(unittest.TestCase):
            del settings["time_budget"]
            settings["max_iter"] = 5
            # test the "_choice_" issue when using ray
-            automl.fit(
-                X_train=X_train, y_train=y_train, n_concurrent_trials=2, **settings
-            )
+            automl.fit(X_train=X_train, y_train=y_train, n_concurrent_trials=2, **settings)
        except ImportError:
            return

@ -212,9 +210,7 @@ class TestMultiClass(unittest.TestCase):
        print(automl.best_iteration)
        print(automl.best_estimator)
        automl = AutoML()
-        estimator = automl.get_estimator_from_log(
-            settings["log_file_name"], record_id=0, task="multiclass"
-        )
+        estimator = automl.get_estimator_from_log(settings["log_file_name"], record_id=0, task="multiclass")
        print(estimator)
        (
            time_history,
@ -233,9 +229,7 @@ class TestMultiClass(unittest.TestCase):
            del settings["time_budget"]
            settings["max_iter"] = 2
            automl.fit(**settings)
-            estimator = automl.get_estimator_from_log(
-                settings["log_file_name"], record_id=1, task="multiclass"
-            )
+            estimator = automl.get_estimator_from_log(settings["log_file_name"], record_id=1, task="multiclass")
        except ImportError:
            pass

@ -290,12 +284,8 @@ class TestMultiClass(unittest.TestCase):
            "model_history": True,
        }
        X_train, y_train = load_iris(return_X_y=True)
-        automl_experiment_micro.fit(
-            X_train=X_train, y_train=y_train, metric="micro_f1", **automl_settings
-        )
-        automl_experiment_macro.fit(
-            X_train=X_train, y_train=y_train, metric="macro_f1", **automl_settings
-        )
+        automl_experiment_micro.fit(X_train=X_train, y_train=y_train, metric="micro_f1", **automl_settings)
+        automl_experiment_macro.fit(X_train=X_train, y_train=y_train, metric="macro_f1", **automl_settings)
        estimator = automl_experiment_macro.model
        y_pred = estimator.predict(X_train)
        y_pred_proba = estimator.predict_proba(X_train)
@ -389,9 +379,7 @@ class TestMultiClass(unittest.TestCase):

    def _test_memory_limit(self):
        automl_experiment = AutoML()
-        automl_experiment.add_learner(
-            learner_name="large_lgbm", learner_class=MyLargeLGBM
-        )
+        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
        automl_settings = {
            "time_budget": -1,
            "task": "classification",
@ -403,19 +391,13 @@ class TestMultiClass(unittest.TestCase):
        }
        X_train, y_train = load_iris(return_X_y=True, as_frame=True)

-        automl_experiment.fit(
-            X_train=X_train, y_train=y_train, max_iter=1, **automl_settings
-        )
+        automl_experiment.fit(X_train=X_train, y_train=y_train, max_iter=1, **automl_settings)
        print(automl_experiment.model)

    def test_time_limit(self):
        automl_experiment = AutoML()
-        automl_experiment.add_learner(
-            learner_name="large_lgbm", learner_class=MyLargeLGBM
-        )
-        automl_experiment.add_learner(
-            learner_name="large_xgb", learner_class=MyLargeXGB
-        )
+        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
+        automl_experiment.add_learner(learner_name="large_xgb", learner_class=MyLargeXGB)
        automl_settings = {
            "time_budget": 0.5,
            "task": "classification",
@ -450,21 +432,12 @@ class TestMultiClass(unittest.TestCase):
            # test drop column
            X_train.columns = range(X_train.shape[1])
            X_train[X_train.shape[1]] = np.zeros(len(y_train))
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            n_concurrent_trials=n_concurrent_trials,
-            **settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, n_concurrent_trials=n_concurrent_trials, **settings)
        automl_val_accuracy = 1.0 - automl.best_loss
        print("Best ML leaner:", automl.best_estimator)
        print("Best hyperparmeter config:", automl.best_config)
        print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                automl.best_config_train_time
-            )
-        )
+        print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))

        starting_points = automl.best_config_per_estimator
        print("starting_points", starting_points)
@ -486,14 +459,8 @@ class TestMultiClass(unittest.TestCase):
        new_automl_val_accuracy = 1.0 - new_automl.best_loss
        print("Best ML leaner:", new_automl.best_estimator)
        print("Best hyperparmeter config:", new_automl.best_config)
-        print(
-            "Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy)
-        )
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                new_automl.best_config_train_time
-            )
-        )
+        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
+        print("Training duration of best run: {0:.4g} s".format(new_automl.best_config_train_time))

    def test_fit_w_starting_point_2(self, as_frame=True):
        try:
@ -520,21 +487,12 @@ class TestMultiClass(unittest.TestCase):
            # test drop column
            X_train.columns = range(X_train.shape[1])
            X_train[X_train.shape[1]] = np.zeros(len(y_train))
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            n_concurrent_trials=n_concurrent_trials,
-            **settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, n_concurrent_trials=n_concurrent_trials, **settings)
        automl_val_accuracy = 1.0 - automl.best_loss
        print("Best ML leaner:", automl.best_estimator)
        print("Best hyperparmeter config:", automl.best_config)
        print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                automl.best_config_train_time
-            )
-        )
+        print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))

        starting_points = {}
        log_file_name = settings["log_file_name"]
@ -568,9 +526,7 @@ class TestMultiClass(unittest.TestCase):
        new_automl_val_accuracy = 1.0 - new_automl.best_loss
        # print('Best ML leaner:', new_automl.best_estimator)
        # print('Best hyperparmeter config:', new_automl.best_config)
-        print(
-            "Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy)
-        )
+        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
        # print('Training duration of best run: {0:.4g} s'.format(new_automl_experiment.best_config_train_time))


--- a/test/automl/test_notebook_example.py
+++ b/test/automl/test_notebook_example.py
@ -60,9 +60,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
    print("Best ML leaner:", automl.best_estimator)
    print("Best hyperparmeter config:", automl.best_config)
    print("Best accuracy on validation data: {0:.4g}".format(1 - automl.best_loss))
-    print(
-        "Training duration of best run: {0:.4g} s".format(automl.best_config_train_time)
-    )
+    print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
    print(automl.model.estimator)
    print(automl.best_config_per_estimator)
    print("time taken to find best model:", automl.time_to_find_best_model)
@ -81,9 +79,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):

    accuracy = 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test)
    print("accuracy", "=", accuracy)
-    print(
-        "roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test)
-    )
+    print("roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test))
    print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
    if budget is None:
        assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67"
@ -122,9 +118,7 @@ def test_mlflow():
    from flaml.automl.data import load_openml_task

    try:
-        X_train, X_test, y_train, y_test = load_openml_task(
-            task_id=7592, data_dir="test/"
-        )
+        X_train, X_test, y_train, y_test = load_openml_task(task_id=7592, data_dir="test/")
    except (OpenMLServerException, ChunkedEncodingError, SSLError) as e:
        print(e)
        return
--- a/test/automl/test_python_log.py
+++ b/test/automl/test_python_log.py
@ -40,11 +40,7 @@ class TestLogging(unittest.TestCase):
            n = len(y_train) >> 1
            print(automl.model, automl.classes_, automl.predict(X_train))
            automl.fit(
-                X_train=X_train[:n],
-                y_train=y_train[:n],
-                X_val=X_train[n:],
-                y_val=y_train[n:],
-                **automl_settings
+                X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings
            )
            logger.info(automl.search_space)
            logger.info(automl.low_cost_partial_config)
@ -58,9 +54,7 @@ class TestLogging(unittest.TestCase):
            sample = define_by_run_func(study.ask(), automl.search_space)
            logger.info(sample)
            logger.info(unflatten_hierarchical(sample, automl.search_space))
-            add_cost_to_space(
-                automl.search_space, automl.low_cost_partial_config, automl.cat_hp_cost
-            )
+            add_cost_to_space(automl.search_space, automl.low_cost_partial_config, automl.cat_hp_cost)
            logger.info(automl.search_space["ml"].categories)
            if automl.best_config:
                config = automl.best_config.copy()
--- a/test/automl/test_regression.py
+++ b/test/automl/test_regression.py
@ -45,13 +45,7 @@ class TestRegression(unittest.TestCase):
        }
        X_train, y_train = fetch_california_housing(return_X_y=True)
        n = int(len(y_train) * 9 // 10)
-        automl.fit(
-            X_train=X_train[:n],
-            y_train=y_train[:n],
-            X_val=X_train[n:],
-            y_val=y_train[n:],
-            **automl_settings
-        )
+        automl.fit(X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings)
        assert automl._state.eval_method == "holdout"
        y_pred = automl.predict(X_train)
        print(y_pred)
@ -88,10 +82,7 @@ class TestRegression(unittest.TestCase):
        print(automl.model.estimator)
        y_pred2 = automl.predict(X_train)
        # In some rare case, the last config is early stopped and it's the best config. But the logged config's n_estimator is not reduced.
-        assert (
-            n_iter != automl.model.estimator.get_params("n_estimator")
-            or (y_pred == y_pred2).all()
-        )
+        assert n_iter != automl.model.estimator.get_params("n_estimator") or (y_pred == y_pred2).all()

    def test_sparse_matrix_regression(self):
        X_train = scipy.sparse.random(300, 900, density=0.0001)
@ -110,9 +101,7 @@ class TestRegression(unittest.TestCase):
            "verbose": 0,
            "early_stop": True,
        }
-        automl.fit(
-            X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings)
        assert automl._state.X_val.shape == X_val.shape
        print(automl.predict(X_train))
        print(automl.model)
@ -135,9 +124,7 @@ class TestRegression(unittest.TestCase):
                "custom_hp": {"catboost": {"n_estimators": {"domain": 100}}},
            }
        )
-        automl.fit(
-            X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings)

    def test_parallel(self, hpo_method=None):
        automl_experiment = AutoML()
@ -203,13 +190,7 @@ class TestRegression(unittest.TestCase):
            "keep_search_state": True,
            "early_stop": True,
        }
-        automl_experiment.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl_experiment.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
        assert automl_experiment._state.X_val.shape == X_val.shape
        print(automl_experiment.predict(X_train))
        print(automl_experiment.model)
@ -231,9 +212,7 @@ def test_multioutput():
    X, y = make_regression(n_targets=3)

    # split into train and test data
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.30, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

    # train the model
    model = MultiOutputRegressor(AutoML(task="regression", time_budget=1))
--- a/test/automl/test_score.py
+++ b/test/automl/test_score.py
@ -11,12 +11,7 @@ class TestScore:
        import statsmodels.api as sm

        data = sm.datasets.co2.load_pandas().data["co2"].resample("MS").mean()
-        data = (
-            data.fillna(data.bfill())
-            .to_frame()
-            .reset_index()
-            .rename(columns={"index": "ds", "co2": "y"})
-        )
+        data = data.fillna(data.bfill()).to_frame().reset_index().rename(columns={"index": "ds", "co2": "y"})
        num_samples = data.shape[0]
        time_horizon = 12
        split_idx = num_samples - time_horizon
@ -48,9 +43,7 @@ class TestScore:
            with open("automl.pkl", "rb") as f:
                pickle.load(f)  # v1.1 of prophet raises RecursionError
        except (ImportError, RecursionError):
-            print(
-                "not using prophet due to ImportError or RecursionError (when unpickling in v1.1)"
-            )
+            print("not using prophet due to ImportError or RecursionError (when unpickling in v1.1)")
            automl.fit(
                dataframe=df,
                **settings,
--- a/test/automl/test_split.py
+++ b/test/automl/test_split.py
@ -29,13 +29,9 @@ def _test(split_type):

        X, y = load_wine(return_X_y=True)
    if split_type != "time":
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.33, random_state=42
-        )
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    else:
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.33, shuffle=False
-        )
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)
    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)

    pred = automl.predict(X_test)
@ -83,9 +79,7 @@ def test_groups():
    automl_settings["split_type"] = GroupKFold(n_splits=3)
    try:
        automl.fit(X, y, **automl_settings)
-        raise RuntimeError(
-            "GroupKFold object as split_type should fail when eval_method is holdout"
-        )
+        raise RuntimeError("GroupKFold object as split_type should fail when eval_method is holdout")
    except AssertionError:
        # eval_method must be 'auto' or 'cv' for custom data splitter.
        pass
@ -140,9 +134,7 @@ def test_rank():
        "log_file_name": "test/{}.log".format(dataset),
        "model_history": True,
        "eval_method": "cv",
-        "groups": np.array(  # group labels
-            [0] * 200 + [1] * 200 + [2] * 200 + [3] * 200 + [4] * 100 + [5] * 100
-        ),
+        "groups": np.array([0] * 200 + [1] * 200 + [2] * 200 + [3] * 200 + [4] * 100 + [5] * 100),  # group labels
        "learner_selector": "roundrobin",
    }
    automl.fit(X, y, **automl_settings)
@ -197,9 +189,7 @@ def test_object():
        "split_type": TestKFold(5),
    }
    automl.fit(X, y, **automl_settings)
-    assert (
-        automl._state.eval_method == "cv"
-    ), "eval_method must be 'cv' for custom data splitter"
+    assert automl._state.eval_method == "cv", "eval_method must be 'cv' for custom data splitter"

    kf = TestKFold(5)
    kf.shuffle = True
--- a/test/automl/test_training_log.py
+++ b/test/automl/test_training_log.py
@ -9,9 +9,7 @@ from flaml.automl.training_log import training_log_reader


 class TestTrainingLog(unittest.TestCase):
-    def test_training_log(
-        self, path="test_training_log.log", estimator_list="auto", use_ray=False
-    ):
+    def test_training_log(self, path="test_training_log.log", estimator_list="auto", use_ray=False):
        with TemporaryDirectory() as d:
            filename = os.path.join(d, path)

@ -64,11 +62,9 @@ class TestTrainingLog(unittest.TestCase):
                assert (
                    str(model.estimator) == str(automl.model.estimator)
                    or estimator == "xgboost"
-                    and str(model.estimator.get_dump())
-                    == str(automl.model.estimator.get_dump())
+                    and str(model.estimator.get_dump()) == str(automl.model.estimator.get_dump())
                    or estimator == "catboost"
-                    and str(model.estimator.get_all_params())
-                    == str(automl.model.estimator.get_all_params())
+                    and str(model.estimator.get_all_params()) == str(automl.model.estimator.get_all_params())
                )
                automl.fit(
                    X_train=X_train,
--- a/test/automl/test_warmstart.py
+++ b/test/automl/test_warmstart.py
@ -29,11 +29,7 @@ class TestWarmStart(unittest.TestCase):
        print("Best ML leaner:", automl.best_estimator)
        print("Best hyperparmeter config:", automl.best_config)
        print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                automl.best_config_train_time
-            )
-        )
+        print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
        # 1. Get starting points from previous experiments.
        starting_points = automl.best_config_per_estimator
        print("starting_points", starting_points)
@ -61,19 +57,13 @@ class TestWarmStart(unittest.TestCase):
                revised_hps_to_search = {
                    "n_estimators": {
                        "domain": tune.lograndint(lower=10, upper=32768),
-                        "init_value": starting_point.get("n_estimators")
-                        or space["n_estimators"].get("init_value", 10),
-                        "low_cost_init_value": space["n_estimators"].get(
-                            "low_cost_init_value", 10
-                        ),
+                        "init_value": starting_point.get("n_estimators") or space["n_estimators"].get("init_value", 10),
+                        "low_cost_init_value": space["n_estimators"].get("low_cost_init_value", 10),
                    },
                    "num_leaves": {
                        "domain": tune.lograndint(lower=10, upper=3276),
-                        "init_value": starting_point.get("num_leaves")
-                        or space["num_leaves"].get("init_value", 10),
-                        "low_cost_init_value": space["num_leaves"].get(
-                            "low_cost_init_value", 10
-                        ),
+                        "init_value": starting_point.get("num_leaves") or space["num_leaves"].get("init_value", 10),
+                        "low_cost_init_value": space["num_leaves"].get("low_cost_init_value", 10),
                    },
                    # (3.2) Add a new hp which is not in the original search space
                    "subsample": {
@ -86,9 +76,7 @@ class TestWarmStart(unittest.TestCase):

        new_estimator_name = "large_lgbm"
        new_automl = AutoML()
-        new_automl.add_learner(
-            learner_name=new_estimator_name, learner_class=MyPartiallyFreezedLargeLGBM
-        )
+        new_automl.add_learner(learner_name=new_estimator_name, learner_class=MyPartiallyFreezedLargeLGBM)

        automl_settings_resume = {
            "time_budget": 3,
@ -108,14 +96,8 @@ class TestWarmStart(unittest.TestCase):
        new_automl_val_accuracy = 1.0 - new_automl.best_loss
        print("Best ML leaner:", new_automl.best_estimator)
        print("Best hyperparmeter config:", new_automl.best_config)
-        print(
-            "Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy)
-        )
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                new_automl.best_config_train_time
-            )
-        )
+        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
+        print("Training duration of best run: {0:.4g} s".format(new_automl.best_config_train_time))

    def test_nobudget(self):
        automl = AutoML()
@ -127,9 +109,7 @@ class TestWarmStart(unittest.TestCase):
        from flaml.automl.data import load_openml_dataset
        from flaml import AutoML

-        X_train, X_test, y_train, y_test = load_openml_dataset(
-            dataset_id=1169, data_dir="./"
-        )
+        X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./")

        automl_settings = {
            "time_budget": 3,
--- a/test/automl/test_xgboost2d.py
+++ b/test/automl/test_xgboost2d.py
@ -49,9 +49,7 @@ def test_simple(method=None):
        from sklearn.datasets import load_wine

        X, y = load_wine(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.33, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
    print(automl.estimator_list)
    print(automl.search_space)
@ -77,9 +75,7 @@ def test_simple(method=None):
        min_resource=automl.min_resource,
        max_resource=automl.max_resource,
        time_budget_s=automl._state.time_budget,
-        config_constraints=[
-            (partial(size, automl._state.learner_classes), "<=", automl._mem_thres)
-        ],
+        config_constraints=[(partial(size, automl._state.learner_classes), "<=", automl._mem_thres)],
        metric_constraints=automl.metric_constraints,
        num_samples=5,
    )
--- a/test/automl/test_xgboost2d_sample_size.py
+++ b/test/automl/test_xgboost2d_sample_size.py
@ -31,9 +31,7 @@ def _test_simple(method=None, size_ratio=1.0):
    automl.add_learner(learner_name="XGBoost2D", learner_class=XGBoost2D)

    X, y = fetch_openml(name=dataset, return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.33, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

    final_size = int(len(y_train) * size_ratio)
    X_train = X_train[:final_size]
--- a/test/default/test_defaults.py
+++ b/test/default/test_defaults.py
@ -69,23 +69,15 @@ def test_regret():
 def test_suggest_classification():
    location = "test/default"
    X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
-    suggested = suggest_hyperparams(
-        "classification", X_train, y_train, "lgbm", location=location
-    )
+    suggested = suggest_hyperparams("classification", X_train, y_train, "lgbm", location=location)
    print(suggested)
-    suggested = preprocess_and_suggest_hyperparams(
-        "classification", X_train, y_train, "xgboost", location=location
-    )
+    suggested = preprocess_and_suggest_hyperparams("classification", X_train, y_train, "xgboost", location=location)
    print(suggested)
-    suggested = suggest_hyperparams(
-        "classification", X_train, y_train, "xgb_limitdepth", location=location
-    )
+    suggested = suggest_hyperparams("classification", X_train, y_train, "xgb_limitdepth", location=location)
    print(suggested)

    X, y = load_iris(return_X_y=True, as_frame=True)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.33, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    (
        hyperparams,
        estimator_class,
@ -93,29 +85,21 @@ def test_suggest_classification():
        y,
        feature_transformer,
        label_transformer,
-    ) = preprocess_and_suggest_hyperparams(
-        "classification", X_train, y_train, "lgbm", location=location
-    )
+    ) = preprocess_and_suggest_hyperparams("classification", X_train, y_train, "lgbm", location=location)
    with open("test/default/feature_transformer", "wb") as f:
        pickle.dump(feature_transformer, f, pickle.HIGHEST_PROTOCOL)
    model = estimator_class(**hyperparams)  # estimator_class is LGBMClassifier
    model.fit(X, y)
    X_test = feature_transformer.transform(X_test)
-    y_pred = label_transformer.inverse_transform(
-        pd.Series(model.predict(X_test).astype(int))
-    )
+    y_pred = label_transformer.inverse_transform(pd.Series(model.predict(X_test).astype(int)))
    print(y_pred)
-    suggested = suggest_hyperparams(
-        "classification", X_train, y_train, "xgboost", location=location
-    )
+    suggested = suggest_hyperparams("classification", X_train, y_train, "xgboost", location=location)
    print(suggested)
    suggested = preprocess_and_suggest_hyperparams(
        "classification", X_train, y_train, "xgb_limitdepth", location=location
    )
    print(suggested)
-    suggested = suggest_hyperparams(
-        "classification", X_train, y_train, "xgb_limitdepth", location=location
-    )
+    suggested = suggest_hyperparams("classification", X_train, y_train, "xgb_limitdepth", location=location)
    suggested = suggest_learner(
        "classification",
        X_train,
@ -129,17 +113,11 @@ def test_suggest_classification():
 def test_suggest_regression():
    location = "test/default"
    X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True)
-    suggested = suggest_hyperparams(
-        "regression", X_train, y_train, "lgbm", location=location
-    )
+    suggested = suggest_hyperparams("regression", X_train, y_train, "lgbm", location=location)
    print(suggested)
-    suggested = preprocess_and_suggest_hyperparams(
-        "regression", X_train, y_train, "xgboost", location=location
-    )
+    suggested = preprocess_and_suggest_hyperparams("regression", X_train, y_train, "xgboost", location=location)
    print(suggested)
-    suggested = suggest_hyperparams(
-        "regression", X_train, y_train, "xgb_limitdepth", location=location
-    )
+    suggested = suggest_hyperparams("regression", X_train, y_train, "xgb_limitdepth", location=location)
    print(suggested)
    suggested = suggest_learner("regression", X_train, y_train, location=location)
    print(suggested)
--- a/test/default_lgbm.py
+++ b/test/default_lgbm.py
@ -5,9 +5,7 @@ from flaml.automl.ml import sklearn_metric_loss_score
 X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir="./")
 lgbm = LGBMRegressor()

-hyperparams, estimator_name, X_transformed, y_transformed = lgbm.suggest_hyperparams(
-    X_train, y_train
-)
+hyperparams, estimator_name, X_transformed, y_transformed = lgbm.suggest_hyperparams(X_train, y_train)
 print(hyperparams)

 lgbm.fit(X_train, y_train)
--- a/test/nlp/test_autohf.py
+++ b/test/nlp/test_autohf.py
@ -21,13 +21,7 @@ def test_hf_data():
    automl_settings["preserve_checkpoint"] = False

    try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
        automl.score(X_val, y_val, **{"metric": "accuracy"})
        automl.pickle("automl.pkl")
    except requests.exceptions.HTTPError:
@ -54,13 +48,7 @@ def test_hf_data():
    automl_settings.pop("use_ray", None)
    automl_settings.pop("estimator_list", None)

-    automl.retrain_from_log(
-        X_train=X_train,
-        y_train=y_train,
-        train_full=True,
-        record_id=0,
-        **automl_settings
-    )
+    automl.retrain_from_log(X_train=X_train, y_train=y_train, train_full=True, record_id=0, **automl_settings)
    automl.predict(X_test, **{"per_device_eval_batch_size": 2})
    automl.predict(["", ""])
    automl.predict_proba(["", ""])
--- a/test/nlp/test_autohf_classificationhead.py
+++ b/test/nlp/test_autohf_classificationhead.py
@ -23,65 +23,47 @@ model_path_list = [

 def test_switch_1_1():
    data_idx, model_path_idx = 0, 0
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])


 def test_switch_1_2():
    data_idx, model_path_idx = 0, 1
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])


 def test_switch_1_3():
    data_idx, model_path_idx = 0, 2
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])


 def test_switch_2_1():
    data_idx, model_path_idx = 1, 0
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])


 def test_switch_2_2():
    data_idx, model_path_idx = 1, 1
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])


 def test_switch_2_3():
    data_idx, model_path_idx = 1, 2
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])


 def test_switch_3_1():
    data_idx, model_path_idx = 2, 0
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])


 def test_switch_3_2():
    data_idx, model_path_idx = 2, 1
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])


 def test_switch_3_3():
    data_idx, model_path_idx = 2, 2
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])


 def _test_switch_classificationhead(each_data, each_model_path):
@ -102,13 +84,7 @@ def _test_switch_classificationhead(each_data, each_model_path):
        automl_settings["metric"] = "accuracy"

    try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
    except requests.exceptions.HTTPError:
        return

--- a/test/nlp/test_autohf_custom_metric.py
+++ b/test/nlp/test_autohf_custom_metric.py
@ -61,22 +61,14 @@ def test_custom_metric():
    automl_settings["use_ray"] = {"local_dir": "data/output/"}

    try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
    except requests.exceptions.HTTPError:
        return

    # testing calling custom metric in TransformersEstimator._compute_metrics_by_dataset_name

    automl_settings["max_iter"] = 3
-    automl.fit(
-        X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
-    )
+    automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
    automl.score(X_val, y_val, **{"metric": custom_metric})
    automl.pickle("automl.pkl")

--- a/test/nlp/test_autohf_cv.py
+++ b/test/nlp/test_autohf_cv.py
@ -5,9 +5,7 @@ import os
 import shutil


-@pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"], reason="do not run on mac os or windows"
-)
+@pytest.mark.skipif(sys.platform in ["darwin", "win32"], reason="do not run on mac os or windows")
 def test_cv():
    from flaml import AutoML
    import requests
--- a/test/nlp/test_autohf_loadargs.py
+++ b/test/nlp/test_autohf_loadargs.py
@ -2,6 +2,4 @@ def test_load_args():
    import subprocess
    import sys

-    subprocess.call(
-        [sys.executable, "load_args.py", "--output_dir", "data/"], shell=True
-    )
+    subprocess.call([sys.executable, "load_args.py", "--output_dir", "data/"], shell=True)
--- a/test/nlp/test_autohf_multichoice_classification.py
+++ b/test/nlp/test_autohf_multichoice_classification.py
@ -5,9 +5,7 @@ import os
 import shutil


-@pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"], reason="do not run on mac os or windows"
-)
+@pytest.mark.skipif(sys.platform in ["darwin", "win32"], reason="do not run on mac os or windows")
 def test_mcc():
    from flaml import AutoML
    import requests
@ -27,13 +25,7 @@ def test_mcc():
    automl_settings["metric"] = "accuracy"

    try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
    except requests.exceptions.HTTPError:
        return

--- a/test/nlp/test_autohf_regression.py
+++ b/test/nlp/test_autohf_regression.py
@ -29,9 +29,7 @@ def test_regression():
    ray.shutdown()
    ray.init()

-    automl.fit(
-        X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
-    )
+    automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
    automl.predict(X_val)

    if os.path.exists("test/data/output/"):
--- a/test/nlp/test_autohf_summarization.py
+++ b/test/nlp/test_autohf_summarization.py
@ -22,18 +22,10 @@ def test_summarization():
    automl_settings["task"] = "summarization"
    automl_settings["metric"] = "rouge1"
    automl_settings["time_budget"] = 2 * automl_settings["time_budget"]
-    automl_settings["fit_kwargs_by_estimator"]["transformer"][
-        "model_path"
-    ] = "patrickvonplaten/t5-tiny-random"
+    automl_settings["fit_kwargs_by_estimator"]["transformer"]["model_path"] = "patrickvonplaten/t5-tiny-random"

    try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
    except requests.exceptions.HTTPError:
        return

@ -41,13 +33,7 @@ def test_summarization():
    automl_settings.pop("use_ray", None)
    automl_settings.pop("estimator_list", None)

-    automl.retrain_from_log(
-        X_train=X_train,
-        y_train=y_train,
-        train_full=True,
-        record_id=0,
-        **automl_settings
-    )
+    automl.retrain_from_log(X_train=X_train, y_train=y_train, train_full=True, record_id=0, **automl_settings)
    automl.predict(X_test)

    if os.path.exists("test/data/output/"):
--- a/test/nlp/test_autohf_tokenclassification.py
+++ b/test/nlp/test_autohf_tokenclassification.py
@ -22,9 +22,7 @@ def test_tokenclassification_idlabel():

    automl_settings = get_automl_settings()
    automl_settings["task"] = "token-classification"
-    automl_settings[
-        "metric"
-    ] = "seqeval:overall_f1"  # evaluating based on the overall_f1 of seqeval
+    automl_settings["metric"] = "seqeval:overall_f1"  # evaluating based on the overall_f1 of seqeval
    automl_settings["fit_kwargs_by_estimator"]["transformer"]["label_list"] = [
        "O",
        "B-PER",
@ -38,13 +36,7 @@ def test_tokenclassification_idlabel():
    ]

    try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
    except requests.exceptions.HTTPError:
        return

@ -83,18 +75,10 @@ def test_tokenclassification_tokenlabel():

    automl_settings = get_automl_settings()
    automl_settings["task"] = "token-classification"
-    automl_settings[
-        "metric"
-    ] = "seqeval:overall_f1"  # evaluating based on the overall_f1 of seqeval
+    automl_settings["metric"] = "seqeval:overall_f1"  # evaluating based on the overall_f1 of seqeval

    try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
    except requests.exceptions.HTTPError:
        return

--- a/test/nlp/test_default.py
+++ b/test/nlp/test_default.py
@ -32,15 +32,10 @@ def test_starting_point_not_in_search_space():
    automl = AutoML()
    automl_settings = get_automl_settings(estimator_name=this_estimator_name)

-    automl_settings["starting_points"] = {
-        this_estimator_name: [{"learning_rate": 2e-3}]
-    }
+    automl_settings["starting_points"] = {this_estimator_name: [{"learning_rate": 2e-3}]}

    automl.fit(X_train, y_train, **automl_settings)
-    assert (
-        automl._search_states[this_estimator_name].init_config[0]["learning_rate"]
-        != 2e-3
-    )
+    assert automl._search_states[this_estimator_name].init_config[0]["learning_rate"] != 2e-3

    """
        test starting_points located outside of the search space, and custom_hp is set
@ -80,10 +75,7 @@ def test_starting_point_not_in_search_space():
            len(automl_settings["custom_hp"][this_estimator_name]),
        )
    )
-    assert (
-        automl._search_states[this_estimator_name].search_space["model_path"]
-        == "albert-base-v2"
-    )
+    assert automl._search_states[this_estimator_name].search_space["model_path"] == "albert-base-v2"

    if os.path.exists("test/data/output/"):
        try:
@ -103,11 +95,7 @@ def test_points_to_evaluate():

    automl_settings["starting_points"] = "data:test/nlp/default/"

-    automl_settings["custom_hp"] = {
-        "transformer_ms": {
-            "model_path": {"domain": "google/electra-small-discriminator"}
-        }
-    }
+    automl_settings["custom_hp"] = {"transformer_ms": {"model_path": {"domain": "google/electra-small-discriminator"}}}

    automl.fit(X_train, y_train, **automl_settings)

@ -137,13 +125,9 @@ def test_zero_shot_nomodel():
        y_train,
        _,
        _,
-    ) = preprocess_and_suggest_hyperparams(
-        "seq-classification", X_train, y_train, estimator_name, location=location
-    )
+    ) = preprocess_and_suggest_hyperparams("seq-classification", X_train, y_train, estimator_name, location=location)

-    model = estimator_class(
-        **hyperparams
-    )  # estimator_class is TransformersEstimatorModelSelection
+    model = estimator_class(**hyperparams)  # estimator_class is TransformersEstimatorModelSelection

    fit_kwargs = automl_settings.pop("fit_kwargs_by_estimator", {}).get(estimator_name)
    fit_kwargs.update(automl_settings)
--- a/test/nlp/utils.py
+++ b/test/nlp/utils.py
@ -314,8 +314,7 @@ def get_toy_data_multiplechoiceclassification():
            " its false bottom. He stands and looks around, his eyes",
        ],
        "sent1": [
-            "Someone leans out of the drive - thru "
-            "window, grinning at her, holding bags filled with fast food.",
+            "Someone leans out of the drive - thru " "window, grinning at her, holding bags filled with fast food.",
            "Someone looks up suddenly when he hears.",
            "Someone drives; someone sits beside her.",
            "He opens the drawer in which we know"
@ -343,8 +342,7 @@ def get_toy_data_multiplechoiceclassification():
        "ending2": [
            "attempts to block her ransacked.",
            "talks using the phone and walks away for a few seconds.",
-            "are too involved with each other to "
-            "notice someone watching them from the drive - thru window.",
+            "are too involved with each other to " "notice someone watching them from the drive - thru window.",
            "finally landing on: the digicam and a stack of cassettes on a shelf.",
        ],
        "ending3": [
--- a/test/nni/mnist.py
+++ b/test/nni/mnist.py
@ -107,9 +107,7 @@ def main(args):
            data_dir,
            train=True,
            download=True,
-            transform=transforms.Compose(
-                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
-            ),
+            transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
        ),
        batch_size=args["batch_size"],
        shuffle=True,
@ -119,9 +117,7 @@ def main(args):
        datasets.MNIST(
            data_dir,
            train=False,
-            transform=transforms.Compose(
-                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
-            ),
+            transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
        ),
        batch_size=1000,
        shuffle=True,
@ -188,12 +184,8 @@ def get_params():
        metavar="N",
        help="number of epochs to train (default: 10)",
    )
-    parser.add_argument(
-        "--seed", type=int, default=1, metavar="S", help="random seed (default: 1)"
-    )
-    parser.add_argument(
-        "--no_cuda", action="store_true", default=False, help="disables CUDA training"
-    )
+    parser.add_argument("--seed", type=int, default=1, metavar="S", help="random seed (default: 1)")
+    parser.add_argument("--no_cuda", action="store_true", default=False, help="disables CUDA training")
    parser.add_argument(
        "--log_interval",
        type=int,
--- a/test/object_store.py
+++ b/test/object_store.py
@ -8,9 +8,7 @@ import ray

 data = fetch_california_housing(return_X_y=False, as_frame=True)
 X, y = data.data, data.target
-X_train, X_test, y_train, y_test = train_test_split(
-    X, y, test_size=0.33, random_state=42
-)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
 X_train_ref = ray.put(X_train)
 print(isinstance(X_train_ref, ray.ObjectRef))

@ -33,22 +31,14 @@ def train_lgbm(config: dict) -> dict:
 # load a built-in search space from flaml
 flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
 # specify the search space as a dict from hp name to domain; you can define your own search space same way
-config_search_space = {
-    hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
-}
+config_search_space = {hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()}
 # give guidance about hp values corresponding to low training cost, i.e., {"n_estimators": 4, "num_leaves": 4}
 low_cost_partial_config = {
-    hp: space["low_cost_init_value"]
-    for hp, space in flaml_lgbm_search_space.items()
-    if "low_cost_init_value" in space
+    hp: space["low_cost_init_value"] for hp, space in flaml_lgbm_search_space.items() if "low_cost_init_value" in space
 }
 # initial points to evaluate
 points_to_evaluate = [
-    {
-        hp: space["init_value"]
-        for hp, space in flaml_lgbm_search_space.items()
-        if "init_value" in space
-    }
+    {hp: space["init_value"] for hp, space in flaml_lgbm_search_space.items() if "init_value" in space}
 ]
 # run the tuning, minimizing mse, with total time budget 3 seconds
 analysis = tune.run(
--- a/test/openai/test_completion.py
+++ b/test/openai/test_completion.py
@ -17,9 +17,7 @@ from flaml.autogen.math_utils import eval_math_responses
    reason="do not run on windows",
 )
 def test_humaneval(num_samples=1):
-    eval_with_generated_assertions = partial(
-        eval_function_completions, assertions=generate_assertions
-    )
+    eval_with_generated_assertions = partial(eval_function_completions, assertions=generate_assertions)

    seed = 41
    data = datasets.load_dataset("openai_humaneval")["test"].shuffle(seed=seed)
@ -165,9 +163,7 @@ def test_math(num_samples=-1):
        "stop": "###",
    }
    test_data_sample = test_data[0:3]
-    result = oai.ChatCompletion.test(
-        test_data_sample, vanilla_config, eval_math_responses
-    )
+    result = oai.ChatCompletion.test(test_data_sample, vanilla_config, eval_math_responses)
    test_data_sample = test_data[3:6]
    result = oai.ChatCompletion.test(
        test_data_sample,
--- a/test/pipeline_tuning_example/submit_train_pipeline.py
+++ b/test/pipeline_tuning_example/submit_train_pipeline.py
@ -83,9 +83,7 @@ def build_and_submit_aml_pipeline(config):
    ################################################
    # load component functions
    ################################################
-    data_prep_component = Component.from_yaml(
-        ws, yaml_file=LOCAL_DIR / "data_prep/data_prep.yaml"
-    )
+    data_prep_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR / "data_prep/data_prep.yaml")
    train_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR / "train/train.yaml")

    ################################################
--- a/test/pipeline_tuning_example/submit_tuner_pipeline.py
+++ b/test/pipeline_tuning_example/submit_tuner_pipeline.py
@ -24,9 +24,7 @@ def remote_run():
    # load component functions
    ################################################

-    pipeline_tuning_func = Component.from_yaml(
-        ws, yaml_file=LOCAL_DIR / "tuner/component_spec.yaml"
-    )
+    pipeline_tuning_func = Component.from_yaml(ws, yaml_file=LOCAL_DIR / "tuner/component_spec.yaml")

    ################################################
    # build pipeline
@ -61,9 +59,7 @@ if __name__ == "__main__":
        help="your_subscription_id",
        required=False,
    )
-    parser.add_argument(
-        "--resource_group", type=str, help="your_resource_group", required=False
-    )
+    parser.add_argument("--resource_group", type=str, help="your_resource_group", required=False)
    parser.add_argument("--workspace", type=str, help="your_workspace", required=False)

    parser.add_argument("--remote", dest="remote", action="store_true")
--- a/test/ray/distribute_tune.py
+++ b/test/ray/distribute_tune.py
@ -26,9 +26,7 @@ if __name__ == "__main__":
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
        X_train_ref = ray.put(X_train)
        flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
-        config_search_space = {
-            hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
-        }
+        config_search_space = {hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()}
        low_cost_partial_config = {
            hp: space["low_cost_init_value"]
            for hp, space in flaml_lgbm_search_space.items()
--- a/test/rep.py
+++ b/test/rep.py
@ -28,9 +28,7 @@ settings = {

 for trial_num in range(8):
    automl = AutoML()
-    automl.add_learner(
-        learner_name="extra_trees_seeded", learner_class=ExtraTreesEstimatorSeeded
-    )
+    automl.add_learner(learner_name="extra_trees_seeded", learner_class=ExtraTreesEstimatorSeeded)
    automl.fit(X_train=X_train, y_train=y_train, **settings)
    print(automl.best_loss)
    print(automl.best_config)
--- a/test/run_distribute_automl.py
+++ b/test/run_distribute_automl.py
@ -7,16 +7,12 @@ ray_environment_name = "aml-ray-cpu"
 ray_environment_dockerfile_path = "./Docker/Dockerfile-cpu"

 # Build CPU image for Ray
-ray_cpu_env = Environment.from_dockerfile(
-    name=ray_environment_name, dockerfile=ray_environment_dockerfile_path
-)
+ray_cpu_env = Environment.from_dockerfile(name=ray_environment_name, dockerfile=ray_environment_dockerfile_path)
 ray_cpu_env.register(workspace=ws)
 ray_cpu_build_details = ray_cpu_env.build(workspace=ws)

 while ray_cpu_build_details.status not in ["Succeeded", "Failed"]:
-    print(
-        f"Awaiting completion of ray CPU environment build. Current status is: {ray_cpu_build_details.status}"
-    )
+    print(f"Awaiting completion of ray CPU environment build. Current status is: {ray_cpu_build_details.status}")
    time.sleep(10)

 command = ["python distribute_automl.py"]
--- a/test/run_distribute_tune.py
+++ b/test/run_distribute_tune.py
@ -7,16 +7,12 @@ ray_environment_name = "aml-ray-cpu"
 ray_environment_dockerfile_path = "./Docker/Dockerfile-cpu"

 # Build CPU image for Ray
-ray_cpu_env = Environment.from_dockerfile(
-    name=ray_environment_name, dockerfile=ray_environment_dockerfile_path
-)
+ray_cpu_env = Environment.from_dockerfile(name=ray_environment_name, dockerfile=ray_environment_dockerfile_path)
 ray_cpu_env.register(workspace=ws)
 ray_cpu_build_details = ray_cpu_env.build(workspace=ws)

 while ray_cpu_build_details.status not in ["Succeeded", "Failed"]:
-    print(
-        f"Awaiting completion of ray CPU environment build. Current status is: {ray_cpu_build_details.status}"
-    )
+    print(f"Awaiting completion of ray CPU environment build. Current status is: {ray_cpu_build_details.status}")
    time.sleep(10)

 command = ["python distribute_tune.py"]
--- a/test/spark/test_0sparkml.py
+++ b/test/spark/test_0sparkml.py
@ -36,9 +36,7 @@ else:
        skip_spark = True


-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")


 def _test_spark_synapseml_lightgbm(spark=None, task="classification"):
@ -83,9 +81,7 @@ def _test_spark_synapseml_lightgbm(spark=None, task="classification"):
        columns = X_train.columns
        feature_cols = [col for col in columns if col != "label"]
        featurizer = VectorAssembler(inputCols=feature_cols, outputCol="features")
-        X_train = featurizer.transform(X_train.to_spark(index_col="index"))[
-            "index", "features"
-        ]
+        X_train = featurizer.transform(X_train.to_spark(index_col="index"))["index", "features"]
    X_train = to_pandas_on_spark(X_train)

    automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
@ -138,9 +134,7 @@ def test_spark_input_df():
        spark.read.format("csv")
        .option("header", True)
        .option("inferSchema", True)
-        .load(
-            "wasbs://publicwasb@mmlspark.blob.core.windows.net/company_bankruptcy_prediction_data.csv"
-        )
+        .load("wasbs://publicwasb@mmlspark.blob.core.windows.net/company_bankruptcy_prediction_data.csv")
    )
    train, test = df.randomSplit([0.8, 0.2], seed=1)
    feature_cols = df.columns[1:]
@ -151,9 +145,7 @@ def test_spark_input_df():
    settings = {
        "time_budget": 30,  # total running time in seconds
        "metric": "roc_auc",
-        "estimator_list": [
-            "lgbm_spark"
-        ],  # list of ML learners; we tune lightgbm in this example
+        "estimator_list": ["lgbm_spark"],  # list of ML learners; we tune lightgbm in this example
        "task": "classification",  # task type
        "log_file_name": "flaml_experiment.log",  # flaml log file
        "seed": 7654321,  # random seed
@ -187,9 +179,7 @@ def test_spark_input_df():
    settings = {
        "time_budget": 10,  # total running time in seconds
        "metric": "roc_auc",
-        "estimator_list": [
-            "lgbm"
-        ],  # list of ML learners; we tune lightgbm in this example
+        "estimator_list": ["lgbm"],  # list of ML learners; we tune lightgbm in this example
        "task": "classification",  # task type
    }
    with pytest.raises(ValueError) as excinfo:
--- a/test/spark/test_automl.py
+++ b/test/spark/test_automl.py
@ -23,9 +23,7 @@ os.environ["FLAML_MAX_CONCURRENT"] = "2"
 spark_available, _ = check_spark()
 skip_spark = not spark_available

-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")


 def test_parallel_xgboost(hpo_method=None, data_size=1000):
@ -59,9 +57,7 @@ def test_parallel_xgboost_others():
    test_parallel_xgboost(hpo_method="random")


-@pytest.mark.skip(
-    reason="currently not supporting too large data, will support spark dataframe in the future"
-)
+@pytest.mark.skip(reason="currently not supporting too large data, will support spark dataframe in the future")
 def test_large_dataset():
    test_parallel_xgboost(data_size=90000000)

@ -101,8 +97,6 @@ if __name__ == "__main__":
    test_parallel_xgboost_others()
    # test_large_dataset()
    if skip_my_learner:
-        print(
-            "please run pytest in the root directory of FLAML, i.e., the directory that contains the setup.py file"
-        )
+        print("please run pytest in the root directory of FLAML, i.e., the directory that contains the setup.py file")
    else:
        test_custom_learner()
--- a/test/spark/test_exceptions.py
+++ b/test/spark/test_exceptions.py
@ -7,17 +7,13 @@ import pytest
 spark_available, _ = check_spark()
 skip_spark = not spark_available

-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")

 os.environ["FLAML_MAX_CONCURRENT"] = "2"


 def base_automl(n_concurrent_trials=1, use_ray=False, use_spark=False, verbose=0):
-    X_train, X_test, y_train, y_test = load_openml_dataset(
-        dataset_id=537, data_dir="./"
-    )
+    X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir="./")
    automl = AutoML()
    settings = {
        "time_budget": 3,  # total running time in seconds
@ -37,9 +33,7 @@ def base_automl(n_concurrent_trials=1, use_ray=False, use_spark=False, verbose=0
    print("Best ML leaner:", automl.best_estimator)
    print("Best hyperparmeter config:", automl.best_config)
    print("Best accuracy on validation data: {0:.4g}".format(1 - automl.best_loss))
-    print(
-        "Training duration of best run: {0:.4g} s".format(automl.best_config_train_time)
-    )
+    print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))


 def test_both_ray_spark():
--- a/test/spark/test_multiclass.py
+++ b/test/spark/test_multiclass.py
@ -107,9 +107,7 @@ class TestMultiClass(unittest.TestCase):
            valid_loss_history,
            config_history,
            metric_history,
-        ) = get_output_from_log(
-            filename=automl_settings["log_file_name"], time_budget=6
-        )
+        ) = get_output_from_log(filename=automl_settings["log_file_name"], time_budget=6)
        print(metric_history)

    def test_classification(self, as_frame=False):
@ -167,12 +165,8 @@ class TestMultiClass(unittest.TestCase):
            "use_spark": True,
        }
        X_train, y_train = load_iris(return_X_y=True)
-        automl_experiment_micro.fit(
-            X_train=X_train, y_train=y_train, metric="micro_f1", **automl_settings
-        )
-        automl_experiment_macro.fit(
-            X_train=X_train, y_train=y_train, metric="macro_f1", **automl_settings
-        )
+        automl_experiment_micro.fit(X_train=X_train, y_train=y_train, metric="micro_f1", **automl_settings)
+        automl_experiment_macro.fit(X_train=X_train, y_train=y_train, metric="macro_f1", **automl_settings)
        estimator = automl_experiment_macro.model
        y_pred = estimator.predict(X_train)
        y_pred_proba = estimator.predict_proba(X_train)
@ -280,9 +274,7 @@ class TestMultiClass(unittest.TestCase):
    )
    def _test_memory_limit(self):
        automl_experiment = AutoML()
-        automl_experiment.add_learner(
-            learner_name="large_lgbm", learner_class=MyLargeLGBM
-        )
+        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
        automl_settings = {
            "time_budget": -1,
            "task": "classification",
@ -296,9 +288,7 @@ class TestMultiClass(unittest.TestCase):
        }
        X_train, y_train = load_iris(return_X_y=True, as_frame=True)

-        automl_experiment.fit(
-            X_train=X_train, y_train=y_train, max_iter=1, **automl_settings
-        )
+        automl_experiment.fit(X_train=X_train, y_train=y_train, max_iter=1, **automl_settings)
        print(automl_experiment.model)

    @unittest.skipIf(
@ -307,12 +297,8 @@ class TestMultiClass(unittest.TestCase):
    )
    def test_time_limit(self):
        automl_experiment = AutoML()
-        automl_experiment.add_learner(
-            learner_name="large_lgbm", learner_class=MyLargeLGBM
-        )
-        automl_experiment.add_learner(
-            learner_name="large_xgb", learner_class=MyLargeXGB
-        )
+        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
+        automl_experiment.add_learner(learner_name="large_xgb", learner_class=MyLargeXGB)
        automl_settings = {
            "time_budget": 0.5,
            "task": "classification",
@ -356,11 +342,7 @@ class TestMultiClass(unittest.TestCase):
        print("Best ML leaner:", automl_experiment.best_estimator)
        print("Best hyperparmeter config:", automl_experiment.best_config)
        print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                automl_experiment.best_config_train_time
-            )
-        )
+        print("Training duration of best run: {0:.4g} s".format(automl_experiment.best_config_train_time))

        starting_points = automl_experiment.best_config_per_estimator
        print("starting_points", starting_points)
@ -379,21 +361,13 @@ class TestMultiClass(unittest.TestCase):
            "use_spark": True,
        }
        new_automl_experiment = AutoML()
-        new_automl_experiment.fit(
-            X_train=X_train, y_train=y_train, **automl_settings_resume
-        )
+        new_automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings_resume)

        new_automl_val_accuracy = 1.0 - new_automl_experiment.best_loss
        print("Best ML leaner:", new_automl_experiment.best_estimator)
        print("Best hyperparmeter config:", new_automl_experiment.best_config)
-        print(
-            "Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy)
-        )
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                new_automl_experiment.best_config_train_time
-            )
-        )
+        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
+        print("Training duration of best run: {0:.4g} s".format(new_automl_experiment.best_config_train_time))

    def test_fit_w_starting_points_list(self, as_frame=True):
        automl_experiment = AutoML()
@ -418,11 +392,7 @@ class TestMultiClass(unittest.TestCase):
        print("Best ML leaner:", automl_experiment.best_estimator)
        print("Best hyperparmeter config:", automl_experiment.best_config)
        print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                automl_experiment.best_config_train_time
-            )
-        )
+        print("Training duration of best run: {0:.4g} s".format(automl_experiment.best_config_train_time))

        starting_points = {}
        log_file_name = automl_settings["log_file_name"]
@ -453,16 +423,12 @@ class TestMultiClass(unittest.TestCase):
            "use_spark": True,
        }
        new_automl_experiment = AutoML()
-        new_automl_experiment.fit(
-            X_train=X_train, y_train=y_train, **automl_settings_resume
-        )
+        new_automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings_resume)

        new_automl_val_accuracy = 1.0 - new_automl_experiment.best_loss
        # print('Best ML leaner:', new_automl_experiment.best_estimator)
        # print('Best hyperparmeter config:', new_automl_experiment.best_config)
-        print(
-            "Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy)
-        )
+        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
        # print('Training duration of best run: {0:.4g} s'.format(new_automl_experiment.best_config_train_time))


--- a/test/spark/test_notebook.py
+++ b/test/spark/test_notebook.py
@ -8,9 +8,7 @@ import pytest
 spark_available, _ = check_spark()
 skip_spark = not spark_available

-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")

 here = os.path.abspath(os.path.dirname(__file__))
 os.environ["FLAML_MAX_CONCURRENT"] = "2"
--- a/test/spark/test_overtime.py
+++ b/test/spark/test_overtime.py
@ -24,9 +24,7 @@ try:
 except ImportError:
    skip_spark = True

-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")


 def test_overtime():
@ -56,11 +54,7 @@ def test_overtime():
    start_time = time.time()
    automl_experiment.fit(**automl_settings)
    elapsed_time = time.time() - start_time
-    print(
-        "time budget: {:.2f}s, actual elapsed time: {:.2f}s".format(
-            time_budget, elapsed_time
-        )
-    )
+    print("time budget: {:.2f}s, actual elapsed time: {:.2f}s".format(time_budget, elapsed_time))
    # assert abs(elapsed_time - time_budget) < 5  # cancel assertion because github VM sometimes is super slow, causing the test to fail
    print(automl_experiment.predict(df))
    print(automl_experiment.model)
--- a/test/spark/test_performance.py
+++ b/test/spark/test_performance.py
@ -8,9 +8,7 @@ import pytest
 spark_available, _ = check_spark()
 skip_spark = not spark_available

-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")

 os.environ["FLAML_MAX_CONCURRENT"] = "2"

@ -72,9 +70,7 @@ def run_automl(budget=3, dataset_format="dataframe", hpo_method=None):
    print("Best ML leaner:", automl.best_estimator)
    print("Best hyperparmeter config:", automl.best_config)
    print("Best accuracy on validation data: {0:.4g}".format(1 - automl.best_loss))
-    print(
-        "Training duration of best run: {0:.4g} s".format(automl.best_config_train_time)
-    )
+    print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
    print(automl.model.estimator)
    print(automl.best_config_per_estimator)
    print("time taken to find best model:", automl.time_to_find_best_model)
@ -89,9 +85,7 @@ def run_automl(budget=3, dataset_format="dataframe", hpo_method=None):

    accuracy = 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test)
    print("accuracy", "=", accuracy)
-    print(
-        "roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test)
-    )
+    print("roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test))
    print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
    if performance_check_budget is None:
        assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67"
--- a/test/spark/test_tune.py
+++ b/test/spark/test_tune.py
@ -12,9 +12,7 @@ import pytest
 spark_available, _ = check_spark()
 skip_spark = not spark_available

-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")

 os.environ["FLAML_MAX_CONCURRENT"] = "2"
 X, y = load_breast_cancer(return_X_y=True)
@ -35,9 +33,7 @@ def train_breast_cancer(config):

 def test_tune_spark():
    flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
-    config_search_space = {
-        hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
-    }
+    config_search_space = {hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()}

    analysis = tune.run(
        train_breast_cancer,
--- a/test/spark/test_utils.py
+++ b/test/spark/test_utils.py
@ -34,9 +34,7 @@ except ImportError:
    print("Spark is not installed. Skip all spark tests.")
    skip_spark = True

-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")


 def test_with_parameters_spark():
@ -53,9 +51,7 @@ def test_with_parameters_spark():
    spark = SparkSession.builder.getOrCreate()
    rdd = spark.sparkContext.parallelize(list(range(2)))

-    t_partial = timeit(
-        lambda: rdd.map(lambda x: partial_train(config=x)).collect(), number=5
-    )
+    t_partial = timeit(lambda: rdd.map(lambda x: partial_train(config=x)).collect(), number=5)
    print("python_partial_train: " + str(t_partial))

    t_spark = timeit(
@ -139,12 +135,8 @@ def test_train_test_split_pyspark():
    spark = SparkSession.builder.getOrCreate()
    sdf = spark.createDataFrame(pdf).repartition(1)
    psdf = to_pandas_on_spark(sdf).spark.repartition(1)
-    train_sdf, test_sdf = train_test_split_pyspark(
-        sdf, test_fraction=0.5, to_pandas_spark=False, seed=1
-    )
-    train_psdf, test_psdf = train_test_split_pyspark(
-        psdf, test_fraction=0.5, stratify_column="y", seed=1
-    )
+    train_sdf, test_sdf = train_test_split_pyspark(sdf, test_fraction=0.5, to_pandas_spark=False, seed=1)
+    train_psdf, test_psdf = train_test_split_pyspark(psdf, test_fraction=0.5, stratify_column="y", seed=1)
    assert isinstance(train_sdf, pyspark.sql.dataframe.DataFrame)
    assert isinstance(test_sdf, pyspark.sql.dataframe.DataFrame)
    assert isinstance(train_psdf, ps.DataFrame)
@ -190,9 +182,7 @@ def test_unique_value_first_index():
 def test_n_current_trials():
    spark = SparkSession.builder.getOrCreate()
    sc = spark._jsc.sc()
-    num_executors = (
-        len([executor.host() for executor in sc.statusTracker().getExecutorInfos()]) - 1
-    )
+    num_executors = len([executor.host() for executor in sc.statusTracker().getExecutorInfos()]) - 1

    def get_n_current_trials(n_concurrent_trials=0, num_executors=num_executors):
        try:
--- a/test/test_autovw.py
+++ b/test/test_autovw.py
@ -18,9 +18,7 @@ NS_LIST = list(string.ascii_lowercase) + list(string.ascii_uppercase)
 logger = logging.getLogger(__name__)


-def oml_to_vw_w_grouping(
-    X, y, ds_dir, fname, orginal_dim, group_num, grouping_method="sequential"
-):
+def oml_to_vw_w_grouping(X, y, ds_dir, fname, orginal_dim, group_num, grouping_method="sequential"):
    # split all_indexes into # group_num of groups
    max_size_per_group = int(np.ceil(orginal_dim / float(group_num)))
    # sequential grouping
@ -49,17 +47,11 @@ def oml_to_vw_w_grouping(
                for i in range(len(X)):
                    NS_content = []
                    for zz in range(len(group_indexes)):
-                        ns_features = " ".join(
-                            "{}:{:.6f}".format(ind, X[i][ind])
-                            for ind in group_indexes[zz]
-                        )
+                        ns_features = " ".join("{}:{:.6f}".format(ind, X[i][ind]) for ind in group_indexes[zz])
                        NS_content.append(ns_features)
                    ns_line = "{} |{}".format(
                        str(y[i]),
-                        "|".join(
-                            "{} {}".format(NS_LIST[j], NS_content[j])
-                            for j in range(len(group_indexes))
-                        ),
+                        "|".join("{} {}".format(NS_LIST[j], NS_content[j]) for j in range(len(group_indexes))),
                    )
                    f.write(ns_line)
                    f.write("\n")
@ -140,10 +132,7 @@ def load_vw_dataset(did, ds_dir, is_regression, max_ns_num):
        fname = "ds_{}_{}_{}.vw".format(did, max_ns_num, 0)
        vw_dataset_file = os.path.join(ds_dir, fname)
        # if file does not exist, generate and save the datasets
-        if (
-            not os.path.exists(vw_dataset_file)
-            or os.stat(vw_dataset_file).st_size < 1000
-        ):
+        if not os.path.exists(vw_dataset_file) or os.stat(vw_dataset_file).st_size < 1000:
            get_oml_to_vw(did, max_ns_num)
        print(ds_dir, vw_dataset_file)
        if not os.path.exists(ds_dir):
@ -175,9 +164,7 @@ def get_data(
    # Y = data.Y
    if vw_format:
        # vw_examples = data.vw_examples
-        vw_examples = load_vw_dataset(
-            did=data_id, ds_dir=VW_DS_DIR, is_regression=True, max_ns_num=max_ns_num
-        )
+        vw_examples = load_vw_dataset(did=data_id, ds_dir=VW_DS_DIR, is_regression=True, max_ns_num=max_ns_num)
        Y = []
        for i, e in enumerate(vw_examples):
            Y.append(float(e.split("|")[0]))
@ -230,9 +217,7 @@ class VowpalWabbitNamesspaceTuningProblem:
        }
        self._problem_info.update(kwargs)
        self._fixed_hp_config = kwargs.get("fixed_hp_config", {})
-        self.namespace_feature_dim = AutoVW.get_ns_feature_dim_from_vw_example(
-            self.vw_examples[0]
-        )
+        self.namespace_feature_dim = AutoVW.get_ns_feature_dim_from_vw_example(self.vw_examples[0])
        self._raw_namespaces = list(self.namespace_feature_dim.keys())
        self._setup_search()

@ -355,13 +340,9 @@ def get_vw_tuning_problem(tuning_hp="NamesapceInteraction"):
        "fixed_hp_config": online_vw_exp_setting["fixed_hp_config"],
    }
    if tuning_hp == "NamesapceInteraction":
-        vw_online_aml_problem = VowpalWabbitNamesspaceTuningProblem(
-            **vw_oml_problem_args
-        )
+        vw_online_aml_problem = VowpalWabbitNamesspaceTuningProblem(**vw_oml_problem_args)
    elif tuning_hp == "NamesapceInteraction+LearningRate":
-        vw_online_aml_problem = VowpalWabbitNamesspaceLRTuningProblem(
-            **vw_oml_problem_args
-        )
+        vw_online_aml_problem = VowpalWabbitNamesspaceLRTuningProblem(**vw_oml_problem_args)
    else:
        NotImplementedError

@ -382,13 +363,9 @@ class TestAutoVW(unittest.TestCase):
            vw_online_aml_problem.max_iter_num,
            vw_online_aml_problem.vw_examples,
            vanilla_vw,
-            loss_func=vw_oml_problem_args["fixed_hp_config"].get(
-                "loss_function", "squared"
-            ),
-        )
-        print(
-            "final average loss:", sum(cumulative_loss_list) / len(cumulative_loss_list)
+            loss_func=vw_oml_problem_args["fixed_hp_config"].get("loss_function", "squared"),
        )
+        print("final average loss:", sum(cumulative_loss_list) / len(cumulative_loss_list))

    def test_supervised_vw_tune_namespace(self):
        # basic experiment setting
@ -405,13 +382,9 @@ class TestAutoVW(unittest.TestCase):
            vw_online_aml_problem.max_iter_num,
            vw_online_aml_problem.vw_examples,
            autovw,
-            loss_func=vw_oml_problem_args["fixed_hp_config"].get(
-                "loss_function", "squared"
-            ),
-        )
-        print(
-            "final average loss:", sum(cumulative_loss_list) / len(cumulative_loss_list)
+            loss_func=vw_oml_problem_args["fixed_hp_config"].get("loss_function", "squared"),
        )
+        print("final average loss:", sum(cumulative_loss_list) / len(cumulative_loss_list))

    def test_supervised_vw_tune_namespace_learningrate(self):
        # basic experiment setting
@ -430,13 +403,9 @@ class TestAutoVW(unittest.TestCase):
            vw_online_aml_problem.max_iter_num,
            vw_online_aml_problem.vw_examples,
            autovw,
-            loss_func=vw_oml_problem_args["fixed_hp_config"].get(
-                "loss_function", "squared"
-            ),
-        )
-        print(
-            "final average loss:", sum(cumulative_loss_list) / len(cumulative_loss_list)
+            loss_func=vw_oml_problem_args["fixed_hp_config"].get("loss_function", "squared"),
        )
+        print("final average loss:", sum(cumulative_loss_list) / len(cumulative_loss_list))

    def test_bandit_vw_tune_namespace(self):
        pass
--- a/test/test_gpu.py
+++ b/test/test_gpu.py
@ -23,9 +23,7 @@ def test_xgboost():
            gpu_per_trial=1,
        )

-        train, label = make_moons(
-            n_samples=300000, shuffle=True, noise=0.3, random_state=None
-        )
+        train, label = make_moons(n_samples=300000, shuffle=True, noise=0.3, random_state=None)
        automl = AutoML()
        automl.fit(
            train,
@ -89,18 +87,10 @@ def _test_hf_data():
        }
    }

-    automl.fit(
-        X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
-    )
+    automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)

    automl = AutoML()
-    automl.retrain_from_log(
-        X_train=X_train,
-        y_train=y_train,
-        train_full=True,
-        record_id=0,
-        **automl_settings
-    )
+    automl.retrain_from_log(X_train=X_train, y_train=y_train, train_full=True, record_id=0, **automl_settings)
    with open("automl.pkl", "wb") as f:
        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
    with open("automl.pkl", "rb") as f:
--- a/test/tune/test_flaml_raytune_consistency.py
+++ b/test/tune/test_flaml_raytune_consistency.py
@ -31,9 +31,7 @@ def setup_searcher(searcher_name):
    from flaml.tune.searcher.blendsearch import BlendSearch, CFO, RandomSearch

    if "cfo" in searcher_name:
-        searcher = CFO(
-            space=config_search_space, low_cost_partial_config=low_cost_partial_config
-        )
+        searcher = CFO(space=config_search_space, low_cost_partial_config=low_cost_partial_config)
    elif searcher_name == "bs":
        searcher = BlendSearch(
            metric="metric",
@ -48,9 +46,7 @@ def setup_searcher(searcher_name):
    return searcher


-def _test_flaml_raytune_consistency(
-    num_samples=-1, max_concurrent_trials=1, searcher_name="cfo"
-):
+def _test_flaml_raytune_consistency(num_samples=-1, max_concurrent_trials=1, searcher_name="cfo"):
    try:
        from ray import tune as raytune, __version__ as ray_version

@ -59,9 +55,7 @@ def _test_flaml_raytune_consistency(
        else:
            from ray.tune.search import ConcurrencyLimiter
    except ImportError:
-        print(
-            "skip _test_flaml_raytune_consistency because ray tune cannot be imported."
-        )
+        print("skip _test_flaml_raytune_consistency because ray tune cannot be imported.")
        return
    searcher = setup_searcher(searcher_name)
    analysis = tune.run(
@ -110,21 +104,13 @@ def _test_flaml_raytune_consistency(
    print("flaml config in results", searcher_name, flaml_config_in_results)
    print("ray config in results", searcher_name, ray_config_in_results)
    assert ray_best_config == flaml_best_config, "best config should be the same"
-    assert (
-        flaml_config_in_results == ray_config_in_results
-    ), "results from raytune and flaml should be the same"
+    assert flaml_config_in_results == ray_config_in_results, "results from raytune and flaml should be the same"


 def test_consistency():
-    _test_flaml_raytune_consistency(
-        num_samples=5, max_concurrent_trials=1, searcher_name="random"
-    )
-    _test_flaml_raytune_consistency(
-        num_samples=5, max_concurrent_trials=1, searcher_name="cfo"
-    )
-    _test_flaml_raytune_consistency(
-        num_samples=5, max_concurrent_trials=1, searcher_name="bs"
-    )
+    _test_flaml_raytune_consistency(num_samples=5, max_concurrent_trials=1, searcher_name="random")
+    _test_flaml_raytune_consistency(num_samples=5, max_concurrent_trials=1, searcher_name="cfo")
+    _test_flaml_raytune_consistency(num_samples=5, max_concurrent_trials=1, searcher_name="bs")


 if __name__ == "__main__":
--- a/test/tune/test_lexiflow.py
+++ b/test/tune/test_lexiflow.py
@ -89,9 +89,7 @@ def test_lexiflow():
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / N_VALID_EXAMPLES
-        flops, params = thop.profile(
-            model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False
-        )
+        flops, params = thop.profile(model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False)
        return np.log2(flops), 1 - accuracy, params

    def evaluate_function(configuration):
@ -198,9 +196,7 @@ def test_lexiflow_performance():
    print(analysis.best_config)
    print(analysis.best_result)

-    assert (
-        analysis.best_result["currin"] <= 2.2
-    ), "the value of currin function should be less than 2.2"
+    assert analysis.best_result["currin"] <= 2.2, "the value of currin function should be less than 2.2"


 if __name__ == "__main__":
--- a/test/tune/test_pytorch_cifar10.py
+++ b/test/tune/test_pytorch_cifar10.py
@ -48,17 +48,11 @@ except ImportError:

 # __load_data_begin__
 def load_data(data_dir="test/data"):
-    transform = transforms.Compose(
-        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
-    )
+    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

-    trainset = torchvision.datasets.CIFAR10(
-        root=data_dir, train=True, download=True, transform=transform
-    )
+    trainset = torchvision.datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transform)

-    testset = torchvision.datasets.CIFAR10(
-        root=data_dir, train=False, download=True, transform=transform
-    )
+    testset = torchvision.datasets.CIFAR10(root=data_dir, train=False, download=True, transform=transform)

    return trainset, testset

@ -93,9 +87,7 @@ def train_cifar(config, checkpoint_dir=None, data_dir=None):
    trainset, testset = load_data(data_dir)

    test_abs = int(len(trainset) * 0.8)
-    train_subset, val_subset = random_split(
-        trainset, [test_abs, len(trainset) - test_abs]
-    )
+    train_subset, val_subset = random_split(trainset, [test_abs, len(trainset) - test_abs])

    trainloader = torch.utils.data.DataLoader(
        train_subset,
@ -112,9 +104,7 @@ def train_cifar(config, checkpoint_dir=None, data_dir=None):

    from ray import tune

-    for epoch in range(
-        int(round(config["num_epochs"]))
-    ):  # loop over the dataset multiple times
+    for epoch in range(int(round(config["num_epochs"]))):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
@ -135,10 +125,7 @@ def train_cifar(config, checkpoint_dir=None, data_dir=None):
            running_loss += loss.item()
            epoch_steps += 1
            if i % 2000 == 1999:  # print every 2000 mini-batches
-                print(
-                    "[%d, %5d] loss: %.3f"
-                    % (epoch + 1, i + 1, running_loss / epoch_steps)
-                )
+                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / epoch_steps))
                running_loss = 0.0

        # Validation loss
@ -178,9 +165,7 @@ def train_cifar(config, checkpoint_dir=None, data_dir=None):
 def _test_accuracy(net, device="cpu"):
    trainset, testset = load_data()

-    testloader = torch.utils.data.DataLoader(
-        testset, batch_size=4, shuffle=False, num_workers=2
-    )
+    testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

    correct = 0
    total = 0
@ -200,9 +185,7 @@ def _test_accuracy(net, device="cpu"):


 # __main_begin__
-def cifar10_main(
-    method="BlendSearch", num_samples=10, max_num_epochs=100, gpus_per_trial=1
-):
+def cifar10_main(method="BlendSearch", num_samples=10, max_num_epochs=100, gpus_per_trial=1):
    data_dir = os.path.abspath("test/data")
    load_data(data_dir)  # Download data for all trials before starting the run
    if method == "BlendSearch":
@ -294,16 +277,8 @@ def cifar10_main(
    logger.info(f"time={time.time()-start_time}")
    best_trial = result.get_best_trial("loss", "min", "all")
    logger.info("Best trial config: {}".format(best_trial.config))
-    logger.info(
-        "Best trial final validation loss: {}".format(
-            best_trial.metric_analysis["loss"]["min"]
-        )
-    )
-    logger.info(
-        "Best trial final validation accuracy: {}".format(
-            best_trial.metric_analysis["accuracy"]["max"]
-        )
-    )
+    logger.info("Best trial final validation loss: {}".format(best_trial.metric_analysis["loss"]["min"]))
+    logger.info("Best trial final validation accuracy: {}".format(best_trial.metric_analysis["accuracy"]["max"]))

    best_trained_model = Net(2 ** best_trial.config["l1"], 2 ** best_trial.config["l2"])
    device = "cpu"
@ -313,10 +288,7 @@ def cifar10_main(
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

-    checkpoint_value = (
-        getattr(best_trial.checkpoint, "dir_or_data", None)
-        or best_trial.checkpoint.value
-    )
+    checkpoint_value = getattr(best_trial.checkpoint, "dir_or_data", None) or best_trial.checkpoint.value
    checkpoint_path = os.path.join(checkpoint_value, "checkpoint")

    model_state, optimizer_state = torch.load(checkpoint_path)
--- a/test/tune/test_reproducibility.py
+++ b/test/tune/test_reproducibility.py
@ -41,9 +41,7 @@ def test_tune(externally_setup_searcher=False, use_ray=False, use_raytune=False)
            metric="mean_loss",
            mode="min",
        )
-        assert (
-            searcher.cost_attr == "time_total_s"
-        ), "when time_budget_s is provided, cost_attr should be time_total_s"
+        assert searcher.cost_attr == "time_total_s", "when time_budget_s is provided, cost_attr should be time_total_s"

        searcher = BlendSearch(
            space=search_space,
@ -51,9 +49,7 @@ def test_tune(externally_setup_searcher=False, use_ray=False, use_raytune=False)
            metric="mean_loss",
            mode="min",
        )
-        assert (
-            searcher.cost_attr is None
-        ), "when time_budget_s is not provided, cost_attr should be None."
+        assert searcher.cost_attr is None, "when time_budget_s is not provided, cost_attr should be None."

        searcher = BlendSearch(
            space=search_space,
@ -116,9 +112,7 @@ def test_reproducibility():
    best_config_2 = test_tune(externally_setup_searcher=True)
    print(best_config_1)
    print(best_config_2)
-    assert (
-        best_config_1 == best_config_2
-    ), "flaml.tune not reproducible when the searcher is set up externally"
+    assert best_config_1 == best_config_2, "flaml.tune not reproducible when the searcher is set up externally"


 def test_gs_reproducibility():
--- a/test/tune/test_restore.py
+++ b/test/tune/test_restore.py
@ -25,9 +25,7 @@ class AbstractWarmStartTest:
        np.random.seed(162)
        search_alg, cost = self.set_basic_conf()
        search_alg = ConcurrencyLimiter(search_alg, 1)
-        results_exp_1 = tune.run(
-            cost, num_samples=5, search_alg=search_alg, verbose=0, local_dir=self.tmpdir
-        )
+        results_exp_1 = tune.run(cost, num_samples=5, search_alg=search_alg, verbose=0, local_dir=self.tmpdir)
        checkpoint_path = os.path.join(self.tmpdir, self.experiment_name)
        search_alg.save(checkpoint_path)
        return results_exp_1, np.random.get_state(), checkpoint_path
--- a/test/tune/test_scheduler.py
+++ b/test/tune/test_scheduler.py
@ -120,9 +120,7 @@ def test_asha_scheduler(use_ray=False, time_budget_s=1):
    except ImportError:
        print("skip the test as ray tune cannot be imported.")
        return
-    best_config = test_scheduler(
-        scheduler="asha", use_ray=use_ray, time_budget_s=time_budget_s
-    )
+    best_config = test_scheduler(scheduler="asha", use_ray=use_ray, time_budget_s=time_budget_s)
    print("Auto ASHA scheduler, test error:", abs(10 / 2 - best_config["z"] / 2))


@ -132,9 +130,7 @@ def test_custom_scheduler():
    except ImportError:
        print("skip the test as ray tune cannot be imported.")
        return
-    my_scheduler = HyperBandScheduler(
-        time_attr="samplesize", max_t=1000, reduction_factor=2
-    )
+    my_scheduler = HyperBandScheduler(time_attr="samplesize", max_t=1000, reduction_factor=2)
    best_config = test_scheduler(scheduler=my_scheduler)
    print("Custom ASHA scheduler, test error:", abs(10 / 2 - best_config["z"] / 2))

--- a/test/tune/test_searcher.py
+++ b/test/tune/test_searcher.py
@ -91,9 +91,7 @@ def test_searchers():
        # 'set' object has no attribute 'keys'
        pass
    try:
-        searcher.add_evaluated_point(
-            {"a": 1, "b": 0.01}, None, intermediate_values=[0.1]
-        )
+        searcher.add_evaluated_point({"a": 1, "b": 0.01}, None, intermediate_values=[0.1])
    except ValueError:
        # `value` is supposed to be set for a complete trial.
        pass
@ -113,16 +111,12 @@ def test_searchers():
        # Dim of point {'a': 1} and parameter_names {'a': UniformDistribution(high=8.0, low=6.0), 'b': LogUniformDistribution(high=0.01, low=0.0001)} do not match.
        pass
    try:
-        searcher = OptunaSearch(
-            config, points_to_evaluate=[{"a": 1, "b": 0.01}], evaluated_rewards=1
-        )
+        searcher = OptunaSearch(config, points_to_evaluate=[{"a": 1, "b": 0.01}], evaluated_rewards=1)
    except TypeError:
        # valuated_rewards expected to be a list, got <class 'int'>.
        pass
    try:
-        searcher = OptunaSearch(
-            config, points_to_evaluate=[{"a": 1, "b": 0.01}], evaluated_rewards=[1, 2]
-        )
+        searcher = OptunaSearch(config, points_to_evaluate=[{"a": 1, "b": 0.01}], evaluated_rewards=[1, 2])
    except ValueError:
        # Dim of evaluated_rewards [1, 2] and points_to_evaluate [{'a': 1, 'b': 0.01}] do not match.
        pass
@ -197,9 +191,7 @@ def test_searchers():
    searcher.save("test/tune/optuna.pkl")
    searcher.restore("test/tune/optuna.pkl")
    try:
-        searcher = BlendSearch(
-            metric="m", global_search_alg=searcher, metric_constraints=[("c", "<", 1)]
-        )
+        searcher = BlendSearch(metric="m", global_search_alg=searcher, metric_constraints=[("c", "<", 1)])
    except AssertionError:
        # sign of metric constraints must be <= or >=.
        pass
@ -303,12 +295,8 @@ def test_searchers():
    from flaml import tune

    tune.run(lambda x: 1, config={}, use_ray=use_ray, log_file_name="logs/searcher.log")
-    searcher = BlendSearch(
-        space=config, cost_attr="cost", cost_budget=10, metric="m", mode="min"
-    )
-    analysis = tune.run(
-        lambda x: {"cost": 2, "m": x["b"]}, search_alg=searcher, num_samples=10
-    )
+    searcher = BlendSearch(space=config, cost_attr="cost", cost_budget=10, metric="m", mode="min")
+    analysis = tune.run(lambda x: {"cost": 2, "m": x["b"]}, search_alg=searcher, num_samples=10)
    assert len(analysis.trials) == 5


--- a/test/tune/test_tune.py
+++ b/test/tune/test_tune.py
@ -85,9 +85,7 @@ def _test_xgboost(method="BlendSearch"):
    else:
        from ray import tune
    search_space = {
-        "max_depth": tune.randint(1, 9)
-        if method in ["BlendSearch", "BOHB", "Optuna"]
-        else tune.randint(1, 9),
+        "max_depth": tune.randint(1, 9) if method in ["BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
        "min_child_weight": tune.choice([1, 2, 3]),
        "subsample": tune.uniform(0.5, 1.0),
        "eta": tune.loguniform(1e-4, 1e-1),
@ -226,9 +224,7 @@ def test_nested_space():
    }

    def simple_func(config):
-        obj = (config["cost_related"]["a"] - 4) ** 2 + (
-            config["b"] - config["cost_related"]["a"]
-        ) ** 2
+        obj = (config["cost_related"]["a"] - 4) ** 2 + (config["b"] - config["cost_related"]["a"]) ** 2
        tune.report(obj=obj)
        tune.report(obj=obj, ab=config["cost_related"]["a"] * config["b"])

@ -291,8 +287,7 @@ def test_nested_space():
        low_cost_partial_config={"cost_related": {"a": 1}},
        points_to_evaluate=points_to_evaluate,
        evaluated_rewards=[
-            (config["cost_related"]["a"] - 4) ** 2
-            + (config["b"] - config["cost_related"]["a"]) ** 2
+            (config["cost_related"]["a"] - 4) ** 2 + (config["b"] - config["cost_related"]["a"]) ** 2
            for config in points_to_evaluate[:-1]
        ],
        metric="obj",
--- a/test/tune_example.py
+++ b/test/tune_example.py
@ -7,9 +7,7 @@ from sklearn.metrics import mean_squared_error

 data = fetch_california_housing(return_X_y=False, as_frame=True)
 df, X, y = data.frame, data.data, data.target
-df_train, _, X_train, X_test, _, y_test = train_test_split(
-    df, X, y, test_size=0.33, random_state=42
-)
+df_train, _, X_train, X_test, _, y_test = train_test_split(df, X, y, test_size=0.33, random_state=42)
 csv_file_name = "test/housing.csv"
 df_train.to_csv(csv_file_name, index=False)
 # X, y = fetch_california_housing(return_X_y=True, as_frame=True)
@ -24,9 +22,7 @@ def train_lgbm(config: dict) -> dict:
    # train the model
    # train_set = lightgbm.Dataset(X_train, y_train)
    # LightGBM only accepts the csv with valid number format, if even these string columns are set to ignore.
-    train_set = lightgbm.Dataset(
-        csv_file_name, params={"label_column": "name:MedHouseVal", "header": True}
-    )
+    train_set = lightgbm.Dataset(csv_file_name, params={"label_column": "name:MedHouseVal", "header": True})
    model = lightgbm.train(params, train_set)
    # evaluate the model
    pred = model.predict(X_test)
@ -39,9 +35,7 @@ def test_tune_lgbm_csv():
    # load a built-in search space from flaml
    flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
    # specify the search space as a dict from hp name to domain; you can define your own search space same way
-    config_search_space = {
-        hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
-    }
+    config_search_space = {hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()}
    # give guidance about hp values corresponding to low training cost, i.e., {"n_estimators": 4, "num_leaves": 4}
    low_cost_partial_config = {
        hp: space["low_cost_init_value"]
@ -50,11 +44,7 @@ def test_tune_lgbm_csv():
    }
    # initial points to evaluate
    points_to_evaluate = [
-        {
-            hp: space["init_value"]
-            for hp, space in flaml_lgbm_search_space.items()
-            if "init_value" in space
-        }
+        {hp: space["init_value"] for hp, space in flaml_lgbm_search_space.items() if "init_value" in space}
    ]
    # run the tuning, minimizing mse, with total time budget 3 seconds
    analysis = tune.run(