Merge branch 'main' into dependabot/github_actions/actions/setup-python-4

2022-11-01 11:07:39 -04:00 · 2022-11-01 11:07:39 -04:00 · 06cff076f0
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@ -43,8 +43,12 @@ jobs:
          pip install -e .
          python -c "import flaml"
          pip install -e .[test]
-      - name: If linux or mac, install ray
-        if: matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest'
+      - name: If linux, install ray 2
+        if: matrix.os == 'ubuntu-latest'
+        run: |
+          pip install ray[tune]
+      - name: If mac, install ray
+        if: matrix.os == 'macOS-latest'
        run: |
          pip install -e .[ray]
      - name: If linux or mac, install prophet on python < 3.9
--- a/4
+++ b/4
@ -20,8 +20,8 @@ RUN sudo pip install -e .[test,notebook]
 RUN pre-commit install

 # For docs
-RUN npm install --global yarn
-RUN pip install pydoc-markdown
+RUN sudo npm install --global yarn
+RUN sudo pip install pydoc-markdown
 RUN cd website
 RUN yarn install --frozen-lockfile --ignore-engines

--- a/README.md
+++ b/README.md
@ -4,6 +4,8 @@
 ![Python Version](https://img.shields.io/badge/3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10-blue)
 [![Downloads](https://pepy.tech/badge/flaml)](https://pepy.tech/project/flaml)
 [![Join the chat at https://gitter.im/FLAMLer/community](https://badges.gitter.im/FLAMLer/community.svg)](https://gitter.im/FLAMLer/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+[![](https://img.shields.io/discord/1025786666260111483?logo=discord&style=flat)](https://discord.gg/Cppx2vSPVP)
+

 # A Fast Library for Automated Machine Learning & Tuning

--- a/flaml/init.py
+++ b/flaml/init.py
@ -1,4 +1,4 @@
-from flaml.searcher import CFO, BlendSearch, FLOW2, BlendSearchTuner
+from flaml.tune.searcher import CFO, BlendSearch, FLOW2, BlendSearchTuner, RandomSearch
 from flaml.automl import AutoML, logger_formatter
 from flaml.onlineml.autovw import AutoVW
 from flaml.version import __version__
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -160,7 +160,7 @@ class SearchState:
            if starting_point_len > len(starting_point):
                logger.warning(
                    "Starting points outside of the search space are removed. "
-                    f"Remaining starting points: {starting_point}"
+                    f"Remaining starting points for {learner_class}: {starting_point}"
                )
            starting_point = starting_point or None

@ -498,7 +498,7 @@ class AutoML(BaseEstimator):
                'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mae', 'mse', 'r2',
                'mape'. Default is 'auto'.
                If passing a customized metric function, the function needs to
-                have the follwing signature:
+                have the following input arguments:

        ```python
        def custom_metric(
@ -2175,7 +2175,7 @@ class AutoML(BaseEstimator):
                'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mae', 'mse', 'r2',
                'mape'. Default is 'auto'.
                If passing a customized metric function, the function needs to
-                have the following signature:
+                have the following input arguments:

        ```python
        def custom_metric(
@ -2370,15 +2370,13 @@ class AutoML(BaseEstimator):
        ```

            cv_score_agg_func: customized cross-validation scores aggregate function. Default to average metrics across folds. If specificed, this function needs to
-                have the following signature:
+                have the following input arguments:
+
+                * val_loss_folds: list of floats, the loss scores of each fold;
+                * log_metrics_folds: list of dicts/floats, the metrics of each fold to log.

-        ```python
-        def cv_score_agg_func(val_loss_folds, log_metrics_folds):
-            return metric_to_minimize, metrics_to_log
-        ```
-                “val_loss_folds” - list of floats, the loss scores of each fold; “log_metrics_folds” - list of dicts/floats, the metrics of each fold to log.
                This function should return the final aggregate result of all folds. A float number of the minimization objective, and a dictionary as the metrics to log or None.
-                E.g.,
+                    E.g.,

        ```python
        def cv_score_agg_func(val_loss_folds, log_metrics_folds):
@ -2393,16 +2391,16 @@ class AutoML(BaseEstimator):
                    metrics_to_log += single_fold
            if metrics_to_log:
                n = len(val_loss_folds)
-                metrics_to_log = {k: v / n for k, v in metrics_to_log.items()} if isinstance(metrics_to_log, dict) else metrics_to_log / n
+                metrics_to_log = (
+                    {k: v / n for k, v in metrics_to_log.items()}
+                    if isinstance(metrics_to_log, dict)
+                    else metrics_to_log / n
+                )
            return metric_to_minimize, metrics_to_log
        ```

+            skip_transform: boolean, default=False | Whether to pre-process data prior to modeling.
            fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name.
-                    For TransformersEstimator, available fit_kwargs can be found from
-                    [TrainingArgumentsForAuto](nlp/huggingface/training_args).
-                    e.g.,
-        skip_transform: boolean, default=False | Whether to pre-process data prior to modeling.
-        fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name.
                For TransformersEstimator, available fit_kwargs can be found from
                [TrainingArgumentsForAuto](nlp/huggingface/training_args).
                e.g.,
@ -2935,8 +2933,11 @@ class AutoML(BaseEstimator):
            from ray import __version__ as ray_version

            assert ray_version >= "1.10.0"
+            if ray_version.startswith("1."):
+                from ray.tune.suggest import ConcurrencyLimiter
+            else:
+                from ray.tune.search import ConcurrencyLimiter
            import ray
-            from ray.tune.suggest import ConcurrencyLimiter
        except (ImportError, AssertionError):
            raise ImportError(
                "n_concurrent_trial>1 or use_ray=True requires installation of ray. "
@ -2947,93 +2948,74 @@ class AutoML(BaseEstimator):
        elif "bs" == self._hpo_method:
            from flaml import BlendSearch as SearchAlgo
        elif "random" == self._hpo_method:
-            from ray.tune.suggest import BasicVariantGenerator as SearchAlgo
-            from ray.tune.sample import Domain
+            from flaml import RandomSearch as SearchAlgo
        elif "optuna" == self._hpo_method:
            try:
                from ray import __version__ as ray_version

                assert ray_version >= "1.10.0"
-                from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
+                if ray_version.startswith("1."):
+                    from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
+                else:
+                    from ray.tune.search.optuna import OptunaSearch as SearchAlgo
            except (ImportError, AssertionError):
-                from .searcher.suggestion import OptunaSearch as SearchAlgo
+                from flaml.tune.searcher.suggestion import OptunaSearch as SearchAlgo
        else:
            raise NotImplementedError(
                f"hpo_method={self._hpo_method} is not recognized. "
                "'auto', 'cfo' and 'bs' are supported."
            )
        space = self.search_space
-        if self._hpo_method == "random":
-            # Any point in points_to_evaluate must consist of hyperparamters
-            # that are tunable, which can be identified by checking whether
-            # the corresponding value in the search space is an instance of
-            # the 'Domain' class from flaml or ray.tune
-            points_to_evaluate = self.points_to_evaluate.copy()
-            to_del = []
-            for k, v in space.items():
-                if not isinstance(v, Domain):
-                    to_del.append(k)
-            for k in to_del:
-                for p in points_to_evaluate:
-                    if k in p:
-                        del p[k]
+        self._state.time_from_start = time.time() - self._start_time_flag
+        time_left = self._state.time_budget - self._state.time_from_start
+        if self._hpo_method != "optuna":
+            min_resource = self.min_resource
+            if isinstance(min_resource, dict):
+                _min_resource_set = set(min_resource.values())
+                min_resource_all_estimator = min(_min_resource_set)
+                if len(_min_resource_set) > 1:
+                    logger.warning(
+                        "Using the min FLAML_sample_size of all the provided starting points as the starting sample size in the case of parallel search."
+                    )
+            else:
+                min_resource_all_estimator = min_resource
            search_alg = SearchAlgo(
-                max_concurrent=self._n_concurrent_trials,
-                points_to_evaluate=points_to_evaluate,
+                metric="val_loss",
+                space=space,
+                low_cost_partial_config=self.low_cost_partial_config,
+                points_to_evaluate=self.points_to_evaluate,
+                cat_hp_cost=self.cat_hp_cost,
+                resource_attr=self.resource_attr,
+                min_resource=min_resource_all_estimator,
+                max_resource=self.max_resource,
+                config_constraints=[
+                    (partial(size, self._state), "<=", self._mem_thres)
+                ],
+                metric_constraints=self.metric_constraints,
+                seed=self._seed,
+                time_budget_s=time_left,
            )
        else:
-            self._state.time_from_start = time.time() - self._start_time_flag
-            time_left = self._state.time_budget - self._state.time_from_start
-            if self._hpo_method != "optuna":
-                min_resource = self.min_resource
-                if isinstance(min_resource, dict):
-                    _min_resource_set = set(min_resource.values())
-                    min_resource_all_estimator = min(_min_resource_set)
-                    if len(_min_resource_set) > 1:
-                        logger.warning(
-                            "Using the min FLAML_sample_size of all the provided starting points as the starting sample size in the case of parallel search."
-                        )
-                else:
-                    min_resource_all_estimator = min_resource
-                search_alg = SearchAlgo(
-                    metric="val_loss",
-                    space=space,
-                    low_cost_partial_config=self.low_cost_partial_config,
-                    points_to_evaluate=self.points_to_evaluate,
-                    cat_hp_cost=self.cat_hp_cost,
-                    resource_attr=self.resource_attr,
-                    min_resource=min_resource_all_estimator,
-                    max_resource=self.max_resource,
-                    config_constraints=[
-                        (partial(size, self._state), "<=", self._mem_thres)
-                    ],
-                    metric_constraints=self.metric_constraints,
-                    seed=self._seed,
-                    time_budget_s=time_left,
-                )
-            else:
-                # if self._hpo_method is bo, sometimes the search space and the initial config dimension do not match
-                # need to remove the extra keys from the search space to be consistent with the initial config
-                converted_space = SearchAlgo.convert_search_space(space)
+            # if self._hpo_method is bo, sometimes the search space and the initial config dimension do not match
+            # need to remove the extra keys from the search space to be consistent with the initial config
+            converted_space = SearchAlgo.convert_search_space(space)

-                removed_keys = set(space.keys()).difference(converted_space.keys())
-                new_points_to_evaluate = []
-                for idx in range(len(self.points_to_evaluate)):
-                    r = self.points_to_evaluate[idx].copy()
-                    for each_key in removed_keys:
-                        r.pop(each_key)
-                    new_points_to_evaluate.append(r)
+            removed_keys = set(space.keys()).difference(converted_space.keys())
+            new_points_to_evaluate = []
+            for idx in range(len(self.points_to_evaluate)):
+                r = self.points_to_evaluate[idx].copy()
+                for each_key in removed_keys:
+                    r.pop(each_key)
+                new_points_to_evaluate.append(r)

-                search_alg = SearchAlgo(
-                    metric="val_loss",
-                    mode="min",
-                    points_to_evaluate=[
-                        p
-                        for p in new_points_to_evaluate
-                        if len(p) == len(converted_space)
-                    ],
-                )
-            search_alg = ConcurrencyLimiter(search_alg, self._n_concurrent_trials)
+            search_alg = SearchAlgo(
+                metric="val_loss",
+                mode="min",
+                points_to_evaluate=[
+                    p for p in new_points_to_evaluate if len(p) == len(converted_space)
+                ],
+            )
+        search_alg = ConcurrencyLimiter(search_alg, self._n_concurrent_trials)
        resources_per_trial = self._state.resources_per_trial

        analysis = ray.tune.run(
@ -3124,7 +3106,7 @@ class AutoML(BaseEstimator):
                mlflow.log_metric("trial_time", search_state.trial_time)
                mlflow.log_metric("wall_clock_time", self._state.time_from_start)
                mlflow.log_metric("validation_loss", search_state.val_loss)
-                mlflow.log_param("config", search_state.config)
+                mlflow.log_params(search_state.config)
                mlflow.log_param("learner", estimator)
                mlflow.log_param("sample_size", search_state.sample_size)
                mlflow.log_metric("best_validation_loss", search_state.best_loss)
@ -3136,9 +3118,12 @@ class AutoML(BaseEstimator):
            from ray import __version__ as ray_version

            assert ray_version >= "1.10.0"
-            from ray.tune.suggest import ConcurrencyLimiter
+            if ray_version.startswith("1."):
+                from ray.tune.suggest import ConcurrencyLimiter
+            else:
+                from ray.tune.search import ConcurrencyLimiter
        except (ImportError, AssertionError):
-            from .searcher.suggestion import ConcurrencyLimiter
+            from flaml.tune.searcher.suggestion import ConcurrencyLimiter
        if self._hpo_method in ("cfo", "grid"):
            from flaml import CFO as SearchAlgo
        elif "optuna" == self._hpo_method:
@ -3146,15 +3131,18 @@ class AutoML(BaseEstimator):
                from ray import __version__ as ray_version

                assert ray_version >= "1.10.0"
-                from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
+                if ray_version.startswith("1."):
+                    from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
+                else:
+                    from ray.tune.search.optuna import OptunaSearch as SearchAlgo
            except (ImportError, AssertionError):
-                from .searcher.suggestion import OptunaSearch as SearchAlgo
+                from flaml.tune.searcher.suggestion import OptunaSearch as SearchAlgo
        elif "bs" == self._hpo_method:
            from flaml import BlendSearch as SearchAlgo
        elif "random" == self._hpo_method:
-            from flaml.searcher import RandomSearch as SearchAlgo
+            from flaml.tune.searcher import RandomSearch as SearchAlgo
        elif "cfocat" == self._hpo_method:
-            from flaml.searcher.cfo_cat import CFOCat as SearchAlgo
+            from flaml.tune.searcher.cfo_cat import CFOCat as SearchAlgo
        else:
            raise NotImplementedError(
                f"hpo_method={self._hpo_method} is not recognized. "
--- a/flaml/model.py
+++ b/flaml/model.py
@ -1975,7 +1975,7 @@ class SARIMAX(ARIMA):
                train_df[[TS_VALUE_COL]],
                exog=train_df[regressors],
                order=(self.params["p"], self.params["d"], self.params["q"]),
-                seasonality_order=(
+                seasonal_order=(
                    self.params["P"],
                    self.params["D"],
                    self.params["Q"],
@ -1988,7 +1988,7 @@ class SARIMAX(ARIMA):
            model = SARIMAX_estimator(
                train_df,
                order=(self.params["p"], self.params["d"], self.params["q"]),
-                seasonality_order=(
+                seasonal_order=(
                    self.params["P"],
                    self.params["D"],
                    self.params["Q"],
@ -2287,7 +2287,7 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
            kwargs.get("log_dir", "lightning_logs")
        )  # logging results to a tensorboard
        default_trainer_kwargs = dict(
-            gpus=self._kwargs.get("gpu_per_trial", [0])
+            gpus=kwargs.get("gpu_per_trial", [0])
            if torch.cuda.is_available()
            else None,
            max_epochs=max_epochs,
--- a/flaml/nlp/huggingface/data_collator.py
+++ b/flaml/nlp/huggingface/data_collator.py
@ -6,7 +6,13 @@ from transformers.data.data_collator import (
 )
 from collections import OrderedDict

-from flaml.data import TOKENCLASSIFICATION, MULTICHOICECLASSIFICATION, SUMMARIZATION
+from flaml.data import (
+    TOKENCLASSIFICATION,
+    MULTICHOICECLASSIFICATION,
+    SUMMARIZATION,
+    SEQCLASSIFICATION,
+    SEQREGRESSION,
+)


@dataclass
@ -45,5 +51,7 @@ task_to_datacollator_class = OrderedDict(
        (TOKENCLASSIFICATION, DataCollatorForTokenClassification),
        (MULTICHOICECLASSIFICATION, DataCollatorForMultipleChoiceClassification),
        (SUMMARIZATION, DataCollatorForSeq2Seq),
+        (SEQCLASSIFICATION, DataCollatorWithPadding),
+        (SEQREGRESSION, DataCollatorWithPadding),
    ]
 )
--- a/flaml/nlp/huggingface/switch_head_auto.py
+++ b/flaml/nlp/huggingface/switch_head_auto.py
@ -1,64 +0,0 @@
-from collections import OrderedDict
-
-import transformers
-
-if transformers.__version__.startswith("3"):
-    from transformers.modeling_electra import ElectraClassificationHead
-    from transformers.modeling_roberta import RobertaClassificationHead
-
-else:
-    from transformers.models.electra.modeling_electra import ElectraClassificationHead
-    from transformers.models.roberta.modeling_roberta import RobertaClassificationHead
-
-MODEL_CLASSIFICATION_HEAD_MAPPING = OrderedDict(
-    [
-        ("electra", ElectraClassificationHead),
-        ("roberta", RobertaClassificationHead),
-    ]
-)
-
-
-class AutoSeqClassificationHead:
-    """
-    This is a class for getting classification head class based on the name of the LM
-    instantiated as one of the ClassificationHead classes of the library when
-    created with the `AutoSeqClassificationHead.from_model_type_and_config` method.
-
-    This class cannot be instantiated directly using ``__init__()`` (throws an error).
-    """
-
-    def __init__(self):
-        raise EnvironmentError(
-            "AutoSeqClassificationHead is designed to be instantiated "
-            "using the `AutoSeqClassificationHead.from_model_type_and_config(cls, model_type, config)` methods."
-        )
-
-    @classmethod
-    def from_model_type_and_config(
-        cls, model_type: str, config: transformers.PretrainedConfig
-    ):
-        """
-        Instantiate one of the classification head classes from the mode_type and model configuration.
-
-        Args:
-            model_type: A string, which desribes the model type, e.g., "electra".
-            config: The huggingface class of the model's configuration.
-
-        Example:
-
-        ```python
-        from transformers import AutoConfig
-        model_config = AutoConfig.from_pretrained("google/electra-base-discriminator")
-        AutoSeqClassificationHead.from_model_type_and_config("electra", model_config)
-        ```
-        """
-        if model_type in MODEL_CLASSIFICATION_HEAD_MAPPING.keys():
-            return MODEL_CLASSIFICATION_HEAD_MAPPING[model_type](config)
-        raise ValueError(
-            "Unrecognized configuration class {} for class {}.\n"
-            "Model type should be one of {}.".format(
-                config.__class__,
-                cls.__name__,
-                ", ".join(MODEL_CLASSIFICATION_HEAD_MAPPING.keys()),
-            )
-        )
--- a/flaml/nlp/huggingface/utils.py
+++ b/flaml/nlp/huggingface/utils.py
@ -404,10 +404,6 @@ def load_model(checkpoint_path, task, num_labels=None):
    transformers.logging.set_verbosity_error()

    from transformers import AutoConfig
-    from ..huggingface.switch_head_auto import (
-        AutoSeqClassificationHead,
-        MODEL_CLASSIFICATION_HEAD_MAPPING,
-    )
    from ...data import SEQCLASSIFICATION, SEQREGRESSION, TOKENCLASSIFICATION

    def get_this_model(checkpoint_path, task, model_config):
@ -418,7 +414,7 @@ def load_model(checkpoint_path, task, num_labels=None):

        if task in (SEQCLASSIFICATION, SEQREGRESSION):
            return AutoModelForSequenceClassification.from_pretrained(
-                checkpoint_path, config=model_config
+                checkpoint_path, config=model_config, ignore_mismatched_sizes=True
            )
        elif task == TOKENCLASSIFICATION:
            return AutoModelForTokenClassification.from_pretrained(
@ -433,9 +429,6 @@ def load_model(checkpoint_path, task, num_labels=None):
                checkpoint_path, config=model_config
            )

-    def is_pretrained_model_in_classification_head_list(model_type):
-        return model_type in MODEL_CLASSIFICATION_HEAD_MAPPING
-
    def _set_model_config(checkpoint_path):
        if task in (SEQCLASSIFICATION, SEQREGRESSION, TOKENCLASSIFICATION):
            model_config = AutoConfig.from_pretrained(
@ -448,40 +441,11 @@ def load_model(checkpoint_path, task, num_labels=None):
            return model_config

    current_config = AutoConfig.from_pretrained(checkpoint_path)
-    this_model_type, this_vocab_size = (
-        current_config.model_type,
-        current_config.vocab_size,
-    )
+    this_vocab_size = current_config.vocab_size

-    if task == SEQCLASSIFICATION:
-        num_labels_old = current_config.num_labels
-        if is_pretrained_model_in_classification_head_list(this_model_type):
-            model_config_num_labels = num_labels_old
-        else:
-            model_config_num_labels = num_labels
-        new_config = _set_model_config(checkpoint_path)
+    model_config_num_labels = num_labels
+    new_config = _set_model_config(checkpoint_path)

-        if is_pretrained_model_in_classification_head_list(this_model_type):
-            if num_labels != num_labels_old:
-                this_model = get_this_model(checkpoint_path, task, new_config)
-                new_config.num_labels = num_labels
-                this_model.num_labels = num_labels
-                this_model.classifier = (
-                    AutoSeqClassificationHead.from_model_type_and_config(
-                        this_model_type, new_config
-                    )
-                )
-            else:
-                this_model = get_this_model(checkpoint_path, task, new_config)
-        else:
-            this_model = get_this_model(checkpoint_path, task, new_config)
-        this_model.resize_token_embeddings(this_vocab_size)
-        return this_model
-    else:
-        if task == SEQREGRESSION:
-            model_config_num_labels = 1
-        elif task == TOKENCLASSIFICATION:
-            model_config_num_labels = num_labels
-        model_config = _set_model_config(checkpoint_path)
-        this_model = get_this_model(checkpoint_path, task, model_config)
-        return this_model
+    this_model = get_this_model(checkpoint_path, task, new_config)
+    this_model.resize_token_embeddings(this_vocab_size)
+    return this_model
--- a/flaml/onlineml/README.md
+++ b/flaml/onlineml/README.md
@ -1,6 +1,6 @@
 # ChaCha for Online AutoML

-FLAML includes *ChaCha* which is an automatic hyperparameter tuning solution for online machine learning. Online machine learning has the following properties: (1) data comes in sequential order; and (2) the performance of the machine learning model is evaluated online, i.e., at every iteration. *ChaCha* performs online AutoML respecting the aforementioned properties of online learning, and at the same time respecting the following constraints: (1) only a small constant number of 'live' models are allowed to perform online learning at the same time;  and (2) no model persistence or offline training is allowed, which means that once we decide to replace a 'live' model with a new one, the replaced model can no longer be retrieved.  
+FLAML includes *ChaCha* which is an automatic hyperparameter tuning solution for online machine learning. Online machine learning has the following properties: (1) data comes in sequential order; and (2) the performance of the machine learning model is evaluated online, i.e., at every iteration. *ChaCha* performs online AutoML respecting the aforementioned properties of online learning, and at the same time respecting the following constraints: (1) only a small constant number of 'live' models are allowed to perform online learning at the same time;  and (2) no model persistence or offline training is allowed, which means that once we decide to replace a 'live' model with a new one, the replaced model can no longer be retrieved.

 For more technical details about *ChaCha*, please check our paper.

--- a/flaml/onlineml/autovw.py
+++ b/flaml/onlineml/autovw.py
@ -8,8 +8,8 @@ from flaml.tune import (
    polynomial_expansion_set,
 )
 from flaml.onlineml import OnlineTrialRunner
-from flaml.scheduler import ChaChaScheduler
-from flaml.searcher import ChampionFrontierSearcher
+from flaml.tune.scheduler import ChaChaScheduler
+from flaml.tune.searcher import ChampionFrontierSearcher
 from flaml.onlineml.trial import get_ns_feature_dim_from_vw_example

 logger = logging.getLogger(__name__)
--- a/flaml/onlineml/trial_runner.py
+++ b/flaml/onlineml/trial_runner.py
@ -1,7 +1,7 @@
 import numpy as np
 import math
 from flaml.tune import Trial
-from flaml.scheduler import TrialScheduler
+from flaml.tune.scheduler import TrialScheduler

 import logging

--- a/flaml/tune/README.md
+++ b/flaml/tune/README.md
@ -20,7 +20,7 @@ def evaluate_config(config):
    # and the cost could be related to certain hyperparameters
    # in this example, we assume it's proportional to x
    time.sleep(config['x']/100000)
-    # use tune.report to report the metric to optimize  
+    # use tune.report to report the metric to optimize
    tune.report(metric=metric)

 analysis = tune.run(
@ -35,7 +35,7 @@ analysis = tune.run(
    num_samples=-1,    # the maximal number of configs to try, -1 means infinite
    time_budget_s=60,   # the time budget in seconds
    local_dir='logs/',  # the local directory to store logs
-    # verbose=0,          # verbosity  
+    # verbose=0,          # verbosity
    # use_ray=True, # uncomment when performing parallel tuning using ray
    )

@ -59,7 +59,7 @@ def evaluate_config(config):
    # and the cost could be related to certain hyperparameters
    # in this example, we assume it's proportional to x
    time.sleep(config['x']/100000)
-    # use tune.report to report the metric to optimize  
+    # use tune.report to report the metric to optimize
    tune.report(metric=metric)

 # provide a time budget (in seconds) for the tuning process
--- a/flaml/tune/init.py
+++ b/flaml/tune/init.py
@ -13,8 +13,12 @@ try:
        qloguniform,
        lograndint,
        qlograndint,
-        sample,
    )
+
+    if ray_version.startswith("1."):
+        from ray.tune import sample
+    else:
+        from ray.tune.search import sample
 except (ImportError, AssertionError):
    from .sample import (
        uniform,
--- a/flaml/tune/analysis.py
+++ b/flaml/tune/analysis.py
@ -18,7 +18,7 @@
 from typing import Dict, Optional
 import numpy as np
 from .trial import Trial
-
+from collections import defaultdict
 import logging

 logger = logging.getLogger(__name__)
@ -68,7 +68,6 @@ class ExperimentAnalysis:
    @property
    def results(self) -> Dict[str, Dict]:
        """Get the last result of all the trials of the experiment"""
-
        return {trial.trial_id: trial.last_result for trial in self.trials}

    def _validate_metric(self, metric: str) -> str:
@ -122,7 +121,6 @@ class ExperimentAnalysis:
        """
        metric = self._validate_metric(metric)
        mode = self._validate_mode(mode)
-
        if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]:
            raise ValueError(
                "ExperimentAnalysis: attempting to get best trial for "
@ -138,7 +136,6 @@ class ExperimentAnalysis:
        for trial in self.trials:
            if metric not in trial.metric_analysis:
                continue
-
            if scope in ["last", "avg", "last-5-avg", "last-10-avg"]:
                metric_score = trial.metric_analysis[metric][scope]
            else:
@ -158,7 +155,6 @@ class ExperimentAnalysis:
            elif (mode == "min") and (best_metric_score > metric_score):
                best_metric_score = metric_score
                best_trial = trial
-
        if not best_trial:
            logger.warning(
                "Could not find best trial. Did you pass the correct `metric` "
--- a/flaml/tune/cgmanifest.json
+++ b/flaml/tune/cgmanifest.json
@ -1,11 +1,12 @@
 {
-    "Registrations": [
-        {
-            "Component": {
-                "Type": "pip",
-                "pip": {"Name": "ray[tune]", "Version": "1.5.1" }
-            },
-            "DevelopmentDependency": false
-        }
-    ]
-}
+  "$schema": "https://json.schemastore.org/component-detection-manifest.json",
+  "Registrations": [
+    {
+      "Component": {
+        "Type": "pip",
+        "pip": { "Name": "ray[tune]", "Version": "1.5.1" }
+      },
+      "DevelopmentDependency": false
+    }
+  ]
+}
--- a/flaml/tune/sample.py
+++ b/flaml/tune/sample.py
@ -36,7 +36,12 @@ except AttributeError:
 logger = logging.getLogger(__name__)

 try:
-    from ray.tune.sample import _BackwardsCompatibleNumpyRng
+    from ray import __version__ as ray_version
+
+    if ray_version.startswith("1."):
+        from ray.tune.sample import _BackwardsCompatibleNumpyRng
+    else:
+        from ray.tune.search.sample import _BackwardsCompatibleNumpyRng
 except ImportError:

    class _BackwardsCompatibleNumpyRng:
--- a/flaml/tune/scheduler/init.py
+++ b/flaml/tune/scheduler/init.py
--- a/flaml/tune/scheduler/online_scheduler.py
+++ b/flaml/tune/scheduler/online_scheduler.py
@ -1,7 +1,7 @@
 import numpy as np
 import logging
 from typing import Dict
-from flaml.scheduler import TrialScheduler
+from flaml.tune.scheduler import TrialScheduler
 from flaml.tune import Trial

 logger = logging.getLogger(__name__)
--- a/flaml/tune/scheduler/trial_scheduler.py
+++ b/flaml/tune/scheduler/trial_scheduler.py
--- a/flaml/tune/searcher/init.py
+++ b/flaml/tune/searcher/init.py
--- a/flaml/tune/searcher/blendsearch.py
+++ b/flaml/tune/searcher/blendsearch.py
@ -11,17 +11,21 @@ try:
    from ray import __version__ as ray_version

    assert ray_version >= "1.10.0"
-    from ray.tune.suggest import Searcher
-    from ray.tune.suggest.optuna import OptunaSearch as GlobalSearch
+    if ray_version.startswith("1."):
+        from ray.tune.suggest import Searcher
+        from ray.tune.suggest.optuna import OptunaSearch as GlobalSearch
+    else:
+        from ray.tune.search import Searcher
+        from ray.tune.search.optuna import OptunaSearch as GlobalSearch
 except (ImportError, AssertionError):
    from .suggestion import Searcher
    from .suggestion import OptunaSearch as GlobalSearch
-from ..tune.trial import unflatten_dict, flatten_dict
-from ..tune import INCUMBENT_RESULT
+from ..trial import unflatten_dict, flatten_dict
+from .. import INCUMBENT_RESULT
 from .search_thread import SearchThread
 from .flow2 import FLOW2
-from ..tune.space import add_cost_to_space, indexof, normalize, define_by_run_func
-from ..tune.result import TIME_TOTAL_S
+from ..space import add_cost_to_space, indexof, normalize, define_by_run_func
+from ..result import TIME_TOTAL_S

 import logging

@ -59,6 +63,7 @@ class BlendSearch(Searcher):
        seed: Optional[int] = 20,
        cost_attr: Optional[str] = "auto",
        experimental: Optional[bool] = False,
+        lexico_objectives: Optional[dict] = None,
        use_incumbent_result_in_evaluation=False,
    ):
        """Constructor.
@ -108,6 +113,27 @@ class BlendSearch(Searcher):
                Default is "auto", which means that we will automatically chose the cost attribute to use (depending
                on the nature of the resource budget). When cost_attr is set to None, cost differences between different trials will be omitted
                in our search algorithm.
+            lexico_objectives: dict, default=None | It specifics information needed to perform multi-objective
+                optimization with lexicographic preferences. This is only supported in CFO currently.
+                When lexico_objectives is not None, the arguments metric, mode will be invalid.
+                This dictionary shall contain the  following fields of key-value pairs:
+                - "metrics":  a list of optimization objectives with the orders reflecting the priorities/preferences of the
+                objectives.
+                - "modes" (optional): a list of optimization modes (each mode either "min" or "max") corresponding to the
+                objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives.
+                - "targets" (optional): a dictionary to specify the optimization targets on the objectives. The keys are the
+                metric names (provided in "metric"), and the values are the numerical target values.
+                - "tolerances"(optional): a dictionary to specify the optimality tolerances on objectives. The keys are the
+                metric names (provided in "metrics"), and the values are the numerical tolerances values.
+                E.g.,
+                ```python
+                lexico_objectives = {
+                    "metrics": ["error_rate", "pred_time"],
+                    "modes": ["min", "min"],
+                    "tolerances": {"error_rate": 0.01, "pred_time": 0.0},
+                    "targets": {"error_rate": 0.0},
+                }
+                ```
            experimental: A bool of whether to use experimental features.
        """
        self._eps = SEARCH_THREAD_EPS
@ -123,6 +149,7 @@ class BlendSearch(Searcher):
        self.penalty = PENALTY  # penalty term for constraints
        self._metric, self._mode = metric, mode
        self._use_incumbent_result_in_evaluation = use_incumbent_result_in_evaluation
+        self.lexico_objectives = lexico_objectives
        init_config = low_cost_partial_config or {}
        if not init_config:
            logger.info(
@ -173,6 +200,7 @@ class BlendSearch(Searcher):
            reduction_factor,
            self.cost_attr,
            seed,
+            self.lexico_objectives,
        )
        if global_search_alg is not None:
            self._gs = global_search_alg
@ -476,11 +504,15 @@ class BlendSearch(Searcher):
            del self._subspace[trial_id]

    def _create_thread(self, config, result, space):
+        if self.lexico_objectives is None:
+            obj = result[self._ls.metric]
+        else:
+            obj = {k: result[k] for k in self.lexico_objectives["metrics"]}
        self._search_thread_pool[self._thread_count] = SearchThread(
            self._ls.mode,
            self._ls.create(
                config,
-                result[self._ls.metric],
+                obj,
                cost=result.get(self.cost_attr, 1),
                space=space,
            ),
@ -935,7 +967,7 @@ try:
        qloguniform,
    )
 except (ImportError, AssertionError):
-    from ..tune.sample import (
+    from ..sample import (
        uniform,
        quniform,
        choice,
@ -1041,6 +1073,7 @@ class BlendSearchTuner(BlendSearch, NNITuner):
            self._ls.resource_multiple_factor,
            cost_attr=self.cost_attr,
            seed=self._ls.seed,
+            lexico_objectives=self.lexico_objectives,
        )
        if self._gs is not None:
            self._gs = GlobalSearch(
--- a/flaml/tune/searcher/cfo_cat.py
+++ b/flaml/tune/searcher/cfo_cat.py
--- a/flaml/tune/searcher/flow2.py
+++ b/flaml/tune/searcher/flow2.py
@ -5,21 +5,24 @@
 from typing import Dict, Optional, Tuple
 import numpy as np
 import logging
+from collections import defaultdict

 try:
    from ray import __version__ as ray_version

    assert ray_version >= "1.0.0"
-    from ray.tune.suggest import Searcher
-    from ray.tune import sample
+    if ray_version.startswith("1."):
+        from ray.tune.suggest import Searcher
+        from ray.tune import sample
+    else:
+        from ray.tune.search import Searcher, sample
    from ray.tune.utils.util import flatten_dict, unflatten_dict
 except (ImportError, AssertionError):
    from .suggestion import Searcher
-    from ..tune import sample
-    from ..tune.trial import flatten_dict, unflatten_dict
-from flaml.tune.sample import _BackwardsCompatibleNumpyRng
+    from flaml.tune import sample
+    from ..trial import flatten_dict, unflatten_dict
 from flaml.config import SAMPLE_MULTIPLY_FACTOR
-from ..tune.space import (
+from ..space import (
    complete_config,
    denormalize,
    normalize,
@ -47,6 +50,7 @@ class FLOW2(Searcher):
        resource_multiple_factor: Optional[float] = None,
        cost_attr: Optional[str] = "time_total_s",
        seed: Optional[int] = 20,
+        lexico_objectives=None,
    ):
        """Constructor.

@ -67,6 +71,26 @@ class FLOW2(Searcher):
                used for increasing resource.
            cost_attr: A string of the attribute used for cost.
            seed: An integer of the random seed.
+            lexico_objectives: dict, default=None | It specifics information needed to perform multi-objective
+                optimization with lexicographic preferences. When lexico_objectives is not None, the arguments metric,
+                mode will be invalid. This dictionary shall contain the following fields of key-value pairs:
+                - "metrics":  a list of optimization objectives with the orders reflecting the priorities/preferences of the
+                objectives.
+                - "modes" (optional): a list of optimization modes (each mode either "min" or "max") corresponding to the
+                objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives
+                - "targets" (optional): a dictionary to specify the optimization targets on the objectives. The keys are the
+                metric names (provided in "metric"), and the values are the numerical target values.
+                - "tolerances"(optional): a dictionary to specify the optimality tolerances on objectives. The keys are the
+                metric names (provided in "metrics"), and the values are the numerical tolerances values.
+                E.g.,
+                ```python
+                lexico_objectives = {
+                    "metrics": ["error_rate", "pred_time"],
+                    "modes": ["min", "min"],
+                    "tolerances": {"error_rate": 0.01, "pred_time": 0.0},
+                    "targets": {"error_rate": 0.0},
+                }
+                ```
        """
        if mode:
            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
@ -82,19 +106,36 @@ class FLOW2(Searcher):
        self.space = space or {}
        self._space = flatten_dict(self.space, prevent_delimiter=True)
        self._random = np.random.RandomState(seed)
-        self.rs_random = _BackwardsCompatibleNumpyRng(seed + 19823)
+        self.rs_random = sample._BackwardsCompatibleNumpyRng(seed + 19823)
        self.seed = seed
        self.init_config = init_config
        self.best_config = flatten_dict(init_config)
        self.resource_attr = resource_attr
        self.min_resource = min_resource
+        self.lexico_objectives = lexico_objectives
+        if self.lexico_objectives is not None:
+            if "modes" not in self.lexico_objectives.keys():
+                self.lexico_objectives["modes"] = ["min"] * len(
+                    self.lexico_objectives["metrics"]
+                )
+            for t_metric, t_mode in zip(
+                self.lexico_objectives["metrics"], self.lexico_objectives["modes"]
+            ):
+                if t_metric not in self.lexico_objectives["tolerances"].keys():
+                    self.lexico_objectives["tolerances"][t_metric] = 0
+                if t_metric not in self.lexico_objectives["targets"].keys():
+                    self.lexico_objectives["targets"][t_metric] = (
+                        -float("inf") if t_mode == "min" else float("inf")
+                    )
        self.resource_multiple_factor = (
            resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR
        )
        self.cost_attr = cost_attr
        self.max_resource = max_resource
        self._resource = None
+        self._f_best = None  # only use for lexico_comapre. It represent the best value achieved by lexico_flow.
        self._step_lb = np.Inf
+        self._histories = None  # only use for lexico_comapre. It records the result of historical configurations.
        if space is not None:
            self._init_search()

@ -262,8 +303,21 @@ class FLOW2(Searcher):
            self.resource_multiple_factor,
            self.cost_attr,
            self.seed + 1,
+            self.lexico_objectives,
        )
-        flow2.best_obj = obj * self.metric_op  # minimize internally
+        if self.lexico_objectives is not None:
+            flow2.best_obj = {}
+            for k, v in obj.items():
+                flow2.best_obj[k] = (
+                    -v
+                    if self.lexico_objectives["modes"][
+                        self.lexico_objectives["metrics"].index(k)
+                    ]
+                    == "max"
+                    else v
+                )
+        else:
+            flow2.best_obj = obj * self.metric_op  # minimize internally
        flow2.cost_incumbent = cost
        self.seed += 1
        return flow2
@ -301,6 +355,80 @@ class FLOW2(Searcher):
            self._init_search()
        return True

+    def update_fbest(
+        self,
+    ):
+        # TODO: Improve the efficiency
+        obj_initial = self.lexico_objectives["metrics"][0]
+        feasible_index = [*range(len(self._histories[obj_initial]))]
+        for k_metric in self.lexico_objectives["metrics"]:
+            k_values = np.array(self._histories[k_metric])
+            self._f_best[k_metric] = np.min(k_values.take(feasible_index))
+            feasible_index_prior = np.where(
+                k_values
+                <= max(
+                    [
+                        self._f_best[k_metric]
+                        + self.lexico_objectives["tolerances"][k_metric],
+                        self.lexico_objectives["targets"][k_metric],
+                    ]
+                )
+            )[0].tolist()
+            feasible_index = [
+                val for val in feasible_index if val in feasible_index_prior
+            ]
+
+    def lexico_compare(self, result) -> bool:
+        if self._histories is None:
+            self._histories, self._f_best = defaultdict(list), {}
+            for k in self.lexico_objectives["metrics"]:
+                self._histories[k].append(result[k])
+            self.update_fbest()
+            return True
+        else:
+            for k in self.lexico_objectives["metrics"]:
+                self._histories[k].append(result[k])
+            self.update_fbest()
+            for k_metric, k_mode in zip(
+                self.lexico_objectives["metrics"], self.lexico_objectives["modes"]
+            ):
+                k_target = (
+                    self.lexico_objectives["targets"][k_metric]
+                    if k_mode == "min"
+                    else -self.lexico_objectives["targets"][k_metric]
+                )
+                if (
+                    result[k_metric]
+                    < max(
+                        [
+                            self._f_best[k_metric]
+                            + self.lexico_objectives["tolerances"][k_metric],
+                            k_target,
+                        ]
+                    )
+                ) and (
+                    self.best_obj[k_metric]
+                    < max(
+                        [
+                            self._f_best[k_metric]
+                            + self.lexico_objectives["tolerances"][k_metric],
+                            k_target,
+                        ]
+                    )
+                ):
+                    continue
+                elif result[k_metric] < self.best_obj[k_metric]:
+                    return True
+                else:
+                    return False
+            for k_metr in self.lexico_objectives["metrics"]:
+                if result[k_metr] == self.best_obj[k_metr]:
+                    continue
+                elif result[k_metr] < self.best_obj[k_metr]:
+                    return True
+                else:
+                    return False
+
    def on_trial_complete(
        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
    ):
@ -311,10 +439,28 @@ class FLOW2(Searcher):
        """
        self.trial_count_complete += 1
        if not error and result:
-            obj = result.get(self._metric)
+            obj = (
+                result.get(self._metric)
+                if self.lexico_objectives is None
+                else {k: result[k] for k in self.lexico_objectives["metrics"]}
+            )
            if obj:
-                obj *= self.metric_op
-                if self.best_obj is None or obj < self.best_obj:
+                obj = (
+                    {
+                        k: -obj[k] if m == "max" else obj[k]
+                        for k, m in zip(
+                            self.lexico_objectives["metrics"],
+                            self.lexico_objectives["modes"],
+                        )
+                    }
+                    if isinstance(obj, dict)
+                    else obj * self.metric_op
+                )
+                if (
+                    self.best_obj is None
+                    or (self.lexico_objectives is None and obj < self.best_obj)
+                    or (self.lexico_objectives is not None and self.lexico_compare(obj))
+                ):
                    self.best_obj = obj
                    self.best_config, self.step = self._configs[trial_id]
                    self.incumbent = self.normalize(self.best_config)
@ -327,7 +473,6 @@ class FLOW2(Searcher):
                    self._num_allowed4incumbent = 2 * self.dim
                    self._proposed_by.clear()
                    if self._K > 0:
-                        # self._oldK must have been set when self._K>0
                        self.step *= np.sqrt(self._K / self._oldK)
                    self.step = min(self.step, self.step_ub)
                    self._iter_best_config = self.trial_count_complete
@ -338,7 +483,6 @@ class FLOW2(Searcher):
                    self._trunc = max(self._trunc >> 1, 1)
        proposed_by = self._proposed_by.get(trial_id)
        if proposed_by == self.incumbent:
-            # proposed by current incumbent and no better
            self._num_complete4incumbent += 1
            cost = (
                result.get(self.cost_attr, 1)
@ -355,17 +499,34 @@ class FLOW2(Searcher):
            if self._num_complete4incumbent == self.dir and (
                not self._resource or self._resource == self.max_resource
            ):
-                # check stuck condition if using max resource
                self._num_complete4incumbent -= 2
                self._num_allowed4incumbent = max(self._num_allowed4incumbent, 2)

    def on_trial_result(self, trial_id: str, result: Dict):
        """Early update of incumbent."""
        if result:
-            obj = result.get(self._metric)
+            obj = (
+                result.get(self._metric)
+                if self.lexico_objectives is None
+                else {k: result[k] for k in self.lexico_objectives["metrics"]}
+            )
            if obj:
-                obj *= self.metric_op
-                if self.best_obj is None or obj < self.best_obj:
+                obj = (
+                    {
+                        k: -obj[k] if m == "max" else obj[k]
+                        for k, m in zip(
+                            self.lexico_objectives["metrics"],
+                            self.lexico_objectives["modes"],
+                        )
+                    }
+                    if isinstance(obj, dict)
+                    else obj * self.metric_op
+                )
+                if (
+                    self.best_obj is None
+                    or (self.lexico_objectives is None and obj < self.best_obj)
+                    or (self.lexico_objectives is not None and self.lexico_compare(obj))
+                ):
                    self.best_obj = obj
                    config = self._configs[trial_id][0]
                    if self.best_config != config:
--- a/flaml/tune/searcher/online_searcher.py
+++ b/flaml/tune/searcher/online_searcher.py
@ -2,10 +2,9 @@ import numpy as np
 import logging
 import itertools
 from typing import Dict, Optional, List
-from flaml.tune import Categorical, Float, PolynomialExpansionSet
-from flaml.tune import Trial
+from flaml.tune import Categorical, Float, PolynomialExpansionSet, Trial
 from flaml.onlineml import VowpalWabbitTrial
-from flaml.searcher import CFO
+from flaml.tune.searcher import CFO

 logger = logging.getLogger(__name__)

--- a/flaml/tune/searcher/search_thread.py
+++ b/flaml/tune/searcher/search_thread.py
@ -9,11 +9,14 @@ try:
    from ray import __version__ as ray_version

    assert ray_version >= "1.10.0"
-    from ray.tune.suggest import Searcher
+    if ray_version.startswith("1."):
+        from ray.tune.suggest import Searcher
+    else:
+        from ray.tune.search import Searcher
 except (ImportError, AssertionError):
    from .suggestion import Searcher
 from .flow2 import FLOW2
-from ..tune.space import add_cost_to_space, unflatten_hierarchical
+from ..space import add_cost_to_space, unflatten_hierarchical
 import logging

 logger = logging.getLogger(__name__)
@ -134,7 +137,11 @@ class SearchThread:
        if result:
            self.cost_last = result.get(self.cost_attr, 1)
            self.cost_total += self.cost_last
-            if self._search_alg.metric in result:
+            if self._search_alg.metric in result and (
+                getattr(self._search_alg, "lexico_objectives", None) is None
+            ):
+                # TODO: Improve this behavior. When lexico_objectives is provided to CFO,
+                # related variables are not callable.
                obj = result[self._search_alg.metric] * self._metric_op
                if obj < self.obj_best1 or self.best_result is None:
                    self.cost_best2 = self.cost_best1
@ -143,7 +150,10 @@ class SearchThread:
                    self.obj_best1 = obj
                    self.cost_best = self.cost_last
                    self.best_result = result
-            self._update_speed()
+            if getattr(self._search_alg, "lexico_objectives", None) is None:
+                # TODO: Improve this behavior. When lexico_objectives is provided to CFO,
+                # related variables are not callable.
+                self._update_speed()
        self.running -= 1
        assert self.running >= 0

--- a/flaml/tune/searcher/suggestion.py
+++ b/flaml/tune/searcher/suggestion.py
@ -23,7 +23,7 @@ import logging
 from typing import Any, Dict, Optional, Union, List, Tuple, Callable
 import pickle
 from .variant_generator import parse_spec_vars
-from ..tune.sample import (
+from ..sample import (
    Categorical,
    Domain,
    Float,
@ -32,7 +32,7 @@ from ..tune.sample import (
    Quantized,
    Uniform,
 )
-from ..tune.trial import flatten_dict, unflatten_dict
+from ..trial import flatten_dict, unflatten_dict

 logger = logging.getLogger(__name__)

@ -179,7 +179,7 @@ class ConcurrencyLimiter(Searcher):
            to finish before updating the underlying searcher.
    Example:
    ```python
-    from ray.tune.suggest import ConcurrencyLimiter
+    from ray.tune.suggest import ConcurrencyLimiter  # ray version < 2
    search_alg = HyperOptSearch(metric="accuracy")
    search_alg = ConcurrencyLimiter(search_alg, max_concurrent=2)
    tune.run(trainable, search_alg=search_alg)
@ -411,7 +411,7 @@ class OptunaSearch(Searcher):
        Tune automatically converts search spaces to Optuna's format:

    ````python
-    from ray.tune.suggest.optuna import OptunaSearch
+    from ray.tune.suggest.optuna import OptunaSearch  # ray version < 2
    config = { "a": tune.uniform(6, 8),
               "b": tune.loguniform(1e-4, 1e-2)}
    optuna_search = OptunaSearch(metric="loss", mode="min")
@ -422,7 +422,7 @@ class OptunaSearch(Searcher):
        look like this:

    ```python
-    from ray.tune.suggest.optuna import OptunaSearch
+    from ray.tune.suggest.optuna import OptunaSearch  # ray version < 2
    import optuna
    config = { "a": optuna.distributions.UniformDistribution(6, 8),
               "b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2)}
--- a/flaml/tune/searcher/variant_generator.py
+++ b/flaml/tune/searcher/variant_generator.py
@ -20,10 +20,15 @@ import logging
 from typing import Any, Dict, Generator, List, Tuple
 import numpy
 import random
-from ..tune.sample import Categorical, Domain, RandomState
+from ..sample import Categorical, Domain, RandomState

 try:
-    from ray.tune.sample import Domain as RayDomain
+    from ray import __version__ as ray_version
+
+    if ray_version.startswith("1."):
+        from ray.tune.sample import Domain as RayDomain
+    else:
+        from ray.tune.search.sample import Domain as RayDomain
 except ImportError:
    RayDomain = Domain

--- a/flaml/tune/space.py
+++ b/flaml/tune/space.py
@ -2,11 +2,15 @@ try:
    from ray import __version__ as ray_version

    assert ray_version >= "1.10.0"
-    from ray.tune import sample
-    from ray.tune.suggest.variant_generator import generate_variants
+    if ray_version.startswith("1."):
+        from ray.tune import sample
+        from ray.tune.suggest.variant_generator import generate_variants
+    else:
+        from ray.tune.search import sample
+        from ray.tune.search.variant_generator import generate_variants
 except (ImportError, AssertionError):
    from . import sample
-    from ..searcher.variant_generator import generate_variants
+    from .searcher.variant_generator import generate_variants
 from typing import Dict, Optional, Any, Tuple, Generator
 import numpy as np
 import logging
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@ -2,11 +2,12 @@
 #  * Copyright (c) FLAML authors. All rights reserved.
 #  * Licensed under the MIT License. See LICENSE file in the
 #  * project root for license information.
-from typing import Optional, Union, List, Callable, Tuple
+from typing import Optional, Union, List, Callable, Tuple, Dict
 import numpy as np
 import datetime
 import time
 import os
+from collections import defaultdict

 try:
    from ray import __version__ as ray_version
@ -36,13 +37,92 @@ INCUMBENT_RESULT = "__incumbent_result__"
 class ExperimentAnalysis(EA):
    """Class for storing the experiment results."""

-    def __init__(self, trials, metric, mode):
+    def __init__(self, trials, metric, mode, lexico_objectives=None):
        try:
            super().__init__(self, None, trials, metric, mode)
+            self.lexico_objectives = lexico_objectives
        except (TypeError, ValueError):
            self.trials = trials
            self.default_metric = metric or DEFAULT_METRIC
            self.default_mode = mode
+            self.lexico_objectives = lexico_objectives
+
+    @property
+    def best_trial(self) -> Trial:
+        if self.lexico_objectives is None:
+            return super().best_trial
+        else:
+            return self.get_best_trial(self.default_metric, self.default_mode)
+
+    @property
+    def best_config(self) -> Dict:
+        if self.lexico_objectives is None:
+            return super().best_config
+        else:
+            return self.get_best_config(self.default_metric, self.default_mode)
+
+    def lexico_best(self, trials):
+        results = {index: trial.last_result for index, trial in enumerate(trials)}
+        metrics = self.lexico_objectives["metrics"]
+        modes = self.lexico_objectives["modes"]
+        f_best = {}
+        keys = list(results.keys())
+        length = len(keys)
+        histories = defaultdict(list)
+        for time_index in range(length):
+            for objective, mode in zip(metrics, modes):
+                histories[objective].append(
+                    results[keys[time_index]][objective]
+                    if mode == "min"
+                    else -trials[keys[time_index]][objective]
+                )
+        obj_initial = self.lexico_objectives["metrics"][0]
+        feasible_index = [*range(len(histories[obj_initial]))]
+        for k_metric, k_mode in zip(
+            self.lexico_objectives["metrics"], self.lexico_objectives["modes"]
+        ):
+            k_values = np.array(histories[k_metric])
+            k_target = (
+                -self.lexico_objectives["targets"][k_metric]
+                if k_mode == "max"
+                else self.lexico_objectives["targets"][k_metric]
+            )
+            f_best[k_metric] = np.min(k_values.take(feasible_index))
+            feasible_index_prior = np.where(
+                k_values
+                <= max(
+                    [
+                        f_best[k_metric]
+                        + self.lexico_objectives["tolerances"][k_metric],
+                        k_target,
+                    ]
+                )
+            )[0].tolist()
+            feasible_index = [
+                val for val in feasible_index if val in feasible_index_prior
+            ]
+        best_trial = trials[feasible_index[-1]]
+        return best_trial
+
+    def get_best_trial(
+        self,
+        metric: Optional[str] = None,
+        mode: Optional[str] = None,
+        scope: str = "last",
+        filter_nan_and_inf: bool = True,
+    ) -> Optional[Trial]:
+        if self.lexico_objectives is not None:
+            best_trial = self.lexico_best(self.trials)
+        else:
+            best_trial = super().get_best_trial(metric, mode, scope, filter_nan_and_inf)
+        return best_trial
+
+    @property
+    def best_result(self) -> Dict:
+        if self.lexico_best is None:
+            return super().best_result
+        else:
+            return self.best_trial.last_result


 def report(_metric=None, **kwargs):
@ -149,6 +229,7 @@ def run(
    use_ray: Optional[bool] = False,
    use_incumbent_result_in_evaluation: Optional[bool] = None,
    log_file_name: Optional[str] = None,
+    lexico_objectives: Optional[dict] = None,
    **ray_args,
 ):
    """The trigger for HPO.
@ -305,6 +386,28 @@ def run(
                if local_dir is not given, no log file is created;
                if local_dir is given, the log file name will be autogenerated under local_dir.
            Only valid when verbose > 0 or use_ray is True.
+        lexico_objectives: dict, default=None | It specifics information needed to perform multi-objective
+            optimization with lexicographic preferences. When lexico_objectives is not None, the arguments metric,
+            mode, will be invalid, and flaml's tune uses CFO
+            as the `search_alg`, which makes the input (if provided) `search_alg' invalid.
+            This dictionary shall contain the following fields of key-value pairs:
+            - "metrics":  a list of optimization objectives with the orders reflecting the priorities/preferences of the
+            objectives.
+            - "modes" (optional): a list of optimization modes (each mode either "min" or "max") corresponding to the
+            objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives.
+            - "targets" (optional): a dictionary to specify the optimization targets on the objectives. The keys are the
+            metric names (provided in "metric"), and the values are the numerical target values.
+            - "tolerances"(optional): a dictionary to specify the optimality tolerances on objectives. The keys are the
+            metric names (provided in "metrics"), and the values are the numerical tolerances values.
+            E.g.,
+            ```python
+            lexico_objectives = {
+                "metrics": ["error_rate", "pred_time"],
+                "modes": ["min", "min"],
+                "tolerances": {"error_rate": 0.01, "pred_time": 0.0},
+                "targets": {"error_rate": 0.0},
+            }
+            ```
        **ray_args: keyword arguments to pass to ray.tune.run().
            Only valid when use_ray=True.
    """
@ -355,8 +458,13 @@ def run(
        else:
            logger.setLevel(logging.CRITICAL)

-    from ..searcher.blendsearch import BlendSearch, CFO
+    from .searcher.blendsearch import BlendSearch, CFO

+    if lexico_objectives is not None:
+        logger.warning(
+            "If lexico_objectives is not None, search_alg is forced to be CFO"
+        )
+        search_alg = None
    if search_alg is None:
        flaml_scheduler_resource_attr = (
            flaml_scheduler_min_resource
@ -371,18 +479,30 @@ def run(
            flaml_scheduler_max_resource = max_resource
            flaml_scheduler_reduction_factor = reduction_factor
            scheduler = None
-        try:
-            import optuna as _
+        if lexico_objectives is None:
+            try:
+                import optuna as _

-            SearchAlgorithm = BlendSearch
-        except ImportError:
+                SearchAlgorithm = BlendSearch
+                logger.info(
+                    "Using search algorithm {}.".format(
+                        SearchAlgorithm.__class__.__name__
+                    )
+                )
+            except ImportError:
+                SearchAlgorithm = CFO
+                logger.warning(
+                    "Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]"
+                )
+            metric = metric or DEFAULT_METRIC
+        else:
            SearchAlgorithm = CFO
-            logger.warning(
-                "Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]"
+            logger.info(
+                "Using search algorithm {}.".format(SearchAlgorithm.__class__.__name__)
            )
-
+            metric = lexico_objectives["metrics"][0] or DEFAULT_METRIC
        search_alg = SearchAlgorithm(
-            metric=metric or DEFAULT_METRIC,
+            metric=metric,
            mode=mode,
            space=config,
            points_to_evaluate=points_to_evaluate,
@ -398,15 +518,19 @@ def run(
            config_constraints=config_constraints,
            metric_constraints=metric_constraints,
            use_incumbent_result_in_evaluation=use_incumbent_result_in_evaluation,
+            lexico_objectives=lexico_objectives,
        )
    else:
        if metric is None or mode is None:
            metric = metric or search_alg.metric or DEFAULT_METRIC
            mode = mode or search_alg.mode
        if ray_import:
-            from ray.tune.suggest import ConcurrencyLimiter
+            if ray_version.startswith("1."):
+                from ray.tune.suggest import ConcurrencyLimiter
+            else:
+                from ray.tune.search import ConcurrencyLimiter
        else:
-            from flaml.searcher.suggestion import ConcurrencyLimiter
+            from flaml.tune.searcher.suggestion import ConcurrencyLimiter
        if (
            search_alg.__class__.__name__
            in [
@ -529,7 +653,12 @@ def run(
            logger.warning(
                f"fail to sample a trial for {max_failure} times in a row, stopping."
            )
-        analysis = ExperimentAnalysis(_runner.get_trials(), metric=metric, mode=mode)
+        analysis = ExperimentAnalysis(
+            _runner.get_trials(),
+            metric=metric,
+            mode=mode,
+            lexico_objectives=lexico_objectives,
+        )
        return analysis
    finally:
        # recover the global variables in case of nested run
--- a/flaml/tune/utils.py
+++ b/flaml/tune/utils.py
@ -4,7 +4,10 @@ try:
    from ray import __version__ as ray_version

    assert ray_version >= "1.10.0"
-    from ray.tune import sample
+    if ray_version.startswith("1."):
+        from ray.tune import sample
+    else:
+        from ray.tune.search import sample
 except (ImportError, AssertionError):
    from . import sample

--- a/flaml/version.py
+++ b/flaml/version.py
@ -1 +1 @@
-__version__ = "1.0.12"
+__version__ = "1.0.13"
--- a/notebook/tune_lexicographic.ipynb
+++ b/notebook/tune_lexicographic.ipynb
@ -0,0 +1,285 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tune neural networks with leicographic preference across objectives\n",
+    "This example is to tune neural networks model with two objectives \"error_rate\", \"flops\" on FashionMnist dataset. \n",
+    "\n",
+    "**Requirements.** This notebook requires:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %pip install torch torchvision flaml[blendsearch,ray] thop"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import thop\n",
+    "import torch.nn as nn\n",
+    "from flaml import tune\n",
+    "import torch.nn.functional as F\n",
+    "import torchvision\n",
+    "import numpy as np\n",
+    "import os\n",
+    "\n",
+    "DEVICE = torch.device(\"cpu\")\n",
+    "BATCHSIZE = 128\n",
+    "N_TRAIN_EXAMPLES = BATCHSIZE * 30\n",
+    "N_VALID_EXAMPLES = BATCHSIZE * 10\n",
+    "data_dir = os.path.abspath(\"data\")\n",
+    "\n",
+    "train_dataset = torchvision.datasets.FashionMNIST(\n",
+    "    data_dir,\n",
+    "    train=True,\n",
+    "    download=True,\n",
+    "    transform=torchvision.transforms.ToTensor(),\n",
+    ")\n",
+    "\n",
+    "train_loader = torch.utils.data.DataLoader(\n",
+    "    torch.utils.data.Subset(train_dataset, list(range(N_TRAIN_EXAMPLES))),\n",
+    "    batch_size=BATCHSIZE,\n",
+    "    shuffle=True,\n",
+    ")\n",
+    "\n",
+    "val_dataset = torchvision.datasets.FashionMNIST(\n",
+    "    data_dir, train=False, transform=torchvision.transforms.ToTensor()\n",
+    ")\n",
+    "\n",
+    "val_loader = torch.utils.data.DataLoader(\n",
+    "    torch.utils.data.Subset(val_dataset, list(range(N_VALID_EXAMPLES))),\n",
+    "    batch_size=BATCHSIZE,\n",
+    "    shuffle=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Specify the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def define_model(configuration):\n",
+    "    n_layers = configuration[\"n_layers\"]\n",
+    "    layers = []\n",
+    "    in_features = 28 * 28\n",
+    "    for i in range(n_layers):\n",
+    "        out_features = configuration[\"n_units_l{}\".format(i)]\n",
+    "        layers.append(nn.Linear(in_features, out_features))\n",
+    "        layers.append(nn.ReLU())\n",
+    "        p = configuration[\"dropout_{}\".format(i)]\n",
+    "        layers.append(nn.Dropout(p))\n",
+    "        in_features = out_features\n",
+    "    layers.append(nn.Linear(in_features, 10))\n",
+    "    layers.append(nn.LogSoftmax(dim=1))\n",
+    "    return nn.Sequential(*layers)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_model(model, optimizer, train_loader):\n",
+    "    model.train()\n",
+    "    for batch_idx, (data, target) in enumerate(train_loader):\n",
+    "        data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)\n",
+    "        optimizer.zero_grad()\n",
+    "        F.nll_loss(model(data), target).backward()\n",
+    "        optimizer.step()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Metrics "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def eval_model(model, valid_loader):\n",
+    "    model.eval()\n",
+    "    correct = 0\n",
+    "    with torch.no_grad():\n",
+    "        for batch_idx, (data, target) in enumerate(valid_loader):\n",
+    "            data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)\n",
+    "            pred = model(data).argmax(dim=1, keepdim=True)\n",
+    "            correct += pred.eq(target.view_as(pred)).sum().item()\n",
+    "\n",
+    "    accuracy = correct / N_VALID_EXAMPLES\n",
+    "    flops, params = thop.profile(\n",
+    "        model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False\n",
+    "    )\n",
+    "    return np.log2(flops), 1 - accuracy, params"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluate function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate_function(configuration):\n",
+    "    model = define_model(configuration).to(DEVICE)\n",
+    "    optimizer = torch.optim.Adam(model.parameters(), configuration[\"lr\"])\n",
+    "    n_epoch = configuration[\"n_epoch\"]\n",
+    "    for epoch in range(n_epoch):\n",
+    "        train_model(model, optimizer, train_loader)\n",
+    "    flops, error_rate, params = eval_model(model, val_loader)\n",
+    "    return {\"error_rate\": error_rate, \"flops\": flops, \"params\": params}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Lexicographic information across objectives"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lexico_objectives = {}\n",
+    "lexico_objectives[\"metrics\"] = [\"error_rate\", \"flops\"]\n",
+    "lexico_objectives[\"tolerances\"] = {\"error_rate\": 0.02, \"flops\": 0.0}\n",
+    "lexico_objectives[\"targets\"] = {\"error_rate\": 0.0, \"flops\": 0.0}\n",
+    "lexico_objectives[\"modes\"] = [\"min\", \"min\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Search space"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "search_space = {\n",
+    "    \"n_layers\": tune.randint(lower=1, upper=3),\n",
+    "    \"n_units_l0\": tune.randint(lower=4, upper=128),\n",
+    "    \"n_units_l1\": tune.randint(lower=4, upper=128),\n",
+    "    \"n_units_l2\": tune.randint(lower=4, upper=128),\n",
+    "    \"dropout_0\": tune.uniform(lower=0.2, upper=0.5),\n",
+    "    \"dropout_1\": tune.uniform(lower=0.2, upper=0.5),\n",
+    "    \"dropout_2\": tune.uniform(lower=0.2, upper=0.5),\n",
+    "    \"lr\": tune.loguniform(lower=1e-5, upper=1e-1),\n",
+    "    \"n_epoch\": tune.randint(lower=1, upper=20),\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Launch the tuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "low_cost_partial_config = {\n",
+    "    \"n_layers\": 1,\n",
+    "    \"n_units_l0\": 4,\n",
+    "    \"n_units_l1\": 4,\n",
+    "    \"n_units_l2\": 4,\n",
+    "    \"n_epoch\": 1,\n",
+    "}\n",
+    "\n",
+    "analysis = tune.run(\n",
+    "    evaluate_function,\n",
+    "    num_samples=-1,\n",
+    "    time_budget_s=100,\n",
+    "    config=search_space,\n",
+    "    use_ray=False,\n",
+    "    lexico_objectives=lexico_objectives,\n",
+    "    low_cost_partial_config=low_cost_partial_config,\n",
+    ")\n",
+    "result = analysis.best_result\n",
+    "print(result)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.14 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.14"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/setup.py
+++ b/setup.py
@ -48,9 +48,12 @@ setuptools.setup(
        ],
        "test": [
            "flake8>=3.8.4",
+            "thop",
            "pytest>=6.1.1",
            "coverage>=5.3",
            "pre-commit",
+            "torch",
+            "torchvision",
            "catboost>=0.26",
            "rgf-python",
            "optuna==2.8.0",
--- a/test/nlp/test_autohf.py
+++ b/test/nlp/test_autohf.py
@ -2,6 +2,8 @@ import sys
 import pytest
 import requests
 from utils import get_toy_data_seqclassification, get_automl_settings
+import os
+import shutil


@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@ -56,7 +58,7 @@ def test_hf_data():
        record_id=0,
        **automl_settings
    )
-    automl.predict(X_test)
+    automl.predict(X_test, **{"per_device_eval_batch_size": 2})
    automl.predict(["test test", "test test"])
    automl.predict(
        [
@ -71,6 +73,9 @@ def test_hf_data():

    del automl

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_hf_data()
--- a/test/nlp/test_autohf_classificationhead.py
+++ b/test/nlp/test_autohf_classificationhead.py
@ -1,14 +1,105 @@
-from utils import get_toy_data_multiclassclassification, get_automl_settings
+from utils import (
+    get_toy_data_regression,
+    get_toy_data_binclassification,
+    get_toy_data_multiclassclassification,
+    get_automl_settings,
+)
+import sys
+import pytest
+import os
+import shutil
+
+data_list = [
+    "get_toy_data_regression",
+    "get_toy_data_binclassification",
+    "get_toy_data_multiclassclassification",
+]
+model_path_list = [
+    "textattack/bert-base-uncased-STS-B",
+    "textattack/bert-base-uncased-SST-2",
+    "textattack/bert-base-uncased-MNLI",
+]


-def test_classification_head():
+def test_switch_1_1():
+    data_idx, model_path_idx = 0, 0
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_1_2():
+    data_idx, model_path_idx = 0, 1
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_1_3():
+    data_idx, model_path_idx = 0, 2
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_2_1():
+    data_idx, model_path_idx = 1, 0
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_2_2():
+    data_idx, model_path_idx = 1, 1
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_2_3():
+    data_idx, model_path_idx = 1, 2
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_3_1():
+    data_idx, model_path_idx = 2, 0
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_3_2():
+    data_idx, model_path_idx = 2, 1
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_3_3():
+    data_idx, model_path_idx = 2, 2
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def _test_switch_classificationhead(each_data, each_model_path):
    from flaml import AutoML
    import requests

-    X_train, y_train, X_val, y_val = get_toy_data_multiclassclassification()
    automl = AutoML()

+    X_train, y_train, X_val, y_val = globals()[each_data]()
    automl_settings = get_automl_settings()
+    automl_settings["model_path"] = each_model_path
+
+    if each_data == "get_toy_data_regression":
+        automl_settings["task"] = "seq-regression"
+        automl_settings["metric"] = "pearsonr"
+    else:
+        automl_settings["task"] = "seq-classification"
+        automl_settings["metric"] = "accuracy"

    try:
        automl.fit(
@ -21,6 +112,9 @@ def test_classification_head():
    except requests.exceptions.HTTPError:
        return

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
-    test_classification_head()
+    _test_switch_classificationhead(data_list[0], model_path_list[0])
--- a/test/nlp/test_autohf_custom_metric.py
+++ b/test/nlp/test_autohf_custom_metric.py
@ -1,6 +1,8 @@
 import sys
 import pytest
 from utils import get_toy_data_seqclassification, get_automl_settings
+import os
+import shutil


 def custom_metric(
@ -81,6 +83,9 @@ def test_custom_metric():

    del automl

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_custom_metric()
--- a/test/nlp/test_autohf_cv.py
+++ b/test/nlp/test_autohf_cv.py
@ -1,6 +1,8 @@
 import sys
 import pytest
 from utils import get_toy_data_seqclassification, get_automl_settings
+import os
+import shutil


@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@ -19,6 +21,9 @@ def test_cv():
    except requests.exceptions.HTTPError:
        return

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_cv()
--- a/test/nlp/test_autohf_multichoice_classification.py
+++ b/test/nlp/test_autohf_multichoice_classification.py
@ -1,6 +1,8 @@
 import sys
 import pytest
 from utils import get_toy_data_multiplechoiceclassification, get_automl_settings
+import os
+import shutil


@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@ -46,6 +48,9 @@ def test_mcc():
    accuracy = round(true_count / len(y_pred), 5)
    print("Accuracy: " + str(accuracy))

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_mcc()
--- a/test/nlp/test_autohf_regression.py
+++ b/test/nlp/test_autohf_regression.py
@ -1,6 +1,8 @@
 import sys
 import pytest
 from utils import get_toy_data_seqregression, get_automl_settings
+import os
+import shutil


@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@ -32,6 +34,9 @@ def test_regression():
    )
    automl.predict(X_val)

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_regression()
--- a/test/nlp/test_autohf_summarization.py
+++ b/test/nlp/test_autohf_summarization.py
@ -2,6 +2,8 @@ import sys
 import pytest
 import requests
 from utils import get_toy_data_summarization, get_automl_settings
+import os
+import shutil


@pytest.mark.skipif(
@ -48,6 +50,9 @@ def test_summarization():
    )
    automl.predict(X_test)

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_summarization()
--- a/test/nlp/test_autohf_tokenclassification.py
+++ b/test/nlp/test_autohf_tokenclassification.py
@ -1,6 +1,8 @@
 import sys
 import pytest
 import requests
+import os
+import shutil
 from utils import (
    get_toy_data_tokenclassification_idlabel,
    get_toy_data_tokenclassification_tokenlabel,
@ -62,6 +64,9 @@ def test_tokenclassification_idlabel():
                if min_inter_result != sys.maxsize:
                    assert val_loss == min_inter_result

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

@pytest.mark.skipif(
    sys.platform == "darwin" or sys.version < "3.7",
@ -106,6 +111,9 @@ def test_tokenclassification_tokenlabel():
                if min_inter_result != sys.maxsize:
                    assert val_loss == min_inter_result

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_tokenclassification_idlabel()
--- a/test/nlp/test_default.py
+++ b/test/nlp/test_default.py
@ -1,6 +1,8 @@
 from utils import get_toy_data_seqclassification, get_automl_settings
 import sys
 from flaml.default import portfolio
+import os
+import shutil


 def pop_args(fit_kwargs):
@ -80,6 +82,9 @@ def test_starting_point_not_in_search_space():
        == "albert-base-v2"
    )

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 def test_points_to_evaluate():
    from flaml import AutoML
@ -99,6 +104,9 @@ def test_points_to_evaluate():

    automl.fit(X_train, y_train, **automl_settings)

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 # TODO: implement _test_zero_shot_model
 def test_zero_shot_nomodel():
@ -131,6 +139,9 @@ def test_zero_shot_nomodel():
    pop_args(fit_kwargs)
    model.fit(X_train, y_train, **fit_kwargs)

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 def test_build_error_portfolio(path="./test/nlp/default", strategy="greedy"):
    import os
@ -159,3 +170,9 @@ def test_build_error_portfolio(path="./test/nlp/default", strategy="greedy"):
        )
    except ValueError:
        print("Feature not implemented")
+
+    import os
+    import shutil
+
+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
--- a/test/nlp/utils.py
+++ b/test/nlp/utils.py
@ -70,23 +70,19 @@ def get_toy_data_seqclassification():
    return X_train, y_train, X_val, y_val, X_test


-def get_toy_data_multiclassclassification():
+def get_toy_data_binclassification():
    train_data = {
        "text": [
            "i didnt feel humiliated",
            "i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake",
-            "im grabbing a minute to post i feel greedy wrong",
            "i am ever feeling nostalgic about the fireplace i will know that it is still on the property",
-            "i am feeling grouchy",
            "ive been feeling a little burdened lately wasnt sure why that was",
-            "ive been taking or milligrams or times recommended amount and ive fallen asleep a lot faster but i also feel like so funny",
-            "i feel as confused about life as a teenager or as jaded as a year old man",
            "i have been with petronas for years i feel that petronas has performed well and made a huge profit",
            "i feel romantic too",
            "i feel like i have to make the suffering i m seeing mean something",
            "i do feel that running is a divine experience and that i can expect to have some type of spiritual encounter",
        ],
-        "label": [0, 0, 3, 2, 3, 0, 5, 4, 1, 2, 0, 1],
+        "label": [0, 0, 1, 0, 1, 1, 0, 1],
    }
    train_dataset = pd.DataFrame(train_data)

@ -95,9 +91,84 @@ def get_toy_data_multiclassclassification():
            "i think it s the easiest time of year to feel dissatisfied",
            "i feel low energy i m just thirsty",
            "i have immense sympathy with the general point but as a possible proto writer trying to find time to write in the corners of life and with no sign of an agent let alone a publishing contract this feels a little precious",
-            "i do not feel reassured anxiety is on each side",
        ],
-        "label": [3, 0, 1, 1],
+        "label": [0, 1, 1],
+    }
+    dev_dataset = pd.DataFrame(dev_data)
+
+    custom_sent_keys = ["text"]
+    label_key = "label"
+
+    X_train = train_dataset[custom_sent_keys]
+    y_train = train_dataset[label_key]
+
+    X_val = dev_dataset[custom_sent_keys]
+    y_val = dev_dataset[label_key]
+
+    return X_train, y_train, X_val, y_val
+
+
+def get_toy_data_regression():
+    train_data = {
+        "text": [
+            "i didnt feel humiliated",
+            "i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake",
+            "i am ever feeling nostalgic about the fireplace i will know that it is still on the property",
+            "ive been feeling a little burdened lately wasnt sure why that was",
+            "i have been with petronas for years i feel that petronas has performed well and made a huge profit",
+            "i feel romantic too",
+            "i feel like i have to make the suffering i m seeing mean something",
+            "i do feel that running is a divine experience and that i can expect to have some type of spiritual encounter",
+        ],
+        "label": [1.0, 1.0, 3.0, 1.0, 5.0, 5.0, 1.0, 3.0],
+    }
+    train_dataset = pd.DataFrame(train_data)
+
+    dev_data = {
+        "text": [
+            "i think it s the easiest time of year to feel dissatisfied",
+            "i feel low energy i m just thirsty",
+            "i have immense sympathy with the general point but as a possible proto writer trying to find time to write in the corners of life and with no sign of an agent let alone a publishing contract this feels a little precious",
+        ],
+        "label": [1.0, 3.0, 3.0],
+    }
+    dev_dataset = pd.DataFrame(dev_data)
+
+    custom_sent_keys = ["text"]
+    label_key = "label"
+
+    X_train = train_dataset[custom_sent_keys]
+    y_train = train_dataset[label_key]
+
+    X_val = dev_dataset[custom_sent_keys]
+    y_val = dev_dataset[label_key]
+
+    return X_train, y_train, X_val, y_val
+
+
+def get_toy_data_multiclassclassification():
+    train_data = {
+        "text": [
+            "i didnt feel humiliated",
+            "i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake",
+            "i am ever feeling nostalgic about the fireplace i will know that it is still on the property",
+            "ive been feeling a little burdened lately wasnt sure why that was",
+            "i have been with petronas for years i feel that petronas has performed well and made a huge profit",
+            "i feel romantic too",
+            "i feel like i have to make the suffering i m seeing mean something",
+            "i do feel that running is a divine experience and that i can expect to have some type of spiritual encounter",
+        ],
+        "label": [0, 0, 2, 0, 1, 2, 0, 1],
+    }
+    train_dataset = pd.DataFrame(train_data)
+
+    dev_data = {
+        "text": [
+            "i think it s the easiest time of year to feel dissatisfied",
+            "i feel low energy i m just thirsty",
+            "i have immense sympathy with the general point but as a possible proto writer trying to find time to write in the corners of life and with no sign of an agent let alone a publishing contract this feels a little precious",
+        ],
+        "label": [0, 1, 1],
    }
    dev_dataset = pd.DataFrame(dev_data)

--- a/test/nni/flaml_nni_wrap.py
+++ b/test/nni/flaml_nni_wrap.py
@ -1,4 +1,4 @@
-from flaml.searcher.blendsearch import BlendSearchTuner as BST
+from flaml.tune.searcher.blendsearch import BlendSearchTuner as BST


 class BlendSearchTuner(BST):
--- a/test/pipeline_tuning_example/data_prep/data_prep.py
+++ b/test/pipeline_tuning_example/data_prep/data_prep.py
@ -20,7 +20,7 @@ def main():

    logger.info(" ".join(f"{k}={v}" for k, v in vars(args).items()))

-    data_path = os.path.join(args.data, 'data.csv')
+    data_path = os.path.join(args.data, "data.csv")
    df = pd.read_csv(data_path)

    train_df, test_df = train_test_split(
--- a/test/pipeline_tuning_example/data_prep/data_prep.yaml
+++ b/test/pipeline_tuning_example/data_prep/data_prep.yaml
@ -19,7 +19,7 @@ environment:
  os: Linux

 command: >-
-  python data_prep.py 
+  python data_prep.py
  --data {inputs.data}
  --test_train_ratio {inputs.test_train_ratio}
  --train_data {outputs.train_data}
--- a/test/pipeline_tuning_example/submit_train_pipeline.py
+++ b/test/pipeline_tuning_example/submit_train_pipeline.py
@ -83,10 +83,10 @@ def build_and_submit_aml_pipeline(config):
    ################################################
    # load component functions
    ################################################
-    data_prep_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR
-                                              / "data_prep/data_prep.yaml")
-    train_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR
-                                          / "train/train.yaml")
+    data_prep_component = Component.from_yaml(
+        ws, yaml_file=LOCAL_DIR / "data_prep/data_prep.yaml"
+    )
+    train_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR / "train/train.yaml")

    ################################################
    # build pipeline
--- a/test/pipeline_tuning_example/submit_tuner_pipeline.py
+++ b/test/pipeline_tuning_example/submit_tuner_pipeline.py
@ -14,16 +14,19 @@ def remote_run():
    ################################################
    # connect to your Azure ML workspace
    ################################################
-    ws = Workspace(subscription_id=args.subscription_id,
-                   resource_group=args.resource_group,
-                   workspace_name=args.workspace)
+    ws = Workspace(
+        subscription_id=args.subscription_id,
+        resource_group=args.resource_group,
+        workspace_name=args.workspace,
+    )

    ################################################
    # load component functions
    ################################################

-    pipeline_tuning_func = Component.from_yaml(ws, yaml_file=LOCAL_DIR
-                                               / "tuner/component_spec.yaml")
+    pipeline_tuning_func = Component.from_yaml(
+        ws, yaml_file=LOCAL_DIR / "tuner/component_spec.yaml"
+    )

    ################################################
    # build pipeline
@ -44,6 +47,7 @@ def remote_run():
 def local_run():
    logger.info("Run tuner locally.")
    from tuner import tuner_func
+
    tuner_func.tune_pipeline(concurrent_run=2)


@ -52,15 +56,18 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_mutually_exclusive_group(required=False)
    parser.add_argument(
-        "--subscription_id", type=str, help="your_subscription_id", required=False,
+        "--subscription_id",
+        type=str,
+        help="your_subscription_id",
+        required=False,
    )
    parser.add_argument(
-        "--resource_group", type=str, help="your_resource_group", required=False)
-    parser.add_argument(
-        "--workspace", type=str, help="your_workspace", required=False)
+        "--resource_group", type=str, help="your_resource_group", required=False
+    )
+    parser.add_argument("--workspace", type=str, help="your_workspace", required=False)

-    parser.add_argument('--remote', dest='remote', action='store_true')
-    parser.add_argument('--local', dest='remote', action='store_false')
+    parser.add_argument("--remote", dest="remote", action="store_true")
+    parser.add_argument("--local", dest="remote", action="store_false")
    parser.set_defaults(remote=True)
    args = parser.parse_args()

--- a/test/pipeline_tuning_example/train/train.py
+++ b/test/pipeline_tuning_example/train/train.py
@ -5,7 +5,7 @@ import pandas as pd
 from azureml.core import Run


-class LightGBMCallbackHandler():
+class LightGBMCallbackHandler:
    def __init__(self):
        pass

@ -24,16 +24,22 @@ class LightGBMCallbackHandler():
 def main(args):
    """Main function of the script."""

-    train_path = os.path.join(args.train_data, 'data.csv')
+    train_path = os.path.join(args.train_data, "data.csv")
    print("traning_path:", train_path)

-    test_path = os.path.join(args.test_data, 'data.csv')
+    test_path = os.path.join(args.test_data, "data.csv")

    train_set = lgb.Dataset(train_path)
    test_set = lgb.Dataset(test_path)
    callbacks_handler = LightGBMCallbackHandler()
-    config = {"header": True, "objective": "binary", "label_column": 30, "metric": "binary_error",
-              "n_estimators": args.n_estimators, "learning_rate": args.learning_rate}
+    config = {
+        "header": True,
+        "objective": "binary",
+        "label_column": 30,
+        "metric": "binary_error",
+        "n_estimators": args.n_estimators,
+        "learning_rate": args.learning_rate,
+    }
    gbm = lgb.train(
        config,
        train_set,
@ -44,9 +50,9 @@ def main(args):
        ],
    )

-    print('Saving model...')
+    print("Saving model...")
    # save model to file
-    gbm.save_model(os.path.join(args.model, 'model.txt'))
+    gbm.save_model(os.path.join(args.model, "model.txt"))


 if __name__ == "__main__":
--- a/test/pipeline_tuning_example/train/train.yaml
+++ b/test/pipeline_tuning_example/train/train.yaml
@ -4,9 +4,9 @@ name: classifier
 version: 0.0.1
 display_name: Train lgbm classifier
 inputs:
-  train_data: 
+  train_data:
    type: path
-  test_data: 
+  test_data:
    type: path
  learning_rate:
    type: float
@ -20,8 +20,8 @@ environment:
  conda_dependencies_file: env.yaml
 os: Linux
 command: >-
-  python train.py 
-  --train_data {inputs.train_data} 
+  python train.py
+  --train_data {inputs.train_data}
  --test_data {inputs.test_data}
  --learning_rate {inputs.learning_rate}
  --n_estimators {inputs.n_estimators}
--- a/test/pipeline_tuning_example/tuner/component_spec.yaml
+++ b/test/pipeline_tuning_example/tuner/component_spec.yaml
@ -9,4 +9,4 @@ environment:
  conda_dependencies_file: env.yaml
 os: Linux
 command: >-
-  python tuner/tuner_func.py 
+  python tuner/tuner_func.py
--- a/test/pipeline_tuning_example/tuner/tuner_func.py
+++ b/test/pipeline_tuning_example/tuner/tuner_func.py
@ -8,8 +8,7 @@ logger = logging.getLogger(__name__)


 def run_with_config(config: dict):
-    """Run the pipeline with a given config dict
-    """
+    """Run the pipeline with a given config dict"""

    # pass the hyperparameters to AzureML jobs by overwriting the config file.
    overrides = [f"{key}={value}" for key, value in config.items()]
@ -24,25 +23,25 @@ def run_with_config(config: dict):
    while not stop:
        # get status
        status = run._core_run.get_status()
-        print(f'status: {status}')
+        print(f"status: {status}")

        # get metrics
        metrics = run._core_run.get_metrics(recursive=True)
        if metrics:
            run_metrics = list(metrics.values())

-            new_metric = run_metrics[0]['eval_binary_error']
+            new_metric = run_metrics[0]["eval_binary_error"]

            if type(new_metric) == list:
                new_metric = new_metric[-1]

-            print(f'eval_binary_error: {new_metric}')
+            print(f"eval_binary_error: {new_metric}")

            tune.report(eval_binary_error=new_metric)

        time.sleep(5)

-        if status == 'FAILED' or status == 'Completed':
+        if status == "FAILED" or status == "Completed":
            stop = True

    print("The run is terminated.")
--- a/test/tune/example.py
+++ b/test/tune/example.py
@ -22,9 +22,15 @@ def easy_objective(config):
 def test_blendsearch_tune(smoke_test=True):
    try:
        from ray import tune
-        from ray.tune.suggest import ConcurrencyLimiter
        from ray.tune.schedulers import AsyncHyperBandScheduler
-        from ray.tune.suggest.flaml import BlendSearch
+        from ray import __version__ as ray_version
+
+        if ray_version.startswith("1."):
+            from ray.tune.suggest import ConcurrencyLimiter
+            from ray.tune.suggest.flaml import BlendSearch
+        else:
+            from ray.tune.search import ConcurrencyLimiter
+            from ray.tune.search.flaml import BlendSearch
    except ImportError:
        print("ray[tune] is not installed, skipping test")
        return
--- a/test/tune/example_scheduler.py
+++ b/test/tune/example_scheduler.py
@ -26,7 +26,7 @@ def easy_objective(use_raytune, config):

 def test_tune_scheduler(smoke_test=True, use_ray=True, use_raytune=False):
    import numpy as np
-    from flaml.searcher.blendsearch import BlendSearch
+    from flaml.tune.searcher.blendsearch import BlendSearch

    np.random.seed(100)
    easy_objective_custom_tune = partial(easy_objective, use_raytune)
--- a/test/tune/test_flaml_raytune_consistency.py
+++ b/test/tune/test_flaml_raytune_consistency.py
@ -28,7 +28,7 @@ low_cost_partial_config = {"x": 1}


 def setup_searcher(searcher_name):
-    from flaml.searcher.blendsearch import BlendSearch, CFO, RandomSearch
+    from flaml.tune.searcher.blendsearch import BlendSearch, CFO, RandomSearch

    if "cfo" in searcher_name:
        searcher = CFO(
@ -52,7 +52,12 @@ def _test_flaml_raytune_consistency(
    num_samples=-1, max_concurrent_trials=1, searcher_name="cfo"
 ):
    try:
-        from ray import tune as raytune
+        from ray import tune as raytune, __version__ as ray_version
+
+        if ray_version.startswith("1."):
+            from ray.tune.suggest import ConcurrencyLimiter
+        else:
+            from ray.tune.search import ConcurrencyLimiter
    except ImportError:
        print(
            "skip _test_flaml_raytune_consistency because ray tune cannot be imported."
@ -78,7 +83,6 @@ def _test_flaml_raytune_consistency(
    print(analysis.best_trial.last_result)  # the best trial's result

    searcher = setup_searcher(searcher_name)
-    from ray.tune.suggest import ConcurrencyLimiter

    search_alg = ConcurrencyLimiter(searcher, max_concurrent_trials)
    analysis = raytune.run(
--- a/test/tune/test_lexiflow.py
+++ b/test/tune/test_lexiflow.py
@ -0,0 +1,142 @@
+import torch
+import thop
+import torch.nn as nn
+from flaml import tune
+import torch.nn.functional as F
+import torchvision
+import numpy as np
+
+DEVICE = torch.device("cpu")
+BATCHSIZE = 128
+N_TRAIN_EXAMPLES = BATCHSIZE * 30
+N_VALID_EXAMPLES = BATCHSIZE * 10
+
+
+def test_lexiflow():
+    train_dataset = torchvision.datasets.FashionMNIST(
+        "test/data",
+        train=True,
+        download=True,
+        transform=torchvision.transforms.ToTensor(),
+    )
+
+    train_loader = torch.utils.data.DataLoader(
+        torch.utils.data.Subset(train_dataset, list(range(N_TRAIN_EXAMPLES))),
+        batch_size=BATCHSIZE,
+        shuffle=True,
+    )
+
+    val_dataset = torchvision.datasets.FashionMNIST(
+        "test/data", train=False, transform=torchvision.transforms.ToTensor()
+    )
+
+    val_loader = torch.utils.data.DataLoader(
+        torch.utils.data.Subset(val_dataset, list(range(N_VALID_EXAMPLES))),
+        batch_size=BATCHSIZE,
+        shuffle=True,
+    )
+
+    def define_model(configuration):
+        n_layers = configuration["n_layers"]
+        layers = []
+        in_features = 28 * 28
+        for i in range(n_layers):
+            out_features = configuration["n_units_l{}".format(i)]
+            layers.append(nn.Linear(in_features, out_features))
+            layers.append(nn.ReLU())
+            p = configuration["dropout_{}".format(i)]
+            layers.append(nn.Dropout(p))
+            in_features = out_features
+        layers.append(nn.Linear(in_features, 10))
+        layers.append(nn.LogSoftmax(dim=1))
+        return nn.Sequential(*layers)
+
+    def train_model(model, optimizer, train_loader):
+        model.train()
+        for batch_idx, (data, target) in enumerate(train_loader):
+            data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)
+            optimizer.zero_grad()
+            F.nll_loss(model(data), target).backward()
+            optimizer.step()
+
+    def eval_model(model, valid_loader):
+        model.eval()
+        correct = 0
+        with torch.no_grad():
+            for batch_idx, (data, target) in enumerate(valid_loader):
+                data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)
+                pred = model(data).argmax(dim=1, keepdim=True)
+                correct += pred.eq(target.view_as(pred)).sum().item()
+
+        accuracy = correct / N_VALID_EXAMPLES
+        flops, params = thop.profile(
+            model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False
+        )
+        return np.log2(flops), 1 - accuracy, params
+
+    def evaluate_function(configuration):
+        model = define_model(configuration).to(DEVICE)
+        optimizer = torch.optim.Adam(model.parameters(), configuration["lr"])
+        n_epoch = configuration["n_epoch"]
+        for epoch in range(n_epoch):
+            train_model(model, optimizer, train_loader)
+        flops, error_rate, params = eval_model(model, val_loader)
+        return {"error_rate": error_rate, "flops": flops, "params": params}
+
+    lexico_objectives = {}
+    lexico_objectives["metrics"] = ["error_rate", "flops"]
+    lexico_objectives["tolerances"] = {"error_rate": 0.02, "flops": 0.0}
+    lexico_objectives["targets"] = {"error_rate": 0.0, "flops": 0.0}
+    lexico_objectives["modes"] = ["min", "min"]
+
+    search_space = {
+        "n_layers": tune.randint(lower=1, upper=3),
+        "n_units_l0": tune.randint(lower=4, upper=128),
+        "n_units_l1": tune.randint(lower=4, upper=128),
+        "n_units_l2": tune.randint(lower=4, upper=128),
+        "dropout_0": tune.uniform(lower=0.2, upper=0.5),
+        "dropout_1": tune.uniform(lower=0.2, upper=0.5),
+        "dropout_2": tune.uniform(lower=0.2, upper=0.5),
+        "lr": tune.loguniform(lower=1e-5, upper=1e-1),
+        "n_epoch": tune.randint(lower=1, upper=20),
+    }
+
+    low_cost_partial_config = {
+        "n_layers": 1,
+        "n_units_l0": 4,
+        "n_units_l1": 4,
+        "n_units_l2": 4,
+        "n_epoch": 1,
+    }
+
+    # lexico tune
+    analysis = tune.run(
+        evaluate_function,
+        num_samples=5,
+        config=search_space,
+        use_ray=False,
+        lexico_objectives=lexico_objectives,
+        low_cost_partial_config=low_cost_partial_config,
+    )
+    print(analysis.best_trial)
+    print(analysis.best_config)
+    print(analysis.best_result)
+
+    # Non lexico tune
+    analysis = tune.run(
+        evaluate_function,
+        metric="error_rate",
+        mode="min",
+        num_samples=5,
+        config=search_space,
+        use_ray=False,
+        lexico_objectives=None,
+        low_cost_partial_config=low_cost_partial_config,
+    )
+    print(analysis.best_trial)
+    print(analysis.best_config)
+    print(analysis.best_result)
+
+
+if __name__ == "__main__":
+    test_lexiflow()
--- a/test/tune/test_record_incumbent.py
+++ b/test/tune/test_record_incumbent.py
@ -7,7 +7,7 @@ def rosenbrock_function(config: dict):
    funcLoss = 50
    for key, value in config.items():
        if key in ["x1", "x2", "x3", "x4", "x5"]:
-            funcLoss += value ** 2 - 10 * np.cos(2 * np.pi * value)
+            funcLoss += value**2 - 10 * np.cos(2 * np.pi * value)
    if INCUMBENT_RESULT in config.keys():
        print("----------------------------------------------")
        print("incumbent result", config[INCUMBENT_RESULT])
@ -62,7 +62,7 @@ def test_record_incumbent(method="BlendSearch"):
            use_incumbent_result_in_evaluation=True,
        )
    elif method == "CFOCat":
-        from flaml.searcher.cfo_cat import CFOCat
+        from flaml.tune.searcher.cfo_cat import CFOCat

        algo = CFOCat(
            use_incumbent_result_in_evaluation=True,
--- a/test/tune/test_reproducibility.py
+++ b/test/tune/test_reproducibility.py
@ -26,7 +26,7 @@ def _easy_objective(use_raytune, config):

 def test_tune(externally_setup_searcher=False, use_ray=False, use_raytune=False):
    from flaml import tune
-    from flaml.searcher.blendsearch import BlendSearch
+    from flaml.tune.searcher.blendsearch import BlendSearch

    easy_objective_custom_tune = partial(_easy_objective, use_raytune)
    search_space = {
--- a/test/tune/test_restore.py
+++ b/test/tune/test_restore.py
@ -3,7 +3,7 @@ import shutil
 import tempfile
 import unittest
 import numpy as np
-from flaml.searcher.suggestion import ConcurrencyLimiter
+from flaml.tune.searcher.suggestion import ConcurrencyLimiter
 from flaml import tune
 from flaml import CFO

--- a/test/tune/test_scheduler.py
+++ b/test/tune/test_scheduler.py
@ -1,6 +1,6 @@
 """Require: pip install flaml[test,ray]
 """
-from flaml.scheduler.trial_scheduler import TrialScheduler
+from flaml.tune.scheduler.trial_scheduler import TrialScheduler
 import numpy as np
 from flaml import tune

--- a/test/tune/test_searcher.py
+++ b/test/tune/test_searcher.py
@ -5,7 +5,10 @@ try:
    from ray import __version__ as ray_version

    assert ray_version >= "1.10.0"
-    from ray.tune import sample
+    if ray_version.startswith("1."):
+        from ray.tune import sample
+    else:
+        from ray.tune.search import sample

    use_ray = True
 except (ImportError, AssertionError):
@ -29,8 +32,12 @@ def wrong_define_search_space(trial):


 def test_searcher():
-    from flaml.searcher.suggestion import OptunaSearch, Searcher, ConcurrencyLimiter
-    from flaml.searcher.blendsearch import BlendSearch, CFO, RandomSearch
+    from flaml.tune.searcher.suggestion import (
+        OptunaSearch,
+        Searcher,
+        ConcurrencyLimiter,
+    )
+    from flaml.tune.searcher.blendsearch import BlendSearch, CFO, RandomSearch
    from flaml.tune import sample as flamlsample

    searcher = Searcher()
@ -303,6 +310,6 @@ def test_no_optuna():
    import sys

    subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "-y", "optuna"])
-    import flaml.searcher.suggestion
+    import flaml.tune.searcher.suggestion

    subprocess.check_call([sys.executable, "-m", "pip", "install", "optuna==2.8.0"])
--- a/test/tune/test_space.py
+++ b/test/tune/test_space.py
@ -69,7 +69,7 @@ def test_define_by_run():


 def test_grid():
-    from flaml.searcher.variant_generator import (
+    from flaml.tune.searcher.variant_generator import (
        generate_variants,
        grid_search,
        TuneError,
--- a/test/tune/test_tune.py
+++ b/test/tune/test_tune.py
@ -1,6 +1,6 @@
 """Require: pip install flaml[test,ray]
 """
-from flaml.searcher.blendsearch import BlendSearch
+from flaml import BlendSearch
 import time
 import os
 from sklearn.model_selection import train_test_split
@ -146,7 +146,7 @@ def _test_xgboost(method="BlendSearch"):
                        },
                    )
                elif "CFOCat" == method:
-                    from flaml.searcher.cfo_cat import CFOCat
+                    from flaml.tune.searcher.cfo_cat import CFOCat

                    algo = CFOCat(
                        low_cost_partial_config={
--- a/website/docs/Contribute.md
+++ b/website/docs/Contribute.md
@ -1,12 +1,12 @@
 # Contributing

-This project welcomes (and encourages) all forms of contributions, including but not limited to:
+This project welcomes and encourages all forms of contributions, including but not limited to:

 -  Pushing patches.
 -  Code review of pull requests.
 -  Documentation, examples and test cases.
 -  Readability improvement, e.g., improvement on docstr and comments.
-  Community participation in [issues](https://github.com/microsoft/FLAML/issues), [discussions](https://github.com/microsoft/FLAML/discussions), and [gitter](https://gitter.im/FLAMLer/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge).
+-  Community participation in [issues](https://github.com/microsoft/FLAML/issues), [discussions](https://github.com/microsoft/FLAML/discussions), and [discord](https://discord.gg/7ZVfhbTQZ5).
 -  Tutorials, blog posts, talks that promote the project.
 -  Sharing application scenarios and/or related research.

@ -26,6 +26,31 @@ This project has adopted the [Microsoft Open Source Code of Conduct](https://ope
 For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
 contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.

+## How to make a good bug report
+
+When you submit an issue to [GitHub](https://github.com/microsoft/FLAML/issues), please do your best to
+follow these guidelines! This will make it a lot easier to provide you with good
+feedback:
+
+- The ideal bug report contains a short reproducible code snippet. This way
+  anyone can try to reproduce the bug easily (see [this](https://stackoverflow.com/help/mcve) for more details). If your snippet is
+  longer than around 50 lines, please link to a [gist](https://gist.github.com) or a GitHub repo.
+
+- If an exception is raised, please **provide the full traceback**.
+
+- Please include your **operating system type and version number**, as well as
+  your **Python, flaml, scikit-learn versions**. The version of flaml
+  can be found by running the following code snippet:
+```python
+import flaml
+print(flaml.__version__)
+```
+
+- Please ensure all **code snippets and error messages are formatted in
+  appropriate code blocks**.  See [Creating and highlighting code blocks](https://help.github.com/articles/creating-and-highlighting-code-blocks)
+  for more details.
+
+
 ## Becoming a Reviewer

 There is currently no formal reviewer solicitation process. Current reviewers identify reviewers from active contributors. If you are willing to become a reviewer, you are welcome to let us know on gitter.
@ -39,12 +64,14 @@ git clone https://github.com/microsoft/FLAML.git
 pip install -e FLAML[test,notebook]
 ```

+In case the `pip install` command fails, try escaping the brackets such as `pip install -e FLAML\[test,notebook\]`
+
 ### Docker

 We provide a simple [Dockerfile](https://github.com/microsoft/FLAML/blob/main/Dockerfile).

 ```bash
-docker build git://github.com/microsoft/FLAML -t flaml-dev
+docker build https://github.com/microsoft/FLAML.git#main -t flaml-dev
 docker run -it flaml-dev
 ```

@ -68,7 +95,6 @@ coverage run -m pytest test

 Then you can see the coverage report by
 `coverage report -m` or `coverage html`.
-If all the tests are passed, please also test run [notebook/automl_classification](https://github.com/microsoft/FLAML/blob/main/notebook/automl_classification.ipynb) to make sure your commit does not break the notebook example.

 ### Documentation

@ -91,3 +117,6 @@ yarn start

 The last command starts a local development server and opens up a browser window.
 Most changes are reflected live without having to restart the server.
+
+Note:
+some tips in this guide are based off the contributor guide from [ray](https://docs.ray.io/en/latest/ray-contribute/getting-involved.html), [scikit-learn](https://scikit-learn.org/stable/developers/contributing.html), or [hummingbird](https://github.com/microsoft/hummingbird/blob/main/CONTRIBUTING.md).
--- a/website/docs/Examples/AutoML-NLP.md
+++ b/website/docs/Examples/AutoML-NLP.md
@ -38,6 +38,13 @@ automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_
 automl.predict(X_test)
 ```

+Notice that after you run `automl.fit`, the intermediate checkpoints are saved under the specified output_dir `data/output`. You can use the following code to clean these outputs if they consume a large storage space:
+
+```python
+if os.path.exists("data/output/"):
+    shutil.rmtree("data/output/")
+```
+
 #### Sample output

 ```
--- a/website/docs/Examples/Integrate
+++ b/website/docs/Examples/Integrate
@ -37,7 +37,7 @@ automl = AutoML()
 settings = {
    "time_budget": 60,  # total running time in seconds
    "metric": "accuracy",  # metric to optimize
-    "task": "classification",  # task type  
+    "task": "classification",  # task type
    "log_file_name": "airlines_experiment.log",  # flaml log file
 }
 experiment = mlflow.set_experiment("flaml")  # the experiment name in AzureML workspace
--- a/website/docs/Examples/Tune-AzureML-pipeline.md
+++ b/website/docs/Examples/Tune-AzureML-pipeline.md
@ -34,7 +34,7 @@ The pipeline consists of two steps: (1) data preparation and (2) model training.

 ![png](images/AzureML_train_pipeline.png).

-The code example discussed in the page is included in
+The [code example](https://github.com/microsoft/FLAML/tree/main/test/pipeline_tuning_example) discussed in the page is included in
 `test/pipeline_tuning_example/`.
 We will use the relative path in the rest of the page.

@ -205,7 +205,7 @@ Overall, to tune the hyperparameters of the AzureML pipeline, run:

 ```bash
 # the training job will run remotely as an AzureML job in both choices
-# run the tuning job locally 
+# run the tuning job locally
 python submit_tune.py --local
 # run the tuning job remotely
 python submit_tune.py --remote --subscription_id <your subscription_id> --resource_group <your resource_group> --workspace <your workspace>
--- a/website/docs/Examples/Tune-Lexicographic-objectives.md
+++ b/website/docs/Examples/Tune-Lexicographic-objectives.md
@ -0,0 +1,165 @@
+# Tune - Lexicographic Objectives
+
+## Requirements
+
+```python
+pip install flaml thop torchvision torch
+```
+
+## Tuning accurate and efficient neural networks with lexicographic preference
+
+### Data
+
+```python
+import torch
+import thop
+import torch.nn as nn
+from flaml import tune
+import torch.nn.functional as F
+import torchvision
+import numpy as np
+import os
+
+DEVICE = torch.device("cpu")
+BATCHSIZE = 128
+N_TRAIN_EXAMPLES = BATCHSIZE * 30
+N_VALID_EXAMPLES = BATCHSIZE * 10
+data_dir = os.path.abspath("data")
+
+train_dataset = torchvision.datasets.FashionMNIST(
+    data_dir,
+    train=True,
+    download=True,
+    transform=torchvision.transforms.ToTensor(),
+)
+
+train_loader = torch.utils.data.DataLoader(
+    torch.utils.data.Subset(train_dataset, list(range(N_TRAIN_EXAMPLES))),
+    batch_size=BATCHSIZE,
+    shuffle=True,
+)
+
+val_dataset = torchvision.datasets.FashionMNIST(
+    data_dir, train=False, transform=torchvision.transforms.ToTensor()
+)
+
+val_loader = torch.utils.data.DataLoader(
+    torch.utils.data.Subset(val_dataset, list(range(N_VALID_EXAMPLES))),
+    batch_size=BATCHSIZE,
+    shuffle=True,
+```
+
+### Specific the model
+
+```python
+def define_model(configuration):
+    n_layers = configuration["n_layers"]
+    layers = []
+    in_features = 28 * 28
+    for i in range(n_layers):
+        out_features = configuration["n_units_l{}".format(i)]
+        layers.append(nn.Linear(in_features, out_features))
+        layers.append(nn.ReLU())
+        p = configuration["dropout_{}".format(i)]
+        layers.append(nn.Dropout(p))
+        in_features = out_features
+    layers.append(nn.Linear(in_features, 10))
+    layers.append(nn.LogSoftmax(dim=1))
+    return nn.Sequential(*layers)
+```
+
+### Train
+
+```python
+def train_model(model, optimizer, train_loader):
+    model.train()
+    for batch_idx, (data, target) in enumerate(train_loader):
+        data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)
+        optimizer.zero_grad()
+        F.nll_loss(model(data), target).backward()
+        optimizer.step()
+```
+
+### Metrics
+
+```python
+def eval_model(model, valid_loader):
+    model.eval()
+    correct = 0
+    with torch.no_grad():
+        for batch_idx, (data, target) in enumerate(valid_loader):
+            data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)
+            pred = model(data).argmax(dim=1, keepdim=True)
+            correct += pred.eq(target.view_as(pred)).sum().item()
+
+    accuracy = correct / N_VALID_EXAMPLES
+    flops, params = thop.profile(
+        model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False
+    )
+    return np.log2(flops), 1 - accuracy, params
+```
+
+
+
+### Evaluation function
+
+```python
+def evaluate_function(configuration):
+    model = define_model(configuration).to(DEVICE)
+    optimizer = torch.optim.Adam(model.parameters(), configuration["lr"])
+    n_epoch = configuration["n_epoch"]
+    for epoch in range(n_epoch):
+        train_model(model, optimizer, train_loader)
+    flops, error_rate, params = eval_model(model, val_loader)
+    return {"error_rate": error_rate, "flops": flops, "params": params}
+```
+
+### Search space
+```python
+search_space = {
+    "n_layers": tune.randint(lower=1, upper=3),
+    "n_units_l0": tune.randint(lower=4, upper=128),
+    "n_units_l1": tune.randint(lower=4, upper=128),
+    "n_units_l2": tune.randint(lower=4, upper=128),
+    "dropout_0": tune.uniform(lower=0.2, upper=0.5),
+    "dropout_1": tune.uniform(lower=0.2, upper=0.5),
+    "dropout_2": tune.uniform(lower=0.2, upper=0.5),
+    "lr": tune.loguniform(lower=1e-5, upper=1e-1),
+    "n_epoch": tune.randint(lower=1, upper=20),
+}
+```
+
+### Launch the tuning process
+
+```python
+
+# Low cost initial point
+low_cost_partial_config = {
+    "n_layers": 1,
+    "n_units_l0": 4,
+    "n_units_l1": 4,
+    "n_units_l2": 4,
+    "n_epoch": 1,
+}
+
+# Specific lexicographic preference
+lexico_objectives = {}
+lexico_objectives["metrics"] = ["error_rate", "flops"]
+lexico_objectives["tolerances"] = {"error_rate": 0.02, "flops": 0.0}
+lexico_objectives["targets"] = {"error_rate": 0.0, "flops": 0.0}
+lexico_objectives["modes"] = ["min", "min"]
+
+# launch the tuning process
+analysis = tune.run(
+    evaluate_function,
+    num_samples=-1,
+    time_budget_s=100,
+    config=search_space, # search space of NN
+    use_ray=False,
+    lexico_objectives=lexico_objectives,
+    low_cost_partial_config=low_cost_partial_config, # low cost initial point
+)
+```
+
+
+[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/tune_lexicographic.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/tune_lexicographic.ipynb)
--- a/website/docs/Getting-Started.md
+++ b/website/docs/Getting-Started.md
@ -95,3 +95,5 @@ Then, you can use it just like you use the original `LGMBClassifier`. Your other
 * Refer to [SDK](reference/automl) and [FAQ](FAQ).

 If you like our project, please give it a [star](https://github.com/microsoft/FLAML/stargazers) on GitHub. If you are interested in contributing, please read [Contributor's Guide](Contribute).
+
+<iframe src="https://ghbtns.com/github-btn.html?user=microsoft&amp;repo=FLAML&amp;type=star&amp;count=true&amp;size=large" frameborder="0" scrolling="0" width="170" height="30" title="GitHub"></iframe>
--- a/website/docs/Installation.md
+++ b/website/docs/Installation.md
@ -79,10 +79,10 @@ You can use FLAML in .NET in the following ways:
 **Low-code**

 - [*Model Builder*](https://dotnet.microsoft.com/apps/machinelearning-ai/ml-dotnet/model-builder) - A Visual Studio extension for training ML models using FLAML. For more information on how to install the, see the [install Model Builder](https://docs.microsoft.com/dotnet/machine-learning/how-to-guides/install-model-builder?tabs=visual-studio-2022) guide.
- [*ML.NET CLI*](https://docs.microsoft.com/dotnet/machine-learning/automate-training-with-cli) - A dotnet CLI tool for training machine learning models using FLAML on Windows, MacOS, and Linux. For more information on how to install the ML.NET CLI, see the [install the ML.NET CLI](https://docs.microsoft.com/dotnet/machine-learning/how-to-guides/install-ml-net-cli?tabs=windows) guide. 
+- [*ML.NET CLI*](https://docs.microsoft.com/dotnet/machine-learning/automate-training-with-cli) - A dotnet CLI tool for training machine learning models using FLAML on Windows, MacOS, and Linux. For more information on how to install the ML.NET CLI, see the [install the ML.NET CLI](https://docs.microsoft.com/dotnet/machine-learning/how-to-guides/install-ml-net-cli?tabs=windows) guide.

 **Code-first**

- [*Microsoft.ML.AutoML*](https://www.nuget.org/packages/Microsoft.ML.AutoML/0.20.0-preview.22313.1) - NuGet package that provides direct access to the FLAML AutoML APIs that power low-code solutions like Model Builder and the ML.NET CLI. For more information on installing NuGet packages, see the install and use a NuGet package in [Visual Studio](https://docs.microsoft.com/nuget/quickstart/install-and-use-a-package-in-visual-studio) or [dotnet CLI](https://docs.microsoft.com/nuget/quickstart/install-and-use-a-package-using-the-dotnet-cli) guides. 
+- [*Microsoft.ML.AutoML*](https://www.nuget.org/packages/Microsoft.ML.AutoML/0.20.0-preview.22313.1) - NuGet package that provides direct access to the FLAML AutoML APIs that power low-code solutions like Model Builder and the ML.NET CLI. For more information on installing NuGet packages, see the install and use a NuGet package in [Visual Studio](https://docs.microsoft.com/nuget/quickstart/install-and-use-a-package-in-visual-studio) or [dotnet CLI](https://docs.microsoft.com/nuget/quickstart/install-and-use-a-package-using-the-dotnet-cli) guides.

 To get started with the ML.NET API and AutoML, see the [csharp-notebooks](https://github.com/dotnet/csharp-notebooks#machine-learning).
--- a/website/docs/Research.md
+++ b/website/docs/Research.md
@ -1,4 +1,4 @@
-# Research in FLAML
+# Research

 For technical details, please check our research publications.

@ -17,7 +17,5 @@ For technical details, please check our research publications.
 * [Economical Hyperparameter Optimization With Blended Search Strategy](https://www.microsoft.com/en-us/research/publication/economical-hyperparameter-optimization-with-blended-search-strategy/). Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. ICLR 2021.
 * [An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models](https://aclanthology.org/2021.acl-long.178.pdf). Susan Xueqing Liu, Chi Wang. ACL 2021.
 * [ChaCha for Online AutoML](https://www.microsoft.com/en-us/research/publication/chacha-for-online-automl/). Qingyun Wu, Chi Wang, John Langford, Paul Mineiro and Marco Rossi. ICML 2021.
-* [Fair AutoML](https://arxiv.org/abs/2111.06495). Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2111.06495 (2021). 
+* [Fair AutoML](https://arxiv.org/abs/2111.06495). Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2111.06495 (2021).
 * [Mining Robust Default Configurations for Resource-constrained AutoML](https://arxiv.org/abs/2202.09927). Moe Kayali, Chi Wang. ArXiv preprint arXiv:2202.09927 (2022).
-
-Many researchers and engineers have contributed to the technology development. In alphabetical order: Vijay Aski, Sebastien Bubeck, Surajit Chaudhuri, Kevin Chen, Yi Wei Chen, Nadiia Chepurko, Ofer Dekel, Alex Deng, Anshuman Dutt, Nicolo Fusi, Jianfeng Gao, Johannes Gehrke, Niklas Gustafsson, Silu Huang, Moe Kayali, Dongwoo Kim, Christian Konig, John Langford, Menghao Li, Mingqin Li, Susan Xueqing Liu, Zhe Liu, Naveen Gaur, Paul Mineiro, Vivek Narasayya, Jake Radzikowski, Marco Rossi, Amin Saied, Neil Tenenholtz, Olga Vrousgou, Chi Wang, Yue Wang, Markus Weimer, Qingyun Wu, Qiufeng Yin, Haozhe Zhang, Minjia Zhang, XiaoYun Zhang, Eric Zhu, Rui Zhuang.
--- a/website/docs/Use-Cases/Task-Oriented-AutoML.md
+++ b/website/docs/Use-Cases/Task-Oriented-AutoML.md
@ -422,7 +422,6 @@ automl2.fit(X_train, y_train, time_budget=7200, starting_points=automl1.best_con

 `starting_points` is a dictionary or a str to specify the starting hyperparameter config. (1) When it is a dictionary, the keys are the estimator names. If you do not need to specify starting points for an estimator, exclude its name from the dictionary. The value for each key can be either a dictionary of a list of dictionaries, corresponding to one hyperparameter configuration, or multiple hyperparameter configurations, respectively. (2) When it is a str: if "data", use data-dependent defaults; if "data:path", use data-dependent defaults which are stored at path; if "static", use data-independent defaults. Please find more details about data-dependent defaults in [zero shot AutoML](Zero-Shot-AutoML#combine-zero-shot-automl-and-hyperparameter-tuning).

-
 ### Log the trials

 The trials are logged in a file if a `log_file_name` is passed.
@ -574,7 +573,7 @@ The curve suggests that increasing the time budget may further improve the accur
 2. set t2 as the time budget, and also set `early_stop=True`. If the early stopping is triggered, you will see a warning like
 > WARNING - All estimator hyperparameters local search has converged at least once, and the total search time exceeds 10 times the time taken to find the best model.

-> WARNING - Stopping search as early_stop is set to True.
+ > WARNING - Stopping search as early_stop is set to True.

 ### How much time is needed to find the best model

--- a/website/docs/Use-Cases/Tune-User-Defined-Function.md
+++ b/website/docs/Use-Cases/Tune-User-Defined-Function.md
@ -515,6 +515,31 @@ analysis = tune.run(
 )
 ```

+### Lexicographic Objectives
+We support tuning multiple objectives with lexicographic preference by providing argument `lexico_objectives` for `tune.tun()`.
+`lexico_objectives` is a dictionary that contains the following fields of key-value pairs:
+ - `metrics`: a list of optimization objectives with the orders reflecting the priorities/preferences of the objectives.
+ - `modes`: (optional) a list of optimization modes (each mode either "min" or "max") corresponding to the objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives.
+ - `tolerances`: (optional) a dictionary to specify the optimality tolerances on objectives. The keys are the metric names (provided in "metrics"), and the values are the numerical tolerances values.
+ - `targets`: (optional) a dictionary to specify the optimization targets on the objectives. The keys are the metric names (provided in "metric"), and the values are the numerical target values.
+
+In the following example, we want to minimize `val_loss` and `pred_time` of the model where `val_loss` has high priority. The tolerances for `val_loss` and `pre_time` are 0.02 and 0 respectively. We do not set targets for these two objectives and we set them to -inf for both objectives.
+
+```python
+lexico_objectives = {}
+lexico_objectives["metrics"] = ["val_loss", "pred_time"]
+lexico_objectives["pred_time"] = ["min", "min"]
+lexico_objectives["tolerances"] = {"val_loss": 0.02, "pred_time": 0.0}
+lexico_objectives["targets"] = {"val_loss": -float('inf'), "pred_time": -float('inf')}
+
+# provide the lexico_objectives to tune.run
+tune.run(..., search_alg=None, lexico_objectives=lexico_objectives)
+```
+NOTE:
+
+1. When lexico_objectives is not None, the arguments metric, mode, will be invalid, and flaml's tune uses CFO as the `search_alg`, which makes the input (if provided) `search_alg` invalid.
+
+2. This is a new feature that will be released in version 1.1.0 and is subject to change in the future version.

 ## Hyperparameter Optimization Algorithm