Issue58 (#59)

* iter per learner * code cleanup
2021-04-08 09:29:55 -07:00 · 2021-04-08 09:29:55 -07:00 · 97a7c114ee
--- a/flaml/init.py
+++ b/flaml/init.py
@ -6,4 +6,3 @@ import logging
 # Set the root logger.
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
-
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -12,11 +12,13 @@ from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, \
    RepeatedKFold
 from sklearn.utils import shuffle
 import pandas as pd
-import os, contextlib
+import os
+import contextlib

 from .ml import compute_estimator, train_estimator, get_estimator_class, \
    get_classification_objective
-from .config import (MIN_SAMPLE_TRAIN, MEM_THRES, RANDOM_SEED,
+from .config import (
+    MIN_SAMPLE_TRAIN, MEM_THRES, RANDOM_SEED,
    SMALL_LARGE_THRES, CV_HOLDOUT_THRESHOLD, SPLIT_RATIO, N_SPLITS,
    SAMPLE_MULTIPLY_FACTOR)
 from .data import concat
@ -31,21 +33,20 @@ logger_formatter = logging.Formatter(

 try:
    import mlflow
-except:
+except ImportError:
    mlflow = None


 class SearchState:

-    
    @property
    def search_space(self):
        return self._search_space_domain

    @property
    def estimated_cost4improvement(self):
-        return max(self.time_best_found-self.time_best_found_old,
-         self.total_time_used-self.time_best_found)
+        return max(self.time_best_found - self.time_best_found_old,
+                   self.total_time_used - self.time_best_found)

    def __init__(self, learner_class, data_size, task):
        self.init_eci = learner_class.cost_relative2lgbm()
@ -59,7 +60,7 @@ class SearchState:
        for name, space in search_space.items():
            assert 'domain' in space
            self._search_space_domain[name] = space['domain']
-            if 'init_value' in space: 
+            if 'init_value' in space:
                self.init_config[name] = space['init_value']
            if 'low_cost_init_value' in space:
                self.low_cost_partial_config[name] = space[
@ -67,7 +68,7 @@ class SearchState:
            if 'cat_hp_cost' in space:
                self.cat_hp_cost[name] = space['cat_hp_cost']
        self._hp_names = list(self._search_space_domain.keys())
-        self.search_alg = None 
+        self.search_alg = None
        self.best_loss = self.best_loss_old = np.inf
        self.total_time_used = 0
        self.total_iter = 0
@ -75,21 +76,20 @@ class SearchState:
        self.time_best_found = 0
        self.time2eval_best = 0
        self.time2eval_best_old = 0
-        self.trained_estimator = None 
-        self.update_count = 0
+        self.trained_estimator = None
        self.sample_size = None
        self.trial_time = 0

    def update(self, analysis, time_used, save_model_history=False):
-        if not analysis.trials: return
-        self.update_count += 1
+        if not analysis.trials:
+            return
        result = analysis.trials[-1].last_result
        if result:
            config = result['config']
-            # logger.info(config)
-            if config and 'FLAML_sample_size' in config: 
+            if config and 'FLAML_sample_size' in config:
                self.sample_size = config['FLAML_sample_size']
-            else: self.sample_size = self.data_size
+            else:
+                self.sample_size = self.data_size
            obj = result['val_loss']
            train_loss = result['train_loss']
            time2eval = result['time2eval']
@ -101,27 +101,28 @@ class SearchState:
        self.trial_time = time2eval
        self.total_time_used += time_used
        self.total_iter += 1
-        
+
        if self.base_eci is None:
            self.base_eci = time_used
-        if (obj is not None) and (self.best_loss is None or obj<self.best_loss):
-            self.best_loss_old = self.best_loss if self.best_loss < float(
-                'inf') else 2*obj
+        if (obj is not None) and (self.best_loss is None or obj < self.best_loss):
+            self.best_loss_old = self.best_loss if self.best_loss < np.inf \
+                else 2 * obj
            self.best_loss = obj
-            self.time_best_found_old = self.time_best_found 
-            self.time_best_found = self.total_time_used 
+            self.time_best_found_old = self.time_best_found
+            self.time_best_found = self.total_time_used
            self.iter_best_found = self.total_iter
            self.best_config = config
            self.best_config_sample_size = self.sample_size
            self.best_config_train_time = time_used
-            if time2eval: 
+            if time2eval:
                self.time2eval_best_old = self.time2eval_best
                self.time2eval_best = time2eval
            if self.trained_estimator and trained_estimator and \
-                self.trained_estimator!= trained_estimator and \
+                self.trained_estimator != trained_estimator and \
                    not save_model_history:
                self.trained_estimator.cleanup()
-            if trained_estimator: self.trained_estimator = trained_estimator
+            if trained_estimator:
+                self.trained_estimator = trained_estimator
        self.train_loss, self.val_loss, self.config = train_loss, obj, config

    def get_hist_config_sig(self, sample_size, config):
@ -132,13 +133,12 @@ class SearchState:
    def est_retrain_time(self, retrain_sample_size):
        assert self.best_config_sample_size is not None, \
            'need to first get best_config_sample_size'
-        return (self.time2eval_best*
-            retrain_sample_size/self.best_config_sample_size)
+        return (self.time2eval_best * retrain_sample_size
+                / self.best_config_sample_size)


 class AutoMLState:

-
    def _prepare_sample_train_data(self, sample_size):
        full_size = len(self.y_train)
        sampled_weight = None
@ -152,8 +152,8 @@ class AutoMLState:
            if weight is not None:
                sampled_weight = weight[:sample_size]
        else:
-            sampled_X_train, sampled_y_train = concat(self.X_train,
-             self.X_val), np.concatenate([self.y_train, self.y_val])
+            sampled_X_train = concat(self.X_train, self.X_val)
+            sampled_y_train = np.concatenate([self.y_train, self.y_val])
            weight = self.fit_kwargs.get('sample_weight')
            if weight is not None:
                sampled_weight = np.concatenate([weight, self.weight_val])
@ -165,54 +165,60 @@ class AutoMLState:
        compute_start_time = time.time()
        if 'FLAML_sample_size' in config_w_resource:
            sample_size = int(config_w_resource['FLAML_sample_size'])
-        else: sample_size = self.data_size
+        else:
+            sample_size = self.data_size
        sampled_X_train, sampled_y_train, sampled_weight = \
            self._prepare_sample_train_data(sample_size)
        if sampled_weight is not None:
            weight = self.fit_kwargs['sample_weight']
            self.fit_kwargs['sample_weight'] = sampled_weight
-        else: weight = None
+        else:
+            weight = None
        config = config_w_resource.copy()
-        if 'FLAML_sample_size' in config: del config['FLAML_sample_size']
+        if 'FLAML_sample_size' in config:
+            del config['FLAML_sample_size']
        time_left = self.time_budget - self.time_from_start
        budget = time_left if sample_size == self.data_size else \
-            time_left/2*sample_size/self.data_size
-        
+            time_left / 2 * sample_size / self.data_size
+
        trained_estimator, val_loss, train_loss, time2eval, _ = \
-          compute_estimator(sampled_X_train,
-                            sampled_y_train,
-                            self.X_val,
-                            self.y_val,
-                            self.weight_val,
-                            budget,
-                            self.kf,
-                            config,
-                            self.task,
-                            estimator,
-                            self.eval_method,
-                            self.metric,
-                            self.best_loss,
-                            self.n_jobs,
-                            self.learner_classes.get(estimator),
-                            self.log_training_metric,
-                            self.fit_kwargs)
+            compute_estimator(
+                sampled_X_train,
+                sampled_y_train,
+                self.X_val,
+                self.y_val,
+                self.weight_val,
+                budget,
+                self.kf,
+                config,
+                self.task,
+                estimator,
+                self.eval_method,
+                self.metric,
+                self.best_loss,
+                self.n_jobs,
+                self.learner_classes.get(estimator),
+                self.log_training_metric,
+                self.fit_kwargs)
        result = {
-        'total_time': time.time()-compute_start_time,
-        'time2eval': time2eval,
-        'train_loss': train_loss, 
-        'val_loss': val_loss, 
-        'trained_estimator': trained_estimator,}
+            'total_time': time.time() - compute_start_time,
+            'time2eval': time2eval,
+            'train_loss': train_loss,
+            'val_loss': val_loss,
+            'trained_estimator': trained_estimator
+        }
        with open(os.devnull, "w") as f, contextlib.redirect_stdout(f):
            tune.report(**result)
        if sampled_weight is not None:
            self.fit_kwargs['sample_weight'] = weight

-
-    def _train_with_config(self, estimator, config_w_resource,
-     sample_size=None):
+    def _train_with_config(
+        self, estimator, config_w_resource, sample_size=None
+    ):
        config = config_w_resource.copy()
-        if 'FLAML_sample_size' in config: 
-            if not sample_size: sample_size = config['FLAML_sample_size']
+        if 'FLAML_sample_size' in config:
+            if not sample_size:
+                sample_size = config['FLAML_sample_size']
            del config['FLAML_sample_size']
        assert sample_size is not None
        sampled_X_train, sampled_y_train, sampled_weight = \
@ -220,9 +226,10 @@ class AutoMLState:
        if sampled_weight is not None:
            weight = self.fit_kwargs['sample_weight']
            self.fit_kwargs['sample_weight'] = sampled_weight
-        else: weight = None
-        budget = None if self.time_budget is None else (self.time_budget -
-         self.time_from_start)
+        else:
+            weight = None
+        budget = None if self.time_budget is None else (
+            self.time_budget - self.time_from_start)
        estimator, train_time = train_estimator(
            sampled_X_train,
            sampled_y_train,
@ -276,7 +283,7 @@ class AutoML:
        '''A dictionary of iter->(estimator, config, time),
        storing the best estimator, config, and the time when the best
        model is updated each time.
-        '''        
+        '''
        return self._config_history

    @property
@ -294,7 +301,7 @@ class AutoML:

        Args:
            estimator_name: a str of the estimator's name
-        
+
        Returns:
            An object with `predict()` and `predict_proba()` method (for
        classification), storing the best trained model for estimator_name.
@ -307,18 +314,18 @@ class AutoML:

    @property
    def best_estimator(self):
-        '''A string indicating the best estimator found.'''        
+        '''A string indicating the best estimator found.'''
        return self._best_estimator

    @property
    def best_iteration(self):
        '''An integer of the iteration number where the best
-        config is found.'''        
+        config is found.'''
        return self._best_iteration

    @property
    def best_config(self):
-        '''A dictionary of the best configuration.'''        
+        '''A dictionary of the best configuration.'''
        return self._search_states[self._best_estimator].best_config

    @property
@ -335,8 +342,8 @@ class AutoML:

    @property
    def classes_(self):
-        '''A list of n_classes elements for class labels.'''        
-        if self._label_transformer: 
+        '''A list of n_classes elements for class labels.'''
+        if self._label_transformer:
            return self._label_transformer.classes_.tolist()
        if self._trained_estimator:
            return self._trained_estimator.model.classes_.tolist()
@ -358,7 +365,8 @@ class AutoML:
            return None
        X_test = self._preprocess(X_test)
        y_pred = self._trained_estimator.predict(X_test)
-        if y_pred.ndim > 1: y_pred = y_pred.flatten()
+        if y_pred.ndim > 1:
+            y_pred = y_pred.flatten()
        if self._label_transformer:
            return self._label_transformer.inverse_transform(pd.Series(
                y_pred))
@ -381,7 +389,7 @@ class AutoML:
        return proba

    def _preprocess(self, X):
-        if issparse(X): 
+        if issparse(X):
            X = X.tocsr()
        if self._transformer:
            X = self._transformer.transform(X)
@ -390,10 +398,8 @@ class AutoML:
    def _validate_data(self, X_train_all, y_train_all, dataframe, label,
                       X_val=None, y_val=None):
        if X_train_all is not None and y_train_all is not None:
-            if not (isinstance(X_train_all, np.ndarray) or
-                    issparse(X_train_all) or
-                    isinstance(X_train_all, pd.DataFrame)
-                   ):
+            if not (isinstance(X_train_all, np.ndarray) or issparse(X_train_all)
+                    or isinstance(X_train_all, pd.DataFrame)):
                raise ValueError(
                    "X_train_all must be a numpy array, a pandas dataframe, "
                    "or Scipy sparse matrix.")
@ -407,14 +413,14 @@ class AutoML:
                y_train_all = y_train_all.flatten()
            if X_train_all.shape[0] != y_train_all.shape[0]:
                raise ValueError(
-            "# rows in X_train must match length of y_train.")
+                    "# rows in X_train must match length of y_train.")
            self._df = isinstance(X_train_all, pd.DataFrame)
            self._nrow, self._ndim = X_train_all.shape
            X, y = X_train_all, y_train_all
        elif dataframe is not None and label is not None:
            if not isinstance(dataframe, pd.DataFrame):
                raise ValueError("dataframe must be a pandas DataFrame")
-            if not label in dataframe.columns:
+            if label not in dataframe.columns:
                raise ValueError("label must a column name in dataframe")
            self._df = True
            X = dataframe.drop(columns=label)
@ -422,8 +428,8 @@ class AutoML:
            y = dataframe[label]
        else:
            raise ValueError(
-        "either X_train_all+y_train_all or dataframe+label need to be provided")
-        if issparse(X_train_all): 
+                "either X_train+y_train or dataframe+label are required")
+        if issparse(X_train_all):
            self._transformer = self._label_transformer = False
            self._X_train_all, self._y_train_all = X, y
        else:
@ -432,12 +438,10 @@ class AutoML:
            self._X_train_all, self._y_train_all = \
                self._transformer.fit_transform(X, y, self._state.task)
            self._label_transformer = self._transformer.label_transformer
-        self._sample_weight_full = self._state.fit_kwargs.get('sample_weight')      
+        self._sample_weight_full = self._state.fit_kwargs.get('sample_weight')
        if X_val is not None and y_val is not None:
-            if not (isinstance(X_val, np.ndarray) or
-                issparse(X_val) or
-                isinstance(X_val, pd.DataFrame)
-                ):
+            if not (isinstance(X_val, np.ndarray) or issparse(X_val)
+                    or isinstance(X_val, pd.DataFrame)):
                raise ValueError(
                    "X_val must be None, a numpy array, a pandas dataframe, "
                    "or Scipy sparse matrix.")
@ -452,8 +456,7 @@ class AutoML:
            if isinstance(y_val, np.ndarray):
                y_val = y_val.flatten()
            if X_val.shape[0] != y_val.shape[0]:
-                raise ValueError(
-            "# rows in X_val must match length of y_val.")
+                raise ValueError("# rows in X_val must match length of y_val.")
            if self._transformer:
                self._state.X_val = self._transformer.transform(X_val)
            else:
@ -470,14 +473,14 @@ class AutoML:
                      split_ratio,
                      n_splits):
        X_val, y_val = self._state.X_val, self._state.y_val
-        if issparse(X_val): 
+        if issparse(X_val):
            X_val = X_val.tocsr()
        X_train_all, y_train_all = \
            self._X_train_all, self._y_train_all
-        if issparse(X_train_all): 
+        if issparse(X_train_all):
            X_train_all = X_train_all.tocsr()
        if self._state.task != 'regression' and self._state.fit_kwargs.get(
-            'sample_weight') is None:
+                'sample_weight') is None:
            # logger.info(f"label {pd.unique(y_train_all)}")
            label_set, counts = np.unique(y_train_all, return_counts=True)
            # augment rare classes
@ -534,7 +537,7 @@ class AutoML:
                X_rest = X_train_all.iloc[rest] if self._df else X_train_all[rest]
                y_rest = y_train_all.iloc[rest] if isinstance(
                    y_train_all, pd.Series) else y_train_all[rest]
-                stratify = y_rest if self._split_type=='stratified' else \
+                stratify = y_rest if self._split_type == 'stratified' else \
                    None
                if 'sample_weight' in self._state.fit_kwargs:
                    X_train, X_val, y_train, y_val, weight_train, weight_val = \
@ -556,21 +559,17 @@ class AutoML:
                        stratify=stratify,
                        random_state=RANDOM_SEED)
                X_train = concat(X_first, X_train)
-                y_train = concat(label_set,
-                    y_train) if self._df else np.concatenate(
+                y_train = concat(
+                    label_set, y_train) if self._df else np.concatenate(
                    [label_set, y_train])
                X_val = concat(X_first, X_val)
-                y_val = concat(label_set,
-                y_val) if self._df else np.concatenate([label_set, y_val])
+                y_val = concat(label_set, y_val) if self._df else \
+                    np.concatenate([label_set, y_val])
                _, y_train_counts_elements = np.unique(y_train,
-                                                        return_counts=True)
+                                                       return_counts=True)
                _, y_val_counts_elements = np.unique(y_val,
-                                                        return_counts=True)
-                logger.debug(
-                    f"""{self._split_type} split for y_train \
-                        {y_train_counts_elements}, \
-                        y_val {y_val_counts_elements}""")
-            elif eval_method == 'holdout' and self._state.task == 'regression':                
+                                                     return_counts=True)
+            elif eval_method == 'holdout' and self._state.task == 'regression':
                if 'sample_weight' in self._state.fit_kwargs:
                    X_train, X_val, y_train, y_val, self._state.fit_kwargs[
                        'sample_weight'], self._state.weight_val = \
@ -587,8 +586,10 @@ class AutoML:
                        test_size=split_ratio,
                        random_state=RANDOM_SEED)
        self._state.data_size = X_train.shape[0]
-        if X_val is None: self.data_size_full = self._state.data_size
-        else: self.data_size_full = self._state.data_size + X_val.shape[0]
+        if X_val is None:
+            self.data_size_full = self._state.data_size
+        else:
+            self.data_size_full = self._state.data_size + X_val.shape[0]
        self._state.X_train, self._state.y_train, self._state.X_val, \
            self._state.y_val = (X_train, y_train, X_val, y_val)
        if self._split_type == "stratified":
@ -596,15 +597,15 @@ class AutoML:
            assert y_train_all.size >= n_splits, (
                f"{n_splits}-fold cross validation"
                f" requires input data with at least {n_splits} examples.")
-            assert y_train_all.size >= 2*n_splits, (
+            assert y_train_all.size >= 2 * n_splits, (
                f"{n_splits}-fold cross validation with metric=r2 "
                f"requires input data with at least {n_splits*2} examples.")
-            self._state.kf = RepeatedStratifiedKFold(n_splits=n_splits,
-             n_repeats=1, random_state=RANDOM_SEED)
+            self._state.kf = RepeatedStratifiedKFold(
+                n_splits=n_splits, n_repeats=1, random_state=RANDOM_SEED)
        else:
            logger.info("Using RepeatedKFold")
-            self._state.kf = RepeatedKFold(n_splits=n_splits, n_repeats=1,
-            random_state=RANDOM_SEED)
+            self._state.kf = RepeatedKFold(
+                n_splits=n_splits, n_repeats=1, random_state=RANDOM_SEED)

    def add_learner(self,
                    learner_name,
@ -624,7 +625,7 @@ class AutoML:
            log_file_name: A string of the log file name
            record_id: An integer of the record ID in the file,
                0 corresponds to the first trial
-            task: A string of the task type, 
+            task: A string of the task type,
                'binary', 'multi', or 'regression'

        Returns:
@ -638,8 +639,7 @@ class AutoML:

        estimator, _ = train_estimator(
            None, None, config, task, estimator,
-            estimator_class=self._state.learner_classes.get(estimator)
-            )
+            estimator_class=self._state.learner_classes.get(estimator))
        return estimator

    def retrain_from_log(self,
@ -724,7 +724,8 @@ class AutoML:
                    self._trained_estimator = Estimator()
                    self._trained_estimator.model = None
                    return training_duration
-        if not best: return
+        if not best:
+            return
        best_estimator = best.learner
        best_config = best.config
        sample_size = len(self._y_train_all) if train_full \
@ -756,7 +757,8 @@ class AutoML:
        return training_duration

    def _decide_eval_method(self, time_budget):
-        if self._state.X_val is not None: return 'holdout'
+        if self._state.X_val is not None:
+            return 'holdout'
        nrow, dim = self._nrow, self._ndim
        if nrow * dim / 0.9 < SMALL_LARGE_THRES * (
                time_budget / 3600) and nrow < CV_HOLDOUT_THRESHOLD:
@ -824,7 +826,7 @@ class AutoML:
            n_jobs: An integer of the number of threads for training
            log_file_name: A string of the log file name
            estimator_list: A list of strings for estimator names, or 'auto'
-                e.g., 
+                e.g.,

                .. code-block:: python

@ -863,12 +865,12 @@ class AutoML:
        self._state.fit_kwargs = fit_kwargs
        self._state.weight_val = sample_weight_val
        self._validate_data(X_train, y_train, dataframe, label, X_val, y_val)
-        self._search_states = {}  #key: estimator name; value: SearchState
+        self._search_states = {}  # key: estimator name; value: SearchState
        self._random = np.random.RandomState(RANDOM_SEED)
        self._learner_selector = learner_selector
        old_level = logger.getEffectiveLevel()
        self.verbose = verbose
-        if verbose==0:
+        if verbose == 0:
            logger.setLevel(logging.WARNING)
        if self._state.task == 'classification':
            self._state.task = get_classification_objective(
@ -884,11 +886,11 @@ class AutoML:
            # Add the console handler.
            _ch = logging.StreamHandler()
            _ch.setFormatter(logger_formatter)
-            logger.addHandler(_ch)        
+            logger.addHandler(_ch)
        logger.info("Evaluation method: {}".format(eval_method))
-        
-        self._retrain_full = retrain_full and (eval_method == 'holdout' and
-         self._state.X_val is None)
+
+        self._retrain_full = retrain_full and (
+            eval_method == 'holdout' and self._state.X_val is None)
        self._prepare_data(eval_method, split_ratio, n_splits)
        self._sample = sample and eval_method != 'cv' and (
            MIN_SAMPLE_TRAIN * SAMPLE_MULTIPLY_FACTOR < self._state.data_size)
@ -911,18 +913,17 @@ class AutoML:
        if 'auto' == estimator_list:
            estimator_list = ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree']
            if 'regression' != self._state.task:
-                estimator_list += ['lrl1',]
-
-        # add learner using add_learner() api
+                estimator_list += ['lrl1']
        for estimator_name in estimator_list:
            if estimator_name not in self._state.learner_classes:
-                self.add_learner(estimator_name,
-                get_estimator_class(self._state.task, estimator_name))
+                self.add_learner(
+                    estimator_name,
+                    get_estimator_class(self._state.task, estimator_name))
        # set up learner search space
        for estimator_name in estimator_list:
            estimator_class = self._state.learner_classes[estimator_name]
-            self._search_states[estimator_name] = SearchState( 
-                learner_class=estimator_class, 
+            self._search_states[estimator_name] = SearchState(
+                learner_class=estimator_class,
                data_size=self._state.data_size, task=self._state.task,
            )
        logger.info("List of ML learners in AutoML Run: {}".format(
@ -941,7 +942,7 @@ class AutoML:
            self._state.n_jobs = n_jobs
            self._search()
            logger.info("fit succeeded")
-        if verbose==0:
+        if verbose == 0:
            logger.setLevel(old_level)

    def _search(self):
@ -953,18 +954,19 @@ class AutoML:
        self._best_iteration = 0
        self._model_history = {}
        self._config_history = {}
-        self._max_iter_per_learner = 1000000 # TODO
-        self._iter_per_learner = dict([(e,0) for e in self.estimator_list])
+        self._max_iter_per_learner = 1000000  # TODO
+        self._iter_per_learner = dict([(e, 0) for e in self.estimator_list])
        self._fullsize_reached = False
        self._trained_estimator = None
        self._best_estimator = None
        self._retrained_config = {}
        est_retrain_time = next_trial_time = 0
        best_config_sig = None
-        # use ConcurrencyLimiter to limit the amount of concurrency when 
+        # use ConcurrencyLimiter to limit the amount of concurrency when
        # using a search algorithm
-        better = True # whether we find a better model in one trial
-        if self._ensemble: self.best_model = {}
+        better = True  # whether we find a better model in one trial
+        if self._ensemble:
+            self.best_model = {}
        try:
            from ray.tune.suggest import ConcurrencyLimiter
        except ImportError:
@ -978,7 +980,8 @@ class AutoML:
                from .searcher.suggestion import OptunaSearch as SearchAlgo
        elif 'bs' == self._hpo_method:
            from flaml import BlendSearch as SearchAlgo
-        else: raise NotImplementedError
+        else:
+            raise NotImplementedError

        for self._track_iter in range(self._max_iter):
            if self._estimator_index is None:
@ -987,18 +990,18 @@ class AutoML:
                estimator = self._select_estimator(self.estimator_list)
                if not estimator:
                    break
-            logger.info(f"iteration {self._track_iter}"
-              f"  current learner {estimator}")
+            logger.info(
+                f"iteration {self._track_iter}, current learner {estimator}")
            search_state = self._search_states[estimator]
-            self._state.time_from_start = time.time()-self._start_time_flag
-            time_left = self._state.time_budget-self._state.time_from_start
+            self._state.time_from_start = time.time() - self._start_time_flag
+            time_left = self._state.time_budget - self._state.time_from_start
            budget_left = time_left if not self._retrain_full or better or (
                not self.best_estimator) or self._search_states[
-                    self.best_estimator].sample_size<self._state.data_size \
-                        else time_left - est_retrain_time
+                self.best_estimator].sample_size < self._state.data_size \
+                else time_left - est_retrain_time
            if not search_state.search_alg:
                search_state.training_function = partial(
-                    AutoMLState._compute_with_config_base, 
+                    AutoMLState._compute_with_config_base,
                    self._state, estimator)
                search_space = search_state.search_space
                if self._sample:
@ -1008,14 +1011,13 @@ class AutoML:
                else:
                    prune_attr = min_resource = max_resource = None
                learner_class = self._state.learner_classes.get(estimator)
-                if 'grid' == self._hpo_method: # for synthetic exp only
+                if 'grid' == self._hpo_method:  # for synthetic exp only
                    points_to_evaluate = []
                    space = search_space
                    keys = list(space.keys())
-                    domain0 = space[keys[0]]
-                    domain1 = space[keys[1]]
-                    for x1 in range(domain0.lower, domain0.upper+1):
-                        for x2 in range(domain1.lower, domain1.upper+1):
+                    domain0, domain1 = space[keys[0]], space[keys[1]]
+                    for x1 in range(domain0.lower, domain0.upper + 1):
+                        for x2 in range(domain1.lower, domain1.upper + 1):
                            points_to_evaluate.append({
                                keys[0]: x1,
                                keys[1]: x2,
@ -1023,27 +1025,27 @@ class AutoML:
                    self._max_iter_per_learner = len(points_to_evaluate)
                    low_cost_partial_config = None
                else:
-                    points_to_evaluate=[search_state.init_config]
+                    points_to_evaluate = [search_state.init_config]
                    low_cost_partial_config = search_state.low_cost_partial_config
                if self._hpo_method in ('bs', 'cfo', 'grid'):
-                    algo = SearchAlgo(metric='val_loss', mode='min',
-                        space=search_space,
-                        points_to_evaluate=points_to_evaluate, 
+                    algo = SearchAlgo(
+                        metric='val_loss', mode='min', space=search_space,
+                        points_to_evaluate=points_to_evaluate,
                        low_cost_partial_config=low_cost_partial_config,
                        cat_hp_cost=search_state.cat_hp_cost,
                        prune_attr=prune_attr,
                        min_resource=min_resource,
                        max_resource=max_resource,
                        resources_per_trial={"cpu": self._state.n_jobs,
-                        "mem": self._mem_thres},
+                                             "mem": self._mem_thres},
                        mem_size=learner_class.size)
-                else:                         
-                    algo = SearchAlgo(metric='val_loss', mode='min',
-                        space=search_space,
-                        points_to_evaluate=points_to_evaluate, 
+                else:
+                    algo = SearchAlgo(
+                        metric='val_loss', mode='min', space=search_space,
+                        points_to_evaluate=points_to_evaluate,
                    )
                search_state.search_alg = ConcurrencyLimiter(algo,
-                    max_concurrent=1)
+                                                             max_concurrent=1)
            else:
                search_space = None
                if self._hpo_method in ('bs', 'cfo'):
@ -1053,25 +1055,23 @@ class AutoML:
                        },
                    )
            start_run_time = time.time()
-            # warnings.filterwarnings("ignore")
-            analysis = tune.run(search_state.training_function,
+            analysis = tune.run(
+                search_state.training_function,
                search_alg=search_state.search_alg,
                time_budget_s=budget_left,
-                verbose=max(self.verbose-1,0), #local_dir='logs/tune_results',
-                use_ray=False,
-                )
-            # warnings.resetwarnings()
-            time_used = time.time()-start_run_time
+                verbose=max(self.verbose - 1, 0),
+                use_ray=False)
+            time_used = time.time() - start_run_time
            better = False
-            if analysis.trials: 
-                search_state.update(analysis, time_used = time_used,
-                    save_model_history = self._save_model_history)
+            if analysis.trials:
+                search_state.update(analysis, time_used=time_used,
+                                    save_model_history=self._save_model_history)
                if self._estimator_index is None:
                    eci_base = search_state.init_eci
                    self._eci.append(search_state.estimated_cost4improvement)
                    for e in self.estimator_list[1:]:
-                        self._eci.append(
-                            self._search_states[e].init_eci/eci_base*self._eci[0])
+                        self._eci.append(self._search_states[e].init_eci
+                                         / eci_base * self._eci[0])
                    self._estimator_index = 0
                self._state.time_from_start = time.time() - self._start_time_flag
                # logger.info(f"{self._search_states[estimator].sample_size}, {data_size}")
@ -1081,7 +1081,7 @@ class AutoML:
                        self._fullsize_reached = True
                if search_state.best_loss < self._state.best_loss:
                    best_config_sig = estimator + search_state.get_hist_config_sig(
-                        self.data_size_full, 
+                        self.data_size_full,
                        search_state.best_config)
                    self._state.best_loss = search_state.best_loss
                    self._best_estimator = estimator
@ -1101,100 +1101,102 @@ class AutoML:
                    self._trained_estimator = search_state.trained_estimator
                    self._best_iteration = self._track_iter
                    better = True
-                    next_trial_time = search_state.time2eval_best                
+                    next_trial_time = search_state.time2eval_best
                if better or self._log_type == 'all':
-                    self._training_log.append(self._iter_per_learner[estimator],
-                                        search_state.train_loss,
-                                        search_state.trial_time, 
-                                        self._state.time_from_start,
-                                        search_state.val_loss,
-                                        search_state.config,
-                                        search_state.best_loss,
-                                        search_state.best_config,
-                                        estimator,
-                                        search_state.sample_size)                
+                    self._training_log.append(
+                        self._iter_per_learner[estimator],
+                        search_state.train_loss,
+                        search_state.trial_time,
+                        self._state.time_from_start,
+                        search_state.val_loss,
+                        search_state.config,
+                        search_state.best_loss,
+                        search_state.best_config,
+                        estimator,
+                        search_state.sample_size)
                    if mlflow is not None and mlflow.active_run():
-                        with mlflow.start_run(nested=True) as run:
+                        with mlflow.start_run(nested=True):
                            mlflow.log_metric('iter_counter',
-                                self._iter_per_learner[estimator])
+                                              self._iter_per_learner[estimator])
                            mlflow.log_param('train_loss',
-                                search_state.train_loss)
+                                             search_state.train_loss)
                            mlflow.log_metric('trial_time',
-                                search_state.trial_time)
+                                              search_state.trial_time)
                            mlflow.log_metric('total_search_time',
-                                self._state.time_from_start)
+                                              self._state.time_from_start)
                            mlflow.log_metric('validation_loss',
-                                search_state.val_loss)
+                                              search_state.val_loss)
                            mlflow.log_param('config',
-                                search_state.config)
+                                             search_state.config)
                            mlflow.log_param('learner',
-                                estimator)
+                                             estimator)
                            mlflow.log_param('sample_size',
-                                search_state.sample_size)
+                                             search_state.sample_size)
                            mlflow.log_metric('best_validation_loss',
-                                search_state.best_loss)
+                                              search_state.best_loss)
                            mlflow.log_param('best_config',
-                                search_state.best_config)
+                                             search_state.best_config)
                            mlflow.log_param('best_learner',
-                                self._best_estimator)
+                                             self._best_estimator)
                logger.info(
-        " at {:.1f}s,\tbest {}'s error={:.4f},\tbest {}'s error={:.4f}".format(
-                            self._state.time_from_start,
-                            estimator,
-                            search_state.best_loss,
-                            self._best_estimator,
-                            self._state.best_loss))
+                    " at {:.1f}s,\tbest {}'s error={:.4f},\tbest {}'s error={:.4f}".format(
+                        self._state.time_from_start,
+                        estimator,
+                        search_state.best_loss,
+                        self._best_estimator,
+                        self._state.best_loss))
            else:
                logger.info(f"no enough budget for learner {estimator}")
                if self._estimator_index is not None:
                    self.estimator_list.remove(estimator)
                    self._estimator_index -= 1
            if self._retrain_full and best_config_sig and not better and (
-                self._search_states[self._best_estimator].sample_size ==
-                self._state.data_size) and (est_retrain_time <=
-                self._state.time_budget - self._state.time_from_start <=
-                    est_retrain_time + next_trial_time):
-                self._trained_estimator, retrain_time = \
-                    self._state._train_with_config(
-                    self._best_estimator, 
-                    self._search_states[self._best_estimator].best_config, 
-                    self.data_size_full)
+                self._search_states[
+                    self._best_estimator].sample_size == self._state.data_size
+            ) and (est_retrain_time
+                    <= self._state.time_budget - self._state.time_from_start
+                    <= est_retrain_time + next_trial_time):
+                self._trained_estimator, \
+                    retrain_time = self._state._train_with_config(
+                        self._best_estimator,
+                        self._search_states[self._best_estimator].best_config,
+                        self.data_size_full)
                logger.info("retrain {} for {:.1f}s".format(
-                          estimator, retrain_time,))
+                    estimator, retrain_time))
                self._retrained_config[best_config_sig] = retrain_time
                est_retrain_time = 0
            self._state.time_from_start = time.time() - self._start_time_flag
-            if (self._state.time_from_start >= self._state.time_budget or
-                not self.estimator_list):
+            if (self._state.time_from_start >= self._state.time_budget
+                    or not self.estimator_list):
                break
            if self._ensemble and self._best_estimator:
-                time_left = self._state.time_budget -self._state.time_from_start
+                time_left = self._state.time_budget - self._state.time_from_start
                time_ensemble = self._search_states[
                    self._best_estimator].time2eval_best
-                if time_left < time_ensemble < 2*time_left:
+                if time_left < time_ensemble < 2 * time_left:
                    break
-            if self._search_states[estimator].time2eval_best > \
-                self._state.time_budget-self._state.time_from_start:
-                self._iter_per_learner[estimator] = self._max_iter_per_learner
        # Add a checkpoint for the current best config to the log.
        self._training_log.checkpoint()
        if self._best_estimator:
            self._selected = self._search_states[self._best_estimator]
            self._trained_estimator = self._selected.trained_estimator
-            self.modelcount = sum(search_state.total_iter
-                            for search_state in self._search_states.values())
-            if self._trained_estimator: 
+            self.modelcount = sum(
+                search_state.total_iter
+                for search_state in self._search_states.values())
+            if self._trained_estimator:
                logger.info(f'selected model: {self._trained_estimator.model}')
            if self._ensemble:
                search_states = list(x for x in self._search_states.items()
-                 if x[1].trained_estimator)
-                search_states.sort(key=lambda x:x[1].best_loss)
-                estimators = [(x[0],x[1].trained_estimator) for x in search_states[
-                    :2]]
-                estimators += [(x[0],x[1].trained_estimator) for x in search_states[
-                    2:] if x[1].best_loss<4*self._selected.best_loss]        
+                                     if x[1].trained_estimator)
+                search_states.sort(key=lambda x: x[1].best_loss)
+                estimators = [(x[0], x[1].trained_estimator)
+                              for x in search_states[:2]]
+                estimators += [
+                    (x[0], x[1].trained_estimator) for x in search_states[2:]
+                    if x[1].best_loss < 4 * self._selected.best_loss]
                logger.info(estimators)
-                if len(estimators)<=1: return
+                if len(estimators) <= 1:
+                    return
                if self._state.task != "regression":
                    from sklearn.ensemble import StackingClassifier as Stacker
                    for e in estimators:
@ -1202,14 +1204,13 @@ class AutoML:
                else:
                    from sklearn.ensemble import StackingRegressor as Stacker
                best_m = self._trained_estimator
-                stacker = Stacker(estimators, best_m,
-                    n_jobs=self._state.n_jobs,
-                    passthrough=True)
+                stacker = Stacker(estimators, best_m, n_jobs=self._state.n_jobs,
+                                  passthrough=True)
                if self._sample_weight_full is not None:
                    self._state.fit_kwargs[
                        'sample_weight'] = self._sample_weight_full
                stacker.fit(self._X_train_all, self._y_train_all,
-                 **self._state.fit_kwargs)
+                            **self._state.fit_kwargs)
                logger.info(f'ensemble: {stacker}')
                self._trained_estimator = stacker
                self._trained_estimator.model = stacker
@ -1233,42 +1234,35 @@ class AutoML:
        inv = []
        untried_exists = False
        for i, estimator in enumerate(estimator_list):
-            if estimator in self._search_states and self._search_states[
-                estimator].sample_size: # sample_size=none meaning no result
+            if estimator in self._search_states and (
+                self._search_states[estimator].sample_size
+            ):  # sample_size=none meaning no result
                search_state = self._search_states[estimator]
-                if self._iter_per_learner[estimator]>=self._max_iter_per_learner:
+                if (self._search_states[estimator].time2eval_best
+                    > self._state.time_budget - self._state.time_from_start
+                    or self._iter_per_learner[estimator]
+                        >= self._max_iter_per_learner):
                    inv.append(0)
                    continue
-                eci_search_state = search_state.estimated_cost4improvement
+                estimated_cost = search_state.estimated_cost4improvement
                if search_state.sample_size < self._state.data_size:
-                    eci_search_state = min(eci_search_state,
-                        search_state.time2eval_best * min(SAMPLE_MULTIPLY_FACTOR, 
-                        self._state.data_size/search_state.sample_size))
+                    estimated_cost = min(
+                        estimated_cost,
+                        search_state.time2eval_best * min(
+                            SAMPLE_MULTIPLY_FACTOR,
+                            self._state.data_size / search_state.sample_size))
                gap = search_state.best_loss - self._state.best_loss
                if gap > 0 and not self._ensemble:
-                    delta_loss = (search_state.best_loss_old - 
-                                 search_state.best_loss) or \
-                                     search_state.best_loss
-                    delta_time = (search_state.total_time_used - 
-                                 search_state.time_best_found_old) or 1e-10
+                    delta_loss = (search_state.best_loss_old
+                                  - search_state.best_loss) or search_state.best_loss
+                    delta_time = (search_state.total_time_used
+                                  - search_state.time_best_found_old) or 1e-10
                    speed = delta_loss / delta_time
-                    try:
-                        estimated_cost = 2*gap/speed
-                    except ZeroDivisionError:
-                        warnings.warn("ZeroDivisionError "
-                                      "speed: {0}, "
-                                      "old_best_loss: {1}, "
-                                      "new_best_loss: {2}"
-                                      .format(speed,
-                                              search_state.best_loss_old,
-                                              search_state.best_loss))
-                        estimated_cost = 0.0
-                    estimated_cost = max(estimated_cost, eci_search_state)
-                else:
-                    estimated_cost = eci_search_state
+                    if speed:
+                        estimated_cost = max(2 * gap / speed, estimated_cost)
                if estimated_cost == 0:
                    estimated_cost = 1e-10
-                inv.append(1/estimated_cost)
+                inv.append(1 / estimated_cost)
            else:
                estimated_cost = self._eci[i]
                inv.append(0)
--- a/flaml/config.py
+++ b/flaml/config.py
@ -1,12 +1,12 @@
 '''!
 * Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License. 
+ * Licensed under the MIT License.
 '''

 N_SPLITS = 5
 RANDOM_SEED = 1
 SPLIT_RATIO = 0.1
-MEM_THRES = 4*(1024**3)
+MEM_THRES = 4 * (1024 ** 3)
 SMALL_LARGE_THRES = 10000000
 MIN_SAMPLE_TRAIN = 10000
 CV_HOLDOUT_THRESHOLD = 100000
--- a/flaml/data.py
+++ b/flaml/data.py
@ -1,6 +1,6 @@
 '''!
 * Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License. 
+ * Licensed under the MIT License.
 '''

 import numpy as np
@ -10,7 +10,7 @@ from .training_log import training_log_reader


 def load_openml_dataset(dataset_id, data_dir=None, random_state=0):
-    '''Load dataset from open ML. 
+    '''Load dataset from open ML.

    If the file is not cached locally, download it from open ML.

@ -23,7 +23,7 @@ def load_openml_dataset(dataset_id, data_dir=None, random_state=0):
        X_train: A 2d numpy array of training data
        X_test:  A 2d numpy array of test data
        y_train: A 1d numpy arrya of labels for training data
-        y_test:  A 1d numpy arrya of labels for test data        
+        y_test:  A 1d numpy arrya of labels for test data
    '''
    import os
    import openml
@ -58,9 +58,9 @@ def load_openml_dataset(dataset_id, data_dir=None, random_state=0):


 def load_openml_task(task_id, data_dir):
-    '''Load task from open ML. 
+    '''Load task from open ML.

-    Use the first fold of the task. 
+    Use the first fold of the task.
    If the file is not cached locally, download it from open ML.

    Args:
@ -71,7 +71,7 @@ def load_openml_task(task_id, data_dir):
        X_train: A 2d numpy array of training data
        X_test:  A 2d numpy array of test data
        y_train: A 1d numpy arrya of labels for training data
-        y_test:  A 1d numpy arrya of labels for test data        
+        y_test:  A 1d numpy arrya of labels for test data
    '''
    import os
    import openml
@ -115,12 +115,12 @@ def get_output_from_log(filename, time_budget):

    Returns:
        training_time_list: A list of the finished time of each logged iter
-        best_error_list: 
+        best_error_list:
            A list of the best validation error after each logged iter
        error_list: A list of the validation error of each logged iter
-        config_list: 
+        config_list:
            A list of the estimator, sample size and config of each logged iter
-        logged_metric_list: A list of the logged metric of each logged iter 
+        logged_metric_list: A list of the logged metric of each logged iter
    '''

    best_config = None
@ -186,7 +186,6 @@ class DataTransformer:
    '''transform X, y
    '''

-
    def fit_transform(self, X, y, task):
        if isinstance(X, pd.DataFrame):
            X = X.copy()
@ -223,17 +222,18 @@ class DataTransformer:
                X_num = X[num_columns]
                if drop and np.issubdtype(X_num.columns.dtype, np.integer):
                    X_num.columns = range(X_num.shape[1])
-                else: drop = False
+                else:
+                    drop = False
                from sklearn.impute import SimpleImputer
                from sklearn.compose import ColumnTransformer
                self.transformer = ColumnTransformer([(
                    'continuous',
                    SimpleImputer(missing_values=np.nan, strategy='median'),
-                        X_num.columns)])
+                    X_num.columns)])
                X[num_columns] = self.transformer.fit_transform(X_num)
            self._cat_columns, self._num_columns = cat_columns, num_columns
            self._drop = drop
-            
+
        if task == 'regression':
            self.label_transformer = None
        else:
--- a/flaml/ml.py
+++ b/flaml/ml.py
@ -1,15 +1,19 @@
 '''!
 * Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License. 
+ * Licensed under the MIT License.
 '''
- 
-from .model import *
+
 import time
+import numpy as np
+import pandas as pd
 from sklearn.metrics import mean_squared_error, r2_score, roc_auc_score, \
    accuracy_score, mean_absolute_error, log_loss, average_precision_score, \
-        f1_score
-import numpy as np
+    f1_score
 from sklearn.model_selection import RepeatedStratifiedKFold
+from .model import (
+    XGBoostEstimator, XGBoostSklearnEstimator, RandomForestEstimator,
+    LGBMEstimator, LRL1Classifier, LRL2Classifier, CatBoostEstimator,
+    ExtraTreeEstimator, KNeighborsEstimator)

 import logging
 logger = logging.getLogger(__name__)
@ -18,7 +22,6 @@ logger = logging.getLogger(__name__)
 def get_estimator_class(task, estimator_name):
    ''' when adding a new learner, need to add an elif branch '''

-
    if 'xgboost' in estimator_name:
        if 'regression' in task:
            estimator_class = XGBoostEstimator
@ -31,7 +34,7 @@ def get_estimator_class(task, estimator_name):
    elif 'lrl1' in estimator_name:
        estimator_class = LRL1Classifier
    elif 'lrl2' in estimator_name:
-        estimator_class = LRL2Classifier  
+        estimator_class = LRL2Classifier
    elif 'catboost' in estimator_name:
        estimator_class = CatBoostEstimator
    elif 'extra_tree' in estimator_name:
@ -39,22 +42,24 @@ def get_estimator_class(task, estimator_name):
    elif 'kneighbor' in estimator_name:
        estimator_class = KNeighborsEstimator
    else:
-        raise ValueError(estimator_name + ' is not a built-in learner. '
+        raise ValueError(
+            estimator_name + ' is not a built-in learner. '
            'Please use AutoML.add_learner() to add a customized learner.')
    return estimator_class
-    

-def sklearn_metric_loss_score(metric_name, y_predict, y_true, labels=None,
-    sample_weight=None):
+
+def sklearn_metric_loss_score(
+    metric_name, y_predict, y_true, labels=None, sample_weight=None
+):
    '''Loss using the specified metric

    Args:
-        metric_name: A string of the mtric name, one of 
-            'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'log_loss', 
+        metric_name: A string of the mtric name, one of
+            'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'log_loss',
            'f1', 'ap'
        y_predict: A 1d or 2d numpy array of the predictions which can be
            used to calculate the metric. E.g., 2d for log_loss and 1d
-            for others. 
+            for others.
        y_true: A 1d numpy array of the true labels
        labels: A 1d numpy array of the unique labels
        sample_weight: A 1d numpy array of the sample weight
@ -66,69 +71,69 @@ def sklearn_metric_loss_score(metric_name, y_predict, y_true, labels=None,
    if 'r2' in metric_name:
        score = 1.0 - r2_score(y_true, y_predict, sample_weight=sample_weight)
    elif metric_name == 'rmse':
-        score = np.sqrt(mean_squared_error(y_true, y_predict,
-         sample_weight=sample_weight))
+        score = np.sqrt(mean_squared_error(
+            y_true, y_predict, sample_weight=sample_weight))
    elif metric_name == 'mae':
-        score = mean_absolute_error(y_true, y_predict,
-         sample_weight=sample_weight)
+        score = mean_absolute_error(
+            y_true, y_predict, sample_weight=sample_weight)
    elif metric_name == 'mse':
-        score = mean_squared_error(y_true, y_predict,
-         sample_weight=sample_weight)
+        score = mean_squared_error(
+            y_true, y_predict, sample_weight=sample_weight)
    elif metric_name == 'accuracy':
-        score = 1.0 - accuracy_score(y_true, y_predict,
-         sample_weight=sample_weight)
+        score = 1.0 - accuracy_score(
+            y_true, y_predict, sample_weight=sample_weight)
    elif 'roc_auc' in metric_name:
-        score = 1.0 - roc_auc_score(y_true, y_predict,
-         sample_weight=sample_weight)
+        score = 1.0 - roc_auc_score(
+            y_true, y_predict, sample_weight=sample_weight)
    elif 'log_loss' in metric_name:
-        score = log_loss(y_true, y_predict, labels=labels,
-         sample_weight=sample_weight)
+        score = log_loss(
+            y_true, y_predict, labels=labels, sample_weight=sample_weight)
    elif 'f1' in metric_name:
        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight)
    elif 'ap' in metric_name:
-        score = 1 - average_precision_score(y_true, y_predict,
-         sample_weight=sample_weight)
+        score = 1 - average_precision_score(
+            y_true, y_predict, sample_weight=sample_weight)
    else:
-        raise ValueError(metric_name+' is not a built-in metric, '
-        'currently built-in metrics are: '
-        'r2, rmse, mae, mse, accuracy, roc_auc, log_loss, f1, ap. '
-        'please pass a customized metric function to AutoML.fit(metric=func)')
+        raise ValueError(
+            metric_name + ' is not a built-in metric, '
+            'currently built-in metrics are: '
+            'r2, rmse, mae, mse, accuracy, roc_auc, log_loss, f1, ap. '
+            'please pass a customized metric function to AutoML.fit(metric=func)')
    return score


 def get_y_pred(estimator, X, eval_metric, obj):
    if eval_metric in ['roc_auc', 'ap'] and 'binary' in obj:
-        y_pred_classes = estimator.predict_proba(X)        
-        y_pred = y_pred_classes[:,
-         1] if y_pred_classes.ndim>1 else y_pred_classes
+        y_pred_classes = estimator.predict_proba(X)
+        y_pred = y_pred_classes[
+            :, 1] if y_pred_classes.ndim > 1 else y_pred_classes
    elif eval_metric in ['log_loss', 'roc_auc']:
        y_pred = estimator.predict_proba(X)
    else:
-        try:
-            y_pred = estimator.predict(X)
-        except:
-            logger.debug("prediction failed. Using a constant predictor.")
-            y_pred = np.ones(X.shape[0])
+        y_pred = estimator.predict(X)
    return y_pred


-def get_test_loss(estimator, X_train, y_train, X_test, y_test, weight_test, 
- eval_metric, obj, labels=None, budget=None, train_loss=False, fit_kwargs={}):
+def get_test_loss(
+    estimator, X_train, y_train, X_test, y_test, weight_test,
+    eval_metric, obj, labels=None, budget=None, train_loss=False, fit_kwargs={}
+):
    start = time.time()
    train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
    if isinstance(eval_metric, str):
        test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj)
        test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test,
-            labels, weight_test)
-        if train_loss != False:
+                                              labels, weight_test)
+        if train_loss is not False:
            test_pred_y = get_y_pred(estimator, X_train, eval_metric, obj)
-            train_loss = sklearn_metric_loss_score(eval_metric, test_pred_y,
-            y_train, labels, fit_kwargs.get('sample_weight'))
-    else: # customized metric function
+            train_loss = sklearn_metric_loss_score(
+                eval_metric, test_pred_y,
+                y_train, labels, fit_kwargs.get('sample_weight'))
+    else:  # customized metric function
        test_loss, train_loss = eval_metric(
            X_test, y_test, estimator, labels, X_train, y_train,
            weight_test, fit_kwargs.get('sample_weight'))
-    train_time = time.time()-start
+    train_time = time.time() - start
    return test_loss, train_time, train_loss


@ -137,9 +142,11 @@ def train_model(estimator, X_train, y_train, budget, fit_kwargs={}):
    return train_time


-def evaluate_model(estimator, X_train, y_train, X_val, y_val, weight_val,
-  budget, kf, task, eval_method, eval_metric, best_val_loss, train_loss=False,
-  fit_kwargs={}):
+def evaluate_model(
+    estimator, X_train, y_train, X_val, y_val, weight_val,
+    budget, kf, task, eval_method, eval_metric, best_val_loss, train_loss=False,
+    fit_kwargs={}
+):
    if 'holdout' in eval_method:
        val_loss, train_loss, train_time = evaluate_model_holdout(
            estimator, X_train, y_train, X_val, y_val, weight_val, budget,
@ -147,33 +154,37 @@ def evaluate_model(estimator, X_train, y_train, X_val, y_val, weight_val,
            fit_kwargs=fit_kwargs)
    else:
        val_loss, train_loss, train_time = evaluate_model_CV(
-            estimator, X_train, y_train, budget, kf, task, 
+            estimator, X_train, y_train, budget, kf, task,
            eval_metric, best_val_loss, train_loss=train_loss,
            fit_kwargs=fit_kwargs)
    return val_loss, train_loss, train_time


-def evaluate_model_holdout(estimator, X_train, y_train, X_val, y_val, 
- weight_val, budget, task, eval_metric, best_val_loss, train_loss=False,
- fit_kwargs={}):
+def evaluate_model_holdout(
+    estimator, X_train, y_train, X_val, y_val,
+    weight_val, budget, task, eval_metric, best_val_loss, train_loss=False,
+    fit_kwargs={}
+):
    val_loss, train_time, train_loss = get_test_loss(
        estimator, X_train, y_train, X_val, y_val, weight_val, eval_metric,
-        task, budget = budget, train_loss=train_loss, fit_kwargs=fit_kwargs)
-    return  val_loss, train_loss, train_time
+        task, budget=budget, train_loss=train_loss, fit_kwargs=fit_kwargs)
+    return val_loss, train_loss, train_time


-def evaluate_model_CV(estimator, X_train_all, y_train_all, budget, kf,
- task, eval_metric, best_val_loss, train_loss=False, fit_kwargs={}):
+def evaluate_model_CV(
+    estimator, X_train_all, y_train_all, budget, kf,
+    task, eval_metric, best_val_loss, train_loss=False, fit_kwargs={}
+):
    start_time = time.time()
    total_val_loss = total_train_loss = 0
    train_time = 0
    valid_fold_num = 0
    n = kf.get_n_splits()
    X_train_split, y_train_split = X_train_all, y_train_all
-    if task=='regression':
+    if task == 'regression':
        labels = None
    else:
-        labels = np.unique(y_train_all) 
+        labels = np.unique(y_train_all)

    if isinstance(kf, RepeatedStratifiedKFold):
        kf = kf.split(X_train_split, y_train_split)
@ -181,7 +192,7 @@ def evaluate_model_CV(estimator, X_train_all, y_train_all, budget, kf,
        kf = kf.split(X_train_split)
    rng = np.random.RandomState(2020)
    val_loss_list = []
-    budget_per_train = budget / (n+1)
+    budget_per_train = budget / (n + 1)
    if 'sample_weight' in fit_kwargs:
        weight = fit_kwargs['sample_weight']
        weight_val = None
@ -207,24 +218,27 @@ def evaluate_model_CV(estimator, X_train_all, y_train_all, budget, kf,
                train_index], weight[val_index]
        val_loss_i, train_time_i, train_loss_i = get_test_loss(
            estimator, X_train, y_train, X_val, y_val, weight_val,
-            eval_metric, task, labels, budget_per_train, 
+            eval_metric, task, labels, budget_per_train,
            train_loss=train_loss, fit_kwargs=fit_kwargs)
        if weight is not None:
            fit_kwargs['sample_weight'] = weight
        valid_fold_num += 1
        total_val_loss += val_loss_i
-        if train_loss != False: 
-            if total_train_loss != 0: total_train_loss += train_loss_i
-            else: total_train_loss = train_loss_i
+        if train_loss is not False:
+            if total_train_loss != 0:
+                total_train_loss += train_loss_i
+            else:
+                total_train_loss = train_loss_i
        train_time += train_time_i
        if valid_fold_num == n:
-            val_loss_list.append(total_val_loss/valid_fold_num)
+            val_loss_list.append(total_val_loss / valid_fold_num)
            total_val_loss = valid_fold_num = 0
        elif time.time() - start_time >= budget:
-            val_loss_list.append(total_val_loss/valid_fold_num)
+            val_loss_list.append(total_val_loss / valid_fold_num)
            break
    val_loss = np.max(val_loss_list)
-    if train_loss != False: train_loss = total_train_loss/n
+    if train_loss is not False:
+        train_loss = total_train_loss / n
    budget -= time.time() - start_time
    if val_loss < best_val_loss and budget > budget_per_train:
        estimator.cleanup()
@ -232,15 +246,17 @@ def evaluate_model_CV(estimator, X_train_all, y_train_all, budget, kf,
    return val_loss, train_loss, train_time


-def compute_estimator(X_train, y_train, X_val, y_val, weight_val, budget, kf,
- config_dic, task, estimator_name, eval_method, eval_metric, 
- best_val_loss = np.Inf, n_jobs=1, estimator_class=None, train_loss=False,
- fit_kwargs = {}):
+def compute_estimator(
+    X_train, y_train, X_val, y_val, weight_val, budget, kf,
+    config_dic, task, estimator_name, eval_method, eval_metric,
+    best_val_loss=np.Inf, n_jobs=1, estimator_class=None, train_loss=False,
+    fit_kwargs={}
+):
    start_time = time.time()
    estimator_class = estimator_class or get_estimator_class(
        task, estimator_name)
    estimator = estimator_class(
-        **config_dic, task = task, n_jobs=n_jobs)
+        **config_dic, task=task, n_jobs=n_jobs)
    val_loss, train_loss, train_time = evaluate_model(
        estimator, X_train, y_train, X_val, y_val, weight_val, budget, kf, task,
        eval_method, eval_metric, best_val_loss, train_loss=train_loss,
@ -249,16 +265,17 @@ def compute_estimator(X_train, y_train, X_val, y_val, weight_val, budget, kf,
    return estimator, val_loss, train_loss, train_time, all_time


-def train_estimator(X_train, y_train, config_dic, task,
- estimator_name, n_jobs=1, estimator_class=None, budget=None, fit_kwargs={}):
+def train_estimator(
+    X_train, y_train, config_dic, task,
+    estimator_name, n_jobs=1, estimator_class=None, budget=None, fit_kwargs={}
+):
    start_time = time.time()
-    estimator_class = estimator_class or get_estimator_class(task,
-     estimator_name)
-    estimator = estimator_class(**config_dic, task = task,
-     n_jobs=n_jobs)
+    estimator_class = estimator_class or get_estimator_class(
+        task, estimator_name)
+    estimator = estimator_class(**config_dic, task=task, n_jobs=n_jobs)
    if X_train is not None:
-        train_time = train_model(estimator, X_train, y_train, budget,
-            fit_kwargs)
+        train_time = train_model(
+            estimator, X_train, y_train, budget, fit_kwargs)
    else:
        estimator = estimator.estimator_class(**estimator.params)
    train_time = time.time() - start_time
--- a/flaml/model.py
+++ b/flaml/model.py
@ -1,6 +1,6 @@
 '''!
 * Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License. 
+ * Licensed under the MIT License.
 '''

 import numpy as np
@ -24,13 +24,13 @@ class BaseEstimator:
    Typical example:
        XGBoostEstimator: for regression
        XGBoostSklearnEstimator: for classification
-        LGBMEstimator, RandomForestEstimator, LRL1Classifier, LRL2Classifier: 
-            for both regression and classification        
+        LGBMEstimator, RandomForestEstimator, LRL1Classifier, LRL2Classifier:
+            for both regression and classification
    '''

-    def __init__(self, task = 'binary:logistic', **params):
+    def __init__(self, task='binary:logistic', **params):
        '''Constructor
-        
+
        Args:
            task: A string of the task type, one of
                'binary:logistic', 'multi:softmax', 'regression'
@ -43,8 +43,8 @@ class BaseEstimator:
        if '_estimator_type' in params:
            self._estimator_type = params['_estimator_type']
        else:
-            self._estimator_type = "regressor" if task=='regression' \
-                else "classifier" 
+            self._estimator_type = "regressor" if task == 'regression' \
+                else "classifier"

    def get_params(self, deep=False):
        params = self.params.copy()
@ -58,7 +58,7 @@ class BaseEstimator:
        return self._model.classes_

    @property
-    def n_features_in_(self): 
+    def n_features_in_(self):
        return self.model.n_features_in_

    @property
@ -70,7 +70,7 @@ class BaseEstimator:
    def _preprocess(self, X):
        return X

-    def _fit(self, X_train, y_train, **kwargs):    
+    def _fit(self, X_train, y_train, **kwargs):

        current_time = time.time()
        X_train = self._preprocess(X_train)
@ -82,7 +82,7 @@ class BaseEstimator:

    def fit(self, X_train, y_train, budget=None, **kwargs):
        '''Train the model from given training data
-        
+
        Args:
            X_train: A numpy array of training data in shape n*m
            y_train: A numpy array of labels in shape n*1
@ -95,14 +95,14 @@ class BaseEstimator:

    def predict(self, X_test):
        '''Predict label from features
-        
+
        Args:
            X_test: A numpy array of featurized instances, shape n*m

        Returns:
-            A numpy array of shape n*1. 
+            A numpy array of shape n*1.
            Each element is the label for a instance
-        '''      
+        '''
        X_test = self._preprocess(X_test)
        return self._model.predict(X_test)

@ -127,25 +127,26 @@ class BaseEstimator:
            X_test = self._preprocess(X_test)
            return self._model.predict_proba(X_test)

-    def cleanup(self): pass
+    def cleanup(self):
+        pass

    @classmethod
-    def search_space(cls, **params): 
+    def search_space(cls, **params):
        '''[required method] search space

        Returns:
-            A dictionary of the search space. 
+            A dictionary of the search space.
            Each key is the name of a hyperparameter, and value is a dict with
-                its domain and init_value (optional), cat_hp_cost (optional) 
-                e.g., 
+                its domain and init_value (optional), cat_hp_cost (optional)
+                e.g.,
                {'domain': tune.randint(lower=1, upper=10), 'init_value': 1}
        '''
        return {}

    @classmethod
-    def size(cls, config): 
+    def size(cls, config):
        '''[optional method] memory size of the estimator in bytes
-        
+
        Args:
            config - the dict of the hyperparameter config

@ -163,7 +164,6 @@ class BaseEstimator:

 class SKLearnEstimator(BaseEstimator):

-
    def _preprocess(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.copy()
@ -174,63 +174,64 @@ class SKLearnEstimator(BaseEstimator):

 class LGBMEstimator(BaseEstimator):

-
    @classmethod
-    def search_space(cls, data_size, **params): 
-        upper = min(32768,int(data_size))
+    def search_space(cls, data_size, **params):
+        upper = min(32768, int(data_size))
        return {
            'n_estimators': {
                'domain': tune.qloguniform(lower=4, upper=upper, q=1),
                'init_value': 4,
                'low_cost_init_value': 4,
            },
-            'max_leaves': {
+            'num_leaves': {
                'domain': tune.qloguniform(lower=4, upper=upper, q=1),
                'init_value': 4,
                'low_cost_init_value': 4,
            },
-            'min_data_in_leaf': {
+            'min_child_samples': {
                'domain': tune.qloguniform(lower=2, upper=2**7, q=1),
                'init_value': 20,
            },
            'learning_rate': {
-                'domain': tune.loguniform(lower=1/1024, upper=1.0),
+                'domain': tune.loguniform(lower=1 / 1024, upper=1.0),
                'init_value': 0.1,
            },
            'subsample': {
                'domain': tune.uniform(lower=0.1, upper=1.0),
                'init_value': 1.0,
-            },                        
+            },
            'log_max_bin': {
                'domain': tune.qloguniform(lower=3, upper=10, q=1),
                'init_value': 8,
-            },                        
+            },
            'colsample_bytree': {
                'domain': tune.uniform(lower=0.01, upper=1.0),
                'init_value': 1.0,
-            },                        
+            },
            'reg_alpha': {
-                'domain': tune.loguniform(lower=1/1024, upper=1024),
-                'init_value': 1/1024,
-            },    
+                'domain': tune.loguniform(lower=1 / 1024, upper=1024),
+                'init_value': 1 / 1024,
+            },
            'reg_lambda': {
-                'domain': tune.loguniform(lower=1/1024, upper=1024),
+                'domain': tune.loguniform(lower=1 / 1024, upper=1024),
                'init_value': 1.0,
-            },    
+            },
        }

    @classmethod
    def size(cls, config):
-        max_leaves = int(round(config['max_leaves']))
+        num_leaves = int(round(config.get('num_leaves') or config['max_leaves']))
        n_estimators = int(round(config['n_estimators']))
-        return (max_leaves*3 + (max_leaves-1)*4 + 1.0)*n_estimators*8
+        return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8

-    def __init__(self, task='binary:logistic', n_jobs=1,
-     n_estimators=2, max_leaves=2, min_data_in_leaf=20, learning_rate=0.1, 
-     subsample=1.0, reg_lambda=1.0, reg_alpha=0.0,
-     colsample_bytree=1.0, log_max_bin=8, **params):
+    def __init__(
+        self, task='binary:logistic', n_jobs=1,
+        n_estimators=2, num_leaves=2, min_child_samples=20, learning_rate=0.1,
+        subsample=1.0, reg_lambda=1.0, reg_alpha=0.0,
+        colsample_bytree=1.0, log_max_bin=8, **params
+    ):
        super().__init__(task, **params)
-        # Default: ‘regression’ for LGBMRegressor, 
+        # Default: ‘regression’ for LGBMRegressor,
        # ‘binary’ or ‘multiclass’ for LGBMClassifier
        if 'regression' in task:
            objective = 'regression'
@ -238,21 +239,22 @@ class LGBMEstimator(BaseEstimator):
            objective = 'binary'
        elif 'multi' in task:
            objective = 'multiclass'
-        else: objective = 'regression'
+        else:
+            objective = 'regression'
        self.params = {
            "n_estimators": int(round(n_estimators)),
-            "max_leaves": int(round(max_leaves)),
+            "num_leaves": int(round(num_leaves)),
            'objective': params.get("objective", objective),
            'n_jobs': n_jobs,
            'learning_rate': float(learning_rate),
            'reg_alpha': float(reg_alpha),
            'reg_lambda': float(reg_lambda),
-            'min_data_in_leaf': int(round(min_data_in_leaf)),
-            'colsample_bytree':float(colsample_bytree),
+            'min_child_samples': int(round(min_child_samples)),
+            'colsample_bytree': float(colsample_bytree),
            'subsample': float(subsample),
        }
        self.params['max_bin'] = params['max_bin'] if 'max_bin' in params else (
-            1<<int(round(log_max_bin)))-1
+            1 << int(round(log_max_bin))) - 1
        if 'regression' in task:
            self.estimator_class = LGBMRegressor
        else:
@ -261,33 +263,35 @@ class LGBMEstimator(BaseEstimator):
        self._train_size = 0

    def _preprocess(self, X):
-        if not isinstance(X, pd.DataFrame) and issparse(
-            X) and np.issubdtype(X.dtype, np.integer):
+        if not isinstance(X, pd.DataFrame) and issparse(X) and np.issubdtype(
+                X.dtype, np.integer):
            X = X.astype(float)
        return X

    def fit(self, X_train, y_train, budget=None, **kwargs):
        start_time = time.time()
        n_iter = self.params["n_estimators"]
-        if (not self._time_per_iter or
-         abs(self._train_size-X_train.shape[0])>4) and budget is not None:
+        if (not self._time_per_iter or abs(
+                self._train_size - X_train.shape[0]) > 4) and budget is not None:
            self.params["n_estimators"] = 1
            self._t1 = self._fit(X_train, y_train, **kwargs)
-            if self._t1 >= budget: 
+            if self._t1 >= budget:
                self.params["n_estimators"] = n_iter
                return self._t1
            self.params["n_estimators"] = 4
            self._t2 = self._fit(X_train, y_train, **kwargs)
-            self._time_per_iter = (self._t2 - self._t1)/(
-                self.params["n_estimators"]-1) if self._t2 > self._t1 \
+            self._time_per_iter = (self._t2 - self._t1) / (
+                self.params["n_estimators"] - 1) if self._t2 > self._t1 \
                else self._t1 if self._t1 else 0.001
            self._train_size = X_train.shape[0]
-            if self._t1+self._t2>=budget or n_iter==self.params["n_estimators"]:
+            if self._t1 + self._t2 >= budget or n_iter == self.params[
+                    "n_estimators"]:
                self.params["n_estimators"] = n_iter
                return time.time() - start_time
        if budget is not None:
-            self.params["n_estimators"] = min(n_iter, int((budget-time.time()+
-                start_time-self._t1)/self._time_per_iter+1))
+            self.params["n_estimators"] = min(n_iter, int(
+                (budget - time.time() + start_time - self._t1)
+                / self._time_per_iter + 1))
        if self.params["n_estimators"] > 0:
            self._fit(X_train, y_train, **kwargs)
        self.params["n_estimators"] = n_iter
@ -298,10 +302,9 @@ class LGBMEstimator(BaseEstimator):
 class XGBoostEstimator(SKLearnEstimator):
    ''' not using sklearn API, used for regression '''

-
    @classmethod
-    def search_space(cls, data_size, **params): 
-        upper = min(32768,int(data_size))
+    def search_space(cls, data_size, **params):
+        upper = min(32768, int(data_size))
        return {
            'n_estimators': {
                'domain': tune.qloguniform(lower=4, upper=upper, q=1),
@ -318,31 +321,31 @@ class XGBoostEstimator(SKLearnEstimator):
                'init_value': 1,
            },
            'learning_rate': {
-                'domain': tune.loguniform(lower=1/1024, upper=1.0),
+                'domain': tune.loguniform(lower=1 / 1024, upper=1.0),
                'init_value': 0.1,
            },
            'subsample': {
                'domain': tune.uniform(lower=0.1, upper=1.0),
                'init_value': 1.0,
-            },                        
+            },
            'colsample_bylevel': {
                'domain': tune.uniform(lower=0.01, upper=1.0),
                'init_value': 1.0,
-            },                        
+            },
            'colsample_bytree': {
                'domain': tune.uniform(lower=0.01, upper=1.0),
                'init_value': 1.0,
-            },                        
+            },
            'reg_alpha': {
-                'domain': tune.loguniform(lower=1/1024, upper=1024),
-                'init_value': 1/1024,
-            },    
+                'domain': tune.loguniform(lower=1 / 1024, upper=1024),
+                'init_value': 1 / 1024,
+            },
            'reg_lambda': {
-                'domain': tune.loguniform(lower=1/1024, upper=1024),
+                'domain': tune.loguniform(lower=1 / 1024, upper=1024),
                'init_value': 1.0,
-            },    
+            },
        }
-        
+
    @classmethod
    def size(cls, config):
        return LGBMEstimator.size(config)
@ -351,10 +354,12 @@ class XGBoostEstimator(SKLearnEstimator):
    def cost_relative2lgbm(cls):
        return 1.6

-    def __init__(self, task='regression', all_thread=False, n_jobs=1,
-        n_estimators=4, max_leaves=4, subsample=1.0, min_child_weight=1, 
+    def __init__(
+        self, task='regression', all_thread=False, n_jobs=1,
+        n_estimators=4, max_leaves=4, subsample=1.0, min_child_weight=1,
        learning_rate=0.1, reg_lambda=1.0, reg_alpha=0.0, colsample_bylevel=1.0,
-        colsample_bytree=1.0, tree_method='auto', **params):
+        colsample_bytree=1.0, tree_method='auto', **params
+    ):
        super().__init__(task, **params)
        self._n_estimators = int(round(n_estimators))
        self._max_leaves = int(round(max_leaves))
@ -372,8 +377,8 @@ class XGBoostEstimator(SKLearnEstimator):
            'min_child_weight': float(min_child_weight),
            'booster': params.get('booster', 'gbtree'),
            'colsample_bylevel': float(colsample_bylevel),
-            'colsample_bytree':float(colsample_bytree),
-            }
+            'colsample_bytree': float(colsample_bytree),
+        }
        if all_thread:
            del self.params['nthread']

@ -383,7 +388,7 @@ class XGBoostEstimator(SKLearnEstimator):
        return params

    def fit(self, X_train, y_train, budget=None, **kwargs):
-        start_time = time.time()        
+        start_time = time.time()
        if not issparse(X_train):
            self.params['tree_method'] = 'hist'
            X_train = self._preprocess(X_train)
@ -392,8 +397,8 @@ class XGBoostEstimator(SKLearnEstimator):
                'sample_weight'])
        else:
            dtrain = xgb.DMatrix(X_train, label=y_train)
-            
-        if self._max_leaves>0:
+
+        if self._max_leaves > 0:
            self._model = xgb.train(self.params, dtrain, self._n_estimators)
            del dtrain
            train_time = time.time() - start_time
@ -411,37 +416,38 @@ class XGBoostEstimator(SKLearnEstimator):
 class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
    ''' using sklearn API, used for classification '''

-
    @classmethod
-    def search_space(cls, data_size, **params): 
+    def search_space(cls, data_size, **params):
        return XGBoostEstimator.search_space(data_size)

    @classmethod
    def cost_relative2lgbm(cls):
        return XGBoostEstimator.cost_relative2lgbm()

-    def __init__(self, task='binary:logistic', n_jobs=1,  
-        n_estimators=4, max_leaves=4, subsample=1.0, 
+    def __init__(
+        self, task='binary:logistic', n_jobs=1,
+        n_estimators=4, max_leaves=4, subsample=1.0,
        min_child_weight=1, learning_rate=0.1, reg_lambda=1.0, reg_alpha=0.0,
-        colsample_bylevel=1.0, colsample_bytree=1.0, tree_method='hist', 
-        **params):
+        colsample_bylevel=1.0, colsample_bytree=1.0, tree_method='hist',
+        **params
+    ):
        super().__init__(task, **params)
        self.params = {
-        "n_estimators": int(round(n_estimators)),
-        'max_leaves': int(round(max_leaves)),
-        'max_depth': 0,
-        'grow_policy': params.get("grow_policy", 'lossguide'),
-        'tree_method': tree_method,
-        'verbosity': 0,
-        'n_jobs': n_jobs,
-        'learning_rate': float(learning_rate),
-        'subsample': float(subsample),
-        'reg_alpha': float(reg_alpha),
-        'reg_lambda': float(reg_lambda),
-        'min_child_weight': float(min_child_weight),
-        'booster': params.get('booster', 'gbtree'),
-        'colsample_bylevel': float(colsample_bylevel),
-        'colsample_bytree': float(colsample_bytree),
+            "n_estimators": int(round(n_estimators)),
+            'max_leaves': int(round(max_leaves)),
+            'max_depth': 0,
+            'grow_policy': params.get("grow_policy", 'lossguide'),
+            'tree_method': tree_method,
+            'verbosity': 0,
+            'n_jobs': n_jobs,
+            'learning_rate': float(learning_rate),
+            'subsample': float(subsample),
+            'reg_alpha': float(reg_alpha),
+            'reg_lambda': float(reg_lambda),
+            'min_child_weight': float(min_child_weight),
+            'booster': params.get('booster', 'gbtree'),
+            'colsample_bylevel': float(colsample_bylevel),
+            'colsample_bytree': float(colsample_bytree),
        }

        if 'regression' in task:
@ -455,13 +461,12 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
        if issparse(X_train):
            self.params['tree_method'] = 'auto'
        return super().fit(X_train, y_train, budget, **kwargs)
-        
+

 class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):

-
    @classmethod
-    def search_space(cls, data_size, task, **params): 
+    def search_space(cls, data_size, task, **params):
        upper = min(2048, int(data_size))
        space = {
            'n_estimators': {
@ -489,13 +494,15 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
    def cost_relative2lgbm(cls):
        return 2.0

-    def __init__(self, task = 'binary:logistic', n_jobs = 1,
-         n_estimators = 4, max_features = 1.0, criterion = 'gini', **params):
+    def __init__(
+        self, task='binary:logistic', n_jobs=1,
+        n_estimators=4, max_features=1.0, criterion='gini', **params
+    ):
        super().__init__(task, **params)
        self.params = {
-        "n_estimators": int(round(n_estimators)),
-        "n_jobs": n_jobs,
-        'max_features': float(max_features),
+            "n_estimators": int(round(n_estimators)),
+            "n_jobs": n_jobs,
+            'max_features': float(max_features),
        }
        if 'regression' in task:
            self.estimator_class = RandomForestRegressor
@ -512,12 +519,11 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):

 class ExtraTreeEstimator(RandomForestEstimator):

-
    @classmethod
    def cost_relative2lgbm(cls):
        return 1.9

-    def __init__(self, task = 'binary:logistic', **params):
+    def __init__(self, task='binary:logistic', **params):
        super().__init__(task, **params)
        if 'regression' in task:
            self.estimator_class = ExtraTreesRegressor
@ -527,9 +533,8 @@ class ExtraTreeEstimator(RandomForestEstimator):

 class LRL1Classifier(SKLearnEstimator):

-
    @classmethod
-    def search_space(cls, **params): 
+    def search_space(cls, **params):
        return {
            'C': {
                'domain': tune.loguniform(lower=0.03125, upper=32768.0),
@ -541,8 +546,10 @@ class LRL1Classifier(SKLearnEstimator):
    def cost_relative2lgbm(cls):
        return 160

-    def __init__(self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0, 
-        **params):
+    def __init__(
+        self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0,
+        **params
+    ):
        super().__init__(task, **params)
        self.params = {
            'penalty': params.get("penalty", 'l1'),
@ -553,25 +560,25 @@ class LRL1Classifier(SKLearnEstimator):
        }
        if 'regression' in task:
            self.estimator_class = None
-            print('LR does not support regression task')
-            raise NotImplementedError
+            raise NotImplementedError('LR does not support regression task')
        else:
            self.estimator_class = LogisticRegression


 class LRL2Classifier(SKLearnEstimator):

-
    @classmethod
-    def search_space(cls, **params): 
+    def search_space(cls, **params):
        return LRL1Classifier.search_space(**params)

    @classmethod
    def cost_relative2lgbm(cls):
        return 25

-    def __init__(self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0, 
-        **params):
+    def __init__(
+        self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0,
+        **params
+    ):
        super().__init__(task, **params)
        self.params = {
            'penalty': params.get("penalty", 'l2'),
@ -582,21 +589,19 @@ class LRL2Classifier(SKLearnEstimator):
        }
        if 'regression' in task:
            self.estimator_class = None
-            print('LR does not support regression task')
-            raise NotImplementedError
+            raise NotImplementedError('LR does not support regression task')
        else:
            self.estimator_class = LogisticRegression


 class CatBoostEstimator(BaseEstimator):

-
    _time_per_iter = None
    _train_size = 0

    @classmethod
-    def search_space(cls, data_size, **params): 
-        upper = max(min(round(1500000/data_size),150), 11)
+    def search_space(cls, data_size, **params):
+        upper = max(min(round(1500000 / data_size), 150), 11)
        return {
            'early_stopping_rounds': {
                'domain': tune.qloguniform(lower=10, upper=upper, q=1),
@ -613,18 +618,20 @@ class CatBoostEstimator(BaseEstimator):
    def size(cls, config):
        n_estimators = 8192
        max_leaves = 64
-        return (max_leaves*3 + (max_leaves-1)*4 + 1.0)*n_estimators*8
+        return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8

    @classmethod
    def cost_relative2lgbm(cls):
        return 15

-    def __init__(self, task = 'binary:logistic', n_jobs=1,
-     n_estimators=8192, learning_rate=0.1, early_stopping_rounds=4, **params):
+    def __init__(
+        self, task='binary:logistic', n_jobs=1,
+        n_estimators=8192, learning_rate=0.1, early_stopping_rounds=4, **params
+    ):
        super().__init__(task, **params)
        self.params = {
            "early_stopping_rounds": int(round(early_stopping_rounds)),
-            "n_estimators": n_estimators, 
+            "n_estimators": n_estimators,
            'learning_rate': learning_rate,
            'thread_count': n_jobs,
            'verbose': params.get('verbose', False),
@ -650,65 +657,69 @@ class CatBoostEstimator(BaseEstimator):
                include='category').columns)
        else:
            cat_features = []
-        if (not CatBoostEstimator._time_per_iter or
-         abs(CatBoostEstimator._train_size-len(y_train))>4) and budget:
+        if (not CatBoostEstimator._time_per_iter or abs(
+                CatBoostEstimator._train_size - len(y_train)) > 4) and budget:
            # measure the time per iteration
            self.params["n_estimators"] = 1
            CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
-            CatBoostEstimator._smallmodel.fit(X_train, y_train,
-             cat_features=cat_features, **kwargs)
+            CatBoostEstimator._smallmodel.fit(
+                X_train, y_train, cat_features=cat_features, **kwargs)
            CatBoostEstimator._t1 = time.time() - start_time
-            if CatBoostEstimator._t1 >= budget: 
+            if CatBoostEstimator._t1 >= budget:
                self.params["n_estimators"] = n_iter
                self._model = CatBoostEstimator._smallmodel
                return CatBoostEstimator._t1
            self.params["n_estimators"] = 4
            CatBoostEstimator._smallmodel = self.estimator_class(**self.params)
-            CatBoostEstimator._smallmodel.fit(X_train, y_train,
-             cat_features=cat_features, **kwargs)
-            CatBoostEstimator._time_per_iter = (time.time() - start_time -
-             CatBoostEstimator._t1)/(self.params["n_estimators"]-1)
-            if CatBoostEstimator._time_per_iter <= 0: 
+            CatBoostEstimator._smallmodel.fit(
+                X_train, y_train, cat_features=cat_features, **kwargs)
+            CatBoostEstimator._time_per_iter = (
+                time.time() - start_time - CatBoostEstimator._t1) / (
+                    self.params["n_estimators"] - 1)
+            if CatBoostEstimator._time_per_iter <= 0:
                CatBoostEstimator._time_per_iter = CatBoostEstimator._t1
            CatBoostEstimator._train_size = len(y_train)
-            if time.time()-start_time>=budget or n_iter==self.params[
-                "n_estimators"]: 
+            if time.time() - start_time >= budget or n_iter == self.params[
+                    "n_estimators"]:
                self.params["n_estimators"] = n_iter
                self._model = CatBoostEstimator._smallmodel
-                return time.time()-start_time
+                return time.time() - start_time
        if budget:
-            train_times = 1 
-            self.params["n_estimators"] = min(n_iter, int((budget-time.time()+
-                start_time-CatBoostEstimator._t1)/train_times/
-                CatBoostEstimator._time_per_iter+1))
+            train_times = 1
+            self.params["n_estimators"] = min(n_iter, int(
+                (budget - time.time() + start_time - CatBoostEstimator._t1)
+                / train_times / CatBoostEstimator._time_per_iter + 1))
            self._model = CatBoostEstimator._smallmodel
        if self.params["n_estimators"] > 0:
-            l = max(int(len(y_train)*0.9), len(y_train)-1000)
-            X_tr, y_tr = X_train[:l], y_train[:l]
+            n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
+            X_tr, y_tr = X_train[:n], y_train[:n]
            if 'sample_weight' in kwargs:
                weight = kwargs['sample_weight']
-                if weight is not None: kwargs['sample_weight'] = weight[:l]
-            else: weight = None
+                if weight is not None:
+                    kwargs['sample_weight'] = weight[:n]
+            else:
+                weight = None
            from catboost import Pool
            model = self.estimator_class(**self.params)
-            model.fit(X_tr, y_tr, cat_features=cat_features, eval_set=Pool(
-                data=X_train[l:], label=y_train[l:], cat_features=cat_features),
-                **kwargs)
-            if weight is not None: kwargs['sample_weight'] = weight            
-            # print(self.params["n_estimators"], model.get_best_iteration())
+            model.fit(
+                X_tr, y_tr, cat_features=cat_features,
+                eval_set=Pool(
+                    data=X_train[n:], label=y_train[n:],
+                    cat_features=cat_features),
+                **kwargs)   # model.get_best_iteration()
+            if weight is not None:
+                kwargs['sample_weight'] = weight
            self._model = model
        self.params["n_estimators"] = n_iter
        train_time = time.time() - start_time
-        # print(budget, train_time)
        return train_time


 class KNeighborsEstimator(BaseEstimator):

-    
    @classmethod
-    def search_space(cls, data_size, **params): 
-        upper = min(512, int(data_size/2))
+    def search_space(cls, data_size, **params):
+        upper = min(512, int(data_size / 2))
        return {
            'n_neighbors': {
                'domain': tune.qloguniform(lower=1, upper=upper, q=1),
@ -721,10 +732,11 @@ class KNeighborsEstimator(BaseEstimator):
    def cost_relative2lgbm(cls):
        return 30

-    def __init__(self, task='binary:logistic', n_jobs=1,
-     n_neighbors=5, **params):
+    def __init__(
+        self, task='binary:logistic', n_jobs=1, n_neighbors=5, **params
+    ):
        super().__init__(task, **params)
-        self.params= {
+        self.params = {
            'n_neighbors': int(round(n_neighbors)),
            'weights': params.get('weights', 'distance'),
            'n_jobs': n_jobs,
@ -739,10 +751,8 @@ class KNeighborsEstimator(BaseEstimator):
    def _preprocess(self, X):
        if isinstance(X, pd.DataFrame):
            cat_columns = X.select_dtypes(['category']).columns
-            # print(X.dtypes)
-            # print(cat_columns)
            if X.shape[1] == len(cat_columns):
                raise ValueError(
-            "kneighbor requires at least one numeric feature")
-            X = X.drop(cat_columns, axis=1) 
+                    "kneighbor requires at least one numeric feature")
+            X = X.drop(cat_columns, axis=1)
        return X
--- a/flaml/searcher/init.py
+++ b/flaml/searcher/init.py
@ -1,2 +1,2 @@
 from .blendsearch import CFO, BlendSearch, BlendSearchTuner
-from .flow2 import FLOW2
+from .flow2 import FLOW2
--- a/flaml/searcher/blendsearch.py
+++ b/flaml/searcher/blendsearch.py
@ -3,7 +3,7 @@
 * Licensed under the MIT License. See LICENSE file in the
 * project root for license information.
 '''
-from typing import Dict, Optional, List, Tuple
+from typing import Dict, Optional, List, Tuple, Callable
 import numpy as np
 import time
 import pickle
@ -26,7 +26,7 @@ class BlendSearch(Searcher):
    '''class for BlendSearch algorithm
    '''

-    cost_attr = "time_total_s" # cost attribute in result
+    cost_attr = "time_total_s"  # cost attribute in result

    def __init__(self,
                 metric: Optional[str] = None,
@ -41,7 +41,7 @@ class BlendSearch(Searcher):
                 reduction_factor: Optional[float] = None,
                 resources_per_trial: Optional[dict] = None,
                 global_search_alg: Optional[Searcher] = None,
-                 mem_size = None, 
+                 mem_size: Callable[[dict], float] = None,
                 seed: Optional[int] = 20):
        '''Constructor

@ -50,35 +50,35 @@ class BlendSearch(Searcher):
                minimization or maximization.
            mode: A string in ['min', 'max'] to specify the objective as
            space: A dictionary to specify the search space.
-            points_to_evaluate: Initial parameter suggestions to be run first. 
-            low_cost_partial_config: A dictionary from a subset of 
+            points_to_evaluate: Initial parameter suggestions to be run first.
+            low_cost_partial_config: A dictionary from a subset of
                controlled dimensions to the initial low-cost values.
-                e.g., 
+                e.g.,

                .. code-block:: python

                    {'n_estimators': 4, 'max_leaves': 4}
-                
+
            cat_hp_cost: A dictionary from a subset of categorical dimensions
-                to the relative cost of each choice. 
+                to the relative cost of each choice.
                e.g.,
-                
+
                .. code-block:: python

                    {'tree_method': [1, 1, 2]}
-                
-                i.e., the relative cost of the 
+
+                i.e., the relative cost of the
                three choices of 'tree_method' is 1, 1 and 2 respectively.
-            prune_attr: A string of the attribute used for pruning. 
+            prune_attr: A string of the attribute used for pruning.
                Not necessarily in space.
-                When prune_attr is in space, it is a hyperparameter, e.g., 
+                When prune_attr is in space, it is a hyperparameter, e.g.,
                    'n_iters', and the best value is unknown.
-                When prune_attr is not in space, it is a resource dimension, 
+                When prune_attr is not in space, it is a resource dimension,
                    e.g., 'sample_size', and the peak performance is assumed
                    to be at the max_resource.
-            min_resource: A float of the minimal resource to use for the 
+            min_resource: A float of the minimal resource to use for the
                prune_attr; only valid if prune_attr is not in space.
-            max_resource: A float of the maximal resource to use for the 
+            max_resource: A float of the maximal resource to use for the
                prune_attr; only valid if prune_attr is not in space.
            reduction_factor: A float of the reduction factor used for
                incremental pruning.
@ -101,14 +101,15 @@ class BlendSearch(Searcher):
            self._gs = GlobalSearch(space=space, metric=metric, mode=mode)
        else:
            self._gs = None
-        self._ls = LocalSearch(init_config, metric, mode, cat_hp_cost, space,
-         prune_attr, min_resource, max_resource, reduction_factor, seed)
+        self._ls = LocalSearch(
+            init_config, metric, mode, cat_hp_cost, space,
+            prune_attr, min_resource, max_resource, reduction_factor, seed)
        self._resources_per_trial = resources_per_trial
        self._mem_size = mem_size
        self._mem_threshold = resources_per_trial.get(
            'mem') if resources_per_trial else None
        self._init_search()
-        
+
    def set_search_properties(self,
                              metric: Optional[str] = None,
                              mode: Optional[str] = None,
@ -119,8 +120,10 @@ class BlendSearch(Searcher):
            if 'metric_target' in config:
                self._metric_target = config.get('metric_target')
        else:
-            if metric: self._metric = metric
-            if mode: self._mode = mode
+            if metric:
+                self._metric = metric
+            if mode:
+                self._mode = mode
            self._ls.set_search_properties(metric, mode, config)
            if self._gs is not None:
                self._gs.set_search_properties(metric, mode, config)
@ -134,22 +137,22 @@ class BlendSearch(Searcher):
        self._search_thread_pool = {
            # id: int -> thread: SearchThread
            0: SearchThread(self._ls.mode, self._gs)
-            } 
-        self._thread_count = 1 # total # threads created
+        }
+        self._thread_count = 1  # total # threads created
        self._init_used = self._ls.init_config is None
-        self._trial_proposed_by = {} # trial_id: str -> thread_id: int
+        self._trial_proposed_by = {}  # trial_id: str -> thread_id: int
        self._ls_bound_min = self._ls.normalize(self._ls.init_config)
        self._ls_bound_max = self._ls_bound_min.copy()
        self._gs_admissible_min = self._ls_bound_min.copy()
        self._gs_admissible_max = self._ls_bound_max.copy()
-        self._result = {} # config_signature: tuple -> result: Dict
+        self._result = {}  # config_signature: tuple -> result: Dict
        self._deadline = np.inf

    def save(self, checkpoint_path: str):
        save_object = self
        with open(checkpoint_path, "wb") as outputFile:
            pickle.dump(save_object, outputFile)
-            
+
    def restore(self, checkpoint_path: str):
        with open(checkpoint_path, "rb") as inputFile:
            state = pickle.load(inputFile)
@ -180,24 +183,23 @@ class BlendSearch(Searcher):
        ''' search thread updater and cleaner
        '''
        thread_id = self._trial_proposed_by.get(trial_id)
-        if thread_id in self._search_thread_pool: 
+        if thread_id in self._search_thread_pool:
            self._search_thread_pool[thread_id].on_trial_complete(
-            trial_id, result, error)
+                trial_id, result, error)
            del self._trial_proposed_by[trial_id]
-            # if not thread_id: logger.info(f"result {result}")
        if result:
            config = {}
            for key, value in result.items():
                if key.startswith('config/'):
                    config[key[7:]] = value
-            if error: # remove from result cache
+            if error:  # remove from result cache
                del self._result[self._ls.config_signature(config)]
-            else: # add to result cache
+            else:  # add to result cache
                self._result[self._ls.config_signature(config)] = result
            # update target metric if improved
-            if (result[self._metric]-self._metric_target)*self._ls.metric_op<0:
+            if (result[self._metric] - self._metric_target) * self._ls.metric_op < 0:
                self._metric_target = result[self._metric]
-            if not thread_id and self._create_condition(result): 
+            if not thread_id and self._create_condition(result):
                # thread creator
                self._search_thread_pool[self._thread_count] = SearchThread(
                    self._ls.mode,
@ -206,14 +208,12 @@ class BlendSearch(Searcher):
                )
                thread_id = self._thread_count
                self._thread_count += 1
-                self._update_admissible_region(config, self._ls_bound_min,
-                    self._ls_bound_max)
+                self._update_admissible_region(
+                    config, self._ls_bound_min, self._ls_bound_max)
            # reset admissible region to ls bounding box
            self._gs_admissible_min.update(self._ls_bound_min)
            self._gs_admissible_max.update(self._ls_bound_max)
        # cleaner
-        # logger.info(f"thread {thread_id} in search thread pool="
-        #     f"{thread_id in self._search_thread_pool}")
        if thread_id and thread_id in self._search_thread_pool:
            # local search thread
            self._clean(thread_id)
@ -231,9 +231,11 @@ class BlendSearch(Searcher):
    def _create_condition(self, result: Dict) -> bool:
        ''' create thread condition
        '''
-        if len(self._search_thread_pool) < 2: return True
-        obj_median = np.median([thread.obj_best1 for id, thread in
-         self._search_thread_pool.items() if id])
+        if len(self._search_thread_pool) < 2:
+            return True
+        obj_median = np.median(
+            [thread.obj_best1 for id, thread in self._search_thread_pool.items()
+             if id])
        return result[self._metric] * self._ls.metric_op < obj_median

    def _clean(self, thread_id: int):
@ -243,21 +245,19 @@ class BlendSearch(Searcher):
        assert thread_id
        todelete = set()
        for id in self._search_thread_pool:
-            if id and id!=thread_id:
+            if id and id != thread_id:
                if self._inferior(id, thread_id):
                    todelete.add(id)
        for id in self._search_thread_pool:
-            if id and id!=thread_id:
+            if id and id != thread_id:
                if self._inferior(thread_id, id):
                    todelete.add(thread_id)
-                    break        
-        # logger.info(f"thead {thread_id}.converged="
-        #     f"{self._search_thread_pool[thread_id].converged}")
+                    break
        if self._search_thread_pool[thread_id].converged:
            todelete.add(thread_id)
            for key in self._ls_bound_max:
                self._ls_bound_max[key] += self._ls.STEPSIZE
-                self._ls_bound_min[key] -= self._ls.STEPSIZE            
+                self._ls_bound_min[key] -= self._ls.STEPSIZE
        for id in todelete:
            del self._search_thread_pool[id]

@ -266,15 +266,20 @@ class BlendSearch(Searcher):
        '''
        t1 = self._search_thread_pool[id1]
        t2 = self._search_thread_pool[id2]
-        if t1.obj_best1 < t2.obj_best2: return False
-        elif t1.resource and t1.resource < t2.resource: return False
-        elif t2.reach(t1): return True
-        else: return False
+        if t1.obj_best1 < t2.obj_best2:
+            return False
+        elif t1.resource and t1.resource < t2.resource:
+            return False
+        elif t2.reach(t1):
+            return True
+        return False

    def on_trial_result(self, trial_id: str, result: Dict):
-        if trial_id not in self._trial_proposed_by: return
+        if trial_id not in self._trial_proposed_by:
+            return
        thread_id = self._trial_proposed_by[trial_id]
-        if not thread_id in self._search_thread_pool: return
+        if thread_id not in self._search_thread_pool:
+            return
        self._search_thread_pool[thread_id].on_trial_result(trial_id, result)

    def suggest(self, trial_id: str) -> Optional[Dict]:
@ -282,92 +287,85 @@ class BlendSearch(Searcher):
        '''
        if self._init_used and not self._points_to_evaluate:
            choice, backup = self._select_thread()
-            # print(f"choice={choice}, backup={backup}")
-            if choice < 0: return None # timeout
+            if choice < 0:  # timeout
+                return None
            self._use_rs = False
            config = self._search_thread_pool[choice].suggest(trial_id)
            # preliminary check; not checking config validation
            skip = self._should_skip(choice, trial_id, config)
            if skip:
-                if choice: 
-                    # print(f"skipping choice={choice}, config={config}")
+                if choice:
                    return None
                # use rs when BO fails to suggest a config
                self._use_rs = True
-                for _, generated in generate_variants(
-                    {'config': self._ls.space}):
+                for _, generated in generate_variants({'config': self._ls.space}):
                    config = generated['config']
-                    break # get one random config
-                # logger.debug(f"random config {config}")
+                    break  # get one random config
                skip = self._should_skip(choice, trial_id, config)
-                if skip: return None
-            if choice or self._valid(config): 
+                if skip:
+                    return None
+            if choice or self._valid(config):
                # LS or valid or no backup choice
                self._trial_proposed_by[trial_id] = choice
-                if not choice: print(config)
-            else: # invalid config proposed by GS
-                # if not self._use_rs:
-                #     self._search_thread_pool[choice].on_trial_complete(
-                #         trial_id, {}, error=True) # tell GS there is an error
+            else:  # invalid config proposed by GS
                self._use_rs = False
                if choice == backup:
                    # use CFO's init point
                    init_config = self._ls.init_config
-                    config = self._ls.complete_config(init_config,
-                        self._ls_bound_min, self._ls_bound_max)
+                    config = self._ls.complete_config(
+                        init_config, self._ls_bound_min, self._ls_bound_max)
                    self._trial_proposed_by[trial_id] = choice
                else:
                    config = self._search_thread_pool[backup].suggest(trial_id)
                    skip = self._should_skip(backup, trial_id, config)
-                    if skip: 
+                    if skip:
                        return None
                    self._trial_proposed_by[trial_id] = backup
                    choice = backup
-            if not choice: # global search
-                if self._ls._resource: 
-                # TODO: add resource to config proposed by GS, min or median?
+            if not choice:  # global search
+                if self._ls._resource:
+                    # TODO: min or median?
                    config[self._ls.prune_attr] = self._ls.min_resource
                # temporarily relax admissible region for parallel proposals
-                self._update_admissible_region(config, self._gs_admissible_min,
-                    self._gs_admissible_max)
+                self._update_admissible_region(
+                    config, self._gs_admissible_min, self._gs_admissible_max)
            else:
-                self._update_admissible_region(config, self._ls_bound_min,
-                    self._ls_bound_max)
+                self._update_admissible_region(
+                    config, self._ls_bound_min, self._ls_bound_max)
                self._gs_admissible_min.update(self._ls_bound_min)
                self._gs_admissible_max.update(self._ls_bound_max)
            self._result[self._ls.config_signature(config)] = {}
-        else: # use init config
-            # print("use init config")
+        else:  # use init config
            init_config = self._points_to_evaluate.pop(
                0) if self._points_to_evaluate else self._ls.init_config
-            config = self._ls.complete_config(init_config,
-             self._ls_bound_min, self._ls_bound_max)
-                # logger.info(f"reset config to {config}")
+            config = self._ls.complete_config(
+                init_config, self._ls_bound_min, self._ls_bound_max)
            config_signature = self._ls.config_signature(config)
            result = self._result.get(config_signature)
-            if result: # tried before
-                # self.on_trial_complete(trial_id, result)
+            if result:  # tried before
                return None
-            elif result is None: # not tried before
+            elif result is None:  # not tried before
                self._result[config_signature] = {}
-            else: return None # running but no result yet
+            else:  # running but no result yet
+                return None
            self._init_used = True
            self._trial_proposed_by[trial_id] = 0
-        # logger.info(f"config={config}")
        return config

    def _should_skip(self, choice, trial_id, config) -> bool:
        ''' if config is None or config's result is known or above mem threshold
            return True; o.w. return False
        '''
-        if config is None: return True
+        if config is None:
+            return True
        config_signature = self._ls.config_signature(config)
        exists = config_signature in self._result
        # check mem constraint
        if not exists and self._mem_threshold and self._mem_size(
-            config)>self._mem_threshold:
+                config) > self._mem_threshold:
            self._result[config_signature] = {
-                self._metric:np.inf*self._ls.metric_op, 'time_total_s':1}
+                self._metric: np.inf * self._ls.metric_op, 'time_total_s': 1
+            }
            exists = True
        if exists:
            if not self._use_rs:
@ -381,7 +379,7 @@ class BlendSearch(Searcher):
                # else:
                #     # tell the thread there is an error
                #     self._search_thread_pool[choice].on_trial_complete(
-                #         trial_id, {}, error=True) 
+                #         trial_id, {}, error=True)
            return True
        return False

@ -390,19 +388,21 @@ class BlendSearch(Searcher):
        '''
        # update priority
        min_eci = self._deadline - time.time()
-        if min_eci <= 0: return -1, -1
+        if min_eci <= 0:
+            return -1, -1
        max_speed = 0
-        for thread in self._search_thread_pool.values():            
-            if thread.speed > max_speed: max_speed = thread.speed
-        for thread in self._search_thread_pool.values():            
+        for thread in self._search_thread_pool.values():
+            if thread.speed > max_speed:
+                max_speed = thread.speed
+        for thread in self._search_thread_pool.values():
            thread.update_eci(self._metric_target, max_speed)
-            if thread.eci < min_eci: min_eci = thread.eci
+            if thread.eci < min_eci:
+                min_eci = thread.eci
        for thread in self._search_thread_pool.values():
            thread.update_priority(min_eci)

        top_thread_id = backup_thread_id = 0
        priority1 = priority2 = self._search_thread_pool[0].priority
-        # print(f"priority of thread 0={priority1}, obj_best1={self._search_thread_pool[0].obj_best1}")
        for thread_id, thread in self._search_thread_pool.items():
            # if thread_id:
            #     print(
@ -411,7 +411,7 @@ class BlendSearch(Searcher):
            #         f"thread {thread_id}.can_suggest={thread.can_suggest}")
            if thread_id and thread.can_suggest:
                priority = thread.priority
-                if priority > priority1: 
+                if priority > priority1:
                    priority1 = priority
                    top_thread_id = thread_id
                if priority > priority2 or backup_thread_id == 0:
@ -426,30 +426,29 @@ class BlendSearch(Searcher):
        for key in self._gs_admissible_min:
            if key in config:
                value = normalized_config[key]
-                # logger.info(
-                #     f"{key},{value},{self._admissible_min[key]},{self._admissible_max[key]}")
-                if value+self._ls.STEPSIZE<self._gs_admissible_min[
-                    key] or value>self._gs_admissible_max[key]+self._ls.STEPSIZE:
+                if value + self._ls.STEPSIZE < self._gs_admissible_min[key] \
+                        or value > self._gs_admissible_max[key] + self._ls.STEPSIZE:
                    return False
        return True


+try:
+    from ray.tune import (uniform, quniform, choice, randint, qrandint, randn,
+                          qrandn, loguniform, qloguniform)
+except ImportError:
+    from ..tune.sample import (uniform, quniform, choice, randint, qrandint, randn,
+                               qrandn, loguniform, qloguniform)
+
 try:
    from nni.tuner import Tuner as NNITuner
    from nni.utils import extract_scalar_reward
-    try:
-        from ray.tune import (uniform, quniform, choice, randint, qrandint, randn,
-    qrandn, loguniform, qloguniform)
-    except:
-        from ..tune.sample import (uniform, quniform, choice, randint, qrandint, randn,
-    qrandn, loguniform, qloguniform)

    class BlendSearchTuner(BlendSearch, NNITuner):
        '''Tuner class for NNI
        '''

        def receive_trial_result(self, parameter_id, parameters, value,
-         **kwargs):
+                                 **kwargs):
            '''
            Receive trial's final result.
            parameter_id: int
@ -458,10 +457,10 @@ try:
            '''
            result = {}
            for key, value in parameters.items():
-                result['config/'+key] = value
+                result['config/' + key] = value
            reward = extract_scalar_reward(value)
            result[self._metric] = reward
-            # if nni does not report training cost, 
+            # if nni does not report training cost,
            # using sequence as an approximation.
            # if no sequence, using a constant 1
            result[self.cost_attr] = value.get(self.cost_attr, value.get(
@ -473,7 +472,7 @@ try:
            '''
            Returns a set of trial (hyper-)parameters, as a serializable object
            parameter_id: int
-            '''            
+            '''
            return self.suggest(str(parameter_id))
        ...

@ -491,7 +490,7 @@ try:
                if _type == 'choice':
                    config[key] = choice(v)
                elif _type == 'randint':
-                    config[key] = randint(v[0], v[1]-1)
+                    config[key] = randint(v[0], v[1] - 1)
                elif _type == 'uniform':
                    config[key] = uniform(v[0], v[1])
                elif _type == 'quniform':
@ -506,14 +505,15 @@ try:
                    config[key] = qrandn(v[1], v[2], v[3])
                else:
                    raise ValueError(
-                    f'unsupported type in search_space {_type}')
+                        f'unsupported type in search_space {_type}')
            self._ls.set_search_properties(None, None, config)
            if self._gs is not None:
                self._gs.set_search_properties(None, None, config)
            self._init_search()

-except:
-    class BlendSearchTuner(BlendSearch): pass
+except ImportError:
+    class BlendSearchTuner(BlendSearch):
+        pass


 class CFO(BlendSearchTuner):
@ -524,7 +524,7 @@ class CFO(BlendSearchTuner):

    def suggest(self, trial_id: str) -> Optional[Dict]:
        # Number of threads is 1 or 2. Thread 0 is a vacuous thread
-        assert len(self._search_thread_pool)<3, len(self._search_thread_pool)
+        assert len(self._search_thread_pool) < 3, len(self._search_thread_pool)
        if len(self._search_thread_pool) < 2:
            # When a local converges, the number of threads is 1
            # Need to restart
@ -533,7 +533,8 @@ class CFO(BlendSearchTuner):

    def _select_thread(self) -> Tuple:
        for key in self._search_thread_pool:
-            if key: return key, key
+            if key:
+                return key, key

    def _create_condition(self, result: Dict) -> bool:
        ''' create thread condition
@ -542,19 +543,15 @@ class CFO(BlendSearchTuner):


 def create_next(client):
-    '''A stateless API for HPO
+    ''' functional API for HPO
    '''
    state = client.get_state()
    setting = client.get_settings_dict()
    if state is None:
        # first time call
        try:
-            from ray.tune import (uniform, quniform, choice, randint, qrandint, randn,
-        qrandn, loguniform, qloguniform)
            from ray.tune.trial import Trial
-        except:
-            from ..tune.sample import (uniform, quniform, choice, randint, qrandint, randn,
-        qrandn, loguniform, qloguniform)
+        except ImportError:
            from ..tune.trial import Trial
        method = setting.get('method', 'BlendSearch')
        mode = client.get_optimization_mode()
@ -575,7 +572,7 @@ def create_next(client):
                space[key] = randint(value["min_val"], value["max_val"])
            elif t == 'quantized_continuous':
                space[key] = quniform(value["min_val"], value["max_val"],
-                 value["step"])
+                                      value["step"])
        init_config = setting.get('init_config', None)
        if init_config:
            points_to_evaluate = [init_config]
@ -588,12 +585,12 @@ def create_next(client):
        elif method == 'CFO':
            Algo = CFO
        algo = Algo(
-            mode=mode, 
-            metric=metric, 
+            mode=mode,
+            metric=metric,
            space=space,
            points_to_evaluate=points_to_evaluate,
            cat_hp_cost=cat_hp_cost,
-            )
+        )
        time_budget_s = setting.get('time_budget_s', None)
        if time_budget_s:
            algo._deadline = time_budget_s + time.time()
@ -616,7 +613,7 @@ def create_next(client):
        result[algo.metric] = trial.metrics[algo.metric].values[-1]
        result[algo.cost_attr] = (end_time - trial.start_time).total_seconds()
        for key, value in trial.hp_sample.items():
-            result['config/'+key] = value
+            result['config/' + key] = value
        algo.on_trial_complete(trial_id, result=result)
    # propose new trial
    trial_id = Trial.generate_id()
--- a/flaml/searcher/flow2.py
+++ b/flaml/searcher/flow2.py
@ -44,32 +44,32 @@ class FLOW2(Searcher):
        Args:
            init_config: a dictionary of a partial or full initial config,
                e.g. from a subset of controlled dimensions
-                to the initial low-cost values. 
-                e.g. {'epochs':1}
+                to the initial low-cost values.
+                e.g. {'epochs': 1}
            metric: A string of the metric name to optimize for.
                minimization or maximization.
            mode: A string in ['min', 'max'] to specify the objective as
            cat_hp_cost: A dictionary from a subset of categorical dimensions
-                to the relative cost of each choice. 
+                to the relative cost of each choice.
                e.g.,
-                
+
                .. code-block:: python

                    {'tree_method': [1, 1, 2]}
-                
-                i.e., the relative cost of the 
+
+                i.e., the relative cost of the
                three choices of 'tree_method' is 1, 1 and 2 respectively.
            space: A dictionary to specify the search space.
-            prune_attr: A string of the attribute used for pruning. 
+            prune_attr: A string of the attribute used for pruning.
                Not necessarily in space.
-                When prune_attr is in space, it is a hyperparameter, e.g., 
+                When prune_attr is in space, it is a hyperparameter, e.g.,
                    'n_iters', and the best value is unknown.
-                When prune_attr is not in space, it is a resource dimension, 
+                When prune_attr is not in space, it is a resource dimension,
                    e.g., 'sample_size', and the peak performance is assumed
                    to be at the max_resource.
-            min_resource: A float of the minimal resource to use for the 
+            min_resource: A float of the minimal resource to use for the
                prune_attr; only valid if prune_attr is not in space.
-            max_resource: A float of the maximal resource to use for the 
+            max_resource: A float of the maximal resource to use for the
                prune_attr; only valid if prune_attr is not in space.
            resource_multiple_factor: A float of the multiplicative factor
                used for increasing resource.
@ -98,7 +98,7 @@ class FLOW2(Searcher):
                "For cost-frugal search, "
                "consider providing init values for cost-related hps via "
                "'init_config'."
-                )
+            )
        self.init_config = init_config
        self.best_config = flatten_dict(init_config)
        self.cat_hp_cost = cat_hp_cost
@ -114,11 +114,11 @@ class FLOW2(Searcher):
    def _init_search(self):
        self._tunable_keys = []
        self._bounded_keys = []
-        # choices of numeric values. integer encoding. 
+        # choices of numeric values. integer encoding.
        # value: (ordered list of choices,
        #  dict from choice to index in the ordered list)
-        self._ordered_choice_hp = {} 
-        # choices with given cost. integer encoding. 
+        self._ordered_choice_hp = {}
+        # choices with given cost. integer encoding.
        # value: (array of choices ordered by cost,
        #  dict from choice to index in the ordered array)
        self._ordered_cat_hp = {}
@ -126,8 +126,8 @@ class FLOW2(Searcher):
        self._unordered_cat_hp = {}
        self._cat_hp_cost = {}
        for key, domain in self.space.items():
-            assert not (isinstance(domain, dict) and 'grid_search' in domain
-            ), key+"'s domain is grid search which is not supported in FLOW2."
+            assert not (isinstance(domain, dict) and 'grid_search' in domain), \
+                f"{key}'s domain is grid search, not supported in FLOW^2."
            if callable(getattr(domain, 'get_sampler', None)):
                self._tunable_keys.append(key)
                sampler = domain.get_sampler()
@ -145,53 +145,50 @@ class FLOW2(Searcher):
                    if cat_hp_cost and key in cat_hp_cost:
                        cost = np.array(cat_hp_cost[key])
                        ind = np.argsort(cost)
-                        l = np.array(domain.categories)[ind]
+                        ordered = np.array(domain.categories)[ind]
                        cost = self._cat_hp_cost[key] = cost[ind]
                        d = {}
-                        for i, choice in enumerate(l):
+                        for i, choice in enumerate(ordered):
                            d[choice] = i
-                        self._ordered_cat_hp[key] = (l, d)
-                        # self._step_lb = min(self._step_lb, 1.0/len(l))
+                        self._ordered_cat_hp[key] = (ordered, d)
                    elif all(isinstance(x, int) or isinstance(x, float)
-                     for x in domain.categories):
-                        l = sorted(domain.categories)
+                             for x in domain.categories):
+                        ordered = sorted(domain.categories)
                        d = {}
-                        for i, choice in enumerate(l):
+                        for i, choice in enumerate(ordered):
                            d[choice] = i
-                        self._ordered_choice_hp[key] = (l, d) 
-                        # self._step_lb = min(self._step_lb, 1.0/len(l))
+                        self._ordered_choice_hp[key] = (ordered, d)
                    else:
-                        self._unordered_cat_hp[key] = l = len(domain.categories)
-                        # self._step_lb = min(self._step_lb, 1.0/l)
+                        self._unordered_cat_hp[key] = len(domain.categories)
                if str(sampler) != 'Normal':
                    self._bounded_keys.append(key)
        self._space_keys = list(self.space.keys())
-        if (self.prune_attr and self.prune_attr not in self.space and
-         self.max_resource):
+        if (self.prune_attr and self.prune_attr not in self.space
+                and self.max_resource):
            self._space_keys.append(self.prune_attr)
            self.min_resource = self.min_resource or self._min_resource()
            self._resource = self._round(self.min_resource)
-            # logger.info(min_resource)
-            # logger.info(max_resource)
-            # logger.info(self._resource)
-        else: self._resource = None
+        else:
+            self._resource = None
        self.incumbent = {}
-        self.incumbent = self.normalize(self.best_config) # flattened
+        self.incumbent = self.normalize(self.best_config)  # flattened
        self.best_obj = self.cost_incumbent = None
        self.dim = len(self._tunable_keys)  # total # tunable dimensions
-        self._direction_tried = None        
+        self._direction_tried = None
        self._num_complete4incumbent = self._cost_complete4incumbent = 0
        self._num_allowed4incumbent = 2 * self.dim
        self._proposed_by = {}  # trial_id: int -> incumbent: Dict
        self.step = self.STEPSIZE * np.sqrt(self.dim)
        lb = self.step_lower_bound
-        if lb > self.step: self.step = lb * 2
+        if lb > self.step:
+            self.step = lb * 2
        # upper bound
        self.step_ub = np.sqrt(self.dim)
-        if self.step > self.step_ub: self.step = self.step_ub
+        if self.step > self.step_ub:
+            self.step = self.step_ub
        # maximal # consecutive no improvements
        self.dir = 2**(self.dim)
-        self._configs = {} # dict from trial_id to config
+        self._configs = {}  # dict from trial_id to config
        self._K = 0
        self._iter_best_config = self.trial_count = 1
        self._reset_times = 0
@ -202,24 +199,26 @@ class FLOW2(Searcher):
    def step_lower_bound(self) -> float:
        step_lb = self._step_lb
        for key in self._tunable_keys:
-            if key not in self.best_config: continue
+            if key not in self.best_config:
+                continue
            domain = self.space[key]
            sampler = domain.get_sampler()
            if isinstance(sampler, sample.Quantized):
                sampler_inner = sampler.get_sampler()
                if str(sampler_inner) == 'LogUniform':
-                    step_lb = min(step_lb,
-                     np.log(1.0+sampler.q/self.best_config[key])/
-                        np.log(domain.upper/domain.lower))
-            elif isinstance(domain, sample.Integer) and str(
-                sampler) == 'LogUniform':
-                    step_lb = min(step_lb,
-                     np.log(1.0+1.0/self.best_config[key])/
-                        np.log(domain.upper/domain.lower))
-        if np.isinf(step_lb): step_lb = self.STEP_LOWER_BOUND
-        else: step_lb *= np.sqrt(self.dim)
+                    step_lb = min(
+                        step_lb, np.log(1.0 + sampler.q / self.best_config[key])
+                        / np.log(domain.upper / domain.lower))
+            elif isinstance(domain, sample.Integer) and str(sampler) == 'LogUniform':
+                step_lb = min(
+                    step_lb, np.log(1.0 + 1.0 / self.best_config[key])
+                    / np.log(domain.upper / domain.lower))
+        if np.isinf(step_lb):
+            step_lb = self.STEP_LOWER_BOUND
+        else:
+            step_lb *= np.sqrt(self.dim)
        return step_lb
-    
+
    @property
    def resource(self) -> float:
        return self._resource
@ -236,60 +235,61 @@ class FLOW2(Searcher):
            return self.max_resource
        return resource

-    def rand_vector_gaussian(self, dim, std = 1.0):
+    def rand_vector_gaussian(self, dim, std=1.0):
        vec = self._random.normal(0, std, dim)
        return vec
-        
-    def complete_config(self, partial_config: Dict,
-     lower: Optional[Dict] = None, upper: Optional[Dict] = None) -> Dict:
+
+    def complete_config(
+        self, partial_config: Dict,
+        lower: Optional[Dict] = None, upper: Optional[Dict] = None
+    ) -> Dict:
        ''' generate a complete config from the partial config input
        add minimal resource to config if available
        '''
-        if self._reset_times and partial_config==self.init_config:
+        if self._reset_times and partial_config == self.init_config:
            # not the first time to complete init_config, use random gaussian
            normalized = self.normalize(partial_config)
            for key in normalized:
-                 # don't change unordered cat choice
+                # don't change unordered cat choice
                if key not in self._unordered_cat_hp:
                    if upper and lower:
-                        u, l = upper[key], lower[key]
-                        gauss_std = u-l or self.STEPSIZE
+                        up, low = upper[key], lower[key]
+                        gauss_std = up - low or self.STEPSIZE
                        # allowed bound
-                        u += self.STEPSIZE
-                        l -= self.STEPSIZE
+                        up += self.STEPSIZE
+                        low -= self.STEPSIZE
                    elif key in self._bounded_keys:
-                        u, l, gauss_std = 1, 0, 1.0
-                    else: u, l, gauss_std = np.Inf, -np.Inf, 1.0
+                        up, low, gauss_std = 1, 0, 1.0
+                    else:
+                        up, low, gauss_std = np.Inf, -np.Inf, 1.0
                    if key in self._bounded_keys:
-                        u = min(u, 1)
-                        l = max(l, 0)
+                        up = min(up, 1)
+                        low = max(low, 0)
                    delta = self.rand_vector_gaussian(1, gauss_std)[0]
-                    normalized[key] = max(l, min(u, normalized[key] + delta))
+                    normalized[key] = max(low, min(up, normalized[key] + delta))
            # use best config for unordered cat choice
            config = self.denormalize(normalized)
        else:
            # first time init_config, or other configs, take as is
            config = partial_config.copy()
-        if partial_config == self.init_config: self._reset_times += 1
+        if partial_config == self.init_config:
+            self._reset_times += 1
        config = flatten_dict(config)
        for key, value in self.space.items():
            if key not in config:
                config[key] = value
-        # logger.debug(f'before random {config}')
        for _, generated in generate_variants({'config': config}):
            config = generated['config']
            break
-        # logger.debug(f'after random {config}')
-
        if self._resource:
            config[self.prune_attr] = self.min_resource
        return unflatten_dict(config)

    def create(self, init_config: Dict, obj: float, cost: float) -> Searcher:
        flow2 = FLOW2(init_config, self.metric, self.mode, self._cat_hp_cost,
-                      unflatten_dict(self.space), self.prune_attr, 
-                      self.min_resource, self.max_resource, 
-                      self.resource_multiple_factor, self._seed+1)
+                      unflatten_dict(self.space), self.prune_attr,
+                      self.min_resource, self.max_resource,
+                      self.resource_multiple_factor, self._seed + 1)
        flow2.best_obj = obj * self.metric_op  # minimize internally
        flow2.cost_incumbent = cost
        return flow2
@ -309,16 +309,17 @@ class FLOW2(Searcher):
                        # normalize categorical
                        if key in self._ordered_cat_hp:
                            l, d = self._ordered_cat_hp[key]
-                            config_norm[key] = (d[value]+0.5)/len(l) # center
+                            config_norm[key] = (d[value] + 0.5) / len(l)
                        elif key in self._ordered_choice_hp:
                            l, d = self._ordered_choice_hp[key]
-                            config_norm[key] = (d[value]+0.5)/len(l) # center
+                            config_norm[key] = (d[value] + 0.5) / len(l)
                        elif key in self.incumbent:
                            config_norm[key] = self.incumbent[
                                key] if value == self.best_config[
                                    key] else (self.incumbent[
-                                        key]+1)%self._unordered_cat_hp[key]
-                        else: config_norm[key] = 0
+                                        key] + 1) % self._unordered_cat_hp[key]
+                        else:
+                            config_norm[key] = 0
                        continue
                    # Uniform/LogUniform/Normal/Base
                    sampler = domain.get_sampler()
@ -326,11 +327,11 @@ class FLOW2(Searcher):
                        # sampler is sample.Quantized
                        sampler = sampler.get_sampler()
                    if str(sampler) == 'LogUniform':
-                        config_norm[key] = np.log(
-                            value/domain.lower)/np.log(domain.upper/domain.lower)
+                        config_norm[key] = np.log(value / domain.lower) / np.log(
+                            domain.upper / domain.lower)
                    elif str(sampler) == 'Uniform':
                        config_norm[key] = (
-                            value-domain.lower)/(domain.upper-domain.lower)
+                            value - domain.lower) / (domain.upper - domain.lower)
                    elif str(sampler) == 'Normal':
                        # N(mean, sd) -> N(0,1)
                        config_norm[key] = (value - sampler.mean) / sampler.sd
@ -338,7 +339,6 @@ class FLOW2(Searcher):
                        # TODO? elif str(sampler) == 'Base': # sample.Function._CallSampler
                        # e.g., {test: sample_from(lambda spec: randn(10, 2).sample() * 0.01)}
                        config_norm[key] = value
-                        # print(key+"'s value is not normalized")
            else:  # prune_attr
                config_norm[key] = value
        return config_norm
@ -359,19 +359,19 @@ class FLOW2(Searcher):
                        if key in self._ordered_cat_hp:
                            l, _ = self._ordered_cat_hp[key]
                            n = len(l)
-                            config_denorm[key] = l[min(n-1,int(np.floor(value*n)))]
+                            config_denorm[key] = l[min(n - 1, int(np.floor(value * n)))]
                        elif key in self._ordered_choice_hp:
                            l, _ = self._ordered_choice_hp[key]
                            n = len(l)
-                            config_denorm[key] = l[min(n-1,int(np.floor(value*n)))]
+                            config_denorm[key] = l[min(n - 1, int(np.floor(value * n)))]
                        else:
                            assert key in self.incumbent
                            if round(value) == self.incumbent[key]:
                                config_denorm[key] = self.best_config[key]
-                            else: # ****random value each time!****
-                                config_denorm[key] = self._random.choice([x
-                                 for x in domain.categories
-                                 if x!=self.best_config[key]])
+                            else:  # ****random value each time!****
+                                config_denorm[key] = self._random.choice(
+                                    [x for x in domain.categories
+                                     if x != self.best_config[key]])
                        continue
                    # Uniform/LogUniform/Normal/Base
                    sampler = domain.get_sampler()
@ -381,10 +381,10 @@ class FLOW2(Searcher):
                    # Handle Log/Uniform
                    if str(sampler) == 'LogUniform':
                        config_denorm[key] = (
-                            domain.upper/domain.lower)**value*domain.lower
+                            domain.upper / domain.lower) ** value * domain.lower
                    elif str(sampler) == 'Uniform':
                        config_denorm[key] = value * (
-                            domain.upper-domain.lower) + domain.lower
+                            domain.upper - domain.lower) + domain.lower
                    elif str(sampler) == 'Normal':
                        # denormalization for 'Normal'
                        config_denorm[key] = value * sampler.sd + sampler.mean
@ -398,8 +398,6 @@ class FLOW2(Searcher):
                    # Handle int (4.6 -> 5)
                    if isinstance(domain, sample.Integer):
                        config_denorm[key] = int(round(config_denorm[key]))
-                    # Handle int (4.6 -> 4)
-                    # config_denorm[key] = domain.cast(config_denorm[key])
            else:  # prune_attr
                config_denorm[key] = value
        return config_denorm
@ -431,7 +429,7 @@ class FLOW2(Searcher):
        self.trial_count += 1
        if not error and result:
            obj = result.get(self._metric)
-            if obj: 
+            if obj:
                obj *= self.metric_op
                if self.best_obj is None or obj < self.best_obj:
                    self.best_obj, self.best_config = obj, self._configs[
@ -444,10 +442,11 @@ class FLOW2(Searcher):
                    self._cost_complete4incumbent = 0
                    self._num_allowed4incumbent = 2 * self.dim
                    self._proposed_by.clear()
-                    if self._K > 0: 
+                    if self._K > 0:
                        # self._oldK must have been set when self._K>0
-                        self.step *= np.sqrt(self._K/self._oldK)
-                    if self.step > self.step_ub: self.step = self.step_ub
+                        self.step *= np.sqrt(self._K / self._oldK)
+                    if self.step > self.step_ub:
+                        self.step = self.step_ub
                    self._iter_best_config = self.trial_count
                    return
        proposed_by = self._proposed_by.get(trial_id)
@ -456,31 +455,30 @@ class FLOW2(Searcher):
            self._num_complete4incumbent += 1
            cost = result.get(
                self.cost_attr) if result else self._trial_cost.get(trial_id)
-            if cost: self._cost_complete4incumbent += cost
-            if self._num_complete4incumbent >= 2*self.dim and \
-                self._num_allowed4incumbent == 0:
+            if cost:
+                self._cost_complete4incumbent += cost
+            if self._num_complete4incumbent >= 2 * self.dim and \
+                    self._num_allowed4incumbent == 0:
                self._num_allowed4incumbent = 2
-            if self._num_complete4incumbent == self.dir and (not self._resource
-            or self._resource == self.max_resource): 
-            # check stuck condition if using max resource
+            if self._num_complete4incumbent == self.dir and (
+                    not self._resource or self._resource == self.max_resource):
+                # check stuck condition if using max resource
                if self.step >= self.step_lower_bound:
                    # decrease step size
                    self._oldK = self._K if self._K else self._iter_best_config
-                    self._K = self.trial_count+1
-                    self.step *= np.sqrt(self._oldK/self._K)
-                    # logger.info(f"step={self.step}, lb={self.step_lower_bound}")
+                    self._K = self.trial_count + 1
+                    self.step *= np.sqrt(self._oldK / self._K)
                self._num_complete4incumbent -= 2
                if self._num_allowed4incumbent < 2:
                    self._num_allowed4incumbent = 2
-        # elif proposed_by: # proposed by older incumbent
-        #     del self._proposed_by[trial_id]
-        
+        # elif proposed_by: del self._proposed_by[trial_id]
+
    def on_trial_result(self, trial_id: str, result: Dict):
        ''' early update of incumbent
        '''
        if result:
            obj = result.get(self._metric)
-            if obj: 
+            if obj:
                obj *= self.metric_op
                if self.best_obj is None or obj < self.best_obj:
                    self.best_obj = obj
@ -503,7 +501,7 @@ class FLOW2(Searcher):
    def rand_vector_unit_sphere(self, dim) -> np.ndarray:
        vec = self._random.normal(0, 1, dim)
        mag = np.linalg.norm(vec)
-        return vec/mag
+        return vec / mag

    def suggest(self, trial_id: str) -> Optional[Dict]:
        ''' suggest a new config, one of the following cases:
@ -513,8 +511,8 @@ class FLOW2(Searcher):
        '''
        if self._num_complete4incumbent > 0 and self.cost_incumbent and \
            self._resource and self._resource < self.max_resource and (
-                self._cost_complete4incumbent >=
-                self.cost_incumbent * self.resource_multiple_factor):
+                self._cost_complete4incumbent
+                >= self.cost_incumbent * self.resource_multiple_factor):
            # consider increasing resource using sum eval cost of complete
            # configs
            self._resource = self._round(
@ -529,7 +527,7 @@ class FLOW2(Searcher):
        if self._direction_tried is not None:
            # return negative direction
            for i, key in enumerate(self._tunable_keys):
-                move[key] -= self._direction_tried[i]                
+                move[key] -= self._direction_tried[i]
            self._direction_tried = None
        # propose a new direction
        self._direction_tried = self.rand_vector_unit_sphere(
@ -548,7 +546,8 @@ class FLOW2(Searcher):
        for key in self._bounded_keys:
            value = config[key]
            config[key] = max(0, min(1, value))
-        if self._resource: config[self.prune_attr] = self._resource
+        if self._resource:
+            config[self.prune_attr] = self._resource

    @property
    def can_suggest(self) -> bool:
@ -583,22 +582,23 @@ class FLOW2(Searcher):
    def converged(self) -> bool:
        ''' return whether the local search has converged
        '''
-        if self._num_complete4incumbent < self.dir-2: return False        
+        if self._num_complete4incumbent < self.dir - 2:
+            return False
        # check stepsize after enough configs are completed
        return self.step < self.step_lower_bound

    def reach(self, other: Searcher) -> bool:
        ''' whether the incumbent can reach the incumbent of other
        '''
-        config1, config2 = self.best_config, other.best_config        
+        config1, config2 = self.best_config, other.best_config
        incumbent1, incumbent2 = self.incumbent, other.incumbent
-        if self._resource and config1[self.prune_attr]>config2[self.prune_attr]:
+        if self._resource and config1[self.prune_attr] > config2[self.prune_attr]:
            # resource will not decrease
            return False
        for key in self._unordered_cat_hp:
            # unordered cat choice is hard to reach by chance
-            if config1[key] != config2[key]: return False
-        delta = np.array([incumbent1[key]-incumbent2[key]
-         for key in self._tunable_keys])        
+            if config1[key] != config2[key]:
+                return False
+        delta = np.array(
+            [incumbent1[key] - incumbent2[key] for key in self._tunable_keys])
        return np.linalg.norm(delta) <= self.step
-
--- a/flaml/searcher/search_thread.py
+++ b/flaml/searcher/search_thread.py
@ -22,24 +22,24 @@ class SearchThread:
    cost_attr = 'time_total_s'
    eps = 1e-10

-    def __init__(self, mode: str = "min", 
+    def __init__(self, mode: str = "min",
                 search_alg: Optional[Searcher] = None):
        ''' When search_alg is omitted, use local search FLOW2
        '''
        self._search_alg = search_alg
        self._is_ls = isinstance(search_alg, FLOW2)
        self._mode = mode
-        self._metric_op = 1 if mode=='min' else -1
+        self._metric_op = 1 if mode == 'min' else -1
        self.cost_best = self.cost_last = self.cost_total = self.cost_best1 = \
            getattr(search_alg, 'cost_incumbent', 0)
        self.cost_best2 = 0
        self.obj_best1 = self.obj_best2 = getattr(
-            search_alg, 'best_obj', np.inf) # inherently minimize        
-        # eci: expected cost for improvement
+            search_alg, 'best_obj', np.inf)  # inherently minimize
+        # eci: estimated cost for improvement
        self.eci = self.cost_best
        self.priority = self.speed = 0
-        self._init_config = True 
-        
+        self._init_config = True
+
    def suggest(self, trial_id: str) -> Optional[Dict]:
        ''' use the suggest() of the underlying search algorithm
        '''
@ -48,9 +48,9 @@ class SearchThread:
        else:
            try:
                config = self._search_alg.suggest(trial_id)
-            except:
+            except FloatingPointError:
                logger.warning(
-                    f'The global search method raises error. '
+                    'The global search method raises FloatingPointError. '
                    'Ignoring for this iteration.')
                config = None
        return config
@ -60,33 +60,36 @@ class SearchThread:
        self.priority = eci * self.speed - self.obj_best1

    def update_eci(self, metric_target: float,
-     max_speed: Optional[float] = np.inf):
-        # calculate eci: expected cost for improvement over metric_target;
+                   max_speed: Optional[float] = np.inf):
+        # calculate eci: estimated cost for improvement over metric_target
        best_obj = metric_target * self._metric_op
-        if not self.speed: self.speed = max_speed
+        if not self.speed:
+            self.speed = max_speed
        self.eci = max(self.cost_total - self.cost_best1,
-            self.cost_best1 - self.cost_best2)
+                       self.cost_best1 - self.cost_best2)
        if self.obj_best1 > best_obj and self.speed > 0:
-             self.eci = max(self.eci, 2*(self.obj_best1-best_obj)/self.speed)
+            self.eci = max(self.eci, 2 * (self.obj_best1 - best_obj) / self.speed)

    def _update_speed(self):
        # calculate speed; use 0 for invalid speed temporarily
-        if self.obj_best2 > self.obj_best1: 
+        if self.obj_best2 > self.obj_best1:
            self.speed = (self.obj_best2 - self.obj_best1) / (
                self.cost_total - self.cost_best2 + self.eps)
-        else: self.speed = 0
+        else:
+            self.speed = 0

    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None,
                          error: bool = False):
        ''' update the statistics of the thread
        '''
-        if not self._search_alg: return       
-        if not hasattr(self._search_alg, '_ot_trials') or (not error and
-            trial_id in self._search_alg._ot_trials):
+        if not self._search_alg:
+            return
+        if not hasattr(self._search_alg, '_ot_trials') or (
+                not error and trial_id in self._search_alg._ot_trials):
            # optuna doesn't handle error
            if self._is_ls or not self._init_config:
                self._search_alg.on_trial_complete(trial_id, result, error)
-            else: 
+            else:
                # init config is not proposed by self._search_alg
                # under this thread
                self._init_config = False
@ -94,8 +97,6 @@ class SearchThread:
            if self.cost_attr in result:
                self.cost_last = result[self.cost_attr]
                self.cost_total += self.cost_last
-            # if not isinstance(self._search_alg, FLOW2):
-            #     logger.info(f"result.metric{result[self._search_alg.metric]}")
            if self._search_alg.metric in result:
                obj = result[self._search_alg.metric] * self._metric_op
                if obj < self.obj_best1:
@ -106,14 +107,14 @@ class SearchThread:
                    self.obj_best1 = obj
                    self.cost_best = self.cost_last
            self._update_speed()
-        
+
    def on_trial_result(self, trial_id: str, result: Dict):
        ''' TODO update the statistics of the thread with partial result?
        '''
-        # print('[SearchThread] on trial result')
-        if not self._search_alg: return       
+        if not self._search_alg:
+            return
        if not hasattr(self._search_alg, '_ot_trials') or (
-            trial_id in self._search_alg._ot_trials):
+                trial_id in self._search_alg._ot_trials):
            self._search_alg.on_trial_result(trial_id, result)
        if self.cost_attr in result and self.cost_last < result[self.cost_attr]:
            self.cost_last = result[self.cost_attr]
@ -137,4 +138,3 @@ class SearchThread:
        ''' whether the thread can suggest new configs
        '''
        return self._search_alg.can_suggest
-
--- a/flaml/searcher/suggestion.py
+++ b/flaml/searcher/suggestion.py
@ -21,6 +21,11 @@ import logging
 import os
 import time
 from typing import Dict, Optional, Union, List, Tuple
+import pickle
+from .variant_generator import parse_spec_vars
+from ..tune.sample import Categorical, Domain, Float, Integer, LogUniform, \
+    Quantized, Uniform
+from ..tune.trial import flatten_dict, unflatten_dict

 logger = logging.getLogger(__name__)

@ -72,7 +77,7 @@ def log_once(key):
        return False
    else:
        return False
-        
+

 class Searcher:
    """Abstract class for wrapping suggesting algorithms.
@ -407,12 +412,6 @@ class ConcurrencyLimiter(Searcher):
        return self.searcher.set_search_properties(metric, mode, config)


-import pickle
-from .variant_generator import parse_spec_vars
-from ..tune.sample import Categorical, Domain, Float, Integer, LogUniform, \
-    Quantized, Uniform
-from ..tune.trial import flatten_dict, unflatten_dict
-
 try:
    import optuna as ot
    from optuna.samplers import BaseSampler
@ -689,4 +688,4 @@ class OptunaSearch(Searcher):
            for path, domain in domain_vars
        ]

-        return values
+        return values
--- a/flaml/searcher/variant_generator.py
+++ b/flaml/searcher/variant_generator.py
@ -66,8 +66,8 @@ def unflatten_dict(dt, delimiter="/"):
            item = item.setdefault(k, dict_type())
        item[path[-1]] = val
    return out
-    
-    
+
+
 class TuneError(Exception):
    """General error class raised by ray.tune."""
    pass
@ -433,4 +433,4 @@ class _UnresolvedAccessGuard(dict):

 class RecursiveDependencyError(Exception):
    def __init__(self, msg: str):
-        Exception.__init__(self, msg)
+        Exception.__init__(self, msg)
--- a/flaml/training_log.py
+++ b/flaml/training_log.py
@ -1,6 +1,6 @@
 '''!
 * Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License. 
+ * Licensed under the MIT License.
 '''

 import json
@ -118,7 +118,7 @@ class TrainingLogWriter(object):

    def close(self):
        self.file.close()
-        self.file = None # for pickle
+        self.file = None  # for pickle


 class TrainingLogReader(object):
@ -142,7 +142,7 @@ class TrainingLogReader(object):

    def close(self):
        self.file.close()
-        self.file = None # for pickle
+        self.file = None  # for pickle

    def get_record(self, record_id) -> TrainingLogRecord:
        if self.file is None:
--- a/flaml/tune/init.py
+++ b/flaml/tune/init.py
@ -1,7 +1,7 @@
 try:
    from ray.tune import (uniform, quniform, choice, randint, qrandint, randn,
- qrandn, loguniform, qloguniform)
-except:
+                          qrandn, loguniform, qloguniform)
+except ImportError:
    from .sample import (uniform, quniform, choice, randint, qrandint, randn,
- qrandn, loguniform, qloguniform)
-from .tune import run, report
+                         qrandn, loguniform, qloguniform)
+from .tune import run, report
--- a/flaml/tune/sample.py
+++ b/flaml/tune/sample.py
@ -532,4 +532,4 @@ def qrandn(mean: float, sd: float, q: float):
        q (float): Quantization number. The result will be rounded to an
            integer increment of this value.
    """
-    return Float(None, None).normal(mean, sd).quantized(q)
+    return Float(None, None).normal(mean, sd).quantized(q)
--- a/flaml/tune/trial.py
+++ b/flaml/tune/trial.py
@ -121,8 +121,7 @@ class Trial:
                    self.metric_analysis[metric]["min"] = min(
                        value, self.metric_analysis[metric]["min"])
                    self.metric_analysis[metric]["avg"] = 1 / step * (
-                        value +
-                        (step - 1) * self.metric_analysis[metric]["avg"])
+                        value + (step - 1) * self.metric_analysis[metric]["avg"])
                    self.metric_analysis[metric]["last"] = value

                    for n in self.n_steps:
--- a/flaml/tune/trial_runner.py
+++ b/flaml/tune/trial_runner.py
@ -6,7 +6,7 @@
 from typing import Optional
 try:
    from ray.tune.trial import Trial
-except:
+except ImportError:
    from .trial import Trial
 import logging
 logger = logging.getLogger(__name__)
@ -16,14 +16,15 @@ class Nologger():
    '''Logger without logging
    '''

-    def on_result(self, result): pass
+    def on_result(self, result):
+        pass


 class SimpleTrial(Trial):
    '''A simple trial class
    '''

-    def __init__(self, config, trial_id = None):
+    def __init__(self, config, trial_id=None):
        self.trial_id = Trial.generate_id() if trial_id is None else trial_id
        self.config = config or {}
        self.status = Trial.PENDING
@ -46,17 +47,16 @@ class BaseTrialRunner:
    Note that the caller usually should not mutate trial state directly.
    """

-    def __init__(self, 
-        search_alg = None, 
-        scheduler = None, 
-        metric: Optional[str] = None,
-        mode: Optional[str] = 'min'):
+    def __init__(self,
+                 search_alg=None, scheduler=None,
+                 metric: Optional[str] = None,
+                 mode: Optional[str] = 'min'):
        self._search_alg = search_alg
-        self._scheduler_alg = scheduler 
+        self._scheduler_alg = scheduler
        self._trials = []
        self._metric = metric
        self._mode = mode
-    
+
    def get_trials(self):
        """Returns the list of trials managed by this TrialRunner.

@ -81,22 +81,22 @@ class BaseTrialRunner:
        self._search_alg.on_trial_result(trial.trial_id, result)
        if self._scheduler_alg:
            decision = self._scheduler_alg.on_trial_result(self, trial, result)
-            if decision == "STOP": trial.set_status(Trial.TERMINATED)
-            elif decision == "PAUSE": trial.set_status(Trial.PAUSED)
+            if decision == "STOP":
+                trial.set_status(Trial.TERMINATED)
+            elif decision == "PAUSE":
+                trial.set_status(Trial.PAUSED)

    def stop_trial(self, trial):
        """Stops trial.
        """
-        if not trial.status in [Trial.ERROR, Trial.TERMINATED]:
+        if trial.status not in [Trial.ERROR, Trial.TERMINATED]:
            if self._scheduler_alg:
-                self._scheduler_alg.on_trial_complete(self,
-                    trial.trial_id, trial.last_result)            
-            self._search_alg.on_trial_complete(
-                trial.trial_id, trial.last_result)
+                self._scheduler_alg.on_trial_complete(
+                    self, trial.trial_id, trial.last_result)
+            self._search_alg.on_trial_complete(trial.trial_id, trial.last_result)
            trial.set_status(Trial.TERMINATED)
-        else:
-            if self._scheduler_alg:
-                self._scheduler_alg.on_trial_remove(self, trial)
+        elif self._scheduler_alg:
+            self._scheduler_alg.on_trial_remove(self, trial)


 class SequentialTrialRunner(BaseTrialRunner):
@ -112,10 +112,11 @@ class SequentialTrialRunner(BaseTrialRunner):
        """
        trial_id = Trial.generate_id()
        config = self._search_alg.suggest(trial_id)
-        if config: 
+        if config:
            trial = SimpleTrial(config, trial_id)
            self.add_trial(trial)
            trial.set_status(Trial.RUNNING)
-        else: trial = None
+        else:
+            trial = None
        self.running_trial = trial
        return trial
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@ -3,13 +3,13 @@
 * Licensed under the MIT License. See LICENSE file in the
 * project root for license information.
 '''
-from typing import Optional, Union, List
-import datetime, time
+from typing import Optional, Union, List, Callable
+import datetime
+import time
 try:
    from ray.tune.analysis import ExperimentAnalysis as EA
-except:
+except ImportError:
    from .analysis import ExperimentAnalysis as EA
-
 import logging
 logger = logging.getLogger(__name__)

@ -28,7 +28,7 @@ class ExperimentAnalysis(EA):
    def __init__(self, trials, metric, mode):
        try:
            super().__init__(self, None, trials, metric, mode)
-        except: 
+        except (TypeError, ValueError):
            self.trials = trials
            self.default_metric = metric
            self.default_mode = mode
@ -59,7 +59,7 @@ def report(_metric=None, **kwargs):
            },
            metric='metric2minimize', mode='min',
            num_samples=1000000, time_budget_s=60, use_ray=False)
-            
+
        print(analysis.trials[-1].last_result)

    Args:
@ -78,7 +78,8 @@ def report(_metric=None, **kwargs):
        result = kwargs
        if _verbose == 2:
            logger.info(f"result: {kwargs}")
-        if _metric: result['_default_anonymous_metric'] = _metric
+        if _metric:
+            result['_default_anonymous_metric'] = _metric
        trial = _runner.running_trial
        if _running_trial == trial:
            _training_iteration += 1
@ -88,14 +89,15 @@ def report(_metric=None, **kwargs):
        result["training_iteration"] = _training_iteration
        result['config'] = trial.config
        for key, value in trial.config.items():
-            result['config/'+key] = value
+            result['config/' + key] = value
        _runner.process_trial_result(_runner.running_trial, result)
        result['time_total_s'] = trial.last_update_time - trial.start_time
        if _verbose > 2:
            logger.info(f"result: {result}")
        if _runner.running_trial.is_finished():
            return None
-        else: return True
+        else:
+            return True


 def run(training_function,
@ -111,14 +113,13 @@ def run(training_function,
        max_resource: Optional[float] = None,
        reduction_factor: Optional[float] = None,
        report_intermediate_result: Optional[bool] = False,
-        search_alg = None,
-        verbose: Optional[int] = 2, 
+        search_alg=None,
+        verbose: Optional[int] = 2,
        local_dir: Optional[str] = None,
        num_samples: Optional[int] = 1,
        resources_per_trial: Optional[dict] = None,
-        mem_size = None,
-        use_ray: Optional[bool] = False,
-        ):
+        mem_size: Callable[[dict], float] = None,
+        use_ray: Optional[bool] = False):
    '''The trigger for HPO.

    Example:
@ -142,53 +143,53 @@ def run(training_function,
            },
            metric='metric2minimize', mode='min',
            num_samples=-1, time_budget_s=60, use_ray=False)
-            
+
        print(analysis.trials[-1].last_result)

    Args:
-        training_function: A user-defined training function. 
+        training_function: A user-defined training function.
        config: A dictionary to specify the search space.
        points_to_evaluate: A list of initial hyperparameter
            configurations to run first.
-        low_cost_partial_config: A dictionary from a subset of 
+        low_cost_partial_config: A dictionary from a subset of
            controlled dimensions to the initial low-cost values.
-            e.g., 
+            e.g.,

            .. code-block:: python

                {'n_estimators': 4, 'max_leaves': 4}
-            
+
        cat_hp_cost: A dictionary from a subset of categorical dimensions
-            to the relative cost of each choice. 
+            to the relative cost of each choice.
            e.g.,
-            
+
            .. code-block:: python

                {'tree_method': [1, 1, 2]}
-            
-            i.e., the relative cost of the 
+
+            i.e., the relative cost of the
            three choices of 'tree_method' is 1, 1 and 2 respectively
        metric: A string of the metric name to optimize for.
        mode: A string in ['min', 'max'] to specify the objective as
            minimization or maximization.
        time_budget_s: A float of the time budget in seconds.
-        prune_attr: A string of the attribute used for pruning. 
+        prune_attr: A string of the attribute used for pruning.
            Not necessarily in space.
-            When prune_attr is in space, it is a hyperparameter, e.g., 
+            When prune_attr is in space, it is a hyperparameter, e.g.,
                'n_iters', and the best value is unknown.
-            When prune_attr is not in space, it is a resource dimension, 
+            When prune_attr is not in space, it is a resource dimension,
                e.g., 'sample_size', and the peak performance is assumed
                to be at the max_resource.
-        min_resource: A float of the minimal resource to use for the 
+        min_resource: A float of the minimal resource to use for the
            prune_attr; only valid if prune_attr is not in space.
-        max_resource: A float of the maximal resource to use for the 
+        max_resource: A float of the maximal resource to use for the
            prune_attr; only valid if prune_attr is not in space.
        reduction_factor: A float of the reduction factor used for incremental
            pruning.
        report_intermediate_result: A boolean of whether intermediate results
            are reported. If so, early stopping and pruning can be used.
        search_alg: An instance of BlendSearch as the search algorithm
-            to be used. The same instance can be used for iterative tuning. 
+            to be used. The same instance can be used for iterative tuning.
            e.g.,

            .. code-block:: python
@ -201,7 +202,7 @@ def run(training_function,
                    analysis = tune.run(compute_with_config,
                        search_alg=algo, use_ray=False)
                    print(analysis.trials[-1].last_result)
-                    
+
        verbose: 0, 1, 2, or 3. Verbosity mode for ray if ray backend is used.
            0 = silent, 1 = only status updates, 2 = status and brief trial
            results, 3 = status and detailed trial results. Defaults to 2.
@ -215,7 +216,7 @@ def run(training_function,
        mem_size: A function to estimate the memory size for a given config.
            It is used to skip configs which do not fit in memory.
        use_ray: A boolean of whether to use ray as the backend
-    '''        
+    '''
    global _use_ray
    global _verbose
    if not use_ray:
@ -224,8 +225,8 @@ def run(training_function,
            import os
            if local_dir:
                os.makedirs(local_dir, exist_ok=True)
-                logger.addHandler(logging.FileHandler(local_dir+'/tune_'+str(
-                    datetime.datetime.now()).replace(':', '-')+'.log'))
+                logger.addHandler(logging.FileHandler(local_dir + '/tune_' + str(
+                    datetime.datetime.now()).replace(':', '-') + '.log'))
            elif not logger.handlers:
                # Add the console handler.
                _ch = logging.StreamHandler()
@ -233,8 +234,8 @@ def run(training_function,
                    '[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
                    '%m-%d %H:%M:%S')
                _ch.setFormatter(logger_formatter)
-                logger.addHandler(_ch)        
-            if verbose<=2:
+                logger.addHandler(_ch)
+            if verbose <= 2:
                logger.setLevel(logging.INFO)
            else:
                logger.setLevel(logging.DEBUG)
@ -243,55 +244,54 @@ def run(training_function,

    if search_alg is None:
        from ..searcher.blendsearch import BlendSearch
-        search_alg = BlendSearch(metric=metric, mode=mode,
-                                points_to_evaluate=points_to_evaluate,
-                                low_cost_partial_config=low_cost_partial_config,
-                                cat_hp_cost=cat_hp_cost,
-                                space=config, prune_attr=prune_attr,
-                                min_resource=min_resource,
-                                max_resource=max_resource,
-                                reduction_factor=reduction_factor,
-                                resources_per_trial=resources_per_trial,
-                                mem_size=mem_size)
+        search_alg = BlendSearch(
+            metric=metric, mode=mode, space=config,
+            points_to_evaluate=points_to_evaluate,
+            low_cost_partial_config=low_cost_partial_config,
+            cat_hp_cost=cat_hp_cost,
+            prune_attr=prune_attr,
+            min_resource=min_resource, max_resource=max_resource,
+            reduction_factor=reduction_factor,
+            resources_per_trial=resources_per_trial,
+            mem_size=mem_size)
    if time_budget_s:
        search_alg.set_search_properties(metric, mode, config={
-            'time_budget_s':time_budget_s})
+            'time_budget_s': time_budget_s})
+    scheduler = None
    if report_intermediate_result:
        params = {}
        # scheduler resource_dimension=prune_attr
-        if prune_attr: params['time_attr'] = prune_attr
-        if max_resource: params['max_t'] = max_resource
-        if min_resource: params['grace_period'] = min_resource
-        if reduction_factor: params['reduction_factor'] = reduction_factor
+        if prune_attr:
+            params['time_attr'] = prune_attr
+        if max_resource:
+            params['max_t'] = max_resource
+        if min_resource:
+            params['grace_period'] = min_resource
+        if reduction_factor:
+            params['reduction_factor'] = reduction_factor
        try:
            from ray.tune.schedulers import ASHAScheduler
            scheduler = ASHAScheduler(**params)
-        except:
-            scheduler = None
-    else:
-        scheduler = None
-
+        except ImportError:
+            pass
    if use_ray:
        try:
            from ray import tune
-        except:
+        except ImportError:
            raise ImportError("Failed to import ray tune. "
-            "Please install ray[tune] or set use_ray=False")
+                              "Please install ray[tune] or set use_ray=False")
        _use_ray = True
        return tune.run(training_function,
-                metric=metric,
-                mode=mode,
-                search_alg=search_alg,
-                scheduler=scheduler,
-                time_budget_s=time_budget_s,
-                verbose=verbose,
-                local_dir=local_dir,
-                num_samples=num_samples,
-                resources_per_trial=resources_per_trial
-                )
+                        metric=metric, mode=mode,
+                        search_alg=search_alg,
+                        scheduler=scheduler,
+                        time_budget_s=time_budget_s,
+                        verbose=verbose, local_dir=local_dir,
+                        num_samples=num_samples,
+                        resources_per_trial=resources_per_trial)

    # simple sequential run without using tune.run() from ray
-    time_start = time.time()    
+    time_start = time.time()
    _use_ray = False
    if scheduler:
        scheduler.set_search_properties(metric=metric, mode=mode)
@ -302,10 +302,10 @@ def run(training_function,
        scheduler=scheduler,
        metric=metric,
        mode=mode,
-        )
+    )
    num_trials = 0
-    while time.time()-time_start<time_budget_s and (
-        num_samples<0 or num_trials<num_samples):
+    while time.time() - time_start < time_budget_s and (
+            num_samples < 0 or num_trials < num_samples):
        trial_to_run = _runner.step()
        if trial_to_run:
            num_trials += 1
@ -313,4 +313,4 @@ def run(training_function,
                logger.info(f'trial {num_trials} config: {trial_to_run.config}')
            training_function(trial_to_run.config)
            _runner.stop_trial(trial_to_run)
-    return ExperimentAnalysis(_runner.get_trials(), metric=metric, mode=mode)
+    return ExperimentAnalysis(_runner.get_trials(), metric=metric, mode=mode)
--- a/notebook/flaml_automl.ipynb
+++ b/notebook/flaml_automl.ipynb
--- a/notebook/flaml_azureml.ipynb
+++ b/notebook/flaml_azureml.ipynb
--- a/notebook/flaml_finetune_transformer.ipynb
+++ b/notebook/flaml_finetune_transformer.ipynb
@ -474,7 +474,7 @@
    "args = TrainingArguments(\n",
    "    output_dir='output',\n",
    "    do_eval=True,\n",
-    "    )"
+    ")"
   ]
  },
  {
@ -569,7 +569,7 @@
    "    flaml.tune.report(\n",
    "        loss=eval_output[\"eval_loss\"],\n",
    "        matthews_correlation=eval_output[\"eval_matthews_correlation\"],\n",
-    "        )"
+    "    )"
   ]
  },
  {
@ -599,7 +599,7 @@
    "        \"adam_epsilon\": flaml.tune.loguniform(1e-9, 1e-7),\n",
    "        \"adam_beta1\": flaml.tune.uniform(0.8, 0.99),\n",
    "        \"adam_beta2\": flaml.tune.loguniform(98e-2, 9999e-4),\n",
-    "    }"
+    "}"
   ]
  },
  {
--- a/notebook/flaml_lightgbm.ipynb
+++ b/notebook/flaml_lightgbm.ipynb
--- a/notebook/flaml_xgboost.ipynb
+++ b/notebook/flaml_xgboost.ipynb
--- a/setup.py
+++ b/setup.py
@ -54,7 +54,7 @@ setuptools.setup(
        ],
        "ray": [
            "ray[tune]==1.2.0",
-            "pyyaml<5.3.1",            
+            "pyyaml<5.3.1",
        ],
        "azureml": [
            "azureml-mlflow",
@ -66,7 +66,7 @@ setuptools.setup(
    classifiers=[
        "Programming Language :: Python :: 3",
        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",        
+        "Operating System :: OS Independent",
    ],
    python_requires=">=3.6",
 )
--- a/test/hf/test_deberta.py
+++ b/test/hf/test_deberta.py
@ -2,6 +2,7 @@
 '''
 import time
 import numpy as np
+import os

 try:
    import ray
@ -15,6 +16,7 @@ try:
        Trainer,
        TrainingArguments,
    )
+    import flaml
    MODEL_CHECKPOINT = "microsoft/deberta-base"
    task_to_keys = {
        "cola": ("sentence", None),
@ -27,9 +29,9 @@ try:
        "stsb": ("sentence1", "sentence2"),
        "wnli": ("sentence1", "sentence2"),
    }
-    max_seq_length=128
-    overwrite_cache=False
-    pad_to_max_length=True
+    max_seq_length = 128
+    overwrite_cache = False
+    pad_to_max_length = True
    padding = "max_length"

    TASK = "qnli"
@ -46,19 +48,17 @@ try:
                examples[sentence1_key], examples[sentence2_key])
        )
        return tokenizer(*args, padding=padding, max_length=max_seq_length,
-         truncation=True)
+                         truncation=True)

-except:
+except ImportError:
    print("pip install torch transformers datasets flaml[blendsearch,ray]")
-    
+
 import logging
 logger = logging.getLogger(__name__)
-import os
 os.makedirs('logs', exist_ok=True)
 logger.addHandler(logging.FileHandler('logs/tune_deberta.log'))
 logger.setLevel(logging.INFO)

-import flaml

 def train_deberta(config: dict):

@ -76,7 +76,6 @@ def train_deberta(config: dict):
        predictions = np.argmax(predictions, axis=1)
        return metric.compute(predictions=predictions, references=labels)

-
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_CHECKPOINT, num_labels=NUM_LABELS
    )
@ -109,7 +108,7 @@ def train_deberta(config: dict):
    flaml.tune.report(
        loss=eval_output["eval_loss"],
        accuracy=eval_output["eval_accuracy"],
-        )
+    )

    try:
        from azureml.core import Run
@ -117,10 +116,12 @@ def train_deberta(config: dict):
        run.log('accuracy', eval_output["eval_accuracy"])
        run.log('loss', eval_output["eval_loss"])
        run.log('config', config)
-    except: pass
+    except ImportError:
+        pass
+

 def _test_deberta(method='BlendSearch'):
- 
+
    max_num_epoch = 100
    num_samples = -1
    time_budget_s = 3600
--- a/test/hf/test_distillbert.py
+++ b/test/hf/test_distillbert.py
@ -2,6 +2,8 @@
 '''
 import time
 import numpy as np
+import logging
+import os

 try:
    import ray
@ -15,6 +17,7 @@ try:
        Trainer,
        TrainingArguments,
    )
+    import flaml
    MODEL_CHECKPOINT = "distilbert-base-uncased"
    TASK = "cola"
    NUM_LABELS = 2
@ -26,20 +29,18 @@ try:

    # Define tokenize method
    tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
+
    def tokenize(examples):
        return tokenizer(examples[COLUMN_NAME], truncation=True)

-except:
+except ImportError:
    print("pip install torch transformers datasets flaml[blendsearch,ray]")
-    
-import logging
+
 logger = logging.getLogger(__name__)
-import os
 os.makedirs('logs', exist_ok=True)
 logger.addHandler(logging.FileHandler('logs/tune_distilbert.log'))
 logger.setLevel(logging.INFO)

-import flaml

 def train_distilbert(config: dict):

@ -87,11 +88,11 @@ def train_distilbert(config: dict):
    flaml.tune.report(
        loss=eval_output["eval_loss"],
        matthews_correlation=eval_output["eval_matthews_correlation"],
-        )
+    )


 def _test_distillbert(method='BlendSearch'):
- 
+
    max_num_epoch = 64
    num_samples = -1
    time_budget_s = 3600
--- a/test/hf/test_electra.py
+++ b/test/hf/test_electra.py
@ -2,6 +2,7 @@
 '''
 import time
 import numpy as np
+import os

 try:
    import ray
@ -15,6 +16,7 @@ try:
        Trainer,
        TrainingArguments,
    )
+    import flaml
    MODEL_CHECKPOINT = "google/electra-base-discriminator"
    task_to_keys = {
        "cola": ("sentence", None),
@ -27,9 +29,9 @@ try:
        "stsb": ("sentence1", "sentence2"),
        "wnli": ("sentence1", "sentence2"),
    }
-    max_seq_length=128
-    overwrite_cache=False
-    pad_to_max_length=True
+    max_seq_length = 128
+    overwrite_cache = False
+    pad_to_max_length = True
    padding = "max_length"

    TASK = "qnli"
@ -46,19 +48,17 @@ try:
                examples[sentence1_key], examples[sentence2_key])
        )
        return tokenizer(*args, padding=padding, max_length=max_seq_length,
-         truncation=True)
+                         truncation=True)

-except:
+except ImportError:
    print("pip install torch transformers datasets flaml[blendsearch,ray]")
-    
+
 import logging
 logger = logging.getLogger(__name__)
-import os
 os.makedirs('logs', exist_ok=True)
 logger.addHandler(logging.FileHandler('logs/tune_electra.log'))
 logger.setLevel(logging.INFO)

-import flaml

 def train_electra(config: dict):

@ -76,7 +76,6 @@ def train_electra(config: dict):
        predictions = np.argmax(predictions, axis=1)
        return metric.compute(predictions=predictions, references=labels)

-
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_CHECKPOINT, num_labels=NUM_LABELS
    )
@ -109,7 +108,7 @@ def train_electra(config: dict):
    flaml.tune.report(
        loss=eval_output["eval_loss"],
        accuracy=eval_output["eval_accuracy"],
-        )
+    )

    try:
        from azureml.core import Run
@ -117,10 +116,12 @@ def train_electra(config: dict):
        run.log('accuracy', eval_output["eval_accuracy"])
        run.log('loss', eval_output["eval_loss"])
        run.log('config', config)
-    except: pass
+    except ImportError:
+        pass
+

 def _test_electra(method='BlendSearch'):
- 
+
    max_num_epoch = 9
    num_samples = -1
    time_budget_s = 3600
@ -247,4 +248,4 @@ def _test_electra_bohb():


 if __name__ == "__main__":
-    _test_electra()
+    _test_electra()
--- a/test/hf/test_roberta.py
+++ b/test/hf/test_roberta.py
@ -2,6 +2,7 @@
 '''
 import time
 import numpy as np
+import os

 try:
    import ray
@ -15,6 +16,7 @@ try:
        Trainer,
        TrainingArguments,
    )
+    import flaml
    MODEL_CHECKPOINT = "roberta-base"
    task_to_keys = {
        "cola": ("sentence", None),
@ -27,9 +29,9 @@ try:
        "stsb": ("sentence1", "sentence2"),
        "wnli": ("sentence1", "sentence2"),
    }
-    max_seq_length=128
-    overwrite_cache=False
-    pad_to_max_length=True
+    max_seq_length = 128
+    overwrite_cache = False
+    pad_to_max_length = True
    padding = "max_length"

    TASK = "qnli"
@ -46,19 +48,17 @@ try:
                examples[sentence1_key], examples[sentence2_key])
        )
        return tokenizer(*args, padding=padding, max_length=max_seq_length,
-         truncation=True)
+                         truncation=True)

-except:
+except ImportError:
    print("pip install torch transformers datasets flaml[blendsearch,ray]")
-    
+
 import logging
 logger = logging.getLogger(__name__)
-import os
 os.makedirs('logs', exist_ok=True)
 logger.addHandler(logging.FileHandler('logs/tune_roberta.log'))
 logger.setLevel(logging.INFO)

-import flaml

 def train_roberta(config: dict):

@ -76,7 +76,6 @@ def train_roberta(config: dict):
        predictions = np.argmax(predictions, axis=1)
        return metric.compute(predictions=predictions, references=labels)

-
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_CHECKPOINT, num_labels=NUM_LABELS
    )
@ -109,7 +108,7 @@ def train_roberta(config: dict):
    flaml.tune.report(
        loss=eval_output["eval_loss"],
        accuracy=eval_output["eval_accuracy"],
-        )
+    )

    try:
        from azureml.core import Run
@ -117,10 +116,12 @@ def train_roberta(config: dict):
        run.log('accuracy', eval_output["eval_accuracy"])
        run.log('loss', eval_output["eval_loss"])
        run.log('config', config)
-    except: pass
+    except ImportError:
+        pass
+

 def _test_roberta(method='BlendSearch'):
- 
+
    max_num_epoch = 100
    num_samples = -1
    time_budget_s = 3600
@ -248,4 +249,3 @@ def _test_roberta_bohb():

 if __name__ == "__main__":
    _test_roberta()
-
--- a/test/nni/flaml_nni_wrap.py
+++ b/test/nni/flaml_nni_wrap.py
@ -2,6 +2,6 @@ from flaml.searcher.blendsearch import BlendSearchTuner as BST


 class BlendSearchTuner(BST):
-    # for best performance pass low cost initial parameters here 
-    def __init__(self, low_cost_partial_config={"hidden_size":128}):
+    # for best performance pass low cost initial parameters here
+    def __init__(self, low_cost_partial_config={"hidden_size": 128}):
        super.__init__(self, low_cost_partial_config=low_cost_partial_config)
--- a/test/nni/mnist.py
+++ b/test/nni/mnist.py
@ -27,7 +27,7 @@ class Net(nn.Module):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
-        self.fc1 = nn.Linear(4*4*50, hidden_size)
+        self.fc1 = nn.Linear(4 * 4 * 50, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 10)

    def forward(self, x):
@ -35,7 +35,7 @@ class Net(nn.Module):
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
-        x = x.view(-1, 4*4*50)
+        x = x.view(-1, 4 * 4 * 50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
@ -151,7 +151,6 @@ def get_params():
    parser.add_argument('--log_interval', type=int, default=1000, metavar='N',
                        help='how many batches to wait before logging training status')

-
    args, _ = parser.parse_known_args()
    return args

--- a/test/run_electra.py
+++ b/test/run_electra.py
@ -15,5 +15,5 @@ config = ScriptRunConfig(

 exp = Experiment(ws, 'test-electra')
 run = exp.submit(config)
-print(run.get_portal_url()) # link to ml.azure.com
-run.wait_for_completion(show_output=True)
+print(run.get_portal_url())  # link to ml.azure.com
+run.wait_for_completion(show_output=True)
--- a/test/test_automl.py
+++ b/test/test_automl.py
@ -14,10 +14,9 @@ from flaml import tune

 class MyRegularizedGreedyForest(SKLearnEstimator):

-
-    def __init__(self, task = 'binary:logistic', n_jobs = 1, max_leaf = 4,
-    n_iter = 1, n_tree_search = 1, opt_interval = 1, learning_rate = 1.0,
-    min_samples_leaf = 1, **params):
+    def __init__(self, task='binary:logistic', n_jobs=1, max_leaf=4,
+                 n_iter=1, n_tree_search=1, opt_interval=1, learning_rate=1.0,
+                 min_samples_leaf=1, **params):

        super().__init__(task, **params)

@ -34,24 +33,24 @@ class MyRegularizedGreedyForest(SKLearnEstimator):
            'n_tree_search': int(round(n_tree_search)),
            'opt_interval': int(round(opt_interval)),
            'learning_rate': learning_rate,
-            'min_samples_leaf':int(round(min_samples_leaf))
-        }    
+            'min_samples_leaf': int(round(min_samples_leaf))
+        }

    @classmethod
    def search_space(cls, data_size, task):
        space = {
-        'max_leaf': {'domain': tune.qloguniform(
-            lower = 4, upper = data_size, q = 1), 'init_value': 4},
-        'n_iter': {'domain': tune.qloguniform(
-            lower = 1, upper = data_size, q = 1), 'init_value': 1},
-        'n_tree_search': {'domain': tune.qloguniform(
-            lower = 1, upper = 32768, q = 1), 'init_value': 1},
-        'opt_interval': {'domain': tune.qloguniform(
-            lower = 1, upper = 10000, q = 1), 'init_value': 100},
-        'learning_rate': {'domain': tune.loguniform(
-            lower = 0.01, upper = 20.0)},
-        'min_samples_leaf': {'domain': tune.qloguniform(
-            lower = 1, upper = 20, q = 1), 'init_value': 20},
+            'max_leaf': {'domain': tune.qloguniform(
+                lower=4, upper=data_size, q=1), 'init_value': 4},
+            'n_iter': {'domain': tune.qloguniform(
+                lower=1, upper=data_size, q=1), 'init_value': 1},
+            'n_tree_search': {'domain': tune.qloguniform(
+                lower=1, upper=32768, q=1), 'init_value': 1},
+            'opt_interval': {'domain': tune.qloguniform(
+                lower=1, upper=10000, q=1), 'init_value': 100},
+            'learning_rate': {'domain': tune.loguniform(
+                lower=0.01, upper=20.0)},
+            'min_samples_leaf': {'domain': tune.qloguniform(
+                lower=1, upper=20, q=1), 'init_value': 20},
        }
        return space

@ -59,22 +58,22 @@ class MyRegularizedGreedyForest(SKLearnEstimator):
    def size(cls, config):
        max_leaves = int(round(config['max_leaf']))
        n_estimators = int(round(config['n_iter']))
-        return (max_leaves*3 + (max_leaves-1)*4 + 1.0)*n_estimators*8
+        return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8

    @classmethod
    def cost_relative2lgbm(cls):
-        return 1.0       
+        return 1.0


 def custom_metric(X_test, y_test, estimator, labels, X_train, y_train,
-    weight_test=None, weight_train=None):
+                  weight_test=None, weight_train=None):
    from sklearn.metrics import log_loss
    y_pred = estimator.predict_proba(X_test)
    test_loss = log_loss(y_test, y_pred, labels=labels,
-     sample_weight=weight_test)
+                         sample_weight=weight_test)
    y_pred = estimator.predict_proba(X_train)
    train_loss = log_loss(y_train, y_pred, labels=labels,
-     sample_weight=weight_train)
+                          sample_weight=weight_train)
    alpha = 0.5
    return test_loss * (1 + alpha) - alpha * train_loss, [test_loss, train_loss]

@ -83,43 +82,42 @@ class TestAutoML(unittest.TestCase):

    def test_custom_learner(self):
        automl = AutoML()
-        automl.add_learner(learner_name = 'RGF',
-            learner_class = MyRegularizedGreedyForest)            
+        automl.add_learner(learner_name='RGF',
+                           learner_class=MyRegularizedGreedyForest)
        X_train, y_train = load_wine(return_X_y=True)
        settings = {
-            "time_budget": 10, # total running time in seconds
-            "estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'], 
-            "task": 'classification', # task type    
-            "sample": True, # whether to subsample training data
+            "time_budget": 10,  # total running time in seconds
+            "estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
+            "task": 'classification',  # task type
+            "sample": True,  # whether to subsample training data
            "log_file_name": "test/wine.log",
-            "log_training_metric": True, # whether to log training metric
+            "log_training_metric": True,  # whether to log training metric
            "n_jobs": 1,
        }

        '''The main flaml automl API'''
-        automl.fit(X_train = X_train, y_train = y_train, **settings)
+        automl.fit(X_train=X_train, y_train=y_train, **settings)
        # print the best model found for RGF
        print(automl.best_model_for_estimator("RGF"))

    def test_ensemble(self):
        automl = AutoML()
-        automl.add_learner(learner_name = 'RGF',
-            learner_class = MyRegularizedGreedyForest)            
+        automl.add_learner(learner_name='RGF',
+                           learner_class=MyRegularizedGreedyForest)
        X_train, y_train = load_wine(return_X_y=True)
        settings = {
-            "time_budget": 10, # total running time in seconds
-            # "estimator_list": ['lgbm', 'xgboost'], 
-            "estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'], 
-            "task": 'classification', # task type    
-            "sample": True, # whether to subsample training data
+            "time_budget": 10,  # total running time in seconds
+            "estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
+            "task": 'classification',  # task type
+            "sample": True,  # whether to subsample training data
            "log_file_name": "test/wine.log",
-            "log_training_metric": True, # whether to log training metric
+            "log_training_metric": True,  # whether to log training metric
            "ensemble": True,
            "n_jobs": 1,
        }

        '''The main flaml automl API'''
-        automl.fit(X_train = X_train, y_train = y_train, **settings)
+        automl.fit(X_train=X_train, y_train=y_train, **settings)

    def test_dataframe(self):
        self.test_classification(True)
@ -210,7 +208,7 @@ class TestAutoML(unittest.TestCase):
            "model_history": True
        }
        X_train, y_train = load_boston(return_X_y=True)
-        n = int(len(y_train)*9//10)
+        n = int(len(y_train) * 9 // 10)
        automl_experiment.fit(X_train=X_train[:n], y_train=y_train[:n],
                              X_val=X_train[n:], y_val=y_train[n:],
                              **automl_settings)
--- a/test/test_python_log.py
+++ b/test/test_python_log.py
@ -39,8 +39,8 @@ class TestLogging(unittest.TestCase):
            X_train, y_train = load_boston(return_X_y=True)
            n = len(y_train) >> 1
            automl.fit(X_train=X_train[:n], y_train=y_train[:n],
-                                  X_val=X_train[n:], y_val=y_train[n:],
-                                  **automl_settings)
+                       X_val=X_train[n:], y_val=y_train[n:],
+                       **automl_settings)

            # Check if the log buffer is populated.
            self.assertTrue(len(buf.getvalue()) > 0)
@ -48,4 +48,4 @@ class TestLogging(unittest.TestCase):
        import pickle
        with open('automl.pkl', 'wb') as f:
            pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
-        print(automl.__version__)
+        print(automl.__version__)
--- a/test/test_pytorch_cifar10.py
+++ b/test/test_pytorch_cifar10.py
@ -9,23 +9,6 @@ logger = logging.getLogger(__name__)
 logger.addHandler(logging.FileHandler('test/tune_pytorch_cifar10.log'))


-# __load_data_begin__
-def load_data(data_dir="./data"):
-    transform = transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
-    ])
-
-    trainset = torchvision.datasets.CIFAR10(
-        root=data_dir, train=True, download=True, transform=transform)
-
-    testset = torchvision.datasets.CIFAR10(
-        root=data_dir, train=False, download=True, transform=transform)
-
-    return trainset, testset
-# __load_data_end__
-
-
 try:
    import torch
    import torch.nn as nn
@ -35,9 +18,9 @@ try:
    import torchvision
    import torchvision.transforms as transforms

-
    # __net_begin__
    class Net(nn.Module):
+
        def __init__(self, l1=120, l2=84):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(3, 6, 5)
@ -79,7 +62,7 @@ def load_data(data_dir="test/data"):

 # __train_begin__
 def train_cifar(config, checkpoint_dir=None, data_dir=None):
-    if not "l1" in config:
+    if "l1" not in config:
        logger.warning(config)
    net = Net(2 ** config["l1"], 2 ** config["l2"])

@ -200,8 +183,9 @@ def _test_accuracy(net, device="cpu"):


 # __main_begin__
-def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
- gpus_per_trial=2):
+def cifar10_main(
+    method='BlendSearch', num_samples=10, max_num_epochs=100, gpus_per_trial=2
+):
    data_dir = os.path.abspath("test/data")
    load_data(data_dir)  # Download data for all trials before starting the run
    if method == 'BlendSearch':
@ -214,15 +198,15 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
            "l2": tune.randint(2, 8),
            "lr": tune.loguniform(1e-4, 1e-1),
            "num_epochs": tune.qloguniform(1, max_num_epochs, q=1),
-            "batch_size": tune.randint(1, 4)#tune.choice([2, 4, 8, 16])
+            "batch_size": tune.randint(1, 4)
        }
    else:
        config = {
            "l1": tune.randint(2, 9),
            "l2": tune.randint(2, 9),
            "lr": tune.loguniform(1e-4, 1e-1),
-            "num_epochs": tune.qloguniform(1, max_num_epochs+1, q=1),
-            "batch_size": tune.randint(1, 5)#tune.choice([2, 4, 8, 16])
+            "num_epochs": tune.qloguniform(1, max_num_epochs + 1, q=1),
+            "batch_size": tune.randint(1, 5)
        }
    import ray
    time_budget_s = 3600
@ -274,7 +258,7 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
            from ray.tune.schedulers import ASHAScheduler
            scheduler = ASHAScheduler(
                max_t=max_num_epochs,
-                grace_period=1)        
+                grace_period=1)
        result = tune.run(
            tune.with_parameters(train_cifar, data_dir=data_dir),
            resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
@ -297,7 +281,7 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
        best_trial.metric_analysis["accuracy"]["max"]))

    best_trained_model = Net(2**best_trial.config["l1"],
-     2**best_trial.config["l2"])
+                             2**best_trial.config["l2"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
@ -315,8 +299,8 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
 # __main_end__


-gpus_per_trial=0#.5
-num_samples=500
+gpus_per_trial = 0  # 0.5 on GPU server
+num_samples = 500


 def _test_cifar10_bs():
@ -325,27 +309,27 @@ def _test_cifar10_bs():

 def _test_cifar10_cfo():
    cifar10_main('CFO',
-     num_samples=num_samples, gpus_per_trial=gpus_per_trial)
+                 num_samples=num_samples, gpus_per_trial=gpus_per_trial)


 def _test_cifar10_optuna():
    cifar10_main('Optuna',
-     num_samples=num_samples, gpus_per_trial=gpus_per_trial)
+                 num_samples=num_samples, gpus_per_trial=gpus_per_trial)


 def _test_cifar10_asha():
    cifar10_main('ASHA',
-     num_samples=num_samples, gpus_per_trial=gpus_per_trial)
+                 num_samples=num_samples, gpus_per_trial=gpus_per_trial)


 def _test_cifar10_bohb():
    cifar10_main('BOHB',
-     num_samples=num_samples, gpus_per_trial=gpus_per_trial)
+                 num_samples=num_samples, gpus_per_trial=gpus_per_trial)


 def _test_cifar10_nevergrad():
    cifar10_main('Nevergrad',
-     num_samples=num_samples, gpus_per_trial=gpus_per_trial)
+                 num_samples=num_samples, gpus_per_trial=gpus_per_trial)


 if __name__ == "__main__":
--- a/test/test_split.py
+++ b/test/test_split.py
@ -24,7 +24,7 @@ def _test(split_type):

    X, y = fetch_openml(name=dataset, return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
-     random_state=42)
+                                                        random_state=42)
    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)

    pred = automl.predict(X_test)
@ -32,6 +32,7 @@ def _test(split_type):

    print(acc)

+
 def _test_uniform():
    _test(split_type="uniform")

--- a/test/test_training_log.py
+++ b/test/test_training_log.py
@ -23,12 +23,12 @@ class TestTrainingLog(unittest.TestCase):
                "task": 'regression',
                "log_file_name": filename,
                "log_training_metric": True,
-                "mem_thres": 1024*1024,
+                "mem_thres": 1024 * 1024,
                "n_jobs": 1,
                "model_history": True,
                "verbose": 2,
            }
-            X_train, y_train = load_boston(return_X_y=True)            
+            X_train, y_train = load_boston(return_X_y=True)
            automl_experiment.fit(X_train=X_train, y_train=y_train,
                                  **automl_settings)

--- a/test/test_tune.py
+++ b/test/test_tune.py
@ -57,7 +57,7 @@ def _test_xgboost(method='BlendSearch'):
    }
    max_iter = 10
    for num_samples in [128]:
-        time_budget_s = 60 #None
+        time_budget_s = 60
        for n_cpu in [8]:
            start_time = time.time()
            ray.init(num_cpus=n_cpu, num_gpus=0)
@ -79,7 +79,7 @@ def _test_xgboost(method='BlendSearch'):
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    local_dir='logs/',
-                    num_samples=num_samples*n_cpu,
+                    num_samples=num_samples * n_cpu,
                    time_budget_s=time_budget_s,
                    use_ray=True)
            else:
@ -112,7 +112,7 @@ def _test_xgboost(method='BlendSearch'):
                    algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
                elif 'ZOOpt' == method:
                    from ray.tune.suggest.zoopt import ZOOptSearch
-                    algo = ZOOptSearch(budget=num_samples*n_cpu)
+                    algo = ZOOptSearch(budget=num_samples * n_cpu)
                elif 'Ax' == method:
                    from ray.tune.suggest.ax import AxSearch
                    algo = AxSearch()
@ -132,7 +132,8 @@ def _test_xgboost(method='BlendSearch'):
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    config=search_space, local_dir='logs/',
-                    num_samples=num_samples*n_cpu, time_budget_s=time_budget_s,
+                    num_samples=num_samples * n_cpu,
+                    time_budget_s=time_budget_s,
                    scheduler=scheduler, search_alg=algo)
            ray.shutdown()
            # # Load the best model checkpoint
@ -140,7 +141,7 @@ def _test_xgboost(method='BlendSearch'):
            # best_bst = xgb.Booster()
            # best_bst.load_model(os.path.join(analysis.best_checkpoint,
            #  "model.xgb"))
-            best_trial = analysis.get_best_trial("eval-logloss","min","all")
+            best_trial = analysis.get_best_trial("eval-logloss", "min", "all")
            accuracy = 1. - best_trial.metric_analysis["eval-error"]["min"]
            logloss = best_trial.metric_analysis["eval-logloss"]["min"]
            logger.info(f"method={method}")
@ -162,14 +163,14 @@ def test_nested():
    }

    def simple_func(config):
-        tune.report(
-            metric=(config["cost_related"]["a"]-4)**2 * (config["b"]-0.7)**2)
+        tune.report(metric=(config["cost_related"]["a"] - 4)**2
+                    * (config["b"] - 0.7)**2)

-    analysis = tune.run(
+    tune.run(
        simple_func,
        config=search_space,
        low_cost_partial_config={
-            "cost_related": {"a": 1,}
+            "cost_related": {"a": 1}
        },
        metric="metric",
        mode="min",
--- a/test/test_version.py
+++ b/test/test_version.py
@ -4,7 +4,6 @@ import flaml

 class TestVersion(unittest.TestCase):

-
    def test_version(self):
        self.assertTrue(hasattr(flaml, '__version__'))
        self.assertTrue(len(flaml.__version__) > 0)
--- a/test/test_xgboost2d.py
+++ b/test/test_xgboost2d.py
@ -15,7 +15,7 @@ class XGBoost2D(XGBoostSklearnEstimator):

    @classmethod
    def search_space(cls, data_size, task):
-        upper = min(32768,int(data_size))
+        upper = min(32768, int(data_size))
        return {
            'n_estimators': {
                'domain': tune.qloguniform(lower=4, upper=upper, q=1),
@ -30,29 +30,25 @@ class XGBoost2D(XGBoostSklearnEstimator):

 def test_simple(method=None):
    automl = AutoML()
-    automl.add_learner(learner_name = 'XGBoost2D',
-        learner_class = XGBoost2D)            
+    automl.add_learner(learner_name='XGBoost2D',
+                       learner_class=XGBoost2D)

    automl_settings = {
        "estimator_list": ['XGBoost2D'],
-        # "metric": 'accuracy',
        "task": 'classification',
        "log_file_name": f"test/xgboost2d_{dataset}_{method}.log",
-        # "model_history": True,
-        # "log_training_metric": True,
-        # "split_type": split_type,
        "n_jobs": 1,
        "hpo_method": method,
        "log_type": "all",
-        "time_budget": 3#6000,
+        "time_budget": 3
    }
    try:
        X, y = fetch_openml(name=dataset, return_X_y=True)
-    except:
+    except FileNotFoundError:
        from sklearn.datasets import load_wine
        X, y = load_wine(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
-     random_state=42)
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.33, random_state=42)
    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)


--- a/test/test_xgboost2d_sample_size.py
+++ b/test/test_xgboost2d_sample_size.py
@ -15,7 +15,7 @@ class XGBoost2D(XGBoostSklearnEstimator):

    @classmethod
    def search_space(cls, data_size, task):
-        upper = min(32768,int(data_size))
+        upper = min(32768, int(data_size))
        return {
            'n_estimators': {
                'domain': tune.qloguniform(lower=4, upper=upper, q=1),
@ -30,19 +30,14 @@ class XGBoost2D(XGBoostSklearnEstimator):

 def _test_simple(method=None, size_ratio=1.0):
    automl = AutoML()
-    automl.add_learner(learner_name = 'XGBoost2D',
-        learner_class = XGBoost2D)            
+    automl.add_learner(learner_name='XGBoost2D',
+                       learner_class=XGBoost2D)

-    
-    try:
-        X, y = fetch_openml(name=dataset, return_X_y=True)
-    except:
-        from sklearn.datasets import load_wine
-        X, y = load_wine(return_X_y=True)
+    X, y = fetch_openml(name=dataset, return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
-     random_state=42)
+                                                        random_state=42)

-    final_size = int(len(y_train)*size_ratio)
+    final_size = int(len(y_train) * size_ratio)
    X_train = X_train[:final_size]
    y_train = y_train[:final_size]
    automl_settings = {
@ -62,14 +57,17 @@ def _test_simple(method=None, size_ratio=1.0):


 def _test_grid_1():
-    _test_simple(method="grid", size_ratio=1.0/3.0)
+    _test_simple(method="grid", size_ratio=1.0 / 3.0)
+

 def _test_grid_2():
-    _test_simple(method="grid", size_ratio=2.0/3.0)
+    _test_simple(method="grid", size_ratio=2.0 / 3.0)
+

 def _test_grid_4():
    _test_simple(method="grid", size_ratio=0.5)

+
 def _test_grid_3():
    _test_simple(method="grid", size_ratio=1.0)