v0.1.0

2020-12-04 09:40:27 -08:00 · 2020-12-04 09:40:27 -08:00 · 492990655d
--- a/.coveragerc
+++ b/.coveragerc
@ -0,0 +1,5 @@
+[run]
+branch = True
+source = flaml
+omit =
+  *tests*
--- a/.flake8
+++ b/.flake8
@ -0,0 +1,5 @@
+[flake8]
+ignore = E203, E266, E501, W503, F403, F401, C901
+max-line-length = 127
+max-complexity = 10
+select = B,C,E,F,W,T4,B9
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@ -0,0 +1,59 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Python package
+
+on:
+  push:
+    branches: ['*']
+  pull_request:
+    branches: ['*']
+
+jobs:
+  build:
+
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-2019]
+        python-version: [3.6, 3.7, 3.8]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: If mac, install libomp to facilitate lgbm install
+      if: matrix.os == 'macOS-latest'
+      run: |
+        brew install libomp
+        export CC=/usr/bin/clang
+        export CXX=/usr/bin/clang++
+        export CPPFLAGS="$CPPFLAGS -Xpreprocessor -fopenmp"
+        export CFLAGS="$CFLAGS -I/usr/local/opt/libomp/include"
+        export CXXFLAGS="$CXXFLAGS -I/usr/local/opt/libomp/include"
+        export LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib -L/usr/local/opt/libomp/lib -lomp"
+    - name: Install packages and dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 pytest coverage
+        pip install -e .
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        pytest test
+    - name: Coverage
+      run: |
+        coverage run -a -m pytest test
+        coverage xml
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v1
+      with:
+        file: ./coverage.xml
+        flags: unittests
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,150 @@
+# Project
+/.vs
+.vscode
+
+# Log files
+*.log
+
+# Python virtualenv
+.venv
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+/catboost_info
+notebook/*.pkl
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -0,0 +1,9 @@
+# Microsoft Open Source Code of Conduct
+
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
+
+Resources:
+
+- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
+- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
+- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+    MIT License
+
+    Copyright (c) Microsoft Corporation.
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE
--- a/README.md
+++ b/README.md
@ -0,0 +1,123 @@
+# FLAML - Fast and Lightweight AutoML
+
+FLAML is a Python library designed to automatically produce accurate machine
+learning models with low computational cost. It frees users from selecting
+learners and hyperparameters for each learner. It is fast and cheap.
+The simple and lightweight design makes it easy to extend, such as
+adding customized learners or metrics. FLAML is powered by a new, cost-effective
+hyperparameter optimization and learner selection method invented by
+Microsoft Research.
+FLAML is easy to use:
+
+1. With three lines of code, you can start using this economical and fast
+AutoML engine as a scikit-learn style estimator.
+```python
+from flaml import AutoML
+automl = AutoML()
+automl.fit(X_train, y_train, task="classification")
+```
+
+2. You can restrict the learners and use FLAML as a fast hyperparameter tuning
+tool for XGBoost, LightGBM, Random Forest etc. or a customized learner.
+```python
+automl.fit(X_train, y_train, task="classification", estimator_list=["lgbm"])
+```
+
+3. You can embed FLAML in self-tuning software for just-in-time tuning with
+low latency & resource consumption.
+```python
+automl.fit(X_train, y_train, task="regression", time_budget=60)
+```
+
+## Installation
+
+FLAML requires **Python version >= 3.6**. It can be installed from pip:
+
+```bash
+pip install flaml
+```
+
+To run the [`notebook example`](https://github.com/microsoft/FLAML/tree/main/notebook),
+install flaml with the [notebook] option:
+
+```bash
+pip install flaml[notebook]
+```
+
+## Examples
+
+A basic classification example.
+
+```python
+from flaml import AutoML
+from sklearn.datasets import load_iris
+# Initialize the FLAML learner.
+automl = AutoML()
+# Provide configurations.
+automl_settings = {
+    "time_budget": 10,  # in seconds
+    "metric": 'accuracy',
+    "task": 'classification',
+    "log_file_name": "test/iris.log",
+}
+X_train, y_train = load_iris(return_X_y=True)
+# Train with labeled input data.
+automl.fit(X_train=X_train, y_train=y_train,
+                        **automl_settings)
+# Predict
+print(automl.predict_proba(X_train))
+# Export the best model.
+print(automl.model)
+```
+
+A basic regression example.
+
+```python
+from flaml import AutoML
+from sklearn.datasets import load_boston
+# Initialize the FLAML learner.
+automl = AutoML()
+# Provide configurations.
+automl_settings = {
+    "time_budget": 10,  # in seconds
+    "metric": 'r2',
+    "task": 'regression',
+    "log_file_name": "test/boston.log",
+}
+X_train, y_train = load_boston(return_X_y=True)
+# Train with labeled input data.
+automl.fit(X_train=X_train, y_train=y_train,
+                        **automl_settings)
+# Predict
+print(automl.predict(X_train))
+# Export the best model.
+print(automl.model)
+```
+
+More examples: see the [notebook](https://github.com/microsoft/FLAML/tree/main/notebook/flaml_demo.ipynb)
+
+## Contributing
+
+This project welcomes contributions and suggestions. Most contributions require you to agree to a
+Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
+the rights to use your contribution. For details, visit <https://cla.opensource.microsoft.com>.
+
+When you submit a pull request, a CLA bot will automatically determine whether you need to provide
+a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
+provided by the bot. You will only need to do this once across all repos using our CLA.
+
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
+For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
+contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
+
+## Authors
+
+* Chi Wang
+* Qingyun Wu
+* Erkang Zhu
+
+Contributors: Markus Weimer, Silu Huang, Haozhe Zhang, Alex Deng.
+
+## License
+
+[MIT License](LICENSE)
--- a/SECURITY.md
+++ b/SECURITY.md
@ -0,0 +1,41 @@
+<!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
+
+## Security
+
+Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
+
+If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
+
+## Reporting Security Issues
+
+**Please do not report security vulnerabilities through public GitHub issues.**
+
+Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
+
+If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
+
+You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
+
+Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
+
+  * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
+  * Full paths of source file(s) related to the manifestation of the issue
+  * The location of the affected source code (tag/branch/commit or direct URL)
+  * Any special configuration required to reproduce the issue
+  * Step-by-step instructions to reproduce the issue
+  * Proof-of-concept or exploit code (if possible)
+  * Impact of the issue, including how an attacker might exploit the issue
+
+This information will help us triage your report more quickly.
+
+If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
+
+## Preferred Languages
+
+We prefer all communications to be in English.
+
+## Policy
+
+Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
+
+<!-- END MICROSOFT SECURITY.MD BLOCK -->
--- a/flaml/init.py
+++ b/flaml/init.py
@ -0,0 +1,70 @@
+from flaml.automl import AutoML
+from flaml.model import BaseEstimator
+from flaml.data import get_output_from_log
+
+from flaml.version import __version__
+
+import logging
+from os.path import join, exists
+import datetime as dt
+from os import listdir, remove, mkdir
+import pathlib
+import json
+
+root = pathlib.Path(__file__).parent.parent.absolute() 
+jsonfilepath = join(root, "settings.json")
+
+with open(jsonfilepath) as f:
+    settings = json.load(f)
+
+logging_level = settings["logging_level"]
+
+if logging_level == "info":
+    logging_level = logging.INFO
+elif logging_level == "debug":
+    logging_level = logging.DEBUG
+elif logging_level == "error":
+    logging_level = logging.ERROR
+elif logging_level == "warning":
+    logging_level = logging.WARNING
+elif logging_level == "critical":
+    logging_level = logging.CRITICAL
+else:
+    logging_level = logging.NOTSET
+
+keep_max_logfiles = settings["keep_max_logfiles"]
+
+log_dir = join(root, "logs")
+
+if not exists(log_dir):
+    mkdir(log_dir)
+
+del_logs = sorted([int(x.split("_")[0]) for x in listdir(log_dir) if ".log" in
+ x], reverse=True)[keep_max_logfiles:]
+
+for l in del_logs:
+    try:
+        remove(join(log_dir, str(l) + "_flaml.log"))
+    except Exception as e:
+        continue
+
+b = dt.datetime.now()
+a = dt.datetime(2020, 4, 1, 0, 0, 0)
+secs = int((b-a).total_seconds())
+name = str(secs) 
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging_level)
+fh = logging.FileHandler(join(log_dir, name + "_" + __name__ + ".log"))
+fh.setLevel(logging_level)
+ch = logging.StreamHandler()
+ch.setLevel(logging_level)
+# formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
+formatter = logging.Formatter(
+    '[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
+    '%m-%d %H:%M:%S')
+ch.setFormatter(formatter)
+fh.setFormatter(formatter)
+logger.addHandler(ch)
+logger.addHandler(fh)
+logger.propagate = True
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -0,0 +1,897 @@
+'''!
+ * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. See LICENSE file in the
+ * project root for license information.
+'''
+import time
+import warnings
+from functools import partial
+import ast
+import numpy as np
+import scipy.sparse
+from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, \
+    RepeatedKFold
+from sklearn.utils import shuffle
+import pandas as pd
+
+from .ml import compute_estimator, train_estimator, get_classification_objective
+from .config import MIN_SAMPLE_TRAIN, MEM_THRES, ETI_INI, \
+    SMALL_LARGE_THRES, CV_HOLDOUT_THRESHOLD, SPLIT_RATIO, N_SPLITS
+from .data import concat
+from .search import ParamSearch
+from .training_log import training_log_reader, training_log_writer
+
+import logging
+logger = logging.getLogger(__name__)
+
+
+class AutoML:
+    '''The AutoML class
+
+    Attributes:
+        model: An object with predict() and predict_proba() method (for
+            classification), storing the best trained model.
+        model_history: A dictionary of iter->model, storing the models when
+            the best model is updated each time
+        config_history: A dictionary of iter->(estimator, config, time), 
+            storing the best estimator, config, and the time when the best
+            model is updated each time
+        classes_: A list of n_classes elements for class labels
+        best_iteration: An integer of the iteration number where the best
+            config is found
+        best_estimator: A string indicating the best estimator found.
+        best_config: A dictionary of the best configuration.
+        best_config_train_time: A float of the seconds taken by training the
+            best config 
+
+    Typical usage example:
+
+        automl = AutoML()
+        automl_settings = {
+            "time_budget": 60,
+            "metric": 'accuracy',
+            "task": 'classification',
+            "log_file_name": 'test/mylog.log',
+        }
+        automl.fit(X_train = X_train, y_train = y_train,
+            **automl_settings)
+    '''
+
+    def __init__(self):
+        self._eti_ini = ETI_INI
+        self._custom_learners = {}
+        self._config_space_info = {}
+        self._custom_size_estimate = {}
+        self._track_iter = 0
+
+    @property
+    def model_history(self):
+        return self._model_history
+
+    @property
+    def config_history(self):
+        return self._config_history
+
+    @property
+    def model(self):
+        if self._trained_estimator:
+            return self._trained_estimator.model
+        else:
+            return None
+
+    @property
+    def best_estimator(self):
+        return self._best_estimator
+
+    @property
+    def best_iteration(self):
+        return self._best_iteration
+
+    @property
+    def best_config(self):
+        return self._selected.best_config[0]
+
+    @property
+    def best_loss(self):
+        return self._best_loss
+
+    @property
+    def best_config_train_time(self):
+        return self.best_train_time
+
+    @property
+    def classes_(self):
+        if self.label_transformer:
+            return self.label_transformer.classes_.tolist()
+        if self._trained_estimator:
+            return self._trained_estimator.model.classes_.tolist()
+        return None
+
+    def predict(self, X_test):
+        '''Predict label from features.
+
+        Args:
+            X_test: A numpy array of featurized instances, shape n*m.
+
+        Returns:
+            A numpy array of shape n*1 -- each element is a predicted class
+            label for an instance.
+        '''
+        X_test = self.preprocess(X_test)
+        y_pred = self._trained_estimator.predict(X_test)
+        if y_pred.ndim > 1:
+            y_pred = y_pred.flatten()
+        if self.label_transformer:
+            return self.label_transformer.inverse_transform(pd.Series(
+                y_pred))
+        else:
+            return y_pred
+
+    def predict_proba(self, X_test):
+        '''Predict the probability of each class from features, only works for
+        classification problems.
+
+        Args:
+            X_test: A numpy array of featurized instances, shape n*m.
+
+        Returns:
+            A numpy array of shape n*c. c is the # classes. Each element at
+            (i,j) is the probability for instance i to be in class j.
+        '''
+        X_test = self.preprocess(X_test)
+        proba = self._trained_estimator.predict_proba(X_test)
+        return proba
+
+    def preprocess(self, X):
+        if scipy.sparse.issparse(X):
+            X = X.tocsr()
+        if self.transformer:
+            X = self.transformer.transform(X)
+        return X
+
+    def _validate_data(self, X_train_all, y_train_all, dataframe, label,
+                       X_val=None, y_val=None):
+        if X_train_all is not None and y_train_all is not None:
+            if not (isinstance(X_train_all, np.ndarray)
+                    or scipy.sparse.issparse(X_train_all)
+                    or isinstance(X_train_all, pd.DataFrame)
+                    ):
+                raise ValueError(
+                    "X_train_all must be a numpy array, a pandas dataframe, "
+                    "or Scipy sparse matrix.")
+            if not (isinstance(y_train_all, np.ndarray)
+                    or isinstance(y_train_all, pd.Series)):
+                raise ValueError(
+                    "y_train_all must be a numpy array or a pandas series.")
+            if X_train_all.size == 0 or y_train_all.size == 0:
+                raise ValueError("Input data must not be empty.")
+            if isinstance(y_train_all, np.ndarray):
+                y_train_all = y_train_all.flatten()
+            if X_train_all.shape[0] != y_train_all.shape[0]:
+                raise ValueError(
+                    "# rows in X_train must match length of y_train.")
+            self.df = isinstance(X_train_all, pd.DataFrame)
+            self.nrow, self.ndim = X_train_all.shape
+            X, y = X_train_all, y_train_all
+        elif dataframe is not None and label is not None:
+            if not isinstance(dataframe, pd.DataFrame):
+                raise ValueError("dataframe must be a pandas DataFrame")
+            if not label in dataframe.columns:
+                raise ValueError("label must a column name in dataframe")
+            self.df = True
+            self.dataframe, self.label = dataframe, label
+            X = dataframe.drop(columns=label)
+            self.nrow, self.ndim = X.shape
+            y = dataframe[label]
+        else:
+            raise ValueError(
+                "either X_train_all+y_train_all or dataframe+label need to be provided.")
+        if scipy.sparse.issparse(X_train_all):
+            self.transformer = self.label_transformer = False
+            self.X_train_all, self.y_train_all = X, y
+        else:
+            from .data import DataTransformer
+            self.transformer = DataTransformer()
+            self.X_train_all, self.y_train_all = self.transformer.fit_transform(
+                X, y, self.task)
+            self.label_transformer = self.transformer.label_transformer
+
+        if X_val is not None and y_val is not None:
+            if not (isinstance(X_val, np.ndarray)
+                    or scipy.sparse.issparse(X_val)
+                    or isinstance(X_val, pd.DataFrame)
+                    ):
+                raise ValueError(
+                    "X_val must be None, a numpy array, a pandas dataframe, "
+                    "or Scipy sparse matrix.")
+            if not (isinstance(y_val, np.ndarray)
+                    or isinstance(y_val, pd.Series)):
+                raise ValueError(
+                    "y_val must be None, a numpy array or a pandas series.")
+            if X_val.size == 0 or y_val.size == 0:
+                raise ValueError(
+                    "Validation data are expected to be nonempty. "
+                    "Use None for X_val and y_val if no validation data.")
+            if isinstance(y_val, np.ndarray):
+                y_val = y_val.flatten()
+            if X_val.shape[0] != y_val.shape[0]:
+                raise ValueError(
+                    "# rows in X_val must match length of y_val.")
+            if self.transformer:
+                self.X_val = self.transformer.transform(X_val)
+            else:
+                self.X_val = X_val
+            if self.label_transformer:
+                self.y_val = self.label_transformer.transform(y_val)
+            else:
+                self.y_val = y_val
+        else:
+            self.X_val = self.y_val = None
+
+    def _prepare_data(self,
+                      eval_method,
+                      split_ratio,
+                      n_splits):
+        X_val, y_val = self.X_val, self.y_val
+        if scipy.sparse.issparse(X_val):
+            X_val = X_val.tocsr()
+        X_train_all, y_train_all = self.X_train_all, self.y_train_all
+        if scipy.sparse.issparse(X_train_all):
+            X_train_all = X_train_all.tocsr()
+
+        if self.task != 'regression':
+            # logger.info(f"label {pd.unique(y_train_all)}")
+            label_set, counts = np.unique(y_train_all, return_counts=True)
+            # augment rare classes
+            rare_threshld = 20
+            rare = counts < rare_threshld
+            rare_label, rare_counts = label_set[rare], counts[rare]
+            for i, label in enumerate(rare_label):
+                count = rare_count = rare_counts[i]
+                rare_index = y_train_all == label
+                n = len(y_train_all)
+                while count < rare_threshld:
+                    if self.df:
+                        X_train_all = concat(X_train_all,
+                                             X_train_all.iloc[:n].loc[rare_index])
+                    else:
+                        X_train_all = concat(X_train_all,
+                                             X_train_all[:n][rare_index, :])
+                    if isinstance(y_train_all, pd.Series):
+                        y_train_all = concat(y_train_all,
+                                             y_train_all.iloc[:n].loc[rare_index])
+                    else:
+                        y_train_all = np.concatenate([y_train_all,
+                                                      y_train_all[:n][rare_index]])
+                    count += rare_count
+                logger.debug(
+                    f"class {label} augmented from {rare_count} to {count}")
+        X_train_all, y_train_all = shuffle(
+            X_train_all, y_train_all, random_state=202020)
+        if self.df:
+            X_train_all.reset_index(drop=True, inplace=True)
+            if isinstance(y_train_all, pd.Series):
+                y_train_all.reset_index(drop=True, inplace=True)
+
+        X_train, y_train = X_train_all, y_train_all
+        if X_val is None:
+            if self.task != 'regression' and eval_method == 'holdout':
+                label_set, first = np.unique(y_train_all, return_index=True)
+                rest = []
+                last = 0
+                first.sort()
+                for i in range(len(first)):
+                    rest.extend(range(last, first[i]))
+                    last = first[i] + 1
+                rest.extend(range(last, len(y_train_all)))
+                X_first = X_train_all.iloc[first] if self.df else X_train_all[
+                    first]
+                X_rest = X_train_all.iloc[rest] if self.df else X_train_all[rest]
+                y_rest = y_train_all.iloc[rest] if isinstance(
+                    y_train_all, pd.Series) else y_train_all[rest]
+                stratify = y_rest if self.split_type == 'stratified' else None
+                X_train, X_val, y_train, y_val = train_test_split(
+                    X_rest,
+                    y_rest,
+                    test_size=split_ratio,
+                    stratify=stratify,
+                    random_state=1)
+                X_train = concat(X_first, X_train)
+                y_train = concat(label_set,
+                                    y_train) if self.df else np.concatenate([label_set, y_train])
+                X_val = concat(X_first, X_val)
+                y_val = concat(label_set,
+                                y_val) if self.df else np.concatenate([label_set, y_val])
+                _, y_train_counts_elements = np.unique(y_train,
+                                                        return_counts=True)
+                _, y_val_counts_elements = np.unique(y_val,
+                                                        return_counts=True)
+                logger.debug(
+                    f"""{self.split_type} split for y_train \
+                        {y_train_counts_elements}, \
+                        y_val {y_val_counts_elements}""")
+            elif eval_method == 'holdout' and self.task == 'regression':
+                X_train, X_val, y_train, y_val = train_test_split(
+                    X_train_all,
+                    y_train_all,
+                    test_size=split_ratio,
+                    random_state=1)
+        self.data_size = X_train.shape[0]
+        self.X_train, self.y_train, self.X_val, self.y_val = (
+            X_train, y_train, X_val, y_val)
+        if self.split_type == "stratified":
+            logger.info("Using StratifiedKFold")
+            self.kf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=1,
+                                              random_state=202020)
+        else:
+            logger.info("Using RepeatedKFold")
+            self.kf = RepeatedKFold(n_splits=n_splits, n_repeats=1,
+                                    random_state=202020)
+
+    def prepare_sample_train_data(self, sample_size):
+        full_size = len(self.y_train)
+        if sample_size <= full_size:
+            if isinstance(self.X_train, pd.DataFrame):
+                sampled_X_train = self.X_train.iloc[:sample_size]
+            else:
+                sampled_X_train = self.X_train[:sample_size]
+            sampled_y_train = self.y_train[:sample_size]
+        else:
+            sampled_X_train = concat(self.X_train, self.X_val)
+            sampled_y_train = np.concatenate([self.y_train, self.y_val])
+        return sampled_X_train, sampled_y_train
+
+    def _compute_with_config_base(self,
+                                  metric,
+                                  compute_train_loss,
+                                  estimator,
+                                  config,
+                                  sample_size):
+        sampled_X_train, sampled_y_train = self.prepare_sample_train_data(
+            sample_size)
+        time_left = self.time_budget - self.time_from_start
+        budget = time_left if sample_size == self.data_size else \
+            time_left / 2 * sample_size / self.data_size
+        return compute_estimator(sampled_X_train,
+                                 sampled_y_train,
+                                 self.X_val,
+                                 self.y_val,
+                                 budget,
+                                 self.kf,
+                                 config,
+                                 self.task,
+                                 estimator,
+                                 self.eval_method,
+                                 metric,
+                                 self._best_loss,
+                                 self.n_jobs,
+                                 self._custom_learners.get(estimator),
+                                 compute_train_loss)
+
+    def _train_with_config(self, estimator, config, sample_size):
+        sampled_X_train, sampled_y_train = self.prepare_sample_train_data(
+            sample_size)
+        budget = None if self.time_budget is None else (self.time_budget
+                                                        - self.time_from_start)
+        model, train_time = train_estimator(
+            sampled_X_train,
+            sampled_y_train,
+            config,
+            self.task,
+            estimator,
+            self.n_jobs,
+            self._custom_learners.get(estimator),
+            budget)
+        return model, train_time
+
+    def add_learner(self,
+                    learner_name,
+                    learner_class,
+                    size_estimate=lambda config: 'unknown',
+                    cost_relative2lgbm=1):
+        '''Add a customized learner
+
+        Args:
+            learner_name: A string of the learner's name
+            learner_class: A subclass of BaseEstimator
+            size_estimate: A function from a config to its memory size in float
+            cost_relative2lgbm: A float number for the training cost ratio with
+                respect to lightgbm (when both use the initial config)
+        '''
+        self._custom_learners[learner_name] = learner_class
+        self._eti_ini[learner_name] = cost_relative2lgbm
+        self._config_space_info[learner_name] = \
+            learner_class.params_configsearch_info
+        self._custom_size_estimate[learner_name] = size_estimate
+
+    def get_estimator_from_log(self, log_file_name, record_id, objective):
+        '''Get the estimator from log file
+
+        Args:
+            log_file_name: A string of the log file name
+            record_id: An integer of the record ID in the file,
+                0 corresponds to the first trial
+            objective: A string of the objective name,
+                'binary', 'multi', or 'regression'
+
+        Returns:
+            An estimator object for the given configuration
+        '''
+
+        with training_log_reader(log_file_name) as reader:
+            record = reader.get_record(record_id)
+            estimator = record.learner
+            config = record.config
+
+        estimator, _ = train_estimator(
+            None, None, config, objective, estimator,
+            estimator_class=self._custom_learners.get(estimator)
+        )
+        return estimator
+
+    def retrain_from_log(self,
+                         log_file_name,
+                         X_train=None,
+                         y_train=None,
+                         dataframe=None,
+                         label=None,
+                         time_budget=0,
+                         task='classification',
+                         eval_method='auto',
+                         split_ratio=SPLIT_RATIO,
+                         n_splits=N_SPLITS,
+                         split_type="stratified",
+                         n_jobs=1,
+                         train_best=True,
+                         train_full=False,
+                         record_id=-1):
+        '''Retrain from log file
+
+        Args:
+            time_budget: A float number of the time budget in seconds
+            log_file_name: A string of the log file name
+            X_train: A numpy array of training data in shape n*m
+            y_train: A numpy array of labels in shape n*1
+            task: A string of the task type, e.g.,
+                'classification', 'regression'
+            eval_method: A string of resampling strategy, one of
+                ['auto', 'cv', 'holdout']
+            split_ratio: A float of the validation data percentage for holdout
+            n_splits: An integer of the number of folds for cross-validation
+            n_jobs: An integer of the number of threads for training
+            train_best: A boolean of whether to train the best config in the
+                time budget; if false, train the last config in the budget
+            train_full: A boolean of whether to train on the full data. If true,
+                eval_method and sample_size in the log file will be ignored
+            record_id: the ID of the training log record from which the model will
+                be retrained. By default `record_id = -1` which means this will be
+                ignored. `record_id = 0` corresponds to the first trial, and
+                when `record_id >= 0`, `time_budget` will be ignored.
+        '''
+        self.task = task
+        self._validate_data(X_train, y_train, dataframe, label)
+
+        logger.info('log file name {}'.format(log_file_name))
+
+        best_config = None
+        best_val_loss = float('+inf')
+        best_estimator = None
+        sample_size = None
+        time_used = 0.0
+        training_duration = 0
+        best = None
+        with training_log_reader(log_file_name) as reader:
+            if record_id >= 0:
+                best = reader.get_record(record_id)
+            else:
+                for record in reader.records():
+                    time_used = record.total_search_time
+                    if time_used > time_budget:
+                        break
+                    training_duration = time_used
+                    val_loss = record.validation_loss
+                    if val_loss <= best_val_loss or not train_best:
+                        if val_loss == best_val_loss and train_best:
+                            size = record.sample_size
+                            if size > sample_size:
+                                best = record
+                                best_val_loss = val_loss
+                                sample_size = size
+                        else:
+                            best = record
+                            size = record.sample_size
+                            best_val_loss = val_loss
+                            sample_size = size
+                if not training_duration:
+                    from .model import BaseEstimator
+                    self._trained_estimator = BaseEstimator()
+                    self._trained_estimator.model = None
+                    return training_duration
+        if not best: return
+        best_estimator = best.learner
+        best_config = best.config
+        sample_size = len(self.y_train_all) if train_full \
+            else best.sample_size
+
+        logger.info(
+            'estimator = {}, config = {}, #training instances = {}'.format(
+                best_estimator, best_config, sample_size))
+        # Partially copied from fit() function
+        # Initilize some attributes required for retrain_from_log
+        np.random.seed(0)
+        self.task = task
+        if self.task == 'classification':
+            self.task = get_classification_objective(
+                len(np.unique(self.y_train_all)))
+            assert split_type in ["stratified", "uniform"]
+            self.split_type = split_type
+        else:
+            self.split_type = "uniform"
+        if record_id >= 0:
+            eval_method = 'cv'
+        elif eval_method == 'auto':
+            eval_method = self._decide_eval_method(time_budget)
+        self.modelcount = 0
+        self._prepare_data(eval_method, split_ratio, n_splits)
+        self.time_budget = None
+        self.n_jobs = n_jobs
+        self._trained_estimator = self._train_with_config(
+            best_estimator, best_config, sample_size)[0]
+        return training_duration
+
+    def _decide_eval_method(self, time_budget):
+        if self.X_val is not None:
+            return 'holdout'
+        nrow, dim = self.nrow, self.ndim
+        if nrow * dim / 0.9 < SMALL_LARGE_THRES * (
+                time_budget / 3600) and nrow < CV_HOLDOUT_THRESHOLD:
+            # time allows or sampling can be used and cv is necessary
+            return 'cv'
+        else:
+            return 'holdout'
+
+    def fit(self,
+            X_train=None,
+            y_train=None,
+            dataframe=None,
+            label=None,
+            metric='auto',
+            task='classification',
+            n_jobs=-1,
+            log_file_name='default.log',
+            estimator_list='auto',
+            time_budget=60,
+            max_iter=1000000,
+            sample=True,
+            ensemble=False,
+            eval_method='auto',
+            log_type='better',
+            model_history=False,
+            split_ratio=SPLIT_RATIO,
+            n_splits=N_SPLITS,
+            log_training_metric=False,
+            mem_thres=MEM_THRES,
+            X_val=None,
+            y_val=None,
+            retrain_full=True,
+            split_type="stratified",
+            learner_selector='sample',
+            ):
+        '''Find a model for a given task
+
+        Args:
+            X_train: A numpy array or a pandas dataframe of training data in
+             shape n*m
+            y_train: A numpy array or a pandas series of labels in shape n*1
+            dataframe: A dataframe of training data including label column
+            label: A str of the label column name
+                Note: If X_train and y_train are provided, 
+                dataframe and label are ignored;
+                If not, dataframe and label must be provided.
+            metric: A string of the metric name or a function,
+                e.g., 'accuracy','roc_auc','f1','log_loss','mae','mse','r2'
+                if passing a customized metric function, the function needs to
+                have the follwing signature
+
+                def metric(X_test, y_test, estimator, labels, X_train, y_train):
+                    return metric_to_minimize, metrics_to_log
+
+                which returns a float number as the minimization objective, 
+                and a tuple of floats as the metrics to log
+            task: A string of the task type, e.g.,
+                'classification', 'regression'
+            n_jobs: An integer of the number of threads for training
+            log_file_name: A string of the log file name
+            estimator_list: A list of strings for estimator names, or 'auto'
+                e.g., ['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree']
+            time_budget: A float number of the time budget in seconds
+            max_iter: An integer of the maximal number of iterations
+            sample: A boolean of whether to sample the training data during
+                search
+            eval_method: A string of resampling strategy, one of
+                ['auto', 'cv', 'holdout']
+            split_ratio: A float of the valiation data percentage for holdout
+            n_splits: An integer of the number of folds for cross-validation
+            log_type: A string of the log type, one of ['better', 'all', 'new']
+                'better' only logs configs with better loss than previos iters
+                'all' logs all the tried configs
+                'new' only logs non-redundant configs
+            model_history: A boolean of whether to keep the history of best
+                models in the history property. Make sure memory is large
+                enough if setting to True.
+            log_training_metric: A boolean of whether to log the training 
+                metric for each model. 
+            mem_thres: A float of the memory size constraint in bytes
+            X_val: None | a numpy array or a pandas dataframe of validation data
+            y_val: None | a numpy array or a pandas series of validation labels
+        '''
+        self.task = task
+        self._validate_data(X_train, y_train, dataframe, label, X_val, y_val)
+        self.start_time_flag = time.time()
+        np.random.seed(0)
+        self.learner_selector = learner_selector
+
+        if self.task == 'classification':
+            self.task = get_classification_objective(
+                len(np.unique(self.y_train_all)))
+            assert split_type in ["stratified", "uniform"]
+            self.split_type = split_type
+        else:
+            self.split_type = "uniform"
+
+        if 'auto' == estimator_list:
+            estimator_list = ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree']
+            if 'regression' != self.task:
+                estimator_list += ['lrl1', ]
+        logger.info(
+            "List of ML learners in AutoML Run: {}".format(estimator_list))
+
+        if eval_method == 'auto' or self.X_val is not None:
+            eval_method = self._decide_eval_method(time_budget)
+        self.eval_method = eval_method
+        logger.info("Evaluation method: {}".format(eval_method))
+
+        self.retrain_full = retrain_full and (eval_method == 'holdout'
+                                              and self.X_val is None)
+        self.sample = sample and (eval_method != 'cv')
+        if 'auto' == metric:
+            if 'binary' in task:
+                metric = 'roc_auc'
+            elif 'multi' in task:
+                metric = 'log_loss'
+            else:
+                metric = 'r2'
+        if metric in ['r2', 'accuracy', 'roc_auc', 'f1', 'ap']:
+            error_metric = f"1-{metric}"
+        elif isinstance(metric, str):
+            error_metric = metric
+        else:
+            error_metric = 'customized metric'
+        logger.info(f'Minimizing error metric: {error_metric}')
+
+        with training_log_writer(log_file_name) as save_helper:
+            self.save_helper = save_helper
+            self._prepare_data(eval_method, split_ratio, n_splits)
+            self._compute_with_config = partial(AutoML._compute_with_config_base,
+                                                self,
+                                                metric,
+                                                log_training_metric)
+            self.time_budget = time_budget
+            self.estimator_list = estimator_list
+            self.ensemble = ensemble
+            self.max_iter = max_iter
+            self.mem_thres = mem_thres
+            self.log_type = log_type
+            self.split_ratio = split_ratio
+            self.save_model_history = model_history
+            self.n_jobs = n_jobs
+            self.search()
+            logger.info("fit succeeded")
+
+    def search(self):
+        self.searchers = {}
+        # initialize the searchers
+        self.eti = []
+        self._best_loss = float('+inf')
+        self.best_train_time = 0
+        self.time_from_start = 0
+        self.estimator_index = -1
+        self._best_iteration = 0
+        self._model_history = {}
+        self._config_history = {}
+        self.max_iter_per_learner = 10000  # TODO
+        self.iter_per_learner = dict([(e, 0) for e in self.estimator_list])
+        self.fullsize = False
+        self._trained_estimator = None
+        if self.ensemble:
+            self.best_model = {}
+        for self._track_iter in range(self.max_iter):
+            if self.estimator_index == -1:
+                estimator = self.estimator_list[0]
+            else:
+                estimator = self._select_estimator(self.estimator_list)
+                if not estimator:
+                    break
+            logger.info(f"iteration {self._track_iter}"
+                        f"  current learner {estimator}")
+            if estimator in self.searchers:
+                model = self.searchers[estimator].trained_estimator
+                improved = self.searchers[estimator].search1step(
+                    global_best_loss=self._best_loss,
+                    retrain_full=self.retrain_full,
+                    mem_thres=self.mem_thres)
+            else:
+                model = improved = None
+                self.searchers[estimator] = ParamSearch(
+                    estimator,
+                    self.data_size,
+                    self._compute_with_config,
+                    self._train_with_config,
+                    self.save_helper,
+                    MIN_SAMPLE_TRAIN if self.sample else self.data_size,
+                    self.task,
+                    self.log_type,
+                    self._config_space_info.get(estimator),
+                    self._custom_size_estimate.get(estimator),
+                    self.split_ratio)
+                self.searchers[estimator].search_begin(self.time_budget,
+                                                       self.start_time_flag)
+                if self.estimator_index == -1:
+                    eti_base = self._eti_ini[estimator]
+                    self.eti.append(
+                        self.searchers[estimator]
+                            .expected_time_improvement_search())
+                    for e in self.estimator_list[1:]:
+                        self.eti.append(
+                            self._eti_ini[e] / eti_base * self.eti[0])
+                    self.estimator_index = 0
+            self.time_from_start = time.time() - self.start_time_flag
+            # logger.info(f"{self.searchers[estimator].sample_size}, {data_size}")
+            if self.searchers[estimator].sample_size == self.data_size:
+                self.iter_per_learner[estimator] += 1
+                if not self.fullsize:
+                    self.fullsize = True
+            if self.searchers[estimator].best_loss < self._best_loss:
+                self._best_loss = self.searchers[estimator].best_loss
+                self._best_estimator = estimator
+                self.best_train_time = self.searchers[estimator].train_time
+                self._config_history[self._track_iter] = (
+                    estimator,
+                    self.searchers[estimator].best_config[0],
+                    self.time_from_start)
+                if self.save_model_history:
+                    self._model_history[self._track_iter] = self.searchers[
+                        estimator].trained_estimator.model
+                elif self._trained_estimator:
+                    del self._trained_estimator
+                    self._trained_estimator = None
+                self._trained_estimator = self.searchers[
+                    estimator].trained_estimator
+                self._best_iteration = self._track_iter
+            if model and improved and not self.save_model_history:
+                model.cleanup()
+
+            logger.info(
+                " at {:.1f}s,\tbest {}'s error={:.4f},\tbest {}'s error={:.4f}".format(
+                    self.time_from_start,
+                    estimator,
+                    self.searchers[estimator].best_loss,
+                    self._best_estimator,
+                    self._best_loss))
+
+            if self.time_from_start >= self.time_budget:
+                break
+            if self.ensemble:
+                time_left = self.time_from_start - self.time_budget
+                time_ensemble = self.searchers[self._best_estimator].train_time
+                if time_left < time_ensemble < 2 * time_left:
+                    break
+            if self.searchers[
+                    estimator].train_time > self.time_budget - self.time_from_start:
+                self.iter_per_learner[estimator] = self.max_iter_per_learner
+
+        # Add a checkpoint for the current best config to the log.
+        self.save_helper.checkpoint()
+
+        if self.searchers:
+            self._selected = self.searchers[self._best_estimator]
+            self._trained_estimator = self._selected.trained_estimator
+            self.modelcount = sum(self.searchers[estimator].model_count
+                                  for estimator in self.searchers)
+            logger.info(self._trained_estimator.model)
+            if self.ensemble:
+                searchers = list(self.searchers.items())
+                searchers.sort(key=lambda x: x[1].best_loss)
+                estimators = [(x[0], x[1].trained_estimator) for x in searchers[
+                    :2]]
+                estimators += [(x[0], x[1].trained_estimator) for x in searchers[
+                    2:] if x[1].best_loss < 4 * self._selected.best_loss]
+                logger.info(estimators)
+                if self.task != "regression":
+                    from sklearn.ensemble import StackingClassifier as Stacker
+                    for e in estimators:
+                        e[1]._estimator_type = 'classifier'
+                else:
+                    from sklearn.ensemble import StackingRegressor as Stacker
+                best_m = self._trained_estimator
+                stacker = Stacker(estimators, best_m, n_jobs=self.n_jobs,
+                                  passthrough=True)
+                stacker.fit(self.X_train_all, self.y_train_all)
+                self._trained_estimator = stacker
+                self._trained_estimator.model = stacker
+        else:
+            self._selected = self._trained_estimator = None
+            self.modelcount = 0
+
+    def __del__(self):
+        if hasattr(self, '_trained_estimator') and self._trained_estimator \
+                and hasattr(self._trained_estimator, 'cleanup'):
+            self._trained_estimator.cleanup()
+            del self._trained_estimator
+
+    def _select_estimator(self, estimator_list):
+        time_left = self.time_budget - self.time_from_start
+        if self.best_train_time < time_left < 2 * self.best_train_time:
+            best_searcher = self.searchers[self._best_estimator]
+            config_sig = best_searcher.get_hist_config_sig(
+                best_searcher.sample_size_full,
+                best_searcher.best_config[0])
+            if config_sig not in best_searcher.config_tried:
+                # trainAll
+                return self._best_estimator
+        if self.learner_selector == 'roundrobin':
+            self.estimator_index += 1
+            if self.estimator_index == len(estimator_list):
+                self.estimator_index = 0
+            return estimator_list[self.estimator_index]
+        min_expected_time, selected = np.Inf, None
+        inv = []
+        for i, estimator in enumerate(estimator_list):
+            if estimator in self.searchers:
+                searcher = self.searchers[estimator]
+                if self.iter_per_learner[estimator] >= self.max_iter_per_learner:
+                    inv.append(0)
+                    continue
+                eti_searcher = min(2 * searcher.train_time,
+                                   searcher.expected_time_improvement_search())
+                gap = searcher.best_loss - self._best_loss
+                if gap > 0 and not self.ensemble:
+                    delta_loss = searcher.old_loss - searcher.new_loss
+                    delta_time = searcher.old_loss_time + \
+                        searcher.new_loss_time - searcher.old_train_time
+                    speed = delta_loss / float(delta_time)
+                    try:
+                        expected_time = max(gap / speed, searcher.train_time)
+                    except ZeroDivisionError:
+                        warnings.warn("ZeroDivisionError: need to debug ",
+                                      "speed: {0}, "
+                                      "old_loss: {1}, "
+                                      "new_loss: {2}"
+                                      .format(speed,
+                                              searcher.old_loss,
+                                              searcher.new_loss))
+                        expected_time = 0.0
+                    expected_time = 2 * max(expected_time, eti_searcher)
+                else:
+                    expected_time = eti_searcher
+                if expected_time == 0:
+                    expected_time = 1e-10
+                inv.append(1 / expected_time)
+            else:
+                expected_time = self.eti[i]
+                inv.append(0)
+            if expected_time < min_expected_time:
+                min_expected_time = expected_time
+                selected = estimator
+        if len(self.searchers) < len(estimator_list) or not selected:
+            if selected not in self.searchers:
+                # print('select',selected,'eti',min_expected_time)
+                return selected
+        s = sum(inv)
+        p = np.random.random()
+        q = 0
+        for i in range(len(inv)):
+            if inv[i]:
+                q += inv[i] / s
+                if p < q:
+                    return estimator_list[i]
--- a/flaml/config.py
+++ b/flaml/config.py
@ -0,0 +1,31 @@
+'''!
+ * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. 
+'''
+
+N_SPLITS = 5
+RANDOM_SEED = 1
+SPLIT_RATIO = 0.1
+HISTORY_SIZE = 10000000
+MEM_THRES = 4*(1024**3)
+SMALL_LARGE_THRES = 10000000
+MIN_SAMPLE_TRAIN = 10000
+MIN_SAMPLE_VAL = 10000
+CV_HOLDOUT_THRESHOLD = 100000
+
+BASE_Const = 2
+BASE_LOWER_BOUND = 2**(0.01)
+
+ETI_INI = {
+    'lgbm':1,
+    'xgboost':1.6,
+    'xgboost_nb':1.6,
+    'rf':2,
+    'lrl1':160,
+    'lrl2':25,
+    'linear_svc':16,
+    'kneighbor':30,
+    'catboost':15,
+    'extra_tree':1.9,
+    'nn':50,
+}
--- a/flaml/data.py
+++ b/flaml/data.py
@ -0,0 +1,256 @@
+'''!
+ * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. 
+'''
+
+import numpy as np
+from scipy.sparse import vstack, issparse
+import pandas as pd
+from sklearn.preprocessing import LabelEncoder
+from .training_log import training_log_reader
+
+
+def load_openml_dataset(dataset_id, data_dir=None, random_state=0):
+    '''Load dataset from open ML. 
+
+    If the file is not cached locally, download it from open ML.
+
+    Args:
+        dataset_id: An integer of the dataset id in openml
+        data_dir: A string of the path to store and load the data
+        random_state: An integer of the random seed for splitting data
+
+    Returns:
+        X_train: A 2d numpy array of training data
+        X_test:  A 2d numpy array of test data
+        y_train: A 1d numpy arrya of labels for training data
+        y_test:  A 1d numpy arrya of labels for test data        
+    '''
+    import os
+    import openml
+    import pickle
+    from sklearn.model_selection import train_test_split
+
+    filename = 'openml_ds' + str(dataset_id) + '.pkl'
+    filepath = os.path.join(data_dir, filename)
+    if os.path.isfile(filepath):
+        print('load dataset from', filepath)
+        with open(filepath, 'rb') as f:
+            dataset = pickle.load(f)
+    else:
+        print('download dataset from openml')
+        dataset = openml.datasets.get_dataset(dataset_id)
+        if not os.path.exists(data_dir):
+            os.makedirs(data_dir)
+        with open(filepath, 'wb') as f:
+            pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
+    print('Dataset name:', dataset.name)
+    X, y, * \
+        __ = dataset.get_data(
+            target=dataset.default_target_attribute, dataset_format='array')
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, random_state=random_state)
+    print(
+        'X_train.shape: {}, y_train.shape: {};\nX_test.shape: {}, y_test.shape: {}'.format(
+            X_train.shape, y_train.shape, X_test.shape, y_test.shape,
+        )
+    )
+    return X_train, X_test, y_train, y_test
+
+
+def load_openml_task(task_id, data_dir):
+    '''Load task from open ML. 
+
+    Use the first fold of the task. 
+    If the file is not cached locally, download it from open ML.
+
+    Args:
+        task_id: An integer of the task id in openml
+        data_dir: A string of the path to store and load the data
+
+    Returns:
+        X_train: A 2d numpy array of training data
+        X_test:  A 2d numpy array of test data
+        y_train: A 1d numpy arrya of labels for training data
+        y_test:  A 1d numpy arrya of labels for test data        
+    '''
+    import os
+    import openml
+    import pickle
+    task = openml.tasks.get_task(task_id)
+    filename = 'openml_task' + str(task_id) + '.pkl'
+    filepath = os.path.join(data_dir, filename)
+    if os.path.isfile(filepath):
+        print('load dataset from', filepath)
+        with open(filepath, 'rb') as f:
+            dataset = pickle.load(f)
+    else:
+        print('download dataset from openml')
+        dataset = task.get_dataset()
+        with open(filepath, 'wb') as f:
+            pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
+    X, y, _, _ = dataset.get_data(task.target_name, dataset_format='array')
+    train_indices, test_indices = task.get_train_test_split_indices(
+        repeat=0,
+        fold=0,
+        sample=0,
+    )
+    X_train = X[train_indices]
+    y_train = y[train_indices]
+    X_test = X[test_indices]
+    y_test = y[test_indices]
+    print(
+        'X_train.shape: {}, y_train.shape: {},\nX_test.shape: {}, y_test.shape: {}'.format(
+            X_train.shape, y_train.shape, X_test.shape, y_test.shape,
+        )
+    )
+    return X_train, X_test, y_train, y_test
+
+
+def get_output_from_log(filename, time_budget):
+    '''Get output from log file
+
+    Args:
+        filename: A string of the log file name
+        time_budget: A float of the time budget in seconds
+
+    Returns:
+        training_time_list: A list of the finished time of each logged iter
+        best_error_list: 
+            A list of the best validation error after each logged iter
+        error_list: A list of the validation error of each logged iter
+        config_list: 
+            A list of the estimator, sample size and config of each logged iter
+        logged_metric_list: A list of the logged metric of each logged iter 
+    '''
+    import ast
+
+    best_config = None
+    best_learner = None
+    best_val_loss = float('+inf')
+    training_duration = 0.0
+
+    training_time_list = []
+    config_list = []
+    best_error_list = []
+    error_list = []
+    logged_metric_list = []
+    best_config_list = []
+    with training_log_reader(filename) as reader:
+        for record in reader.records():
+            time_used = record.total_search_time
+            training_duration = time_used
+            val_loss = record.validation_loss
+            config = record.config
+            learner = record.learner.split('_')[0]
+            sample_size = record.sample_size
+            train_loss = record.logged_metric
+
+            if time_used < time_budget:
+                if val_loss < best_val_loss:
+                    best_val_loss = val_loss
+                    best_config = config
+                    best_learner = learner
+                    best_config_list.append(best_config)
+                training_time_list.append(training_duration)
+                best_error_list.append(best_val_loss)
+                logged_metric_list.append(train_loss)
+                error_list.append(val_loss)
+                config_list.append({"Current Learner": learner,
+                                    "Current Sample": sample_size,
+                                    "Current Hyper-parameters": record.config,
+                                    "Best Learner": best_learner,
+                                    "Best Hyper-parameters": best_config})
+
+    return (training_time_list, best_error_list, error_list, config_list,
+            logged_metric_list)
+
+
+def concat(X1, X2):
+    '''concatenate two matrices vertically
+    '''
+    if isinstance(X1, pd.DataFrame) or isinstance(X1, pd.Series):
+        if isinstance(X1, pd.DataFrame):
+            cat_columns = X1.select_dtypes(
+                include='category').columns
+        df = pd.concat([X1, X2], sort=False)
+        df.reset_index(drop=True, inplace=True)
+        if isinstance(X1, pd.DataFrame) and len(cat_columns):
+            df[cat_columns] = df[cat_columns].astype('category')
+        return df
+    if issparse(X1):
+        return vstack((X1, X2))
+    else:
+        return np.concatenate([X1, X2])
+
+
+class DataTransformer:
+    '''transform X, y
+    '''
+
+    def fit_transform(self, X, y, objective):
+        if isinstance(X, pd.DataFrame):
+            X = X.copy()
+            n = X.shape[0]
+            cat_columns, num_columns = [], []
+            for column in X.columns:
+                if X[column].dtype.name in ('object', 'category'):
+                    if X[column].nunique() == 1 or X[column].nunique(
+                            dropna=True) == n - X[column].isnull().sum():
+                        X.drop(columns=column, inplace=True)
+                    elif X[column].dtype.name == 'category':
+                        current_categories = X[column].cat.categories
+                        if '__NAN__' not in current_categories:
+                            X[column] = X[column].cat.add_categories(
+                                '__NAN__').fillna('__NAN__')
+                        cat_columns.append(column)
+                    else:
+                        X[column].fillna('__NAN__', inplace=True)
+                        cat_columns.append(column)
+                else:
+                    # print(X[column].dtype.name)
+                    if X[column].nunique(dropna=True) < 2:
+                        X.drop(columns=column, inplace=True)
+                    else:
+                        X[column].fillna(np.nan, inplace=True)
+                        num_columns.append(column)
+            X = X[cat_columns + num_columns]
+            if cat_columns:
+                X[cat_columns] = X[cat_columns].astype('category')
+            if num_columns:
+                from sklearn.impute import SimpleImputer
+                from sklearn.compose import ColumnTransformer
+                self.transformer = ColumnTransformer([(
+                    'continuous',
+                    SimpleImputer(missing_values=np.nan, strategy='median'),
+                    num_columns)])
+                X[num_columns] = self.transformer.fit_transform(X)
+            self.cat_columns, self.num_columns = cat_columns, num_columns
+
+        if objective == 'regression':
+            self.label_transformer = None
+        else:
+            from sklearn.preprocessing import LabelEncoder
+            self.label_transformer = LabelEncoder()
+            y = self.label_transformer.fit_transform(y)
+        return X, y
+
+    def transform(self, X):
+        if isinstance(X, pd.DataFrame):
+            cat_columns, num_columns = self.cat_columns, self.num_columns
+            X = X[cat_columns + num_columns].copy()
+            for column in cat_columns:
+                # print(column, X[column].dtype.name)
+                if X[column].dtype.name == 'object':
+                    X[column].fillna('__NAN__', inplace=True)
+                elif X[column].dtype.name == 'category':
+                    current_categories = X[column].cat.categories
+                    if '__NAN__' not in current_categories:
+                        X[column] = X[column].cat.add_categories(
+                            '__NAN__').fillna('__NAN__')
+            if cat_columns:
+                X[cat_columns] = X[cat_columns].astype('category')
+            if num_columns:
+                X[num_columns].fillna(np.nan, inplace=True)
+                X[num_columns] = self.transformer.transform(X)
+        return X
--- a/flaml/ml.py
+++ b/flaml/ml.py
@ -0,0 +1,241 @@
+'''!
+ * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. 
+'''
+ 
+from .model import *
+import time
+from sklearn.metrics import mean_squared_error, r2_score, roc_auc_score, \
+    accuracy_score, mean_absolute_error, log_loss, average_precision_score, \
+        f1_score
+import numpy as np
+from sklearn.model_selection import RepeatedStratifiedKFold
+
+
+def get_estimator_class(objective_name, estimator_name):
+    ''' when adding a new learner, need to add an elif branch '''
+
+
+    if 'xgboost' in estimator_name:
+        if 'regression' in objective_name:
+            estimator_class = XGBoostEstimator
+        else:
+            estimator_class = XGBoostSklearnEstimator
+    elif 'rf' in estimator_name:
+        estimator_class = RandomForestEstimator
+    elif 'lgbm' in estimator_name:
+        estimator_class = LGBMEstimator
+    elif 'lrl1' in estimator_name:
+        estimator_class = LRL1Classifier
+    elif 'lrl2' in estimator_name:
+        estimator_class = LRL2Classifier  
+    elif 'catboost' in estimator_name:
+        estimator_class = CatBoostEstimator
+    elif 'extra_tree' in estimator_name:
+        estimator_class = ExtraTreeEstimator
+    elif 'kneighbor' in estimator_name:
+        estimator_class = KNeighborsEstimator
+    else:
+        raise ValueError(estimator_name + ' is not a built-in learner. '
+            'Please use AutoML.add_learner() to add a customized learner.')
+    return estimator_class
+    
+
+def sklearn_metric_loss_score(metric_name, y_predict, y_true, labels=None):
+    '''Loss using the specified metric
+
+    Args:
+        metric_name: A string of the mtric name, one of 
+            'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'log_loss', 
+            'f1', 'ap'
+        y_predict: A 1d or 2d numpy array of the predictions which can be
+            used to calculate the metric. E.g., 2d for log_loss and 1d
+            for others. 
+        y_true: A 1d numpy array of the true labels
+        labels: A 1d numpy array of the unique labels
+    
+    Returns:
+        score: A float number of the loss, the lower the better
+    '''
+    metric_name = metric_name.lower()
+    if 'r2' in metric_name:
+        score = 1.0 - r2_score(y_true, y_predict)
+    elif metric_name == 'rmse':
+        score = np.sqrt(mean_squared_error(y_true, y_predict))
+    elif metric_name == 'mae':
+        score = mean_absolute_error(y_true, y_predict)
+    elif metric_name == 'mse':
+        score = mean_squared_error(y_true, y_predict)
+    elif metric_name == 'accuracy':
+        score = 1.0 - accuracy_score(y_true, y_predict)
+    elif 'roc_auc' in metric_name:
+        score = 1.0 - roc_auc_score(y_true, y_predict)
+    elif 'log_loss' in metric_name:
+        score = log_loss(y_true, y_predict, labels=labels)
+    elif 'f1' in metric_name:
+        score = 1 - f1_score(y_true, y_predict)
+    elif 'ap' in metric_name:
+        score = 1 - average_precision_score(y_true, y_predict)
+    else:
+        raise ValueError(metric_name+' is not a built-in metric, '
+        'currently built-in metrics are: '
+        'r2, rmse, mae, mse, accuracy, roc_auc, log_loss, f1, ap. '
+        'please pass a customized metric function to AutoML.fit(metric=func)')
+    return score
+
+
+def get_y_pred(estimator, X, eval_metric, obj):
+    if eval_metric in ['roc_auc', 'ap'] and 'binary' in obj:
+        y_pred_classes = estimator.predict_proba(X)        
+        y_pred = y_pred_classes[:,
+         1] if y_pred_classes.ndim>1 else y_pred_classes
+    elif eval_metric in ['log_loss', 'roc_auc']:
+        y_pred = estimator.predict_proba(X)
+    else:
+        y_pred = estimator.predict(X)
+    return y_pred
+
+
+def get_test_loss(estimator, X_train, y_train, X_test, y_test, eval_metric, obj,
+ labels=None, budget=None, train_loss=False):
+    start = time.time()
+    train_time = estimator.fit(X_train, y_train, budget)
+    if isinstance(eval_metric, str):
+        test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj)
+        test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test,
+        labels)
+        if train_loss != False:
+            test_pred_y = get_y_pred(estimator, X_train, eval_metric, obj)
+            train_loss = sklearn_metric_loss_score(eval_metric, test_pred_y,
+            y_train, labels)
+    else: # customized metric function
+        test_loss, train_loss = eval_metric(
+            X_test, y_test, estimator, labels, X_train, y_train)
+    train_time = time.time()-start
+    return test_loss, train_time, train_loss
+
+
+def train_model(estimator, X_train, y_train, budget):
+    train_time = estimator.fit(X_train, y_train, budget)
+    return train_time
+
+
+def evaluate_model(estimator, X_train, y_train, X_val, y_val, budget, kf,
+ objective_name, eval_method, eval_metric, best_val_loss, train_loss=False):
+    if 'holdout' in eval_method:
+        val_loss, train_loss, train_time = evaluate_model_holdout(
+            estimator, X_train, y_train, X_val, y_val, budget, 
+            objective_name, eval_metric, best_val_loss, train_loss=train_loss)
+    else:
+        val_loss, train_loss, train_time = evaluate_model_CV(
+            estimator, X_train, y_train, budget, kf, objective_name, 
+            eval_metric, best_val_loss, train_loss=train_loss)
+    return val_loss, train_loss, train_time
+
+
+def evaluate_model_holdout(estimator, X_train, y_train, X_val, y_val, budget,
+ objective_name, eval_metric, best_val_loss, train_loss=False):
+    val_loss, train_time, train_loss = get_test_loss(
+        estimator, X_train, y_train, X_val, y_val, eval_metric, objective_name,
+        budget = budget, train_loss=train_loss)
+    return  val_loss, train_loss, train_time
+
+
+def evaluate_model_CV(estimator, X_train_all, y_train_all, budget, kf,
+ objective_name, eval_metric, best_val_loss, train_loss=False):
+    start_time = time.time()
+    total_val_loss = total_train_loss = 0
+    train_time = 0
+    valid_fold_num = 0
+    n = kf.get_n_splits()
+    X_train_split, y_train_split = X_train_all, y_train_all
+    if objective_name=='regression':
+        labels = None
+    else:
+        labels = np.unique(y_train_all) 
+
+    if isinstance(kf, RepeatedStratifiedKFold):
+        kf = kf.split(X_train_split, y_train_split)
+    else:
+        kf = kf.split(X_train_split)
+    rng = np.random.RandomState(2020)
+    val_loss_list = []
+    budget_per_train = budget / (n+1)
+    for train_index, val_index in kf:
+        train_index = rng.permutation(train_index)
+        if isinstance(X_train_all, pd.DataFrame):
+            X_train, X_val = X_train_split.iloc[
+                train_index], X_train_split.iloc[val_index]
+        else:
+            X_train, X_val = X_train_split[
+                train_index], X_train_split[val_index]
+        if isinstance(y_train_all, pd.Series):
+            y_train, y_val = y_train_split.iloc[
+                train_index], y_train_split.iloc[val_index]
+        else:
+            y_train, y_val = y_train_split[
+                train_index], y_train_split[val_index]
+        estimator.cleanup()
+        val_loss_i, train_time_i, train_loss_i = get_test_loss(
+            estimator, X_train, y_train, X_val, y_val, eval_metric, 
+            objective_name, labels, budget_per_train, train_loss=train_loss)
+        valid_fold_num += 1
+        total_val_loss += val_loss_i
+        if train_loss != False: 
+            if total_train_loss != 0: total_train_loss += train_loss_i
+            else: total_train_loss = train_loss_i
+        train_time += train_time_i
+        if valid_fold_num == n:
+            val_loss_list.append(total_val_loss/valid_fold_num)
+            total_val_loss = valid_fold_num = 0
+        elif time.time() - start_time >= budget:
+            val_loss_list.append(total_val_loss/valid_fold_num)
+            break
+    val_loss = np.max(val_loss_list)
+    if train_loss != False: train_loss = total_train_loss/n
+    budget -= time.time() - start_time
+    if val_loss < best_val_loss and budget > budget_per_train:
+        estimator.cleanup()
+        train_time_full = estimator.fit(X_train_all, y_train_all, budget)
+        train_time += train_time_full
+    return val_loss, train_loss, train_time
+
+
+def compute_estimator(X_train, y_train, X_val, y_val, budget, kf,
+ config_dic, objective_name, estimator_name, eval_method, eval_metric, 
+ best_val_loss = np.Inf, n_jobs=1, estimator_class=None, train_loss=False):
+    start_time = time.time()
+    estimator_class = estimator_class or get_estimator_class(
+        objective_name, estimator_name)
+    estimator = estimator_class(
+        **config_dic, objective_name = objective_name, n_jobs=n_jobs)
+    val_loss, train_loss, train_time = evaluate_model(
+        estimator, X_train, y_train, X_val, y_val, budget, kf, objective_name, 
+        eval_method, eval_metric, best_val_loss, train_loss=train_loss)
+    all_time = time.time() - start_time
+    return estimator, val_loss, train_loss, train_time, all_time
+
+
+def train_estimator(X_train, y_train, config_dic, objective_name,
+ estimator_name, n_jobs=1, estimator_class=None, budget=None):
+    start_time = time.time()
+    estimator_class = estimator_class or get_estimator_class(objective_name,
+     estimator_name)
+    estimator = estimator_class(**config_dic, objective_name = objective_name,
+     n_jobs=n_jobs)
+    if X_train is not None:
+        train_time = train_model(estimator, X_train, y_train, budget)
+    else:
+        estimator = estimator.estimator_class(**estimator.params)
+    train_time = time.time() - start_time
+    return estimator, train_time
+
+
+def get_classification_objective(num_labels: int) -> str:
+    if num_labels == 2:
+        objective_name = 'binary:logistic'
+    else:
+        objective_name = 'multi:softmax'
+    return objective_name
+
+
--- a/flaml/model.py
+++ b/flaml/model.py
@ -0,0 +1,515 @@
+'''!
+ * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. 
+'''
+
+import numpy as np
+import xgboost as xgb
+from xgboost import XGBClassifier, XGBRegressor
+import time
+from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from lightgbm import LGBMClassifier, LGBMRegressor
+import scipy.sparse
+import pandas as pd
+
+
+class BaseEstimator:
+    '''The abstract class for all learners
+
+    Typical example:
+        XGBoostEstimator: for regression
+        XGBoostSklearnEstimator: for classification
+        LGBMEstimator, RandomForestEstimator, LRL1Classifier, LRL2Classifier: 
+            for both regression and classification        
+    '''
+
+    def __init__(self, objective_name = 'binary:logistic', 
+        **params):
+        '''Constructor
+        
+        Args:
+            objective_name: A string of the objective name, one of
+                'binary:logistic', 'multi:softmax', 'regression'
+            n_jobs: An integer of the number of parallel threads
+            params: A dictionary of the hyperparameter names and values
+        '''
+        self.params = params
+        self.estimator_class = None
+        self.objective_name = objective_name
+        if '_estimator_type' in params:
+            self._estimator_type = params['_estimator_type']
+        else:
+            self._estimator_type = "regressor" if objective_name=='regression' \
+                else "classifier" 
+
+    def get_params(self, deep=False):
+        params = self.params.copy()
+        params["objective_name"] = self.objective_name
+        if hasattr(self, '_estimator_type'):
+            params['_estimator_type'] = self._estimator_type
+        return params
+
+    @property
+    def classes_(self):
+        return self.model.classes_
+
+    def preprocess(self, X):
+        return X
+
+    def _fit(self, X_train, y_train):    
+
+        curent_time = time.time()
+        X_train = self.preprocess(X_train)
+        model = self.estimator_class(**self.params)
+        model.fit(X_train, y_train)
+        train_time =  time.time() - curent_time
+        self.model = model
+        return train_time
+
+    def fit(self, X_train, y_train, budget=None):    
+        '''Train the model from given training data
+        
+        Args:
+            X_train: A numpy array of training data in shape n*m
+            y_train: A numpy array of labels in shape n*1
+            budget: A float of the time budget in seconds
+
+        Returns:
+            train_time: A float of the training time in seconds
+        '''
+        return self._fit(X_train, y_train)
+
+    def predict(self, X_test):
+        '''Predict label from features
+        
+        Args:
+            X_test: A numpy array of featurized instances, shape n*m
+
+        Returns:
+            A numpy array of shape n*1. 
+            Each element is the label for a instance
+        '''      
+        X_test = self.preprocess(X_test)
+        return self.model.predict(X_test)
+
+    def predict_proba(self, X_test):
+        '''Predict the probability of each class from features
+
+        Only works for classification problems
+
+        Args:
+            model: An object of trained model with method predict_proba()
+            X_test: A numpy array of featurized instances, shape n*m
+
+        Returns:
+            A numpy array of shape n*c. c is the # classes
+            Each element at (i,j) is the probability for instance i to be in
+                class j
+        '''
+        if 'regression' in self.objective_name:
+            print('Regression tasks do not support predict_prob')
+            raise ValueError
+        else:
+            X_test = self.preprocess(X_test)
+            return self.model.predict_proba(X_test)
+
+    def cleanup(self): pass
+
+
+class SKLearnEstimator(BaseEstimator):
+
+
+    def preprocess(self, X):
+        if isinstance(X, pd.DataFrame):
+            X = X.copy()
+            cat_columns = X.select_dtypes(include=['category']).columns
+            X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)
+        return X
+
+
+class LGBMEstimator(BaseEstimator):
+
+
+    def __init__(self, objective_name='binary:logistic', n_jobs=1,
+     n_estimators=2, max_leaves=2, min_child_weight=1e-3, learning_rate=0.1, 
+     subsample=1.0, reg_lambda=1.0, reg_alpha=0.0, colsample_bylevel=1.0, 
+     colsample_bytree=1.0, log_max_bin=8, **params):
+        super().__init__(objective_name, **params)
+        # Default: ‘regression’ for LGBMRegressor, 
+        # ‘binary’ or ‘multiclass’ for LGBMClassifier
+        if 'regression' in objective_name:
+            final_objective_name = 'regression'
+        elif 'binary' in objective_name:
+            final_objective_name = 'binary'
+        elif 'multi' in objective_name:
+            final_objective_name = 'multiclass'
+        else:
+            final_objective_name = 'regression'
+        self.params = {
+            "n_estimators": int(round(n_estimators)),
+            "num_leaves":  params[
+                'num_leaves'] if 'num_leaves' in params else int(
+                    round(max_leaves)),
+            'objective': params[
+                "objective"] if "objective" in params else final_objective_name,
+            'n_jobs': n_jobs,
+            'learning_rate': float(learning_rate),
+            'reg_alpha': float(reg_alpha),
+            'reg_lambda': float(reg_lambda),
+            'min_child_weight': float(min_child_weight),
+            'colsample_bytree':float(colsample_bytree),
+            'subsample': float(subsample),
+        }
+        self.params['max_bin'] = params['max_bin'] if 'max_bin' in params else (
+            1<<int(round(log_max_bin)))-1
+        if 'regression' in objective_name:
+            self.estimator_class = LGBMRegressor
+        else:
+            self.estimator_class = LGBMClassifier
+        self.time_per_iter = None
+        self.train_size = 0
+
+    def preprocess(self, X):
+        if not isinstance(X, pd.DataFrame) and scipy.sparse.issparse(
+            X) and np.issubdtype(X.dtype, np.integer):
+            X = X.astype(float)
+        return X
+
+    def fit(self, X_train, y_train, budget=None):
+        start_time = time.time()
+        n_iter = self.params["n_estimators"]
+        if (not self.time_per_iter or
+         abs(self.train_size-X_train.shape[0])>4) and budget is not None:
+            self.params["n_estimators"] = 1
+            self.t1 = self._fit(X_train, y_train)
+            if self.t1 >= budget: 
+                self.params["n_estimators"] = n_iter
+                return self.t1
+            self.params["n_estimators"] = 4
+            self.t2 = self._fit(X_train, y_train)
+            self.time_per_iter = (self.t2 - self.t1)/(
+                self.params["n_estimators"]-1) if self.t2 > self.t1 \
+                else self.t1 if self.t1 else 0.001
+            self.train_size = X_train.shape[0]
+            if self.t1+self.t2>=budget or n_iter==self.params["n_estimators"]:
+                self.params["n_estimators"] = n_iter
+                return time.time() - start_time
+        if budget is not None:
+            self.params["n_estimators"] = min(n_iter, int((budget-time.time()+
+                start_time-self.t1)/self.time_per_iter+1))
+        if self.params["n_estimators"] > 0:
+            self._fit(X_train, y_train)
+        self.params["n_estimators"] = n_iter
+        train_time = time.time() - start_time
+        return train_time
+
+
+class XGBoostEstimator(SKLearnEstimator):
+    ''' not using sklearn API, used for regression '''
+
+
+    def __init__(self, objective_name='regression', all_thread=False, n_jobs=1,
+        n_estimators=4, max_leaves=4, subsample=1.0, min_child_weight=1, 
+        learning_rate=0.1, reg_lambda=1.0, reg_alpha=0.0, colsample_bylevel=1.0,
+        colsample_bytree=1.0, tree_method='auto', **params):
+        super().__init__(objective_name, **params)
+        self.n_estimators = int(round(n_estimators))
+        self.max_leaves = int(round(max_leaves))
+        self.grids = []
+        self.params = {
+            'max_leaves': int(round(max_leaves)),
+            'max_depth': 0,
+            'grow_policy': params[
+                "grow_policy"] if "grow_policy" in params else 'lossguide',
+            'tree_method':tree_method,
+            'verbosity': 0,
+            'nthread':n_jobs,
+            'learning_rate': float(learning_rate),
+            'subsample': float(subsample),
+            'reg_alpha': float(reg_alpha),
+            'reg_lambda': float(reg_lambda),
+            'min_child_weight': float(min_child_weight),
+            'booster': params['booster'] if 'booster' in params else 'gbtree',
+            'colsample_bylevel': float(colsample_bylevel),
+            'colsample_bytree':float(colsample_bytree),
+            }
+        if all_thread:
+            del self.params['nthread']
+
+    def get_params(self, deep=False):
+        params = super().get_params()
+        params["n_jobs"] = params['nthread']
+        return params
+
+    def fit(self, X_train, y_train, budget=None):    
+        curent_time = time.time()        
+        if not scipy.sparse.issparse(X_train):
+            self.params['tree_method'] = 'hist'
+            X_train = self.preprocess(X_train)
+        dtrain = xgb.DMatrix(X_train, label=y_train)
+        if self.max_leaves>0:
+            xgb_model = xgb.train(self.params,  dtrain, self.n_estimators)
+            del dtrain
+            train_time = time.time() - curent_time
+            self.model = xgb_model
+            return train_time
+        else:
+            return None
+
+    def predict(self, X_test):
+        if not scipy.sparse.issparse(X_test):
+            X_test = self.preprocess(X_test)
+        dtest = xgb.DMatrix(X_test)
+        return super().predict(dtest)
+
+
+class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
+    ''' using sklearn API, used for classification '''
+
+
+    def __init__(self, objective_name='binary:logistic', n_jobs=1,  
+        n_estimators=4, max_leaves=4, subsample=1.0, 
+        min_child_weight=1, learning_rate=0.1, reg_lambda=1.0, reg_alpha=0.0,
+        colsample_bylevel=1.0, colsample_bytree=1.0, tree_method='hist', 
+        **params):
+        super().__init__(objective_name, **params)
+        self.params = {
+        "n_estimators": int(round(n_estimators)),
+        'max_leaves': int(round(max_leaves)),
+        'max_depth': 0,
+        'grow_policy': params[
+                "grow_policy"] if "grow_policy" in params else 'lossguide',
+        'tree_method':tree_method,
+        'verbosity': 0,
+        'n_jobs': n_jobs,
+        'learning_rate': float(learning_rate),
+        'subsample': float(subsample),
+        'reg_alpha': float(reg_alpha),
+        'reg_lambda': float(reg_lambda),
+        'min_child_weight': float(min_child_weight),
+        'booster': params['booster'] if 'booster' in params else 'gbtree',
+        'colsample_bylevel': float(colsample_bylevel),
+        'colsample_bytree': float(colsample_bytree),
+        }
+
+        if 'regression' in objective_name:
+            self.estimator_class = XGBRegressor
+        else:
+            self.estimator_class = XGBClassifier
+        self.time_per_iter = None
+        self.train_size = 0
+
+    def fit(self, X_train, y_train, budget=None):    
+        if scipy.sparse.issparse(X_train):
+            self.params['tree_method'] = 'auto'
+        return super().fit(X_train, y_train, budget)
+        
+
+class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
+
+
+    def __init__(self, objective_name = 'binary:logistic', n_jobs = 1,
+      n_estimators = 4, max_leaves = 4, max_features = 1.0, 
+      min_samples_split = 2, min_samples_leaf = 1, criterion = 1, **params):
+        super().__init__(objective_name, **params)
+        self.params = {
+        "n_estimators": int(round(n_estimators)),
+        "n_jobs": n_jobs,
+        'max_features': float(max_features),
+        }
+        if 'regression' in objective_name:
+            self.estimator_class = RandomForestRegressor
+        else:
+            self.estimator_class = RandomForestClassifier
+            self.params['criterion'] = 'entropy' if criterion>1.5 else 'gini'
+        self.time_per_iter = None
+        self.train_size = 0
+
+    def get_params(self, deep=False):
+        params = super().get_params()
+        params["criterion"] = 1 if params["criterion"]=='gini' else 2
+        return params
+
+
+class ExtraTreeEstimator(RandomForestEstimator):
+
+
+    def __init__(self, objective_name = 'binary:logistic', n_jobs = 1,
+      n_estimators = 4, max_leaves = 4, max_features = 1.0, 
+      min_samples_split = 2, min_samples_leaf = 1, criterion = 1, **params):
+        super().__init__(objective_name, **params)
+        self.params = {
+        "n_estimators": int(round(n_estimators)),
+        "n_jobs": n_jobs,
+        'max_features': float(max_features),
+        }
+        if 'regression' in objective_name:
+            from sklearn.ensemble import ExtraTreesRegressor
+            self.estimator_class = ExtraTreesRegressor
+        else:
+            from sklearn.ensemble import ExtraTreesClassifier
+            self.estimator_class = ExtraTreesClassifier
+            self.params['criterion'] = 'entropy' if criterion>1.5 else 'gini'
+        self.time_per_iter = None
+        self.train_size = 0
+
+
+class LRL1Classifier(SKLearnEstimator):
+
+
+    def __init__(self, tol=0.0001, C=1.0, 
+        objective_name='binary:logistic', n_jobs=1, **params):
+        super().__init__(objective_name, **params)
+        self.params = {
+            'penalty': 'l1',
+            'tol': float(tol),
+            'C': float(C),
+            'solver': 'saga',
+            'n_jobs': n_jobs,
+        }
+        if 'regression' in objective_name:
+            self.estimator_class = None
+            print('Does not support regression task')
+            raise NotImplementedError
+        else:
+            self.estimator_class = LogisticRegression
+
+
+class LRL2Classifier(SKLearnEstimator):
+
+
+    def __init__(self, tol=0.0001, C=1.0, 
+        objective_name='binary:logistic', n_jobs=1, **params):
+        super().__init__(objective_name, **params)
+        self.params = {
+            'penalty': 'l2',
+            'tol': float(tol),
+            'C': float(C),
+            'solver': 'lbfgs',
+            'n_jobs': n_jobs,
+        }
+        if 'regression' in objective_name:
+            self.estimator_class = None
+            print('Does not support regression task')
+            raise NotImplementedError
+        else:
+            self.estimator_class = LogisticRegression
+
+
+class CatBoostEstimator(BaseEstimator):
+
+
+    time_per_iter = None
+    train_size = 0
+
+    def __init__(self, objective_name = 'binary:logistic', n_jobs=1,
+    n_estimators=8192, exp_max_depth=64, learning_rate=0.1, rounds=4, 
+    l2_leaf_reg=3, **params):
+        super().__init__(objective_name, **params)
+        self.params = {
+            "early_stopping_rounds": int(round(rounds)),
+            "n_estimators": n_estimators, 
+            'learning_rate': learning_rate,
+            'thread_count': n_jobs,
+            'verbose': False,
+            'random_seed': params[
+                "random_seed"] if "random_seed" in params else 10242048,
+        }
+        # print(n_estimators)
+        if 'regression' in objective_name:
+            from catboost import CatBoostRegressor
+            self.estimator_class = CatBoostRegressor
+        else:
+            from catboost import CatBoostClassifier
+            self.estimator_class = CatBoostClassifier
+
+    def get_params(self, deep=False):
+        params = super().get_params()
+        params['n_jobs'] = params['thread_count']
+        params['rounds'] = params['early_stopping_rounds']
+        return params
+
+    def fit(self, X_train, y_train, budget=None):
+        start_time = time.time()
+        n_iter = self.params["n_estimators"]
+        if isinstance(X_train, pd.DataFrame):
+            cat_features = list(X_train.select_dtypes(
+                include='category').columns)
+        else:
+            cat_features = []
+        if (not CatBoostEstimator.time_per_iter or
+         abs(CatBoostEstimator.train_size-len(y_train))>4) and budget:
+            # measure the time per iteration
+            self.params["n_estimators"] = 1
+            CatBoostEstimator.model = self.estimator_class(**self.params)
+            CatBoostEstimator.model.fit(X_train, y_train,
+             cat_features=cat_features)
+            CatBoostEstimator.t1 = time.time() - start_time
+            if CatBoostEstimator.t1 >= budget: 
+                self.params["n_estimators"] = n_iter
+                self.model = CatBoostEstimator.model
+                return CatBoostEstimator.t1
+            self.params["n_estimators"] = 4
+            CatBoostEstimator.model = self.estimator_class(**self.params)
+            CatBoostEstimator.model.fit(X_train, y_train,
+             cat_features=cat_features)
+            CatBoostEstimator.time_per_iter = (time.time() - start_time -
+             CatBoostEstimator.t1)/(self.params["n_estimators"]-1)
+            if CatBoostEstimator.time_per_iter <= 0: 
+                CatBoostEstimator.time_per_iter = CatBoostEstimator.t1
+            CatBoostEstimator.train_size = len(y_train)
+            if time.time()-start_time>=budget or n_iter==self.params[
+                "n_estimators"]: 
+                self.params["n_estimators"] = n_iter
+                self.model = CatBoostEstimator.model
+                return time.time()-start_time
+        if budget:
+            train_times = 1 
+            self.params["n_estimators"] = min(n_iter, int((budget-time.time()+
+                start_time-CatBoostEstimator.t1)/train_times/
+                CatBoostEstimator.time_per_iter+1))
+            self.model = CatBoostEstimator.model
+        if self.params["n_estimators"] > 0:
+            l = max(int(len(y_train)*0.9), len(y_train)-1000)
+            X_tr, y_tr = X_train[:l], y_train[:l]
+            from catboost import Pool
+            model = self.estimator_class(**self.params)
+            model.fit(X_tr, y_tr, cat_features=cat_features, eval_set=Pool(
+                data=X_train[l:], label=y_train[l:], cat_features=cat_features))
+            # print(self.params["n_estimators"], model.get_best_iteration())
+            self.model = model
+        self.params["n_estimators"] = n_iter
+        train_time = time.time() - start_time
+        # print(budget, train_time)
+        return train_time
+
+
+class KNeighborsEstimator(BaseEstimator):
+
+    
+    def __init__(self, objective_name='binary:logistic', n_jobs=1,
+     n_neighbors=5, **params):
+        super().__init__(objective_name, **params)
+        self.params= {
+            'n_neighbors': int(round(n_neighbors)),
+            'weights': 'distance',
+            'n_jobs': n_jobs,
+        }
+        if 'regression' in objective_name:
+            from sklearn.neighbors import KNeighborsRegressor
+            self.estimator_class = KNeighborsRegressor
+        else:
+            from sklearn.neighbors import KNeighborsClassifier
+            self.estimator_class = KNeighborsClassifier
+
+    def preprocess(self, X):
+        if isinstance(X, pd.DataFrame):
+            cat_columns = X.select_dtypes(['category']).columns
+            # print(X.dtypes)
+            # print(cat_columns)
+            if X.shape[1] == len(cat_columns):
+                raise ValueError(
+            "kneighbor requires at least one numeric feature")
+            X = X.drop(cat_columns, axis=1) 
+        return X
--- a/flaml/search.py
+++ b/flaml/search.py
@ -0,0 +1,675 @@
+'''!
+ * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. 
+'''
+
+from functools import partial
+from .ml import train_estimator
+import time
+import math
+import numpy as np
+from .space import config_space, estimator_size, get_config_values, \
+    generate_config_ini, generate_config_max, generate_config_min
+from .config import SPLIT_RATIO, MIN_SAMPLE_TRAIN, \
+    HISTORY_SIZE, MEM_THRES, BASE_Const, BASE_LOWER_BOUND
+from random import gauss
+
+
+def rand_vector_unit_sphere(dims):
+    vec = [gauss(0, 1) for i in range(dims)]
+    mag = sum(x**2 for x in vec) ** .5
+    return [x / mag for x in vec]
+
+
+def rand_vector_gaussian(dims):
+    vec = [gauss(0, 1) for i in range(dims)]
+    return vec
+
+
+class ParamSearch:
+    '''
+    the class for searching params for 1 learner
+    '''
+
+    def __init__(self, estimator, data_size,
+                 compute_with_config, train_with_config, save_info_helper=None,
+                 init_sample_size=MIN_SAMPLE_TRAIN, objective_name='regression',
+                 log_type='better', config_space_info=None, size_estimator=None,
+                 split_ratio=SPLIT_RATIO, base_change='sqrtK', use_dual_dir=True,
+                 move_type='geo'):
+        self.log_type = log_type
+        self.base_change = base_change
+        if init_sample_size > data_size:
+            init_sample_size = data_size
+        self.next_sample_size = {}
+        self.prev_sample_size = {}
+        s = init_sample_size
+        self.prev_sample_size[s] = s
+        self.estimator_configspace = config_space_info or config_space(
+            estimator, data_size, objective_name)
+        self.get_size_for_config = size_estimator or (
+            lambda x: estimator_size(x, estimator))
+        config_min_dic_primary, config_min_dic_more, config_min_dic = \
+            generate_config_min(estimator, self.estimator_configspace, None)
+        self.min_config_primary = np.array(
+            list(config_min_dic_primary.values()))
+        self.min_config_more = np.array(list(config_min_dic_more.values()))
+        self.min_config = np.array(list(config_min_dic.values()))
+        # init configurations for different sample size
+        config_init_dic_primary, config_init_dic_more, _, config_type_dic = \
+            generate_config_ini(estimator, self.estimator_configspace)
+        self.init_config_dic_primary = {s: config_init_dic_primary}
+        self.init_config_dic_more = {s: config_init_dic_more}
+        self.init_config_dic_type_dic = {'primary': {
+            s: config_init_dic_primary}, 'more': {s: config_init_dic_more}}
+        self.init_config_dic = {
+            **self.init_config_dic_type_dic['primary'],
+            **self.init_config_dic_type_dic['more']
+        }
+        self.config_type_dic = config_type_dic
+        # max configurations for different sample size
+        config_max_dic_primary, config_max_dic_more, config_max_dic = \
+            generate_config_max(
+                estimator, self.estimator_configspace, int(s))
+        self.max_config_dic_primary = {s: np.array(
+            list(config_max_dic_primary.values()))}
+        self.max_config_dic_more = {s: np.array(
+            list(config_max_dic_more.values()))}
+        self.max_config_dic = {s: np.array(list(config_max_dic.values()))}
+        self.dims = (len(self.min_config_primary), len(self.min_config_more))
+        # print(self.dims)
+        if self.dims[1] > 0 and self.dims[0] > 0:
+            self.base_upper_bound = {
+                s:
+                max(
+                    max(
+                        (self.max_config_dic_primary[s][i] / self.min_config_primary[i])
+                        ** math.sqrt(self.dims[0]) for i in range(self.dims[0])
+                    ),
+                    max(
+                        (self.max_config_dic_more[s][i] / self.min_config_more[i])
+                        ** math.sqrt(self.dims[1]) for i in range(self.dims[1]))
+                )
+            }
+        elif self.dims[0] > 0:
+            self.base_upper_bound = {
+                s:
+                max(
+                    (self.max_config_dic_primary[s][i] / self.min_config_primary[i])
+                    ** (math.sqrt(self.dims[0])) for i in range(self.dims[0])
+                )
+            }
+        else:
+            self.base_upper_bound = {
+                s:
+                max(
+                    (self.max_config_dic_more[s][i] / self.min_config_more[i])
+                    ** (math.sqrt(self.dims[1])) for i in range(self.dims[1])
+                )
+            }
+
+        # create sample size sequence
+        while s < data_size:
+            s2 = self.next_sample_size[s] = s * 2 if s * 2 <= data_size else data_size
+            self.prev_sample_size[s2] = s
+            s = s2
+
+            config_max_dic_primary, config_max_dic_more, config_max_dic = \
+                generate_config_max(
+                    estimator, self.estimator_configspace, int(s))
+            self.max_config_dic_primary[s] = np.array(
+                list(config_max_dic_primary.values()))
+            self.max_config_dic_more[s] = np.array(
+                list(config_max_dic_more.values()))
+            self.max_config_dic[s] = np.array(list(config_max_dic.values()))
+            if self.dims[1] > 0 and self.dims[0] > 0:
+                self.base_upper_bound[s] = max(
+                    max(
+                        (self.max_config_dic_primary[s][i]
+                         / self.min_config_primary[i])
+                        ** math.sqrt(self.dims[0]) for i in range(self.dims[0])
+                    ),
+                    max(
+                        (self.max_config_dic_more[s][i]
+                         / self.min_config_more[i])
+                        ** math.sqrt(self.dims[1]) for i in range(self.dims[1])
+                    )
+                )
+            elif self.dims[0] > 0:
+                self.base_upper_bound[s] = max(
+                    (self.max_config_dic_primary[s][i]
+                     / self.min_config_primary[i])
+                    ** math.sqrt(self.dims[0]) for i in range(self.dims[0])
+                )
+            else:
+                self.base_upper_bound[s] = max(
+                    (self.max_config_dic_more[s][i] / self.min_config_more[i])
+                    ** math.sqrt(self.dims[1]) for i in range(self.dims[1])
+                )
+
+        self.init_sample_size = init_sample_size
+        self.data_size = data_size
+        self.sample_size_full = int(self.data_size / (1.0 - split_ratio))
+
+        self.compute_with_config = compute_with_config
+        self.estimator = estimator
+
+        # for logging
+        self.save_helper = save_info_helper
+        self.estimator_type_list = ['primary', 'more']
+        self.dim = self.dims[0] if self.dims[0] > 0 else self.dims[1]
+        self.b = BASE_Const**(math.sqrt(self.dim))
+        self.base_ini = self.b
+        self.total_dim = sum(self.dims)
+
+        self.epo = 2**(self.dim - 1)
+        # keys are [sample size, config], values are (loss, train_time)
+        self.config_tried = {}
+        self.train_with_config = train_with_config
+
+        self.current_config_loss = None
+        self.use_dual_dir = use_dual_dir
+        self.move_type = move_type
+
+    def evaluate_config(self, config, sample_size, move='_pos'):
+        '''
+        evaluate a configuration, update search state, 
+        and return whether the state is changed
+        '''
+        if self.time_from_start >= self.time_budget or move != '_ini' and \
+                self.train_time > self.time_budget - self.time_from_start:
+            return False
+
+        model, val_loss, new_train_time, from_history, train_loss = \
+            self.evaluate_proposed_config(config, sample_size, move)
+        # update current config
+        self.update_current_config(config, val_loss, sample_size)
+        # update best model statistics, including statistics about loss and time
+        improved = self.update_search_state_best(
+            config, sample_size, model, val_loss, new_train_time, from_history)
+        self.time_from_start = time.time() - self.start_time
+        if self.save_helper is not None:
+            if from_history:
+                move = move + '_from_hist'
+            self.save_helper.append(self.model_count,
+                                    train_loss,
+                                    new_train_time,
+                                    self.time_from_start,
+                                    val_loss,
+                                    config,
+                                    self.best_loss,
+                                    self.best_config[0],
+                                    self.estimator,
+                                    sample_size)
+        return improved
+
+    def get_hist_config_sig(self, sample_size, config):
+        config_values = get_config_values(config, self.config_type_dic)
+        config_sig = str(sample_size) + '_' + str(config_values)
+        return config_sig
+
+    def evaluate_proposed_config(self, config, sample_size, move):
+        self.model_count += 1
+        config_sig = self.get_hist_config_sig(sample_size, config)
+        d = self.total_dim
+        history_size_per_d = len(self.config_tried) / float(d)
+        if config_sig in self.config_tried:
+            val_loss, new_train_time = self.config_tried[config_sig]
+            # print(config_sig,'found in history')
+            model = train_loss = None
+            from_history = True
+        else:
+            model, val_loss, train_loss, new_train_time, _ = \
+                self.compute_with_config(self.estimator, config, sample_size)
+            from_history = False
+            if history_size_per_d < HISTORY_SIZE:
+                self.config_tried[config_sig] = (val_loss, new_train_time)
+
+        if self.first_move:
+            self.init_config_dic[sample_size] = config
+            move = '_ini'
+            self.base = self.base_ini
+            self.num_noimprovement = 0
+        move = str(self.estimator) + move
+        return model, val_loss, new_train_time, from_history, train_loss
+
+    def update_current_config(self, config, val_loss, sample_size):
+        if self.first_move or val_loss < self.current_config_loss:
+            self.first_move = False
+            # update current config and coressponding sample_size
+            self.sample_size = sample_size
+            self.config = config
+            self.config_primary = {x: config[x]
+                                   for x in self.config_primary.keys()}
+            try:
+                self.config_more = {x: config[x]
+                                    for x in self.config_more.keys()}
+            except:
+                self.config_more = {}
+            self.current_config_loss = val_loss
+
+    def update_reset_best_config_loss(self, sample_size, config, val_loss):
+        if sample_size == self.data_size:
+            if self.best_config_loss_dic_full_reset[1] is None:
+                self.best_config_loss_dic_full_reset = [
+                    config, val_loss, self.model_count]
+            else:
+                full_reset_best_loss = self.best_config_loss_dic_full_reset[1]
+                if val_loss < full_reset_best_loss:
+                    self.best_config_loss_dic_full_reset = [
+                        config, full_reset_best_loss, self.model_count]
+
+    def update_search_state_best(self, config, sample_size, model, val_loss,
+                                 new_train_time, from_history):
+        # upate the loss statistics for a particular sample size
+        if sample_size not in self.best_config_loss_samplesize_dic:
+            self.best_config_loss_samplesize_dic[sample_size] = [
+                config, val_loss, self.model_count]
+        else:
+            s_best_loss = self.best_config_loss_samplesize_dic[sample_size][1]
+            if val_loss < s_best_loss:
+                self.best_config_loss_samplesize_dic[sample_size] = [
+                    config, val_loss, self.model_count]
+
+        self.update_reset_best_config_loss(sample_size, config, val_loss)
+
+        # update best model statistics, including statistics about loss and time
+        if val_loss < self.new_loss:
+            self.old_loss = self.new_loss if self.new_loss < float(
+                'inf') else 2 * val_loss
+            self.new_loss = val_loss
+            self.old_loss_time = self.new_loss_time
+            self.old_train_time = self.train_time
+            self.new_loss_time = self.train_time = new_train_time
+            if val_loss < self.best_loss:
+                self.best_config = [self.config, self.model_count]
+                if not from_history:
+                    self.trained_estimator = model
+                    # print(model)
+                else:
+                    print(val_loss, self.best_loss)
+                self.best_loss = val_loss
+                self.time_best_found = self.time_from_start
+            return True
+        else:
+            if not from_history:
+                self.new_loss_time += new_train_time
+            return False
+
+    def get_proposal(self, current_config, rand_vector_func, base, move_type):
+        rand_vector = rand_vector_func(len(current_config))
+        rand_vector = [i for i in rand_vector]
+        rand_vector_neg = [-i for i in rand_vector]
+
+        move_vector = {}
+        move_vector_neg = {}
+
+        index_ = 0
+        for k, v in current_config.items():
+            if 'geo' in move_type:
+                # get the move vector using the proposed random vector
+                move_vector[k] = v * (base**(rand_vector[index_]))
+                move_vector_neg[k] = v * (base**(rand_vector_neg[index_]))
+            else:
+                move_vector[k] = v + (base * (rand_vector[index_]))
+                move_vector_neg[k] = v + (base * (rand_vector_neg[index_]))
+            index_ += 1
+
+        # as long as one of the proposed model (+ or -) is within the mem_limit
+        # we will proceed
+        if not self.use_dual_dir:
+            move_vector_neg = None
+        return move_vector, move_vector_neg
+
+    def get_config_from_move_vector(self, v, estimator_type):
+        if v != None:
+            if 'all' in estimator_type:
+                v = v
+            elif 'primary' in estimator_type:
+                v = {**v, **self.config_more}
+            else:
+                v = {**self.config_primary, **v}
+
+            bounded_v = self.get_v_within_min_max(v)
+        else:
+            bounded_v = None
+        return bounded_v
+
+    def dual_direction_sample(self, base, current_search_config,
+                              estimator_type='primary', rand_vector_func=rand_vector_unit_sphere,
+                              mem_thres=MEM_THRES, move_type='geo'):
+        current_config = current_search_config
+        if len(current_config) == 0:
+            return None, None
+        bounded_v_list = [None, None]
+        while not bounded_v_list[0] and not bounded_v_list[
+                1] and self.time_from_start < self.time_budget:
+            move_vector, move_vector_neg = self.get_proposal(
+                current_config, rand_vector_func,
+                base, move_type)
+            bounded_v_list = [move_vector, move_vector_neg]
+            for i, v in enumerate(bounded_v_list):
+                bounded_v = self.get_config_from_move_vector(v, estimator_type)
+                proposed_model_size = self.get_size_for_config(bounded_v)
+                proposed_model_size = 0 if not isinstance(
+                    proposed_model_size, float) else proposed_model_size
+                if proposed_model_size > mem_thres:
+                    # print(bounded_v, proposed_model_size, mem_thres)
+                    bounded_v = None
+                bounded_v_list[i] = bounded_v
+            self.time_from_start = time.time() - self.start_time
+        return bounded_v_list
+
+    def get_v_within_min_max(self, v):
+        index_ = 0
+        bounded_v = {}
+        for key, value in v.items():
+            new_value = min(max(
+                value, self.min_config[index_]), self.max_config_dic[
+                    self.sample_size][index_])
+            bounded_v[key] = new_value
+            index_ += 1
+        return bounded_v
+
+    def expected_time_improvement_search(self):
+        return max(self.old_loss_time - self.old_train_time + self.train_time,
+                   self.new_loss_time)
+
+    def increase_sample_size(self):
+        '''
+        whether it's time to increase sample size
+        '''
+        expected_time_improvement_sample = 2 * self.train_time
+        self.increase = self.sample_size < self.data_size and (
+            self.estimator_type == 0 or self.dims[0] == 0) and (
+                not self.improved
+            or expected_time_improvement_sample
+            < self.expected_time_improvement_search()
+        )
+        return self.increase
+
+    def search_begin(self, time_budget, start_time=None):
+        self.time_budget = time_budget
+        if not start_time:
+            self.start_time = time.time()
+        else:
+            self.start_time = start_time
+        # the time to train the last selected config
+        self.old_train_time = self.train_time = 0
+        self.time_from_start = 0
+        # search states
+        self.first_move = True
+        self.improved = True
+        self.estimator_type = 0 if self.dims[0] > 0 else 1
+
+        self.old_loss = self.new_loss = self.best_loss = float('+inf')
+        # new_loss_time is the time from the beginning of training self.config to
+        # now,
+        # old_loss_time is the time from the beginning of training the old
+        # self.config to the beginning of training self.config
+        self.old_loss_time = self.new_loss_time = 0
+
+        self.trained_estimator = None
+        self.model_count = 0
+        self.K = 0
+        self.old_modelcount = 0
+
+        # self.config has two parts: config_primary contain the configs
+        # that are related with model complexity, config_more contains the
+        # configs that is not related with model complexity
+        self.config_primary = self.init_config_dic_primary[self.init_sample_size]
+        self.config_more = self.init_config_dic_more[self.init_sample_size]
+        self.config = {**self.config_primary, **self.config_more}
+        self.best_config = [None, None]
+        # key: sample size, value: [best_config, best_loss, model_count] under
+        # sample size in the key
+        self.best_config_loss_samplesize_dic = {
+            self.init_sample_size: [self.config, self.old_loss, self.model_count]}
+        # key: sample size, value: [best_config, best_loss, model_count] under
+        # sample size in the key
+        self.best_config_loss_dic_full_reset = [None, None, None]
+        self.sample_size = self.init_sample_size
+        self.base_change_bound = 1
+        self.base_change_count = 0
+        self.evaluate_config(self.config, self.sample_size, '_ini')
+        self.increase = False
+
+    def train_config(self, config, sample_size):
+        '''
+        train a configuration
+        '''
+        # print('Evalute Config')
+        if self.time_from_start >= self.time_budget:
+            return False
+        config_sig = self.get_hist_config_sig(sample_size, config)
+        if not config_sig in self.config_tried:
+            _, new_train_time = self.train_with_config(
+                self.estimator, config, sample_size)
+            train_loss, val_loss, move = None, self.new_loss, str(
+                self.estimator) + '_trainAll'
+            self.time_from_start = time.time() - self.start_time
+            if self.save_helper is not None:
+                self.save_helper.append(self.model_count,
+                                        train_loss,
+                                        new_train_time,
+                                        self.time_from_start,
+                                        val_loss,
+                                        config,
+                                        self.best_loss,
+                                        self.best_config,
+                                        move,
+                                        sample_size)
+            self.config_tried[config_sig] = (val_loss, new_train_time)
+
+    def try_increase_sample_size(self):
+        # print( self.estimator, self.sample_size)
+        if self.sample_size in self.next_sample_size:
+            if self.increase_sample_size():
+                self.first_move = True
+                self.improved = True
+                self.estimator_type = 0 if self.dims[0] > 0 else 1
+                self.evaluate_config(
+                    self.config, self.next_sample_size[self.sample_size])
+        if not self.old_modelcount and self.sample_size == self.data_size:
+            self.old_modelcount = self.model_count
+
+    def setup_current_search_config(self):
+        estimator_type = self.estimator_type_list[self.estimator_type]
+        if 'all' in estimator_type:
+            current_search_config = self.config
+        elif 'primary' in estimator_type:
+            current_search_config = self.config_primary
+        else:
+            current_search_config = self.config_more
+            # print(self.config_more)
+        return estimator_type, current_search_config
+
+    def search1step(self, global_best_loss=float('+inf'),
+                    retrain_full=True, mem_thres=MEM_THRES, reset_type='init_gaussian'):
+        # try to increase sample size
+        self.try_increase_sample_size()
+        # decide current_search_config according to estimator_type
+        estimator_type, current_search_config = \
+            self.setup_current_search_config()
+        time_left = self.time_budget - self.time_from_start
+        if time_left < self.train_time:
+            return False
+        if retrain_full and self.train_time < time_left < 2 * self.train_time \
+                and self.best_loss <= global_best_loss:
+            self.train_config(self.best_config[0], self.sample_size_full)
+
+        move_vector, move_vector_neg = self.dual_direction_sample(
+            self.base, current_search_config, estimator_type,
+            rand_vector_unit_sphere, mem_thres, self.move_type)
+        if move_vector is None:
+            if move_vector_neg is None:
+                self.improved = False
+            else:
+                self.improved = self.evaluate_config(
+                    move_vector_neg, self.sample_size, '_neg' + str(
+                        estimator_type))
+        else:
+            self.improved = self.evaluate_config(
+                move_vector, self.sample_size, '_pos' + str(estimator_type))
+            if not self.improved:
+                if move_vector_neg is None:
+                    pass
+                else:
+                    self.improved = self.evaluate_config(
+                        move_vector_neg, self.sample_size, '_neg' + str(
+                            estimator_type))
+        self.update_noimprovement_stat(
+            global_best_loss, retrain_full, reset_type)
+        return self.improved
+
+    def update_noimprovement_stat(self, global_best_loss, retrain_full,
+                                  reset_type):
+        if self.improved:
+            self.num_noimprovement = 0
+        else:
+            self.estimator_type = 1 - self.estimator_type
+            if self.dims[self.estimator_type] == 0:
+                self.estimator_type = 1 - self.estimator_type
+            if self.estimator_type == 1 or self.dims[1] == 0:
+                self.noimprovement(global_best_loss, retrain_full, reset_type)
+
+    def noimprovement(self, global_best_loss, retrain_full, reset_type='org'):
+        if self.sample_size == self.data_size:
+            # Do not wait until full sample size to update num_noimprovement?
+            self.num_noimprovement += 1
+            if self.num_noimprovement >= self.epo:
+                self.num_noimprovement = 0
+                # print(self.num_noimprovement, self.epo)
+                if self.base_change == 'squareroot':
+                    self.base = math.sqrt(self.base)
+                else:
+                    if self.K == 0:  # first time
+                        oldK = self.best_config_loss_dic_full_reset[2] - \
+                            self.old_modelcount
+                    else:
+                        oldK = self.K
+                    self.K = self.model_count + 1 - self.old_modelcount
+                    if self.base_change == 'K':
+                        self.base **= oldK / self.K
+                    else:
+                        self.base **= math.sqrt(oldK / self.K)
+                if self.dims[1] > 0 and self.dims[0] > 0:
+                    base_lower_bound = min(
+                        min(
+                            (1.0 + self.estimator_configspace[i].min_change
+                             / self.config_primary[i])
+                            ** math.sqrt(self.dims[0])
+                            for i in self.config_primary.keys()
+                        ),
+                        min(
+                            (1.0 + self.estimator_configspace[i].min_change
+                             / self.config_more[i])
+                            ** math.sqrt(self.dims[1])
+                            for i in self.config_more.keys()
+                        )
+                    )
+                elif self.dims[0] > 0:
+                    base_lower_bound = min(
+                        (1.0 + self.estimator_configspace[i].min_change
+                         / self.config_primary[i])
+                        ** math.sqrt(self.dims[0])
+                        for i in self.config_primary.keys()
+                    )
+                else:
+                    base_lower_bound = min(
+                        (1.0 + self.estimator_configspace[i].min_change
+                         / self.config_more[i])
+                        ** math.sqrt(self.dims[1])
+                        for i in self.config_more.keys()
+                    )
+                if np.isinf(base_lower_bound):
+                    base_lower_bound = BASE_LOWER_BOUND
+                self.base_change_count += 1
+                if self.base <= base_lower_bound or \
+                        self.base_change_count == self.base_change_bound:
+                    if retrain_full and self.sample_size == self.data_size:
+                        if self.best_loss <= global_best_loss:
+                            # Only train on full data when the curent estimator
+                            #  is the best estimator
+                            # print('best estimator and train on full data')
+                            self.train_config(
+                                self.best_config[0], self.sample_size_full)
+                    # remaining time is more than enough for another trial
+                    if self.time_budget - self.time_from_start > self.train_time:
+                        self.base_change_bound <<= 1
+                        self.base_change_count = 0
+                        self.K = 0
+                        self.old_modelcount = self.model_count
+                        self.best_config_loss_dic_full_reset = [None, None,
+                                                                None]
+                        self.first_move = True
+                        self.improved = True
+                        self.base_ini = min(
+                            self.base_ini * 2, self.base_upper_bound[
+                                self.sample_size])
+                        self.estimator_type = 0 if self.dims[0] > 0 else 1
+                        reset_config, reset_sample_size = self.get_reset_config(
+                            self.init_sample_size, reset_type)
+                        self.sample_size = reset_sample_size
+                        # print('reset sample size', reset_sample_size)
+                        self.evaluate_config(reset_config, self.sample_size,
+                                             '_ini')
+
+    def get_reset_config(self, sample_size, reset_type):
+        init_config = self.init_config_dic[self.sample_size]
+        reset_sample_size = sample_size
+        if 'org' in reset_type:
+            reset_config = init_config
+        else:
+            if 'init_gaussian' in reset_type:
+                reset_config = init_config
+                reset_sample_size = self.get_reset_sample_size(reset_config)
+                config_values = get_config_values(
+                    reset_config, self.config_type_dic)
+                config_sig = str(reset_sample_size) + '_' + str(config_values)
+                count = 0
+                while config_sig in self.config_tried and \
+                        self.time_from_start < self.time_budget and count < 1000:
+                    # TODO: check exhaustiveness? use time as condition?
+                    count += 1
+                    move, move_neg = self.dual_direction_sample(
+                        base=self.b, current_search_config=init_config,
+                        estimator_type='all',
+                        rand_vector_func=rand_vector_gaussian,
+                        move_type=self.move_type)
+                    if move:
+                        reset_config = move_neg
+                    elif move_neg:
+                        reset_config = move_neg
+                    else:
+                        continue
+                    reset_sample_size = self.get_reset_sample_size(
+                        reset_config)
+                    config_values = get_config_values(
+                        reset_config, self.config_type_dic)
+                    config_sig = str(reset_sample_size) + \
+                        '_' + str(config_values)
+                    self.time_from_start = time.time() - self.start_time
+            else:
+                raise NotImplementedError
+        return reset_config, reset_sample_size
+
+    def get_reset_sample_size(self, reset_config):
+        if not reset_config:
+            print('reset_config is none')
+        reset_config_size = self.get_size_for_config(reset_config)
+
+        candidate_sample_size_list = []
+        for sample_size, config_and_bestloss in \
+                self.best_config_loss_samplesize_dic.items():
+            s_best_config = config_and_bestloss[0]
+            if not s_best_config:
+                print('best config is none', sample_size)
+            s_best_config_model_size = self.get_size_for_config(s_best_config)
+            if s_best_config_model_size >= reset_config_size:
+                candidate_sample_size_list.append(sample_size)
+
+        if len(candidate_sample_size_list) != 0:
+            return min(candidate_sample_size_list)
+        else:
+            return self.data_size
--- a/flaml/space.py
+++ b/flaml/space.py
@ -0,0 +1,249 @@
+'''!
+ * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. 
+'''
+
+
+class ConfigSearchInfo:
+    '''The class of the search space of a hyperparameters:
+
+    Attributes:
+        name: A string of the name of the hyperparameter
+        type: data type of the hyperparameter
+        lower: A number of the lower bound of the value
+        upper: A number of the upper bound of the value
+        init: A number of the initial value. For hyperparameters related to
+            complexity, the init value needs to correspond to the lowest
+            complexity
+        change_tpe: A string of the change type, 'linear' or 'log'
+        min_change: A number of the minimal change required. Could be inf if
+            no such requirement
+    '''
+
+    def __init__(self, name, type, lower, upper, init, change_type = 'log',
+     complexity_related = True, min_change = None):
+        self.name = name  
+        self.type = type  
+        self.lower = lower 
+        self.upper = upper 
+        self.init = init  
+        self.change_type = change_type
+        self.complexity_related = complexity_related
+        # default setting of min_change: if type is int, min_change 
+        # should be 1, otherwise +inf
+        if min_change is None:
+            if self.type == int:
+                self.min_change = 1.0 #minimum change required, 
+            else:
+                self.min_change = float('+inf')
+        else:
+            self.min_change = min_change
+
+
+def config_space(estimator, data_size, objective_name = "regression"):
+    CS = {}
+    n_estimators_upper = min(32768,int(data_size))
+    max_leaves_upper = min(32768,int(data_size))
+    # exp_max_depth_upper = min(32768,data_size)
+    if 'xgboost' in estimator:
+        CS['n_estimators'] = ConfigSearchInfo(name = 'n_estimators',
+         type = int, lower = 4, init = 4, upper = n_estimators_upper, 
+         change_type = 'log')
+        CS['max_leaves'] = ConfigSearchInfo(name = 'max_leaves', type =int,
+         lower = 4, init = 4, upper = max_leaves_upper, change_type = 'log')
+        CS['min_child_weight'] = ConfigSearchInfo(name = 'min_child_weight',
+         type = float, lower = 0.001, init = 20.0, upper = 20.0, 
+         change_type = 'log')
+
+        CS['learning_rate'] = ConfigSearchInfo(name = 'learning_rate',
+         type = float, lower = 0.01, init = 0.1, upper = 1.0, 
+         change_type = 'log')
+        CS['subsample'] = ConfigSearchInfo(name = 'subsample', type = float,
+         lower = 0.6, init = 1.0, upper = 1.0, change_type = 'linear')
+        CS['reg_alpha'] = ConfigSearchInfo(name = 'reg_alpha', type = float,
+         lower = 1e-10, init = 1e-10, upper = 1.0, change_type = 'log',
+         complexity_related = True)
+        CS['reg_lambda'] = ConfigSearchInfo(name = 'reg_lambda', type = float,
+         lower = 1e-10, init = 1.0, upper = 1.0, change_type = 'log')
+        CS['colsample_bylevel'] = ConfigSearchInfo(name = 'colsample_bylevel',
+         type = float, lower = 0.6, init = 1.0, upper = 1.0, 
+         change_type = 'linear')
+        CS['colsample_bytree'] = ConfigSearchInfo(name = 'colsample_bytree',
+         type = float, lower = 0.7, init = 1.0, upper = 1.0, 
+         change_type = 'linear')
+    elif estimator in ('rf', 'extra_tree'):
+        n_estimators_upper = min(2048, n_estimators_upper)
+        # max_leaves_upper = min(2048, max_leaves_upper)
+        CS['n_estimators'] = ConfigSearchInfo(name = 'n_estimators',
+         type = int, lower = 4, init = 4, upper = n_estimators_upper, 
+         change_type = 'log')
+        if objective_name != 'regression':
+            CS['criterion'] = ConfigSearchInfo(name = 'criterion',
+            type = int, lower = 1, init = 1, upper = 2, 
+            change_type = 'log')
+        
+        # CS['max_leaves'] = ConfigSearchInfo(name = 'max_leaves', type =int,
+        #  lower = 4, init = 4, upper = max_leaves_upper, change_type = 'log',
+        #  complexity_related = True)
+        
+        CS['max_features'] = ConfigSearchInfo(name = 'max_features', type = float,
+         lower = 0.1, init = 1.0, upper = 1.0, change_type = 'log')
+        # CS['min_samples_split'] = ConfigSearchInfo(name = 'min_samples_split',
+        #  type = int, lower = 2, init = 2, upper = 20, change_type = 'log', 
+        #  complexity_related = True)
+        # CS['min_samples_leaf'] = ConfigSearchInfo(name = 'min_samples_leaf',
+        #  type = int, lower = 1, init = 1, upper = 20, change_type = 'log', 
+        #  complexity_related = True)
+    elif 'lgbm' in estimator:
+        CS['n_estimators'] = ConfigSearchInfo(name = 'n_estimators', type = int,
+         lower = 4, init = 4, upper = n_estimators_upper, change_type = 'log')
+        CS['max_leaves'] = ConfigSearchInfo(name = 'max_leaves', type = int,
+         lower = 4, init = 4, upper = max_leaves_upper, change_type = 'log')
+        CS['min_child_weight'] = ConfigSearchInfo(name = 'min_child_weight',
+         type = float, lower = 0.001, init = 20, upper = 20.0, 
+         change_type = 'log')
+
+        CS['learning_rate'] = ConfigSearchInfo(name = 'learning_rate',
+         type = float, lower = 0.01, init = 0.1, upper = 1.0, 
+         change_type = 'log')
+        CS['subsample'] = ConfigSearchInfo(name = 'subsample', type = float,
+         lower = 0.6, init = 1.0, upper = 1.0, change_type = 'log',
+         complexity_related = True)
+        CS['log_max_bin'] = ConfigSearchInfo(name = 'log_max_bin', type = int,
+         lower = 3, init = 8, upper = 10, change_type = 'log',
+         complexity_related = True)
+        CS['reg_alpha'] = ConfigSearchInfo(name = 'reg_alpha', type = float,
+         lower = 1e-10, init = 1e-10, upper = 1.0, change_type = 'log',
+         complexity_related = True)
+        CS['reg_lambda'] = ConfigSearchInfo(name = 'reg_lambda', type = float,
+         lower = 1e-10, init = 1.0, upper = 1.0, change_type = 'log')
+        CS['colsample_bytree'] = ConfigSearchInfo(name = 'colsample_bytree',
+         type = float, lower = 0.7, init = 1.0, upper = 1.0, 
+         change_type = 'log')
+    elif 'lr' in estimator:
+        CS['C'] = ConfigSearchInfo(name = 'C', type =float, lower = 0.03125,
+          init = 1.0, upper = 32768.0, change_type = 'log', 
+          complexity_related = True)
+    elif 'catboost' in estimator:
+        # CS['n_estimators'] = ConfigSearchInfo(name = 'n_estimators', type = int,
+        #  lower = 4, init = 64,  upper = n_estimators_upper, change_type = 'log', 
+        #  complexity_related = True)
+        early_stopping_rounds = max(min(round(1500000/data_size),150), 10)
+        CS['rounds'] = ConfigSearchInfo(name = 'rounds', type = int,
+         lower = 10, init = 10, 
+         upper = early_stopping_rounds, change_type = 'log')
+        # CS['exp_max_depth'] = ConfigSearchInfo(name = 'exp_max_depth', type = int,
+        #  lower = 32, init = 64,  upper = 256, change_type = 'log', 
+        #  complexity_related = True)
+
+        CS['learning_rate'] = ConfigSearchInfo(name = 'learning_rate',
+         type = float, lower = 0.005,  init = 0.1,  upper = .2, 
+         change_type = 'log')
+        # CS['l2_leaf_reg'] = ConfigSearchInfo(name = 'l2_leaf_reg',
+        #  type = float, lower = 1,  init = 3, upper = 5, 
+        #  change_type = 'log')
+    elif 'nn' == estimator:
+        CS['learning_rate'] = ConfigSearchInfo(name = 'learning_rate',
+         type = float, lower = 1e-4, init = 3e-4, upper = 3e-2, 
+         change_type = 'log')
+        CS['weight_decay'] = ConfigSearchInfo(name = 'weight_decay',
+         type = float, lower = 1e-12, init = 1e-6, upper = .1, 
+         change_type = 'log')
+        CS['dropout_prob'] = ConfigSearchInfo(name = 'dropout_prob',
+         type = float, lower = 1.0, init = 1.1, upper = 1.5, 
+         change_type = 'log')
+    elif 'kneighbor' in estimator:
+        n_neighbors_upper = min(512,int(data_size/2))
+        CS['n_neighbors'] = ConfigSearchInfo(name = 'n_neighbors', type = int,
+         lower = 1, init = 5, upper = n_neighbors_upper, change_type = 'log')        
+    else:
+        raise NotImplementedError
+
+    return CS
+
+
+def estimator_size(config, estimator):
+    if estimator in ['xgboost', 'lgbm', 'rf', 'extra_tree']:
+        try:
+            max_leaves = int(round(config['max_leaves']))
+            n_estimators = int(round(config['n_estimators']))
+            model_size = float((max_leaves*3 + (max_leaves-1)*4 + 1)*
+                n_estimators*8) 
+        except:
+            model_size = 0
+        return model_size
+    elif 'catboost' in estimator:
+        # if config is None: raise Exception("config is none")
+        n_estimators = int(round(config.get('n_estimators',8192)))
+        max_leaves = int(round(config.get('exp_max_depth',64)))
+        model_size = float((max_leaves*3 + (max_leaves-1)*4 + 1)*
+            n_estimators*8) 
+        return model_size
+    else:
+        model_size = 1.0
+        # raise NotImplementedError
+    return model_size
+
+
+def generate_config_ini(estimator, estimator_configspace):
+
+
+    config_dic = {}
+    config_dic_more = {}
+    config_type_dic = {}
+    for _, config in estimator_configspace.items():
+        name, init = config.name, config.init
+        type_, complexity_related = config.type, config.complexity_related
+        config_type_dic[name] = type_
+        if complexity_related:
+            config_dic[name] = init
+        else:
+            config_dic_more[name] = init
+    return config_dic, config_dic_more, {**config_dic, **config_dic_more}, \
+        config_type_dic
+
+
+def generate_config_min(estimator,estimator_configspace, max_config_size):
+
+
+    config_dic = {}
+    config_dic_more = {}
+    for _, config in estimator_configspace.items():
+        name, lower = config.name, config.lower
+        complexity_related = config.complexity_related
+        if complexity_related:
+            config_dic[name] = lower
+        else:
+            config_dic_more[name] = lower
+
+    return config_dic, config_dic_more, {**config_dic, **config_dic_more}
+
+
+def generate_config_max(estimator, estimator_configspace, max_config_size):
+
+
+    config_dic = {}
+    config_dic_more = {}
+    for _, config in estimator_configspace.items():
+        name, upper = config.name, config.upper
+        complexity_related = config.complexity_related
+        if complexity_related:
+            if name in ('n_estimators', 'max_leaves'):
+                config_dic[name] = min(upper, max_config_size)
+            else:
+                config_dic[name] = upper
+        else:
+            config_dic_more[name] = upper
+    return config_dic, config_dic_more, {**config_dic, **config_dic_more}
+
+
+def get_config_values(config_dic, config_type_dic):
+    value_list = []
+    for k in config_dic.keys():
+        org_v = config_dic[k]
+        if config_type_dic[k] == int:
+            v = int(round(org_v))
+            value_list.append(v)
+        else:
+            value_list.append(org_v)
+    return value_list
--- a/flaml/training_log.py
+++ b/flaml/training_log.py
@ -0,0 +1,168 @@
+'''!
+ * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. 
+'''
+
+import json
+from typing import IO
+from contextlib import contextmanager
+import warnings
+
+
+class TrainingLogRecord(object):
+
+    def __init__(self,
+                 record_id: int,
+                 iter_per_learner: int,
+                 logged_metric: float,
+                 trial_time: float,
+                 total_search_time: float,
+                 validation_loss,
+                 config,
+                 best_validation_loss,
+                 best_config,
+                 learner,
+                 sample_size):
+        self.record_id = record_id
+        self.iter_per_learner = iter_per_learner
+        self.logged_metric = logged_metric
+        self.trial_time = trial_time
+        self.total_search_time = total_search_time
+        self.validation_loss = validation_loss
+        self.config = config
+        self.best_validation_loss = best_validation_loss
+        self.best_config = best_config
+        self.learner = learner
+        self.sample_size = sample_size
+
+    def dump(self, fp: IO[str]):
+        d = vars(self)
+        return json.dump(d, fp)
+
+    @classmethod
+    def load(cls, json_str: str):
+        d = json.loads(json_str)
+        return cls(**d)
+
+
+class TrainingLogCheckPoint(TrainingLogRecord):
+
+    def __init__(self, curr_best_record_id: int):
+        self.curr_best_record_id = curr_best_record_id
+
+
+class TrainingLogWriter(object):
+
+    def __init__(self, output_filename: str):
+        self.output_filename = output_filename
+        self.file = None
+        self.current_best_loss_record_id = None
+        self.current_best_loss = float('+inf')
+        self.current_sample_size = None
+        self.current_record_id = 0
+
+    def open(self):
+        self.file = open(self.output_filename, 'w')
+
+    def append(self,
+               it_counter: int,
+               train_loss: float,
+               trial_time: float,
+               total_search_time: float,
+               validation_loss,
+               config,
+               best_validation_loss,
+               best_config,
+               learner,
+               sample_size):
+        if self.file is None:
+            raise IOError("Call open() to open the outpute file first.")
+        if validation_loss is None:
+            raise ValueError('TEST LOSS NONE ERROR!!!')
+        record = TrainingLogRecord(self.current_record_id,
+                                   it_counter,
+                                   train_loss,
+                                   trial_time,
+                                   total_search_time,
+                                   validation_loss,
+                                   config,
+                                   best_validation_loss,
+                                   best_config,
+                                   learner,
+                                   sample_size)
+        if validation_loss < self.current_best_loss or \
+            validation_loss == self.current_best_loss and \
+                sample_size > self.current_sample_size:
+            self.current_best_loss = validation_loss
+            self.current_sample_size = sample_size
+            self.current_best_loss_record_id = self.current_record_id
+        self.current_record_id += 1
+        record.dump(self.file)
+        self.file.write('\n')
+        self.file.flush()
+
+    def checkpoint(self):
+        if self.file is None:
+            raise IOError("Call open() to open the outpute file first.")
+        if self.current_best_loss_record_id is None:
+            warnings.warn("checkpoint() called before any record is written, "
+                          "skipped.")
+            return
+        record = TrainingLogCheckPoint(self.current_best_loss_record_id)
+        record.dump(self.file)
+        self.file.write('\n')
+        self.file.flush()
+
+    def close(self):
+        self.file.close()
+
+
+class TrainingLogReader(object):
+
+    def __init__(self, filename: str):
+        self.filename = filename
+        self.file = None
+
+    def open(self):
+        self.file = open(self.filename)
+
+    def records(self):
+        if self.file is None:
+            raise IOError("Call open() before reading log file.")
+        for line in self.file:
+            data = json.loads(line)
+            if len(data) == 1:
+                # Skip checkpoints.
+                continue
+            yield TrainingLogRecord(**data)
+
+    def close(self):
+        self.file.close()
+
+    def get_record(self, record_id) -> TrainingLogRecord:
+        if self.file is None:
+            raise IOError("Call open() before reading log file.")
+        for rec in self.records():
+            if rec.record_id == record_id:
+                return rec
+        raise ValueError(f"Cannot find record with id {record_id}.")
+
+
+@contextmanager
+def training_log_writer(filename: str):
+    try:
+        w = TrainingLogWriter(filename)
+        w.open()
+        yield w
+    finally:
+        w.close()
+
+
+@contextmanager
+def training_log_reader(filename: str):
+    try:
+        r = TrainingLogReader(filename)
+        r.open()
+        yield r
+    finally:
+        r.close()
--- a/flaml/version.py
+++ b/flaml/version.py
@ -0,0 +1 @@
+__version__="0.1.0"
--- a/notebook/flaml_demo.ipynb
+++ b/notebook/flaml_demo.ipynb
--- a/settings.json
+++ b/settings.json
@ -0,0 +1,4 @@
+{
+    "keep_max_logfiles": 30,
+    "logging_level": "info"
+}
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,56 @@
+import setuptools
+import os
+
+here = os.path.abspath(os.path.dirname(__file__))
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+
+# Get the code version
+version = {}
+with open(os.path.join(here, "flaml/version.py")) as fp:
+    exec(fp.read(), version)
+__version__ = version["__version__"]
+
+install_requires = [
+    "NumPy>=1.16.2",
+    "lightgbm>=2.3.1",
+    "xgboost>=0.90",
+    "scipy>=1.4.1",
+    "catboost>=0.23",
+    "scikit-learn>=0.23",
+],
+
+
+setuptools.setup(
+    name="FLAML",
+    version=__version__,
+    author="Microsoft Corporation",
+    author_email="hpo@microsoft.com",
+    description="A fast and lightweight autoML system",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/microsoft/FLAML",
+    packages=["flaml"],
+    install_requires=install_requires,
+    extras_require={
+        "notebook": [
+            "openml==0.10.2",
+            "jupyter",
+            "matplotlib==3.2.0",
+            "rgf-python",
+        ],
+        "test": [
+            "flake8>=3.8.4",
+            "pytest>=6.1.1",
+            "coverage>=5.3",
+        ],
+    },
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",        
+    ],
+    python_requires=">=3.6",
+)
--- a/test/init.py
+++ b/test/init.py
--- a/test/test_automl.py
+++ b/test/test_automl.py
@ -0,0 +1,235 @@
+import unittest
+
+import numpy as np
+import scipy.sparse
+from sklearn.datasets import load_boston, load_iris
+
+from flaml import AutoML, get_output_from_log
+
+
+def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
+    from sklearn.metrics import log_loss
+    y_pred = estimator.predict_proba(X_test)
+    test_loss = log_loss(y_test, y_pred, labels=labels)
+    y_pred = estimator.predict_proba(X_train)
+    train_loss = log_loss(y_train, y_pred, labels=labels)
+    alpha = 0.5
+    return test_loss * (1 + alpha) - alpha * train_loss, [test_loss, train_loss]
+
+
+class TestAutoML(unittest.TestCase):
+
+    def test_dataframe(self):
+        self.test_classification(True)
+
+    def test_custom_metric(self):
+
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 10,
+            'eval_method': 'holdout',
+            "metric": custom_metric,
+            "task": 'classification',
+            "log_file_name": "test/iris_custom.log",
+            "log_training_metric": True,
+            'log_type': 'all',
+            "model_history": True
+        }
+        X_train, y_train = load_iris(return_X_y=True)
+        automl_experiment.fit(X_train=X_train, y_train=y_train,
+                              **automl_settings)
+        print(automl_experiment.classes_)
+        print(automl_experiment.predict_proba(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+        automl_experiment = AutoML()
+        estimator = automl_experiment.get_estimator_from_log(
+            automl_settings["log_file_name"], record_id=0,
+            objective='multi')
+        print(estimator)
+        time_history, best_valid_loss_history, valid_loss_history, \
+            config_history, train_loss_history = get_output_from_log(
+                filename=automl_settings['log_file_name'], time_budget=6)
+        print(train_loss_history)
+
+    def test_classification(self, as_frame=False):
+
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 4,
+            "metric": 'accuracy',
+            "task": 'classification',
+            "log_file_name": "test/iris.log",
+            "log_training_metric": True,
+            "model_history": True
+        }
+        X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
+        automl_experiment.fit(X_train=X_train, y_train=y_train,
+                              **automl_settings)
+        print(automl_experiment.classes_)
+        print(automl_experiment.predict_proba(X_train)[:5])
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+        del automl_settings["metric"]
+        del automl_settings["model_history"]
+        del automl_settings["log_training_metric"]
+        automl_experiment = AutoML()
+        duration = automl_experiment.retrain_from_log(
+            log_file_name=automl_settings["log_file_name"],
+            X_train=X_train, y_train=y_train,
+            train_full=True, record_id=0)
+        print(duration)
+        print(automl_experiment.model)
+        print(automl_experiment.predict_proba(X_train)[:5])
+
+    def test_regression(self):
+
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "metric": 'mse',
+            "task": 'regression',
+            "log_file_name": "test/boston.log",
+            "log_training_metric": True,
+            "model_history": True
+        }
+        X_train, y_train = load_boston(return_X_y=True)
+        n = len(y_train)
+        automl_experiment.fit(X_train=X_train[:n >> 1], y_train=y_train[:n >> 1],
+                              X_val=X_train[n >> 1:], y_val=y_train[n >> 1:],
+                              **automl_settings)
+        assert automl_experiment.y_val.shape[0] == n - (n >> 1)
+        assert automl_experiment.eval_method == 'holdout'
+        print(automl_experiment.predict(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+        print(get_output_from_log(automl_settings["log_file_name"], 1))
+
+    def test_sparse_matrix_classification(self):
+
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "metric": 'auto',
+            "task": 'classification',
+            "log_file_name": "test/sparse_classification.log",
+            "split_type": "uniform",
+            "model_history": True
+        }
+        X_train = scipy.sparse.random(1554, 21, dtype=int)
+        y_train = np.random.randint(3, size=1554)
+        automl_experiment.fit(X_train=X_train, y_train=y_train,
+                              **automl_settings)
+        print(automl_experiment.classes_)
+        print(automl_experiment.predict_proba(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+
+    def test_sparse_matrix_regression(self):
+
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "metric": 'mae',
+            "task": 'regression',
+            "log_file_name": "test/sparse_regression.log",
+            "model_history": True
+        }
+        X_train = scipy.sparse.random(300, 900, density=0.0001)
+        y_train = np.random.uniform(size=300)
+        X_val = scipy.sparse.random(100, 900, density=0.0001)
+        y_val = np.random.uniform(size=100)
+        automl_experiment.fit(X_train=X_train, y_train=y_train,
+                              X_val=X_val, y_val=y_val,
+                              **automl_settings)
+        assert automl_experiment.X_val.shape == X_val.shape
+        print(automl_experiment.predict(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+        print(automl_experiment.best_config)
+        print(automl_experiment.best_loss)
+        print(automl_experiment.best_config_train_time)
+
+    def test_sparse_matrix_xgboost(self):
+
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "metric": 'ap',
+            "task": 'classification',
+            "log_file_name": "test/sparse_classification.log",
+            "estimator_list": ["xgboost"],
+            "log_type": "all",
+        }
+        X_train = scipy.sparse.eye(900000)
+        y_train = np.random.randint(2, size=900000)
+        automl_experiment.fit(X_train=X_train, y_train=y_train,
+                              **automl_settings)
+        print(automl_experiment.predict(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+
+    def test_sparse_matrix_lr(self):
+
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "metric": 'f1',
+            "task": 'classification',
+            "log_file_name": "test/sparse_classification.log",
+            "estimator_list": ["lrl1", "lrl2"],
+            "log_type": "all",
+        }
+        X_train = scipy.sparse.random(3000, 900, density=0.1)
+        y_train = np.random.randint(2, size=3000)
+        automl_experiment.fit(X_train=X_train, y_train=y_train,
+                              **automl_settings)
+        print(automl_experiment.predict(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+
+    def test_sparse_matrix_regression_cv(self):
+
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            'eval_method': 'cv',
+            "task": 'regression',
+            "log_file_name": "test/sparse_regression.log",
+            "model_history": True
+        }
+        X_train = scipy.sparse.random(100, 100)
+        y_train = np.random.uniform(size=100)
+        automl_experiment.fit(X_train=X_train, y_train=y_train,
+                              **automl_settings)
+        print(automl_experiment.predict(X_train))
+        print(automl_experiment.model)
+        print(automl_experiment.config_history)
+        print(automl_experiment.model_history)
+        print(automl_experiment.best_iteration)
+        print(automl_experiment.best_estimator)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/test_split.py
+++ b/test/test_split.py
@ -0,0 +1,45 @@
+import unittest
+
+from sklearn.datasets import fetch_openml
+from flaml.automl import AutoML
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+
+
+dataset = "Airlines"
+
+
+def _test(split_type):
+    automl = AutoML()
+
+    automl_settings = {
+        "time_budget": 2,
+        # "metric": 'accuracy',
+        "task": 'classification',
+        "log_file_name": "test/{}.log".format(dataset),
+        "model_history": True,
+        "log_training_metric": True,
+        "split_type": split_type,
+    }
+
+    X, y = fetch_openml(name=dataset, return_X_y=True)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
+     random_state=42)
+    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
+
+    pred = automl.predict(X_test)
+    acc = accuracy_score(y_test, pred)
+
+    print(acc)
+
+
+def test_stratified():
+    _test(split_type="stratified")
+
+
+def test_uniform():
+    _test(split_type="uniform")
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/test_version.py
+++ b/test/test_version.py
@ -0,0 +1,14 @@
+import unittest
+import flaml
+
+
+class TestVersion(unittest.TestCase):
+
+
+    def test_version(self):
+        self.assertTrue(hasattr(flaml, '__version__'))
+        self.assertTrue(len(flaml.__version__) > 0)
+
+
+if __name__ == "__main__":
+    unittest.main()