update: use standard multioutputs loaders; update tests

2021-01-17 21:00:40 -08:00 · 2021-01-17 21:00:40 -08:00 · 7d18e348dd
--- a/.gitignore
+++ b/.gitignore
@ -68,5 +68,8 @@ test-output.xml
 # jupyter
 .ipynb_checkpoints

-# big models
-bigmodels/
+# tmp directories
+tmp/
+
+# hydra outputs
+outputs/
--- a/base.py
+++ b/base.py
@ -8,6 +8,7 @@ import numpy as np
 import pandas as pd

 from typing import Tuple, List, Union
+from natsort import natsorted
 from sklearn.preprocessing import StandardScaler
 from loaders import CsvReader

@ -17,7 +18,7 @@ console = logging.StreamHandler(sys.stdout)
 console.setLevel(logging.DEBUG)
 formater = logging.Formatter("%(name)-13s: %(levelname)-8s %(message)s")
 console.setFormatter(formater)
-logging.getLogger(__name__).addHandler(console)
+logging.getLogger("datamodeler").addHandler(console)

 # TODO: add weighting to the model
 # TODO: this should go into a metrics function?
@ -203,10 +204,44 @@ class BaseModel(abc.ABC):

        pickle.dump(self.model, open(filename, "wb"))

-    def load_model(self, filename: str, scale_data: bool = False):
+    def load_model(
+        self, filename: str, scale_data: bool = False, separate_models: bool = False
+    ):

+        self.separate_models = separate_models
        self.scale_data = scale_data
-        self.model = pickle.load(open(filename, "rb"))
+
+        if scale_data:
+            if not self.separate_models:
+                path_name = str(pathlib.Path(filename).parent)
+            else:
+                path_name = filename
+            self.xscalar = pickle.load(
+                open(os.path.join(path_name, "xscalar.pkl"), "rb")
+            )
+            self.yscalar = pickle.load(
+                open(os.path.join(path_name, "yscalar.pkl"), "rb")
+            )
+        if separate_models:
+            self._load_multimodels(filename, scale_data)
+        else:
+            if not any([s in filename for s in [".pkl", ".pickle"]]):
+                filename += ".pkl"
+            self.model = pickle.load(open(filename, "rb"))
+
+    def _load_multimodels(self, filename: str, scale_data: bool):
+
+        all_models = os.listdir(filename)
+        all_models = natsorted(all_models)
+        if self.scale_data:
+            all_models = all_models[:-2]
+        num_models = len(all_models)
+        models = []
+        for i in range(num_models):
+            models.append(
+                pickle.load(open(os.path.join(filename, all_models[i]), "rb"))
+            )
+        self.models = models

    def evaluate(self, test_data: np.ndarray):

--- a/conf/config.yaml
+++ b/conf/config.yaml
@ -1,4 +1,4 @@
 defaults:
-  - data: quanser-log.yaml
+  - data: cartpole_st1_at.yaml
  - model: SVR.yaml
-  - simulator: quanser-log.yaml
+  - simulator: gboost_cartpole.yaml
--- a/conf/model/SVR.yaml
+++ b/conf/model/SVR.yaml
@ -12,10 +12,9 @@ model:
    - lambda: 1
    - max_bin: 256
  saver:
-    - filename: models/boost/SVR_model
+    - filename: models/SVR_model
  sweep:
    - run: False
    - search_algorithm: bayesian
    - num_trials: 3
    - scoring_func: r2
-
--- a/gboost_models.py
+++ b/gboost_models.py
@ -122,40 +122,40 @@ class GBoostModel(BaseModel):
                parent_dir.mkdir(parents=True, exist_ok=True)
            pickle.dump(self.model, open(filename, "wb"))

-    def load_model(
-        self, filename: str, scale_data: bool = False, separate_models: bool = False
-    ):
+    # def load_model(
+    #     self, filename: str, scale_data: bool = False, separate_models: bool = False
+    # ):

-        self.scale_data = scale_data
-        self.separate_models = separate_models
-        if self.separate_models:
-            all_models = os.listdir(filename)
-            all_models = natsorted(all_models)
-            if self.scale_data:
-                all_models = all_models[:-2]
-            num_models = len(all_models)
-            models = []
-            for i in range(num_models):
-                models.append(
-                    pickle.load(open(os.path.join(filename, all_models[i]), "rb"))
-                )
-            self.models = models
-        else:
-            if not any([s in filename for s in [".pkl", ".pickle"]]):
-                filename += ".pkl"
-            self.model = pickle.load(open(filename, "rb"))
+    #     self.scale_data = scale_data
+    #     self.separate_models = separate_models
+    #     if self.separate_models:
+    #         all_models = os.listdir(filename)
+    #         all_models = natsorted(all_models)
+    #         if self.scale_data:
+    #             all_models = all_models[:-2]
+    #         num_models = len(all_models)
+    #         models = []
+    #         for i in range(num_models):
+    #             models.append(
+    #                 pickle.load(open(os.path.join(filename, all_models[i]), "rb"))
+    #             )
+    #         self.models = models
+    #     else:
+    #         if not any([s in filename for s in [".pkl", ".pickle"]]):
+    #             filename += ".pkl"
+    #         self.model = pickle.load(open(filename, "rb"))

-        if scale_data:
-            if not separate_models:
-                path_name = str(pathlib.Path(filename).parent)
-            else:
-                path_name = filename
-            self.xscalar = pickle.load(
-                open(os.path.join(path_name, "xscalar.pkl"), "rb")
-            )
-            self.yscalar = pickle.load(
-                open(os.path.join(path_name, "yscalar.pkl"), "rb")
-            )
+    #     if scale_data:
+    #         if not separate_models:
+    #             path_name = str(pathlib.Path(filename).parent)
+    #         else:
+    #             path_name = filename
+    #         self.xscalar = pickle.load(
+    #             open(os.path.join(path_name, "xscalar.pkl"), "rb")
+    #         )
+    #         self.yscalar = pickle.load(
+    #             open(os.path.join(path_name, "yscalar.pkl"), "rb")
+    #         )

    def sweep(self, params: Dict, X, y):

--- a/skmodels.py
+++ b/skmodels.py
@ -54,13 +54,15 @@ class SKModel(BaseModel):

        if self.model_type == "GradientBoostingRegressor" and fit_separate == False:
            fit_separate = True
-            print(
+            logger.info(
                "Note: fit_separate should be True for GradientBoostingRegressor. Changing to True .."
            )

        if self.model_type == "SVR" and fit_separate == False:
            fit_separate = True
-            print("Note: fit_separate should be True for SVR. Changing to True ..")
+            logger.info(
+                "Note: fit_separate should be True for SVR. Changing to True .."
+            )

        self.separate_models = fit_separate

@ -73,7 +75,7 @@ class SKModel(BaseModel):
            try:
                self.model.fit(X, y)
            except ValueError:
-                print(
+                logger.info(
                    f"fit separate should be True for model type of {self.model_type}"
                )

@ -114,25 +116,25 @@ class SKModel(BaseModel):
        else:
            pickle.dump(self.model, open(filename, "wb"))

-    def load_model(
-        self, dir_path: str, scale_data: bool = False, separate_models: bool = False
-    ):
+    # def load_model(
+    #     self, filename: str, scale_data: bool = False, separate_models: bool = False
+    # ):

-        self.separate_models = separate_models
-        if self.separate_models:
-            all_models = os.listdir(dir_path)
-            all_models = natsorted(all_models)
-            num_models = len(all_models)
-            models = []
-            for i in range(num_models):
-                models.append(
-                    pickle.load(open(os.path.join(dir_path, all_models[i]), "rb"))
-                )
-            self.models = models
-        else:
-            self.model = pickle.load(open(dir_path, "rb"))
+    #     self.separate_models = separate_models
+    #     if self.separate_models:
+    #         all_models = os.listdir(filename)
+    #         all_models = natsorted(all_models)
+    #         num_models = len(all_models)
+    #         models = []
+    #         for i in range(num_models):
+    #             models.append(
+    #                 pickle.load(open(os.path.join(filename, all_models[i]), "rb"))
+    #             )
+    #         self.models = models
+    #     else:
+    #         self.model = pickle.load(open(filename, "rb"))

-        self.scale_data = scale_data
+    #     self.scale_data = scale_data

    def sweep(self, X, y, params: Dict = None):
        if not params:
@ -169,7 +171,7 @@ if __name__ == "__main__":

    skm.build_model(model_type="linear_model")
    skm.fit(X, y, fit_separate=False)
-    print(X)
+    logger.info(X)
    yhat = skm.predict(X)

    skm.save_model(dir_path="models/linear_pole_multi.pkl")
@ -183,14 +185,14 @@ if __name__ == "__main__":

    skm.build_model(model_type="SVR")
    skm.fit(X, y, fit_separate=False)
-    print(X)
+    logger.info(X)
    yhat = skm.predict(X)

    skm.save_model(dir_path="models/lsvc_pole_multi.pkl")

    skm.build_model(model_type="GradientBoostingRegressor")
    skm.fit(X, y, fit_separate=False)
-    print(X)
+    logger.info(X)
    yhat = skm.predict(X)

    skm.save_model(dir_path="models/gbr_pole_multi.pkl")
@ -231,9 +233,9 @@ if __name__ == "__main__":
    # random = TuneSearchCV(pipe, param_grid, search_optimization="random")
    # X, y = load_digits(return_X_y=True)
    # random.fit(X, y)
-    # print(random.cv_results_)
+    # logger.info(random.cv_results_)

    # grid = TuneGridSearchCV(pipe, param_grid=param_grid)
    # grid.fit(X, y)
-    # print(grid.cv_results_)
+    # logger.info(grid.cv_results_)

--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@ -27,7 +27,7 @@ def test_svm_train():
    lsvm.save_model(filename="tmp/lsvm_pole")

    lsvm2 = SKModel()
-    lsvm2.load_model(dir_path="tmp/lsvm_pole", separate_models=True)
+    lsvm2.load_model(filename="tmp/lsvm_pole", separate_models=True)

    yhat0 = lsvm.predict(X)
    yhat = lsvm2.predict(X)
@ -45,7 +45,7 @@ def test_linear_train():
    linear.save_model(filename="tmp/linear_pole.pkl")

    linear2 = SKModel()
-    linear2.load_model(dir_path="tmp/linear_pole.pkl")
+    linear2.load_model(filename="tmp/linear_pole.pkl")

    yhat0 = linear.predict(X)
    yhat = linear2.predict(X)
@ -64,7 +64,7 @@ def test_gbr_train():
    gbr.save_model(filename="tmp/gbr_pole.pkl")

    gbr2 = SKModel()
-    gbr2.load_model(dir_path="tmp/gbr_pole.pkl", separate_models=True)
+    gbr2.load_model(filename="tmp/gbr_pole.pkl", separate_models=True)

    yhat0 = gbr.predict(X)
    yhat = gbr2.predict(X)