update: use standard multioutputs loaders; update tests

This commit is contained in:
Ali Zaidi 2021-01-17 21:00:40 -08:00
Родитель 2be904a6b5
Коммит 7d18e348dd
7 изменённых файлов: 108 добавлений и 69 удалений

7
.gitignore поставляемый
Просмотреть файл

@ -68,5 +68,8 @@ test-output.xml
# jupyter
.ipynb_checkpoints
# big models
bigmodels/
# tmp directories
tmp/
# hydra outputs
outputs/

41
base.py
Просмотреть файл

@ -8,6 +8,7 @@ import numpy as np
import pandas as pd
from typing import Tuple, List, Union
from natsort import natsorted
from sklearn.preprocessing import StandardScaler
from loaders import CsvReader
@ -17,7 +18,7 @@ console = logging.StreamHandler(sys.stdout)
console.setLevel(logging.DEBUG)
formater = logging.Formatter("%(name)-13s: %(levelname)-8s %(message)s")
console.setFormatter(formater)
logging.getLogger(__name__).addHandler(console)
logging.getLogger("datamodeler").addHandler(console)
# TODO: add weighting to the model
# TODO: this should go into a metrics function?
@ -203,10 +204,44 @@ class BaseModel(abc.ABC):
pickle.dump(self.model, open(filename, "wb"))
def load_model(self, filename: str, scale_data: bool = False):
def load_model(
self, filename: str, scale_data: bool = False, separate_models: bool = False
):
self.separate_models = separate_models
self.scale_data = scale_data
self.model = pickle.load(open(filename, "rb"))
if scale_data:
if not self.separate_models:
path_name = str(pathlib.Path(filename).parent)
else:
path_name = filename
self.xscalar = pickle.load(
open(os.path.join(path_name, "xscalar.pkl"), "rb")
)
self.yscalar = pickle.load(
open(os.path.join(path_name, "yscalar.pkl"), "rb")
)
if separate_models:
self._load_multimodels(filename, scale_data)
else:
if not any([s in filename for s in [".pkl", ".pickle"]]):
filename += ".pkl"
self.model = pickle.load(open(filename, "rb"))
def _load_multimodels(self, filename: str, scale_data: bool):
all_models = os.listdir(filename)
all_models = natsorted(all_models)
if self.scale_data:
all_models = all_models[:-2]
num_models = len(all_models)
models = []
for i in range(num_models):
models.append(
pickle.load(open(os.path.join(filename, all_models[i]), "rb"))
)
self.models = models
def evaluate(self, test_data: np.ndarray):

Просмотреть файл

@ -1,4 +1,4 @@
defaults:
- data: quanser-log.yaml
- data: cartpole_st1_at.yaml
- model: SVR.yaml
- simulator: quanser-log.yaml
- simulator: gboost_cartpole.yaml

Просмотреть файл

@ -12,10 +12,9 @@ model:
- lambda: 1
- max_bin: 256
saver:
- filename: models/boost/SVR_model
- filename: models/SVR_model
sweep:
- run: False
- search_algorithm: bayesian
- num_trials: 3
- scoring_func: r2

Просмотреть файл

@ -122,40 +122,40 @@ class GBoostModel(BaseModel):
parent_dir.mkdir(parents=True, exist_ok=True)
pickle.dump(self.model, open(filename, "wb"))
def load_model(
self, filename: str, scale_data: bool = False, separate_models: bool = False
):
# def load_model(
# self, filename: str, scale_data: bool = False, separate_models: bool = False
# ):
self.scale_data = scale_data
self.separate_models = separate_models
if self.separate_models:
all_models = os.listdir(filename)
all_models = natsorted(all_models)
if self.scale_data:
all_models = all_models[:-2]
num_models = len(all_models)
models = []
for i in range(num_models):
models.append(
pickle.load(open(os.path.join(filename, all_models[i]), "rb"))
)
self.models = models
else:
if not any([s in filename for s in [".pkl", ".pickle"]]):
filename += ".pkl"
self.model = pickle.load(open(filename, "rb"))
# self.scale_data = scale_data
# self.separate_models = separate_models
# if self.separate_models:
# all_models = os.listdir(filename)
# all_models = natsorted(all_models)
# if self.scale_data:
# all_models = all_models[:-2]
# num_models = len(all_models)
# models = []
# for i in range(num_models):
# models.append(
# pickle.load(open(os.path.join(filename, all_models[i]), "rb"))
# )
# self.models = models
# else:
# if not any([s in filename for s in [".pkl", ".pickle"]]):
# filename += ".pkl"
# self.model = pickle.load(open(filename, "rb"))
if scale_data:
if not separate_models:
path_name = str(pathlib.Path(filename).parent)
else:
path_name = filename
self.xscalar = pickle.load(
open(os.path.join(path_name, "xscalar.pkl"), "rb")
)
self.yscalar = pickle.load(
open(os.path.join(path_name, "yscalar.pkl"), "rb")
)
# if scale_data:
# if not separate_models:
# path_name = str(pathlib.Path(filename).parent)
# else:
# path_name = filename
# self.xscalar = pickle.load(
# open(os.path.join(path_name, "xscalar.pkl"), "rb")
# )
# self.yscalar = pickle.load(
# open(os.path.join(path_name, "yscalar.pkl"), "rb")
# )
def sweep(self, params: Dict, X, y):

Просмотреть файл

@ -54,13 +54,15 @@ class SKModel(BaseModel):
if self.model_type == "GradientBoostingRegressor" and fit_separate == False:
fit_separate = True
print(
logger.info(
"Note: fit_separate should be True for GradientBoostingRegressor. Changing to True .."
)
if self.model_type == "SVR" and fit_separate == False:
fit_separate = True
print("Note: fit_separate should be True for SVR. Changing to True ..")
logger.info(
"Note: fit_separate should be True for SVR. Changing to True .."
)
self.separate_models = fit_separate
@ -73,7 +75,7 @@ class SKModel(BaseModel):
try:
self.model.fit(X, y)
except ValueError:
print(
logger.info(
f"fit separate should be True for model type of {self.model_type}"
)
@ -114,25 +116,25 @@ class SKModel(BaseModel):
else:
pickle.dump(self.model, open(filename, "wb"))
def load_model(
self, dir_path: str, scale_data: bool = False, separate_models: bool = False
):
# def load_model(
# self, filename: str, scale_data: bool = False, separate_models: bool = False
# ):
self.separate_models = separate_models
if self.separate_models:
all_models = os.listdir(dir_path)
all_models = natsorted(all_models)
num_models = len(all_models)
models = []
for i in range(num_models):
models.append(
pickle.load(open(os.path.join(dir_path, all_models[i]), "rb"))
)
self.models = models
else:
self.model = pickle.load(open(dir_path, "rb"))
# self.separate_models = separate_models
# if self.separate_models:
# all_models = os.listdir(filename)
# all_models = natsorted(all_models)
# num_models = len(all_models)
# models = []
# for i in range(num_models):
# models.append(
# pickle.load(open(os.path.join(filename, all_models[i]), "rb"))
# )
# self.models = models
# else:
# self.model = pickle.load(open(filename, "rb"))
self.scale_data = scale_data
# self.scale_data = scale_data
def sweep(self, X, y, params: Dict = None):
if not params:
@ -169,7 +171,7 @@ if __name__ == "__main__":
skm.build_model(model_type="linear_model")
skm.fit(X, y, fit_separate=False)
print(X)
logger.info(X)
yhat = skm.predict(X)
skm.save_model(dir_path="models/linear_pole_multi.pkl")
@ -183,14 +185,14 @@ if __name__ == "__main__":
skm.build_model(model_type="SVR")
skm.fit(X, y, fit_separate=False)
print(X)
logger.info(X)
yhat = skm.predict(X)
skm.save_model(dir_path="models/lsvc_pole_multi.pkl")
skm.build_model(model_type="GradientBoostingRegressor")
skm.fit(X, y, fit_separate=False)
print(X)
logger.info(X)
yhat = skm.predict(X)
skm.save_model(dir_path="models/gbr_pole_multi.pkl")
@ -231,9 +233,9 @@ if __name__ == "__main__":
# random = TuneSearchCV(pipe, param_grid, search_optimization="random")
# X, y = load_digits(return_X_y=True)
# random.fit(X, y)
# print(random.cv_results_)
# logger.info(random.cv_results_)
# grid = TuneGridSearchCV(pipe, param_grid=param_grid)
# grid.fit(X, y)
# print(grid.cv_results_)
# logger.info(grid.cv_results_)

Просмотреть файл

@ -27,7 +27,7 @@ def test_svm_train():
lsvm.save_model(filename="tmp/lsvm_pole")
lsvm2 = SKModel()
lsvm2.load_model(dir_path="tmp/lsvm_pole", separate_models=True)
lsvm2.load_model(filename="tmp/lsvm_pole", separate_models=True)
yhat0 = lsvm.predict(X)
yhat = lsvm2.predict(X)
@ -45,7 +45,7 @@ def test_linear_train():
linear.save_model(filename="tmp/linear_pole.pkl")
linear2 = SKModel()
linear2.load_model(dir_path="tmp/linear_pole.pkl")
linear2.load_model(filename="tmp/linear_pole.pkl")
yhat0 = linear.predict(X)
yhat = linear2.predict(X)
@ -64,7 +64,7 @@ def test_gbr_train():
gbr.save_model(filename="tmp/gbr_pole.pkl")
gbr2 = SKModel()
gbr2.load_model(dir_path="tmp/gbr_pole.pkl", separate_models=True)
gbr2.load_model(filename="tmp/gbr_pole.pkl", separate_models=True)
yhat0 = gbr.predict(X)
yhat = gbr2.predict(X)