joblib
This commit is contained in:
Родитель
b9c17cbb1f
Коммит
74946cffa2
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -2,6 +2,13 @@ import os
|
|||
import numpy as np
|
||||
from azureml.core import Model
|
||||
import joblib
|
||||
#import argparse
|
||||
|
||||
#parser = argparse.ArgumentParser()
|
||||
#parser.add_argument("--id-feature", type=str, dest='id_feature', help='ID Freature')
|
||||
#args = parser.parse_args()
|
||||
#id_feat = str(args.id_feature)
|
||||
#print('id feature', id_feat)
|
||||
|
||||
|
||||
def init():
|
||||
|
@ -13,13 +20,8 @@ def init():
|
|||
model = joblib.load(model_path)
|
||||
|
||||
def run(mini_batch):
|
||||
# # This runs for each batch
|
||||
# resultList = model.predict(mini_batch)
|
||||
# ind = mini_batch.index
|
||||
# return resultList.tolist()
|
||||
mini_batch.set_index('Van_Stock_Proposal_Detail_Id', inplace=True)
|
||||
index_list = list(mini_batch.index)
|
||||
y_pred = model.predict(mini_batch).tolist()
|
||||
score = model.score_samples(mini_batch).tolist()
|
||||
|
||||
|
||||
return(list(zip(index_list, y_pred, score)))
|
|
@ -3,7 +3,7 @@ from azureml.core import Model, Run
|
|||
import argparse
|
||||
import numpy as np
|
||||
import iJungle
|
||||
import pickle
|
||||
import joblib
|
||||
|
||||
run = Run.get_context()
|
||||
|
||||
|
@ -14,6 +14,7 @@ parser = argparse.ArgumentParser()
|
|||
|
||||
# Input Data
|
||||
parser.add_argument("--input-data", type=str, dest='input_data', help='Overhead dataset')
|
||||
parser.add_argument("--id-feature", type=str, dest='id_feature', help='ID Freature')
|
||||
|
||||
# Hyper parameters
|
||||
parser.add_argument('--trees', type=int, dest='trees', default=100, help='Number of trees')
|
||||
|
@ -21,6 +22,8 @@ parser.add_argument('--subsample-size', type=int, dest='subsample_size', default
|
|||
|
||||
# Add arguments to args collection
|
||||
args = parser.parse_args()
|
||||
id_feat = str(args.id_feature)
|
||||
print('id feature', id_feat)
|
||||
|
||||
# Log Hyperparameter values
|
||||
trees = np.int(args.trees)
|
||||
|
@ -33,6 +36,7 @@ run.log('subsample_size', subsample_size)
|
|||
# Load training data
|
||||
print("Loading Data...")
|
||||
W = run.input_datasets['overhead_data'].to_pandas_dataframe() # Get the training data from the estimator input
|
||||
W.set_index(id_feat, inplace=True)
|
||||
|
||||
# Load iFor_list pickle
|
||||
print("Loading pickle...")
|
||||
|
@ -40,9 +44,7 @@ model_name = 'iJungle_light_' + str(trees) + '_' + str(subsample_size)
|
|||
print(model_name)
|
||||
model_path = Model.get_model_path(model_name)
|
||||
print(model_path)
|
||||
with open(model_path, 'rb') as infile:
|
||||
iFor_list = pickle.load(infile)
|
||||
|
||||
iFor_list = joblib.load(model_path)
|
||||
|
||||
# Evaluation
|
||||
print("Starting evaluation ...")
|
||||
|
@ -50,8 +52,7 @@ os.makedirs(iJungle._MODEL_DIR, exist_ok=True)
|
|||
results = iJungle.model_eval_fun(W, iFor_list)
|
||||
results_filename = os.path.join(iJungle._MODEL_DIR, model_name + '_results.pkl')
|
||||
print("Writing results:", results_filename)
|
||||
with open(results_filename, 'wb') as outfile:
|
||||
pickle.dump(results, outfile)
|
||||
joblib.dump(value=results, filename=results_filename)
|
||||
|
||||
# Log dummy metric
|
||||
run.log('Dummy', np.float(0))
|
||||
|
|
|
@ -12,6 +12,7 @@ parser = argparse.ArgumentParser()
|
|||
|
||||
# Input Data
|
||||
parser.add_argument("--input-data", type=str, dest='input_data', help='training dataset')
|
||||
parser.add_argument("--id-feature", type=str, dest='id_feature', help='ID Freature')
|
||||
parser.add_argument("--max-subsample-size", type=int, dest='max_sss', help='Max subsample size')
|
||||
parser.add_argument("--train-size", type=float, dest='train_size', help='Train size')
|
||||
|
||||
|
@ -21,6 +22,8 @@ parser.add_argument('--subsample-size', type=int, dest='subsample_size', default
|
|||
|
||||
# Add arguments to args collection
|
||||
args = parser.parse_args()
|
||||
id_feat = str(args.id_feature)
|
||||
print('id feature', id_feat)
|
||||
|
||||
# Log Hyperparameter values
|
||||
trees = np.int(args.trees)
|
||||
|
@ -41,7 +44,7 @@ run.log('train_size', train_size)
|
|||
# Load training data
|
||||
print("Loading Data...")
|
||||
df = run.input_datasets['training_data'].to_pandas_dataframe() # Get the training data from the estimator input
|
||||
|
||||
df.set_index(id_feat, inplace=True)
|
||||
|
||||
print("Starting training ...")
|
||||
model_filename = iJungle.model_train_fun(df, trees, subsample_size, train_size, max_sss)
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
__version__ = '0.1.66'
|
||||
__version__ = '0.1.73'
|
||||
_MODEL_DIR = 'outputs'
|
||||
|
|
|
@ -3,7 +3,7 @@ from iJungle.config import _MODEL_DIR
|
|||
|
||||
import random
|
||||
from sklearn.ensemble import IsolationForest
|
||||
import pickle
|
||||
import joblib
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
@ -67,9 +67,8 @@ def model_train_fun(df, trees=100, subsample_size=8192, train_size = 0.2, max_ss
|
|||
print("{}/{}".format(counter, int(df_len/max_sss+1)))
|
||||
|
||||
filename = 'iJungle_light_' + str(trees) + '_' + str(subsample_size) + '.pkl'
|
||||
with open(os.path.join(_MODEL_DIR, filename), 'wb') as outfile:
|
||||
pickle.dump(iFor_list, outfile)
|
||||
|
||||
joblib.dump(value=iFor_list, filename=os.path.join(_MODEL_DIR, filename))
|
||||
|
||||
return(filename)
|
||||
except Exception as err:
|
||||
# TODO: Implement logger
|
||||
|
@ -135,7 +134,7 @@ def grid_eval(df, subsample_list = [4096, 2048, 1024, 512],
|
|||
W = df.iloc[my_indexes[:df_len]]
|
||||
|
||||
results_dic = {}
|
||||
## Evaluation with stored models as external files(pickle format)
|
||||
## Evaluation with stored models as external files(joblib format)
|
||||
for i, subsample_size in enumerate(subsample_list):
|
||||
results_dic_t = {}
|
||||
for j, trees in enumerate(trees_list):
|
||||
|
@ -143,16 +142,14 @@ def grid_eval(df, subsample_list = [4096, 2048, 1024, 512],
|
|||
# TODO: Implement logger
|
||||
if verbose:
|
||||
print('Reading ' + filename)
|
||||
with open(os.path.join(_MODEL_DIR, filename), 'rb') as infile:
|
||||
iFor_list = pickle.load(infile)
|
||||
iFor_list = joblib.load(os.path.join(_MODEL_DIR, filename))
|
||||
results_dic_t[str(trees)] = model_eval_fun(W, iFor_list, verbose)
|
||||
results_dic[str(subsample_size)] = results_dic_t
|
||||
|
||||
filename_results = 'iJungle_light_results_overhead.pkl'
|
||||
|
||||
results = pd.DataFrame(results_dic)
|
||||
with open(os.path.join(_MODEL_DIR, filename_results), 'wb') as outfile:
|
||||
pickle.dump(results, outfile)
|
||||
joblib.dump(value=results, filename=os.path.join(_MODEL_DIR, filename_results))
|
||||
return(results)
|
||||
except Exception as err:
|
||||
# TODO: Implement logger
|
||||
|
@ -165,8 +162,7 @@ def get_grid_eval_results(verbose = True):
|
|||
if os.path.exists(picklename):
|
||||
if verbose:
|
||||
print("Reading ", picklename)
|
||||
with open(picklename, 'rb') as pickle_in:
|
||||
results = pickle.load(pickle_in)
|
||||
results = joblib.load(picklename)
|
||||
return(results)
|
||||
else:
|
||||
raise Exception("grid_eval has not have been executed")
|
||||
|
@ -211,9 +207,7 @@ def best_iforest(results, verbose=True):
|
|||
picklename = os.path.join(_MODEL_DIR,'iJungle_light_' + str(trees) + '_' + str(subsample_size) + '.pkl')
|
||||
if verbose:
|
||||
print('Reading ' + picklename)
|
||||
|
||||
with open(picklename,"rb") as pickle_in:
|
||||
iFor_list = pickle.load(pickle_in)
|
||||
iFor_list = joblib.load(picklename)
|
||||
|
||||
model = iFor_list[best_iF_k]
|
||||
if verbose:
|
||||
|
|
Загрузка…
Ссылка в новой задаче