joblib
This commit is contained in:
Родитель
b9c17cbb1f
Коммит
74946cffa2
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -2,6 +2,13 @@ import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from azureml.core import Model
|
from azureml.core import Model
|
||||||
import joblib
|
import joblib
|
||||||
|
#import argparse
|
||||||
|
|
||||||
|
#parser = argparse.ArgumentParser()
|
||||||
|
#parser.add_argument("--id-feature", type=str, dest='id_feature', help='ID Freature')
|
||||||
|
#args = parser.parse_args()
|
||||||
|
#id_feat = str(args.id_feature)
|
||||||
|
#print('id feature', id_feat)
|
||||||
|
|
||||||
|
|
||||||
def init():
|
def init():
|
||||||
|
@ -13,13 +20,8 @@ def init():
|
||||||
model = joblib.load(model_path)
|
model = joblib.load(model_path)
|
||||||
|
|
||||||
def run(mini_batch):
|
def run(mini_batch):
|
||||||
# # This runs for each batch
|
mini_batch.set_index('Van_Stock_Proposal_Detail_Id', inplace=True)
|
||||||
# resultList = model.predict(mini_batch)
|
|
||||||
# ind = mini_batch.index
|
|
||||||
# return resultList.tolist()
|
|
||||||
index_list = list(mini_batch.index)
|
index_list = list(mini_batch.index)
|
||||||
y_pred = model.predict(mini_batch).tolist()
|
y_pred = model.predict(mini_batch).tolist()
|
||||||
score = model.score_samples(mini_batch).tolist()
|
score = model.score_samples(mini_batch).tolist()
|
||||||
|
|
||||||
|
|
||||||
return(list(zip(index_list, y_pred, score)))
|
return(list(zip(index_list, y_pred, score)))
|
|
@ -3,7 +3,7 @@ from azureml.core import Model, Run
|
||||||
import argparse
|
import argparse
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import iJungle
|
import iJungle
|
||||||
import pickle
|
import joblib
|
||||||
|
|
||||||
run = Run.get_context()
|
run = Run.get_context()
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@ parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
# Input Data
|
# Input Data
|
||||||
parser.add_argument("--input-data", type=str, dest='input_data', help='Overhead dataset')
|
parser.add_argument("--input-data", type=str, dest='input_data', help='Overhead dataset')
|
||||||
|
parser.add_argument("--id-feature", type=str, dest='id_feature', help='ID Freature')
|
||||||
|
|
||||||
# Hyper parameters
|
# Hyper parameters
|
||||||
parser.add_argument('--trees', type=int, dest='trees', default=100, help='Number of trees')
|
parser.add_argument('--trees', type=int, dest='trees', default=100, help='Number of trees')
|
||||||
|
@ -21,6 +22,8 @@ parser.add_argument('--subsample-size', type=int, dest='subsample_size', default
|
||||||
|
|
||||||
# Add arguments to args collection
|
# Add arguments to args collection
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
id_feat = str(args.id_feature)
|
||||||
|
print('id feature', id_feat)
|
||||||
|
|
||||||
# Log Hyperparameter values
|
# Log Hyperparameter values
|
||||||
trees = np.int(args.trees)
|
trees = np.int(args.trees)
|
||||||
|
@ -33,6 +36,7 @@ run.log('subsample_size', subsample_size)
|
||||||
# Load training data
|
# Load training data
|
||||||
print("Loading Data...")
|
print("Loading Data...")
|
||||||
W = run.input_datasets['overhead_data'].to_pandas_dataframe() # Get the training data from the estimator input
|
W = run.input_datasets['overhead_data'].to_pandas_dataframe() # Get the training data from the estimator input
|
||||||
|
W.set_index(id_feat, inplace=True)
|
||||||
|
|
||||||
# Load iFor_list pickle
|
# Load iFor_list pickle
|
||||||
print("Loading pickle...")
|
print("Loading pickle...")
|
||||||
|
@ -40,9 +44,7 @@ model_name = 'iJungle_light_' + str(trees) + '_' + str(subsample_size)
|
||||||
print(model_name)
|
print(model_name)
|
||||||
model_path = Model.get_model_path(model_name)
|
model_path = Model.get_model_path(model_name)
|
||||||
print(model_path)
|
print(model_path)
|
||||||
with open(model_path, 'rb') as infile:
|
iFor_list = joblib.load(model_path)
|
||||||
iFor_list = pickle.load(infile)
|
|
||||||
|
|
||||||
|
|
||||||
# Evaluation
|
# Evaluation
|
||||||
print("Starting evaluation ...")
|
print("Starting evaluation ...")
|
||||||
|
@ -50,8 +52,7 @@ os.makedirs(iJungle._MODEL_DIR, exist_ok=True)
|
||||||
results = iJungle.model_eval_fun(W, iFor_list)
|
results = iJungle.model_eval_fun(W, iFor_list)
|
||||||
results_filename = os.path.join(iJungle._MODEL_DIR, model_name + '_results.pkl')
|
results_filename = os.path.join(iJungle._MODEL_DIR, model_name + '_results.pkl')
|
||||||
print("Writing results:", results_filename)
|
print("Writing results:", results_filename)
|
||||||
with open(results_filename, 'wb') as outfile:
|
joblib.dump(value=results, filename=results_filename)
|
||||||
pickle.dump(results, outfile)
|
|
||||||
|
|
||||||
# Log dummy metric
|
# Log dummy metric
|
||||||
run.log('Dummy', np.float(0))
|
run.log('Dummy', np.float(0))
|
||||||
|
|
|
@ -12,6 +12,7 @@ parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
# Input Data
|
# Input Data
|
||||||
parser.add_argument("--input-data", type=str, dest='input_data', help='training dataset')
|
parser.add_argument("--input-data", type=str, dest='input_data', help='training dataset')
|
||||||
|
parser.add_argument("--id-feature", type=str, dest='id_feature', help='ID Freature')
|
||||||
parser.add_argument("--max-subsample-size", type=int, dest='max_sss', help='Max subsample size')
|
parser.add_argument("--max-subsample-size", type=int, dest='max_sss', help='Max subsample size')
|
||||||
parser.add_argument("--train-size", type=float, dest='train_size', help='Train size')
|
parser.add_argument("--train-size", type=float, dest='train_size', help='Train size')
|
||||||
|
|
||||||
|
@ -21,6 +22,8 @@ parser.add_argument('--subsample-size', type=int, dest='subsample_size', default
|
||||||
|
|
||||||
# Add arguments to args collection
|
# Add arguments to args collection
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
id_feat = str(args.id_feature)
|
||||||
|
print('id feature', id_feat)
|
||||||
|
|
||||||
# Log Hyperparameter values
|
# Log Hyperparameter values
|
||||||
trees = np.int(args.trees)
|
trees = np.int(args.trees)
|
||||||
|
@ -41,7 +44,7 @@ run.log('train_size', train_size)
|
||||||
# Load training data
|
# Load training data
|
||||||
print("Loading Data...")
|
print("Loading Data...")
|
||||||
df = run.input_datasets['training_data'].to_pandas_dataframe() # Get the training data from the estimator input
|
df = run.input_datasets['training_data'].to_pandas_dataframe() # Get the training data from the estimator input
|
||||||
|
df.set_index(id_feat, inplace=True)
|
||||||
|
|
||||||
print("Starting training ...")
|
print("Starting training ...")
|
||||||
model_filename = iJungle.model_train_fun(df, trees, subsample_size, train_size, max_sss)
|
model_filename = iJungle.model_train_fun(df, trees, subsample_size, train_size, max_sss)
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
__version__ = '0.1.66'
|
__version__ = '0.1.73'
|
||||||
_MODEL_DIR = 'outputs'
|
_MODEL_DIR = 'outputs'
|
||||||
|
|
|
@ -3,7 +3,7 @@ from iJungle.config import _MODEL_DIR
|
||||||
|
|
||||||
import random
|
import random
|
||||||
from sklearn.ensemble import IsolationForest
|
from sklearn.ensemble import IsolationForest
|
||||||
import pickle
|
import joblib
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
@ -67,9 +67,8 @@ def model_train_fun(df, trees=100, subsample_size=8192, train_size = 0.2, max_ss
|
||||||
print("{}/{}".format(counter, int(df_len/max_sss+1)))
|
print("{}/{}".format(counter, int(df_len/max_sss+1)))
|
||||||
|
|
||||||
filename = 'iJungle_light_' + str(trees) + '_' + str(subsample_size) + '.pkl'
|
filename = 'iJungle_light_' + str(trees) + '_' + str(subsample_size) + '.pkl'
|
||||||
with open(os.path.join(_MODEL_DIR, filename), 'wb') as outfile:
|
joblib.dump(value=iFor_list, filename=os.path.join(_MODEL_DIR, filename))
|
||||||
pickle.dump(iFor_list, outfile)
|
|
||||||
|
|
||||||
return(filename)
|
return(filename)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
# TODO: Implement logger
|
# TODO: Implement logger
|
||||||
|
@ -135,7 +134,7 @@ def grid_eval(df, subsample_list = [4096, 2048, 1024, 512],
|
||||||
W = df.iloc[my_indexes[:df_len]]
|
W = df.iloc[my_indexes[:df_len]]
|
||||||
|
|
||||||
results_dic = {}
|
results_dic = {}
|
||||||
## Evaluation with stored models as external files(pickle format)
|
## Evaluation with stored models as external files(joblib format)
|
||||||
for i, subsample_size in enumerate(subsample_list):
|
for i, subsample_size in enumerate(subsample_list):
|
||||||
results_dic_t = {}
|
results_dic_t = {}
|
||||||
for j, trees in enumerate(trees_list):
|
for j, trees in enumerate(trees_list):
|
||||||
|
@ -143,16 +142,14 @@ def grid_eval(df, subsample_list = [4096, 2048, 1024, 512],
|
||||||
# TODO: Implement logger
|
# TODO: Implement logger
|
||||||
if verbose:
|
if verbose:
|
||||||
print('Reading ' + filename)
|
print('Reading ' + filename)
|
||||||
with open(os.path.join(_MODEL_DIR, filename), 'rb') as infile:
|
iFor_list = joblib.load(os.path.join(_MODEL_DIR, filename))
|
||||||
iFor_list = pickle.load(infile)
|
|
||||||
results_dic_t[str(trees)] = model_eval_fun(W, iFor_list, verbose)
|
results_dic_t[str(trees)] = model_eval_fun(W, iFor_list, verbose)
|
||||||
results_dic[str(subsample_size)] = results_dic_t
|
results_dic[str(subsample_size)] = results_dic_t
|
||||||
|
|
||||||
filename_results = 'iJungle_light_results_overhead.pkl'
|
filename_results = 'iJungle_light_results_overhead.pkl'
|
||||||
|
|
||||||
results = pd.DataFrame(results_dic)
|
results = pd.DataFrame(results_dic)
|
||||||
with open(os.path.join(_MODEL_DIR, filename_results), 'wb') as outfile:
|
joblib.dump(value=results, filename=os.path.join(_MODEL_DIR, filename_results))
|
||||||
pickle.dump(results, outfile)
|
|
||||||
return(results)
|
return(results)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
# TODO: Implement logger
|
# TODO: Implement logger
|
||||||
|
@ -165,8 +162,7 @@ def get_grid_eval_results(verbose = True):
|
||||||
if os.path.exists(picklename):
|
if os.path.exists(picklename):
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Reading ", picklename)
|
print("Reading ", picklename)
|
||||||
with open(picklename, 'rb') as pickle_in:
|
results = joblib.load(picklename)
|
||||||
results = pickle.load(pickle_in)
|
|
||||||
return(results)
|
return(results)
|
||||||
else:
|
else:
|
||||||
raise Exception("grid_eval has not have been executed")
|
raise Exception("grid_eval has not have been executed")
|
||||||
|
@ -211,9 +207,7 @@ def best_iforest(results, verbose=True):
|
||||||
picklename = os.path.join(_MODEL_DIR,'iJungle_light_' + str(trees) + '_' + str(subsample_size) + '.pkl')
|
picklename = os.path.join(_MODEL_DIR,'iJungle_light_' + str(trees) + '_' + str(subsample_size) + '.pkl')
|
||||||
if verbose:
|
if verbose:
|
||||||
print('Reading ' + picklename)
|
print('Reading ' + picklename)
|
||||||
|
iFor_list = joblib.load(picklename)
|
||||||
with open(picklename,"rb") as pickle_in:
|
|
||||||
iFor_list = pickle.load(pickle_in)
|
|
||||||
|
|
||||||
model = iFor_list[best_iF_k]
|
model = iFor_list[best_iF_k]
|
||||||
if verbose:
|
if verbose:
|
||||||
|
|
Загрузка…
Ссылка в новой задаче