This commit is contained in:
Gustavo Pabon 2022-05-23 22:15:52 +00:00
Родитель b9c17cbb1f
Коммит 74946cffa2
6 изменённых файлов: 57 добавлений и 1313 удалений

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -2,6 +2,13 @@ import os
import numpy as np
from azureml.core import Model
import joblib
#import argparse
#parser = argparse.ArgumentParser()
#parser.add_argument("--id-feature", type=str, dest='id_feature', help='ID Freature')
#args = parser.parse_args()
#id_feat = str(args.id_feature)
#print('id feature', id_feat)
def init():
@ -13,13 +20,8 @@ def init():
model = joblib.load(model_path)
def run(mini_batch):
# # This runs for each batch
# resultList = model.predict(mini_batch)
# ind = mini_batch.index
# return resultList.tolist()
mini_batch.set_index('Van_Stock_Proposal_Detail_Id', inplace=True)
index_list = list(mini_batch.index)
y_pred = model.predict(mini_batch).tolist()
score = model.score_samples(mini_batch).tolist()
return(list(zip(index_list, y_pred, score)))

Просмотреть файл

@ -3,7 +3,7 @@ from azureml.core import Model, Run
import argparse
import numpy as np
import iJungle
import pickle
import joblib
run = Run.get_context()
@ -14,6 +14,7 @@ parser = argparse.ArgumentParser()
# Input Data
parser.add_argument("--input-data", type=str, dest='input_data', help='Overhead dataset')
parser.add_argument("--id-feature", type=str, dest='id_feature', help='ID Freature')
# Hyper parameters
parser.add_argument('--trees', type=int, dest='trees', default=100, help='Number of trees')
@ -21,6 +22,8 @@ parser.add_argument('--subsample-size', type=int, dest='subsample_size', default
# Add arguments to args collection
args = parser.parse_args()
id_feat = str(args.id_feature)
print('id feature', id_feat)
# Log Hyperparameter values
trees = np.int(args.trees)
@ -33,6 +36,7 @@ run.log('subsample_size', subsample_size)
# Load training data
print("Loading Data...")
W = run.input_datasets['overhead_data'].to_pandas_dataframe() # Get the training data from the estimator input
W.set_index(id_feat, inplace=True)
# Load iFor_list pickle
print("Loading pickle...")
@ -40,9 +44,7 @@ model_name = 'iJungle_light_' + str(trees) + '_' + str(subsample_size)
print(model_name)
model_path = Model.get_model_path(model_name)
print(model_path)
with open(model_path, 'rb') as infile:
iFor_list = pickle.load(infile)
iFor_list = joblib.load(model_path)
# Evaluation
print("Starting evaluation ...")
@ -50,8 +52,7 @@ os.makedirs(iJungle._MODEL_DIR, exist_ok=True)
results = iJungle.model_eval_fun(W, iFor_list)
results_filename = os.path.join(iJungle._MODEL_DIR, model_name + '_results.pkl')
print("Writing results:", results_filename)
with open(results_filename, 'wb') as outfile:
pickle.dump(results, outfile)
joblib.dump(value=results, filename=results_filename)
# Log dummy metric
run.log('Dummy', np.float(0))

Просмотреть файл

@ -12,6 +12,7 @@ parser = argparse.ArgumentParser()
# Input Data
parser.add_argument("--input-data", type=str, dest='input_data', help='training dataset')
parser.add_argument("--id-feature", type=str, dest='id_feature', help='ID Freature')
parser.add_argument("--max-subsample-size", type=int, dest='max_sss', help='Max subsample size')
parser.add_argument("--train-size", type=float, dest='train_size', help='Train size')
@ -21,6 +22,8 @@ parser.add_argument('--subsample-size', type=int, dest='subsample_size', default
# Add arguments to args collection
args = parser.parse_args()
id_feat = str(args.id_feature)
print('id feature', id_feat)
# Log Hyperparameter values
trees = np.int(args.trees)
@ -41,7 +44,7 @@ run.log('train_size', train_size)
# Load training data
print("Loading Data...")
df = run.input_datasets['training_data'].to_pandas_dataframe() # Get the training data from the estimator input
df.set_index(id_feat, inplace=True)
print("Starting training ...")
model_filename = iJungle.model_train_fun(df, trees, subsample_size, train_size, max_sss)

Просмотреть файл

@ -1,2 +1,2 @@
__version__ = '0.1.66'
__version__ = '0.1.73'
_MODEL_DIR = 'outputs'

Просмотреть файл

@ -3,7 +3,7 @@ from iJungle.config import _MODEL_DIR
import random
from sklearn.ensemble import IsolationForest
import pickle
import joblib
import os
import numpy as np
import pandas as pd
@ -67,9 +67,8 @@ def model_train_fun(df, trees=100, subsample_size=8192, train_size = 0.2, max_ss
print("{}/{}".format(counter, int(df_len/max_sss+1)))
filename = 'iJungle_light_' + str(trees) + '_' + str(subsample_size) + '.pkl'
with open(os.path.join(_MODEL_DIR, filename), 'wb') as outfile:
pickle.dump(iFor_list, outfile)
joblib.dump(value=iFor_list, filename=os.path.join(_MODEL_DIR, filename))
return(filename)
except Exception as err:
# TODO: Implement logger
@ -135,7 +134,7 @@ def grid_eval(df, subsample_list = [4096, 2048, 1024, 512],
W = df.iloc[my_indexes[:df_len]]
results_dic = {}
## Evaluation with stored models as external files(pickle format)
## Evaluation with stored models as external files(joblib format)
for i, subsample_size in enumerate(subsample_list):
results_dic_t = {}
for j, trees in enumerate(trees_list):
@ -143,16 +142,14 @@ def grid_eval(df, subsample_list = [4096, 2048, 1024, 512],
# TODO: Implement logger
if verbose:
print('Reading ' + filename)
with open(os.path.join(_MODEL_DIR, filename), 'rb') as infile:
iFor_list = pickle.load(infile)
iFor_list = joblib.load(os.path.join(_MODEL_DIR, filename))
results_dic_t[str(trees)] = model_eval_fun(W, iFor_list, verbose)
results_dic[str(subsample_size)] = results_dic_t
filename_results = 'iJungle_light_results_overhead.pkl'
results = pd.DataFrame(results_dic)
with open(os.path.join(_MODEL_DIR, filename_results), 'wb') as outfile:
pickle.dump(results, outfile)
joblib.dump(value=results, filename=os.path.join(_MODEL_DIR, filename_results))
return(results)
except Exception as err:
# TODO: Implement logger
@ -165,8 +162,7 @@ def get_grid_eval_results(verbose = True):
if os.path.exists(picklename):
if verbose:
print("Reading ", picklename)
with open(picklename, 'rb') as pickle_in:
results = pickle.load(pickle_in)
results = joblib.load(picklename)
return(results)
else:
raise Exception("grid_eval has not have been executed")
@ -211,9 +207,7 @@ def best_iforest(results, verbose=True):
picklename = os.path.join(_MODEL_DIR,'iJungle_light_' + str(trees) + '_' + str(subsample_size) + '.pkl')
if verbose:
print('Reading ' + picklename)
with open(picklename,"rb") as pickle_in:
iFor_list = pickle.load(pickle_in)
iFor_list = joblib.load(picklename)
model = iFor_list[best_iF_k]
if verbose: