EdgeML/tools/SeeDot/fixSeeDotInput.py

129 строки
3.9 KiB
Python

#! /usr/bin/env python
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.
# This file is a fixer that fixes the inaccuracy created
# during multiple training instances of the same model .
import numpy as np
import os, sys
import argparse
import re
def parse():
parser = argparse.ArgumentParser(description='Modify SeeDot input file')
parser.add_argument('--seedot_file', type=str,metavar='',
help='path .sd file (including file name)')
parser.add_argument('--model_dir', type=str,metavar='',
help='path to model files directory')
parser.add_argument('--dataset_dir', type=str,metavar='',
help='path to data files directory (the directory with train.npy and test.npy)')
parser.add_argument("-n", "--numOutputs", type=int, metavar='',
help='The number of outputs that the model under consideration produces', default=1)
parser.add_argument('--normalise_data', action='store_true',
help='Normalise the input train and test files.')
return parser.parse_args()
def readModelWeights(model_dir, dataset_dir, numOutputs, normalise_data):
filelist = os.listdir(os.path.join(os.getcwd(), model_dir))
cur_dir = os.getcwd()
os.chdir(model_dir)
filelist = [x for x in filelist if x[-4:] == '.npy']
weight_min_max_dict = {}
for filename in filelist:
f = np.load(filename).flatten()
if (len(f) == 1):
m1 = 1.0/(1.0 + np.exp(-1*f[0]))
weight_min_max_dict[filename[:-4]] = [m1]
else:
m1 = np.min(f)
m2 = np.max(f)
weight_min_max_dict[filename[:-4]] = [m1, m2]
os.chdir(cur_dir)
os.chdir(dataset_dir)
train = np.load("train.npy")
Xtrain = train[:, numOutputs:]
test = np.load("test.npy")
Xtest = test[:, numOutputs:]
if normalise_data:
mean = np.mean(Xtrain, 0)
std = np.std(Xtrain, 0)
std[std[:] < 0.000001] = 1
Xtrain = (Xtrain - mean) / std
Xtest = (Xtest - mean) / std
m1 = np.min(Xtrain)
m2 = np.max(Xtrain)
m1 = min(m1, np.min(Xtest))
m2 = min(m2, np.max(Xtest))
weight_min_max_dict['X'] = [m1, m2]
if normalise_data:
train[:, numOutputs:] = Xtrain
test[:, numOutputs:] = Xtest
np.save("train.npy", train)
np.save("test.npy", test)
os.chdir(cur_dir)
return weight_min_max_dict
def getVar(line, weights_dict):
replace = False
new_line = None
if line.count('=') == 1:
left, right = line.split('=')
left = left.lstrip().rstrip()
var = left.split(' ')[-1].split('\t')[-1]
right = right.lstrip().rstrip()
if var in weights_dict.keys():
replace = True
weights = weights_dict[var]
if len(weights) == 1:
new_line = "let " + var + " = " + "%.20f"%(weights[0]) + " in"
else:
shape = line[line.find('('):line.find(')')+1]
new_line = "let " + var + " = " + shape + " in [" +\
"%.20f"%(weights[0]) + ", " + "%.20f"%(weights[1]) + "] in"
return replace, new_line
def writeToInputDotSD(file, dir):
os.chdir(dir)
f = open("input.sd", "w")
for i in range(len(file)):
f.write(file[i] + "\n")
f.close()
def run(args):
input_file = open(args.seedot_file).read().split("\n")
model_weights_dict = readModelWeights(args.model_dir, args.dataset_dir, args.numOutputs, args.normalise_data)
for i in range(len(input_file)):
line = input_file[i]
replace, new_line = getVar(line, model_weights_dict)
if replace:
input_file[i] = new_line
# print(line + " | " + new_line)
writeToInputDotSD(input_file, args.model_dir)
if __name__ == '__main__':
args = parse()
run(args)