#! /usr/bin/env python # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT license. # This file is a fixer that fixes the inaccuracy created # during multiple training instances of the same model . import numpy as np import os, sys import argparse import re def parse(): parser = argparse.ArgumentParser(description='Modify SeeDot input file') parser.add_argument('--seedot_file', type=str,metavar='', help='path .sd file (including file name)') parser.add_argument('--model_dir', type=str,metavar='', help='path to model files directory') parser.add_argument('--dataset_dir', type=str,metavar='', help='path to data files directory (the directory with train.npy and test.npy)') parser.add_argument("-n", "--numOutputs", type=int, metavar='', help='The number of outputs that the model under consideration produces', default=1) parser.add_argument('--normalise_data', action='store_true', help='Normalise the input train and test files.') return parser.parse_args() def readModelWeights(model_dir, dataset_dir, numOutputs, normalise_data): filelist = os.listdir(os.path.join(os.getcwd(), model_dir)) cur_dir = os.getcwd() os.chdir(model_dir) filelist = [x for x in filelist if x[-4:] == '.npy'] weight_min_max_dict = {} for filename in filelist: f = np.load(filename).flatten() if (len(f) == 1): m1 = 1.0/(1.0 + np.exp(-1*f[0])) weight_min_max_dict[filename[:-4]] = [m1] else: m1 = np.min(f) m2 = np.max(f) weight_min_max_dict[filename[:-4]] = [m1, m2] os.chdir(cur_dir) os.chdir(dataset_dir) train = np.load("train.npy") Xtrain = train[:, numOutputs:] test = np.load("test.npy") Xtest = test[:, numOutputs:] if normalise_data: mean = np.mean(Xtrain, 0) std = np.std(Xtrain, 0) std[std[:] < 0.000001] = 1 Xtrain = (Xtrain - mean) / std Xtest = (Xtest - mean) / std m1 = np.min(Xtrain) m2 = np.max(Xtrain) m1 = min(m1, np.min(Xtest)) m2 = min(m2, np.max(Xtest)) weight_min_max_dict['X'] = [m1, m2] if normalise_data: train[:, numOutputs:] = Xtrain test[:, numOutputs:] = Xtest np.save("train.npy", train) np.save("test.npy", test) os.chdir(cur_dir) return weight_min_max_dict def getVar(line, weights_dict): replace = False new_line = None if line.count('=') == 1: left, right = line.split('=') left = left.lstrip().rstrip() var = left.split(' ')[-1].split('\t')[-1] right = right.lstrip().rstrip() if var in weights_dict.keys(): replace = True weights = weights_dict[var] if len(weights) == 1: new_line = "let " + var + " = " + "%.20f"%(weights[0]) + " in" else: shape = line[line.find('('):line.find(')')+1] new_line = "let " + var + " = " + shape + " in [" +\ "%.20f"%(weights[0]) + ", " + "%.20f"%(weights[1]) + "] in" return replace, new_line def writeToInputDotSD(file, dir): os.chdir(dir) f = open("input.sd", "w") for i in range(len(file)): f.write(file[i] + "\n") f.close() def run(args): input_file = open(args.seedot_file).read().split("\n") model_weights_dict = readModelWeights(args.model_dir, args.dataset_dir, args.numOutputs, args.normalise_data) for i in range(len(input_file)): line = input_file[i] replace, new_line = getVar(line, model_weights_dict) if replace: input_file[i] = new_line # print(line + " | " + new_line) writeToInputDotSD(input_file, args.model_dir) if __name__ == '__main__': args = parse() run(args)