Reverting back an accidental commit

Revert "Fixing merge conflicts"

This reverts commit f580ae9fc0, reversing
changes made to 44f2e732f3.
This commit is contained in:
Richin Jain 2019-03-28 11:27:01 -04:00
Родитель f580ae9fc0
Коммит 52a7ce139f
3 изменённых файлов: 59 добавлений и 113 удалений

Просмотреть файл

@ -23,8 +23,10 @@ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
import pickle
import json
import numpy
from sklearn.ensemble import RandomForestClassifier
from azureml.core.model import Model

Просмотреть файл

@ -23,6 +23,8 @@ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
import pickle
from azureml.core import Workspace
from azureml.core.run import Run
import os
import argparse
@ -32,41 +34,69 @@ from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
import numpy as np
import json
import subprocess
from typing import Tuple, List
parser = argparse.ArgumentParser("train")
parser.add_argument(
"--config_suffix", type=str, help="Datetime suffix for json config files"
)
parser.add_argument(
"--json_config",
type=str,
help="Directory to write all the intermediate json configs",
)
args = parser.parse_args()
print("Argument 1: %s" % args.config_suffix)
print("Argument 2: %s" % args.json_config)
if not (args.json_config is None):
os.makedirs(args.json_config, exist_ok=True)
print("%s created" % args.json_config)
run = Run.get_context()
exp = run.experiment
ws = run.experiment.workspace
# using diabetes dataset from scikit-learn
X, y = load_diabetes(return_X_y=True)
columns = ["age", "gender", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}}
print("Running train.py")
def experiment_code(data_split):
run = Run.get_submitted_run()
# Randomly pic alpha
alphas = np.arange(0.0, 1.0, 0.05)
alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0]
print(alpha)
# Log alpha metric
run.log("alpha", alpha)
# train the model with selected value of alpha and log mse
reg = Ridge(alpha=alpha)
reg.fit(data["train"]["X"], data_split["train"]["y"])
preds = reg.predict(data["test"]["X"])
run.log("mse", mean_squared_error(preds, data_split["test"]["y"]))
# Randomly pic alpha
alphas = np.arange(0.0, 1.0, 0.05)
alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0]
print(alpha)
run.log("alpha", alpha)
reg = Ridge(alpha=alpha)
reg.fit(data["train"]["X"], data["train"]["y"])
preds = reg.predict(data["test"]["X"])
run.log("mse", mean_squared_error(preds, data["test"]["y"]))
# Write model name to the config file
model_name = "sklearn_regression_model.pkl"
with open(model_name, "wb"):
joblib.dump(value=reg, filename=model_name)
# upload the model file explicitly into artifacts
run.upload_file(name="./outputs/" + model_name, path_or_stream=model_name)
print("Uploaded the model {} to experiment {}".format(model_name, run.experiment.name))
dirpath = os.getcwd()
print(dirpath)
# Save model as part of the run history
model_name = "sklearn_regression_model.pkl"
# model_name = "."
print("Following files are uploaded ")
print(run.get_file_names())
run.complete()
with open(model_name, "wb") as file:
joblib.dump(value=reg, filename=model_name)
# upload the model file explicitly into artifacts
run.upload_file(name="./outputs/" + model_name, path_or_stream=model_name)
print("Uploaded the model {} to experiment {}".format(model_name, run.experiment.name))
dirpath = os.getcwd()
print(dirpath)
print("Following files are uploaded ")
print(run.get_file_names())
# register the model
# run.log_model(file_name = model_name)
# print('Registered the model {} to run history {}'.format(model_name, run.history.name))
run_id = {}
run_id["run_id"] = run.id
@ -76,6 +106,4 @@ output_path = os.path.join(args.json_config, filename)
with open(output_path, "w") as outfile:
json.dump(run_id, outfile)
if __name__ == "__main__":
print("Running train.py")
experiment_code(data)
run.complete()

Просмотреть файл

@ -1,84 +0,0 @@
"""
Copyright (C) Microsoft Corporation. All rights reserved.
Microsoft Corporation (Microsoft) grants you a nonexclusive, perpetual,
royalty-free right to use, copy, and modify the software code provided by us
("Software Code"). You may not sublicense the Software Code or any use of it
(except to your affiliates and to vendors to perform work on your behalf)
through distribution, network access, service agreement, lease, rental, or
otherwise. This license does not purport to express any claim of ownership over
data you may have shared with Microsoft in the creation of the Software Code.
Unless applicable law gives you more rights, Microsoft reserves all other
rights not expressly granted herein, whether by implication, estoppel or
otherwise.
THE SOFTWARE CODE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
# test the training script
import os
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
# using diabetes dataset from scikit-learn
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}}
def test_check_schema():
datafile = get_absPath("diabetes.csv")
# check that file exists
assert(os.path.exists(datafile))
dataset = pd.read_csv(datafile)
header = dataset[dataset.columns[:-1]]
actual_columns = header.shape[1]
# check header has expected number of columns
assert(actual_columns == expected_columns)
def test_check_bad_schema():
datafile = get_absPath("diabetes_bad_schema.csv")
# check that file exists
assert(os.path.exists(datafile))
dataset = pd.read_csv(datafile)
header = dataset[dataset.columns[:-1]]
actual_columns = header.shape[1]
# check header has expected number of columns
assert(actual_columns != expected_columns)
def test_check_missing_values():
datafile = get_absPath("diabetes_missing_values.csv")
# check that file exists
assert(os.path.exists(datafile))
dataset = pd.read_csv(datafile)
n_nan = np.sum(np.isnan(dataset.values))
assert(n_nan > 0)
def test_check_distribution():
datafile = get_absPath("diabetes_bad_dist.csv")
# check that file exists
assert(os.path.exists(datafile))
dataset = pd.read_csv(datafile)
mean = np.mean(dataset.values, axis=0)
std = np.mean(dataset.values, axis=0)
assert(np.sum(abs(mean - historical_mean) > shift_tolerance *
abs(historical_mean)) or
np.sum(abs(std - historical_std) > shift_tolerance *
abs(historical_std)) > 0)