Reverting back an accidental commit
Revert "Fixing merge conflicts" This reverts commitf580ae9fc0
, reversing changes made to44f2e732f3
.
This commit is contained in:
Родитель
f580ae9fc0
Коммит
52a7ce139f
|
@ -23,8 +23,10 @@ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|||
ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
import pickle
|
||||
import json
|
||||
import numpy
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from azureml.core.model import Model
|
||||
|
||||
|
||||
|
|
|
@ -23,6 +23,8 @@ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|||
ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
import pickle
|
||||
from azureml.core import Workspace
|
||||
from azureml.core.run import Run
|
||||
import os
|
||||
import argparse
|
||||
|
@ -32,41 +34,69 @@ from sklearn.metrics import mean_squared_error
|
|||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.externals import joblib
|
||||
import numpy as np
|
||||
import json
|
||||
import subprocess
|
||||
from typing import Tuple, List
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser("train")
|
||||
parser.add_argument(
|
||||
"--config_suffix", type=str, help="Datetime suffix for json config files"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json_config",
|
||||
type=str,
|
||||
help="Directory to write all the intermediate json configs",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Argument 1: %s" % args.config_suffix)
|
||||
print("Argument 2: %s" % args.json_config)
|
||||
|
||||
if not (args.json_config is None):
|
||||
os.makedirs(args.json_config, exist_ok=True)
|
||||
print("%s created" % args.json_config)
|
||||
|
||||
run = Run.get_context()
|
||||
exp = run.experiment
|
||||
ws = run.experiment.workspace
|
||||
|
||||
# using diabetes dataset from scikit-learn
|
||||
X, y = load_diabetes(return_X_y=True)
|
||||
columns = ["age", "gender", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"]
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
|
||||
data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}}
|
||||
|
||||
print("Running train.py")
|
||||
|
||||
def experiment_code(data_split):
|
||||
run = Run.get_submitted_run()
|
||||
# Randomly pic alpha
|
||||
alphas = np.arange(0.0, 1.0, 0.05)
|
||||
alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0]
|
||||
print(alpha)
|
||||
# Log alpha metric
|
||||
run.log("alpha", alpha)
|
||||
# train the model with selected value of alpha and log mse
|
||||
reg = Ridge(alpha=alpha)
|
||||
reg.fit(data["train"]["X"], data_split["train"]["y"])
|
||||
preds = reg.predict(data["test"]["X"])
|
||||
run.log("mse", mean_squared_error(preds, data_split["test"]["y"]))
|
||||
# Randomly pic alpha
|
||||
alphas = np.arange(0.0, 1.0, 0.05)
|
||||
alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0]
|
||||
print(alpha)
|
||||
run.log("alpha", alpha)
|
||||
reg = Ridge(alpha=alpha)
|
||||
reg.fit(data["train"]["X"], data["train"]["y"])
|
||||
preds = reg.predict(data["test"]["X"])
|
||||
run.log("mse", mean_squared_error(preds, data["test"]["y"]))
|
||||
|
||||
# Write model name to the config file
|
||||
model_name = "sklearn_regression_model.pkl"
|
||||
with open(model_name, "wb"):
|
||||
joblib.dump(value=reg, filename=model_name)
|
||||
|
||||
# upload the model file explicitly into artifacts
|
||||
run.upload_file(name="./outputs/" + model_name, path_or_stream=model_name)
|
||||
print("Uploaded the model {} to experiment {}".format(model_name, run.experiment.name))
|
||||
dirpath = os.getcwd()
|
||||
print(dirpath)
|
||||
# Save model as part of the run history
|
||||
model_name = "sklearn_regression_model.pkl"
|
||||
# model_name = "."
|
||||
|
||||
print("Following files are uploaded ")
|
||||
print(run.get_file_names())
|
||||
run.complete()
|
||||
with open(model_name, "wb") as file:
|
||||
joblib.dump(value=reg, filename=model_name)
|
||||
|
||||
# upload the model file explicitly into artifacts
|
||||
run.upload_file(name="./outputs/" + model_name, path_or_stream=model_name)
|
||||
print("Uploaded the model {} to experiment {}".format(model_name, run.experiment.name))
|
||||
dirpath = os.getcwd()
|
||||
print(dirpath)
|
||||
print("Following files are uploaded ")
|
||||
print(run.get_file_names())
|
||||
|
||||
# register the model
|
||||
# run.log_model(file_name = model_name)
|
||||
# print('Registered the model {} to run history {}'.format(model_name, run.history.name))
|
||||
|
||||
run_id = {}
|
||||
run_id["run_id"] = run.id
|
||||
|
@ -76,6 +106,4 @@ output_path = os.path.join(args.json_config, filename)
|
|||
with open(output_path, "w") as outfile:
|
||||
json.dump(run_id, outfile)
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Running train.py")
|
||||
experiment_code(data)
|
||||
run.complete()
|
||||
|
|
|
@ -1,84 +0,0 @@
|
|||
"""
|
||||
Copyright (C) Microsoft Corporation. All rights reserved.
|
||||
|
||||
Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual,
|
||||
royalty-free right to use, copy, and modify the software code provided by us
|
||||
("Software Code"). You may not sublicense the Software Code or any use of it
|
||||
(except to your affiliates and to vendors to perform work on your behalf)
|
||||
through distribution, network access, service agreement, lease, rental, or
|
||||
otherwise. This license does not purport to express any claim of ownership over
|
||||
data you may have shared with Microsoft in the creation of the Software Code.
|
||||
Unless applicable law gives you more rights, Microsoft reserves all other
|
||||
rights not expressly granted herein, whether by implication, estoppel or
|
||||
otherwise.
|
||||
|
||||
THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
|
||||
# test the training script
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from sklearn.datasets import load_diabetes
|
||||
|
||||
|
||||
# using diabetes dataset from scikit-learn
|
||||
X, y = load_diabetes(return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
|
||||
data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}}
|
||||
|
||||
|
||||
|
||||
def test_check_schema():
|
||||
datafile = get_absPath("diabetes.csv")
|
||||
# check that file exists
|
||||
assert(os.path.exists(datafile))
|
||||
dataset = pd.read_csv(datafile)
|
||||
header = dataset[dataset.columns[:-1]]
|
||||
actual_columns = header.shape[1]
|
||||
# check header has expected number of columns
|
||||
assert(actual_columns == expected_columns)
|
||||
|
||||
|
||||
def test_check_bad_schema():
|
||||
datafile = get_absPath("diabetes_bad_schema.csv")
|
||||
# check that file exists
|
||||
assert(os.path.exists(datafile))
|
||||
dataset = pd.read_csv(datafile)
|
||||
header = dataset[dataset.columns[:-1]]
|
||||
actual_columns = header.shape[1]
|
||||
# check header has expected number of columns
|
||||
assert(actual_columns != expected_columns)
|
||||
|
||||
|
||||
def test_check_missing_values():
|
||||
datafile = get_absPath("diabetes_missing_values.csv")
|
||||
# check that file exists
|
||||
assert(os.path.exists(datafile))
|
||||
dataset = pd.read_csv(datafile)
|
||||
n_nan = np.sum(np.isnan(dataset.values))
|
||||
assert(n_nan > 0)
|
||||
|
||||
|
||||
def test_check_distribution():
|
||||
datafile = get_absPath("diabetes_bad_dist.csv")
|
||||
# check that file exists
|
||||
assert(os.path.exists(datafile))
|
||||
dataset = pd.read_csv(datafile)
|
||||
mean = np.mean(dataset.values, axis=0)
|
||||
std = np.mean(dataset.values, axis=0)
|
||||
assert(np.sum(abs(mean - historical_mean) > shift_tolerance *
|
||||
abs(historical_mean)) or
|
||||
np.sum(abs(std - historical_std) > shift_tolerance *
|
||||
abs(historical_std)) > 0)
|
Загрузка…
Ссылка в новой задаче