This commit is contained in:
krisbock 2022-11-29 11:49:53 +10:00
Родитель baf1735df3
Коммит 0f971ba26a
10 изменённых файлов: 186 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,23 @@
name: Deploy in prod
on:
workflow_dispatch:
jobs:
deploy_endpoint:
runs-on: ubuntu-latest
environment:
name: prod
steps:
- name: Check out repo
uses: actions/checkout@main
- name: Install az ml extension
run: az extension add -n ml -y
- name: Azure login
uses: azure/login@v1
with:
creds: ${{secrets.AZURE_PROD_CREDENTIALS}}
- name: deploy prod
run: |
az ml online-endpoint create --name diabetes-endpoint -f level-4/src/create-endpoint.yaml --resource-group mlops-prod --workspace-name mlops-prod --wait
az ml online-deployment create --name mlflow-deployment --endpoint diabetes-endpoint -f level-4/src/mlflow-deployment.yaml --all-traffic --resource-group mlops-prod --workspace-name mlops-prod

Просмотреть файл

@ -0,0 +1,3 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
name: diabetes-endpoint
auth_mode: key

Просмотреть файл

@ -0,0 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: mlflow-deployment
endpoint_name: diabetes-endpoint
model: azureml:diabetes-mlflow-model@latest
instance_type: Standard_DS2_v2
instance_count: 1

Просмотреть файл

@ -0,0 +1,78 @@
# Import libraries
import argparse
import glob
import os
import mlflow
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
# define functions
def main(args):
# TO DO: enable autologging
mlflow.autolog()
# read data
df = get_csvs_df(args.training_data)
# split data
X_train, X_test, y_train, y_test = split_data(df)
# train model
train_model(args.reg_rate, X_train, X_test, y_train, y_test)
def get_csvs_df(path):
if not os.path.exists(path):
raise RuntimeError(f"Cannot use non-existent path provided: {path}")
csv_files = glob.glob(f"{path}/*.csv")
if not csv_files:
raise RuntimeError(f"No CSV files found in provided data path: {path}")
return pd.concat((pd.read_csv(f) for f in csv_files), sort=False)
# TO DO: add function to split data
def split_data(df):
X = df.drop("Diabetic", axis=1)
y = df["Diabetic"]
return train_test_split(X, y, test_size=0.2, random_state=42)
def train_model(reg_rate, X_train, X_test, y_train, y_test):
# train model
LogisticRegression(C=1/reg_rate, solver="liblinear").fit(X_train, y_train)
def parse_args():
# setup arg parser
parser = argparse.ArgumentParser()
# add arguments
parser.add_argument("--training_data", dest='training_data',
type=str)
parser.add_argument("--reg_rate", dest='reg_rate',
type=float, default=0.01)
# parse args
args = parser.parse_args()
# return args
return args
# run script
if __name__ == "__main__":
# add space in logs
print("\n\n")
print("*" * 60)
# parse args
args = parse_args()
# run main function
main(args)
# add space in logs
print("*" * 60)
print("\n\n")

Просмотреть файл

@ -0,0 +1,27 @@
[flake8]
ignore =
W504,
C901,
E41,
E722,
W,
D,
F,
N,
C,
I
max-line-length = 79
exclude =
.tox,
.git,
__pycache__,
*.pyc,
*.egg-info,
.cache,
.eggs,
develop
per-file-ignores =
src/__init__.py:D104
src/*/__init__.py:D104
max-complexity = 10
import-order-style = pep8

Просмотреть файл

Просмотреть файл

@ -0,0 +1,11 @@
index,first,last
0,Glenn,Hernandez
1,Sarah,Pedersen
2,Jill,Tracy
3,Melissa,Nelson
4,Hugh,Soto
5,Frank,Dees
6,Vita,Singleton
7,James,Papenfuss
8,Mary,Smithson
9,Bonnie,Begor
1 index first last
2 0 Glenn Hernandez
3 1 Sarah Pedersen
4 2 Jill Tracy
5 3 Melissa Nelson
6 4 Hugh Soto
7 5 Frank Dees
8 6 Vita Singleton
9 7 James Papenfuss
10 8 Mary Smithson
11 9 Bonnie Begor

Просмотреть файл

@ -0,0 +1,5 @@
import os
current_directory = os.path.dirname(os.path.abspath(__file__))
print(current_directory)

Просмотреть файл

@ -0,0 +1,11 @@
index,first,last
0,Tina,Holloway
1,Katherine,Logan
2,Juan,Duncan
3,Doyle,Clyne
4,Jacob,Kazin
5,Kimberly,Tomes
6,Lisa,Cochrane
7,Troy,Hall
8,Erin,Johnson
9,Joan,Laborde
1 index first last
2 0 Tina Holloway
3 1 Katherine Logan
4 2 Juan Duncan
5 3 Doyle Clyne
6 4 Jacob Kazin
7 5 Kimberly Tomes
8 6 Lisa Cochrane
9 7 Troy Hall
10 8 Erin Johnson
11 9 Joan Laborde

Просмотреть файл

@ -0,0 +1,22 @@
from model.train import get_csvs_df
import os
import pytest
def test_csvs_no_files():
with pytest.raises(RuntimeError) as error:
get_csvs_df("./")
assert error.match("No CSV files found in provided data")
def test_csvs_no_files_invalid_path():
with pytest.raises(RuntimeError) as error:
get_csvs_df("/invalid/path/does/not/exist/")
assert error.match("Cannot use non-existent path provided")
def test_csvs_creates_dataframe():
current_directory = os.path.dirname(os.path.abspath(__file__))
datasets_directory = os.path.join(current_directory, 'datasets')
result = get_csvs_df(datasets_directory)
assert len(result) == 20