deploy
This commit is contained in:
Родитель
baf1735df3
Коммит
0f971ba26a
|
@ -0,0 +1,23 @@
|
|||
name: Deploy in prod
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
deploy_endpoint:
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: prod
|
||||
steps:
|
||||
- name: Check out repo
|
||||
uses: actions/checkout@main
|
||||
- name: Install az ml extension
|
||||
run: az extension add -n ml -y
|
||||
- name: Azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZURE_PROD_CREDENTIALS}}
|
||||
- name: deploy prod
|
||||
run: |
|
||||
az ml online-endpoint create --name diabetes-endpoint -f level-4/src/create-endpoint.yaml --resource-group mlops-prod --workspace-name mlops-prod --wait
|
||||
az ml online-deployment create --name mlflow-deployment --endpoint diabetes-endpoint -f level-4/src/mlflow-deployment.yaml --all-traffic --resource-group mlops-prod --workspace-name mlops-prod
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
|
||||
name: diabetes-endpoint
|
||||
auth_mode: key
|
|
@ -0,0 +1,6 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
|
||||
name: mlflow-deployment
|
||||
endpoint_name: diabetes-endpoint
|
||||
model: azureml:diabetes-mlflow-model@latest
|
||||
instance_type: Standard_DS2_v2
|
||||
instance_count: 1
|
|
@ -0,0 +1,78 @@
|
|||
# Import libraries
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import mlflow
|
||||
import pandas as pd
|
||||
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
# define functions
|
||||
def main(args):
|
||||
# TO DO: enable autologging
|
||||
mlflow.autolog()
|
||||
|
||||
# read data
|
||||
df = get_csvs_df(args.training_data)
|
||||
|
||||
# split data
|
||||
X_train, X_test, y_train, y_test = split_data(df)
|
||||
|
||||
# train model
|
||||
train_model(args.reg_rate, X_train, X_test, y_train, y_test)
|
||||
|
||||
|
||||
def get_csvs_df(path):
|
||||
if not os.path.exists(path):
|
||||
raise RuntimeError(f"Cannot use non-existent path provided: {path}")
|
||||
csv_files = glob.glob(f"{path}/*.csv")
|
||||
if not csv_files:
|
||||
raise RuntimeError(f"No CSV files found in provided data path: {path}")
|
||||
return pd.concat((pd.read_csv(f) for f in csv_files), sort=False)
|
||||
|
||||
|
||||
# TO DO: add function to split data
|
||||
def split_data(df):
|
||||
X = df.drop("Diabetic", axis=1)
|
||||
y = df["Diabetic"]
|
||||
return train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
def train_model(reg_rate, X_train, X_test, y_train, y_test):
|
||||
|
||||
# train model
|
||||
LogisticRegression(C=1/reg_rate, solver="liblinear").fit(X_train, y_train)
|
||||
|
||||
|
||||
def parse_args():
|
||||
# setup arg parser
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
# add arguments
|
||||
parser.add_argument("--training_data", dest='training_data',
|
||||
type=str)
|
||||
parser.add_argument("--reg_rate", dest='reg_rate',
|
||||
type=float, default=0.01)
|
||||
|
||||
# parse args
|
||||
args = parser.parse_args()
|
||||
|
||||
# return args
|
||||
return args
|
||||
|
||||
# run script
|
||||
if __name__ == "__main__":
|
||||
# add space in logs
|
||||
print("\n\n")
|
||||
print("*" * 60)
|
||||
|
||||
# parse args
|
||||
args = parse_args()
|
||||
|
||||
# run main function
|
||||
main(args)
|
||||
|
||||
# add space in logs
|
||||
print("*" * 60)
|
||||
print("\n\n")
|
|
@ -0,0 +1,27 @@
|
|||
[flake8]
|
||||
ignore =
|
||||
W504,
|
||||
C901,
|
||||
E41,
|
||||
E722,
|
||||
W,
|
||||
D,
|
||||
F,
|
||||
N,
|
||||
C,
|
||||
I
|
||||
max-line-length = 79
|
||||
exclude =
|
||||
.tox,
|
||||
.git,
|
||||
__pycache__,
|
||||
*.pyc,
|
||||
*.egg-info,
|
||||
.cache,
|
||||
.eggs,
|
||||
develop
|
||||
per-file-ignores =
|
||||
src/__init__.py:D104
|
||||
src/*/__init__.py:D104
|
||||
max-complexity = 10
|
||||
import-order-style = pep8
|
|
@ -0,0 +1,11 @@
|
|||
index,first,last
|
||||
0,Glenn,Hernandez
|
||||
1,Sarah,Pedersen
|
||||
2,Jill,Tracy
|
||||
3,Melissa,Nelson
|
||||
4,Hugh,Soto
|
||||
5,Frank,Dees
|
||||
6,Vita,Singleton
|
||||
7,James,Papenfuss
|
||||
8,Mary,Smithson
|
||||
9,Bonnie,Begor
|
|
|
@ -0,0 +1,5 @@
|
|||
import os
|
||||
|
||||
current_directory = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
print(current_directory)
|
|
@ -0,0 +1,11 @@
|
|||
index,first,last
|
||||
0,Tina,Holloway
|
||||
1,Katherine,Logan
|
||||
2,Juan,Duncan
|
||||
3,Doyle,Clyne
|
||||
4,Jacob,Kazin
|
||||
5,Kimberly,Tomes
|
||||
6,Lisa,Cochrane
|
||||
7,Troy,Hall
|
||||
8,Erin,Johnson
|
||||
9,Joan,Laborde
|
|
|
@ -0,0 +1,22 @@
|
|||
from model.train import get_csvs_df
|
||||
import os
|
||||
import pytest
|
||||
|
||||
|
||||
def test_csvs_no_files():
|
||||
with pytest.raises(RuntimeError) as error:
|
||||
get_csvs_df("./")
|
||||
assert error.match("No CSV files found in provided data")
|
||||
|
||||
|
||||
def test_csvs_no_files_invalid_path():
|
||||
with pytest.raises(RuntimeError) as error:
|
||||
get_csvs_df("/invalid/path/does/not/exist/")
|
||||
assert error.match("Cannot use non-existent path provided")
|
||||
|
||||
|
||||
def test_csvs_creates_dataframe():
|
||||
current_directory = os.path.dirname(os.path.abspath(__file__))
|
||||
datasets_directory = os.path.join(current_directory, 'datasets')
|
||||
result = get_csvs_df(datasets_directory)
|
||||
assert len(result) == 20
|
Загрузка…
Ссылка в новой задаче