deploy

2022-11-29 11:49:53 +10:00 · 2022-11-29 11:49:53 +10:00 · 0f971ba26a
--- a/mlops/mlops-maturity/level-4/_.github/inprogress/01-deploy-prod-trigger-job.yml
+++ b/mlops/mlops-maturity/level-4/_.github/inprogress/01-deploy-prod-trigger-job.yml
@ -0,0 +1,23 @@
+name: Deploy in prod 
+on:
+  workflow_dispatch:
+
+jobs:
+  deploy_endpoint:
+    runs-on: ubuntu-latest
+    environment:
+        name: prod    
+    steps:
+    - name: Check out repo
+      uses: actions/checkout@main
+    - name: Install az ml extension
+      run: az extension add -n ml -y
+    - name: Azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZURE_PROD_CREDENTIALS}}
+    - name: deploy prod
+      run: |
+        az ml online-endpoint create --name diabetes-endpoint -f level-4/src/create-endpoint.yaml --resource-group mlops-prod --workspace-name mlops-prod --wait
+        az ml online-deployment create --name mlflow-deployment --endpoint diabetes-endpoint -f level-4/src/mlflow-deployment.yaml --all-traffic --resource-group mlops-prod --workspace-name mlops-prod
+
--- a/mlops/mlops-maturity/level-4/src/create-endpoint.yaml
+++ b/mlops/mlops-maturity/level-4/src/create-endpoint.yaml
@ -0,0 +1,3 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
+name: diabetes-endpoint
+auth_mode: key
--- a/mlops/mlops-maturity/level-4/src/mlflow-deployment.yml
+++ b/mlops/mlops-maturity/level-4/src/mlflow-deployment.yml
@ -0,0 +1,6 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: mlflow-deployment
+endpoint_name: diabetes-endpoint
+model: azureml:diabetes-mlflow-model@latest
+instance_type: Standard_DS2_v2
+instance_count: 1
--- a/mlops/mlops-maturity/level-4/src/model/train.py
+++ b/mlops/mlops-maturity/level-4/src/model/train.py
@ -0,0 +1,78 @@
+# Import libraries
+
+import argparse
+import glob
+import os
+import mlflow
+import pandas as pd
+
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+
+# define functions
+def main(args):
+    # TO DO: enable autologging
+    mlflow.autolog()
+
+    # read data
+    df = get_csvs_df(args.training_data)
+
+    # split data
+    X_train, X_test, y_train, y_test = split_data(df)
+
+    # train model
+    train_model(args.reg_rate, X_train, X_test, y_train, y_test)
+
+
+def get_csvs_df(path):
+    if not os.path.exists(path):
+        raise RuntimeError(f"Cannot use non-existent path provided: {path}")
+    csv_files = glob.glob(f"{path}/*.csv")
+    if not csv_files:
+        raise RuntimeError(f"No CSV files found in provided data path: {path}")
+    return pd.concat((pd.read_csv(f) for f in csv_files), sort=False)
+
+
+# TO DO: add function to split data
+def split_data(df):
+    X = df.drop("Diabetic", axis=1)
+    y = df["Diabetic"]
+    return train_test_split(X, y, test_size=0.2, random_state=42)
+
+def train_model(reg_rate, X_train, X_test, y_train, y_test):
+
+    # train model
+    LogisticRegression(C=1/reg_rate, solver="liblinear").fit(X_train, y_train)
+
+
+def parse_args():
+    # setup arg parser
+    parser = argparse.ArgumentParser()
+
+    # add arguments
+    parser.add_argument("--training_data", dest='training_data',
+                        type=str)
+    parser.add_argument("--reg_rate", dest='reg_rate',
+                        type=float, default=0.01)
+
+    # parse args
+    args = parser.parse_args()
+
+    # return args
+    return args
+
+# run script
+if __name__ == "__main__":
+    # add space in logs
+    print("\n\n")
+    print("*" * 60)
+
+    # parse args
+    args = parse_args()
+
+    # run main function
+    main(args)
+
+    # add space in logs
+    print("*" * 60)
+    print("\n\n")
--- a/mlops/mlops-maturity/level-4/src/tests/.flake8
+++ b/mlops/mlops-maturity/level-4/src/tests/.flake8
@ -0,0 +1,27 @@
+[flake8]
+ignore = 
+    W504,
+    C901,
+    E41,
+    E722,
+    W,
+    D,
+    F,
+    N,
+    C,
+    I
+max-line-length = 79
+exclude = 
+    .tox,
+    .git,
+    __pycache__,
+    *.pyc,
+    *.egg-info,
+    .cache,
+    .eggs,
+    develop
+per-file-ignores =
+    src/__init__.py:D104
+    src/*/__init__.py:D104
+max-complexity = 10
+import-order-style = pep8
--- a/mlops/mlops-maturity/level-4/src/tests/init.py
+++ b/mlops/mlops-maturity/level-4/src/tests/init.py
--- a/mlops/mlops-maturity/level-4/src/tests/datasets/first.csv
+++ b/mlops/mlops-maturity/level-4/src/tests/datasets/first.csv
@ -0,0 +1,11 @@
+index,first,last
+0,Glenn,Hernandez
+1,Sarah,Pedersen
+2,Jill,Tracy
+3,Melissa,Nelson
+4,Hugh,Soto
+5,Frank,Dees
+6,Vita,Singleton
+7,James,Papenfuss
+8,Mary,Smithson
+9,Bonnie,Begor
--- a/mlops/mlops-maturity/level-4/src/tests/datasets/foo.py
+++ b/mlops/mlops-maturity/level-4/src/tests/datasets/foo.py
@ -0,0 +1,5 @@
+import os
+
+current_directory = os.path.dirname(os.path.abspath(__file__))
+
+print(current_directory)
--- a/mlops/mlops-maturity/level-4/src/tests/datasets/second.csv
+++ b/mlops/mlops-maturity/level-4/src/tests/datasets/second.csv
@ -0,0 +1,11 @@
+index,first,last
+0,Tina,Holloway
+1,Katherine,Logan
+2,Juan,Duncan
+3,Doyle,Clyne
+4,Jacob,Kazin
+5,Kimberly,Tomes
+6,Lisa,Cochrane
+7,Troy,Hall
+8,Erin,Johnson
+9,Joan,Laborde
--- a/mlops/mlops-maturity/level-4/src/tests/test_train.py
+++ b/mlops/mlops-maturity/level-4/src/tests/test_train.py
@ -0,0 +1,22 @@
+from model.train import get_csvs_df
+import os
+import pytest
+
+
+def test_csvs_no_files():
+    with pytest.raises(RuntimeError) as error:
+        get_csvs_df("./")
+    assert error.match("No CSV files found in provided data")
+
+
+def test_csvs_no_files_invalid_path():
+    with pytest.raises(RuntimeError) as error:
+        get_csvs_df("/invalid/path/does/not/exist/")
+    assert error.match("Cannot use non-existent path provided")
+
+
+def test_csvs_creates_dataframe():
+    current_directory = os.path.dirname(os.path.abspath(__file__))
+    datasets_directory = os.path.join(current_directory, 'datasets')
+    result = get_csvs_df(datasets_directory)
+    assert len(result) == 20