From 775d2066088493481e3419bb8261f51cab0038aa Mon Sep 17 00:00:00 2001 From: Maggie Mhanna Date: Tue, 20 Dec 2022 17:28:55 +0000 Subject: [PATCH] remove yml components, defining components using command --- data-science/components/evaluate.yml | 24 ----- data-science/components/prep.yml | 30 ------- data-science/components/register.yml | 24 ----- data-science/components/train.yml | 18 ---- mlops/azureml/azureml-sdkv2.ipynb | 126 +++++++++++++++++++++------ 5 files changed, 98 insertions(+), 124 deletions(-) delete mode 100644 data-science/components/evaluate.yml delete mode 100644 data-science/components/prep.yml delete mode 100644 data-science/components/register.yml delete mode 100644 data-science/components/train.yml diff --git a/data-science/components/evaluate.yml b/data-science/components/evaluate.yml deleted file mode 100644 index f02ba4c..0000000 --- a/data-science/components/evaluate.yml +++ /dev/null @@ -1,24 +0,0 @@ -# -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: evaluate_model -display_name: evaluate-model -type: command -inputs: - model_name: - type: string - model_input: - type: uri_folder - test_data: - type: uri_folder -outputs: - evaluation_output: - type: uri_folder -code: ../src/evaluate -environment: azureml:taxi-train-env@latest -command: >- - python evaluate.py - --model_name ${{inputs.model_name}} - --model_input ${{inputs.model_input}} - --test_data ${{inputs.test_data}} - --evaluation_output ${{outputs.evaluation_output}} -# \ No newline at end of file diff --git a/data-science/components/prep.yml b/data-science/components/prep.yml deleted file mode 100644 index e4bdc29..0000000 --- a/data-science/components/prep.yml +++ /dev/null @@ -1,30 +0,0 @@ -# -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: prep_data -display_name: prep-data -type: command -inputs: - raw_data: - type: uri_file - enable_monitoring: - type: string - table_name: - type: string -outputs: - train_data: - type: uri_folder - val_data: - type: uri_folder - test_data: - type: uri_folder -code: ../src/prep -environment: azureml:taxi-train-env@latest -command: >- - python prep.py - --raw_data ${{inputs.raw_data}} - --train_data ${{outputs.train_data}} - --val_data ${{outputs.val_data}} - --test_data ${{outputs.test_data}} - --enable_monitoring ${{inputs.enable_monitoring}} - --table_name ${{inputs.table_name}} -# \ No newline at end of file diff --git a/data-science/components/register.yml b/data-science/components/register.yml deleted file mode 100644 index a1aead2..0000000 --- a/data-science/components/register.yml +++ /dev/null @@ -1,24 +0,0 @@ -# -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: register_model -display_name: register-model -type: command -inputs: - model_name: - type: string - model_path: - type: uri_folder - evaluation_output: - type: uri_folder -outputs: - model_info_output_path: - type: uri_folder -code: ../src/register -environment: azureml:taxi-train-env@latest -command: >- - python register.py - --model_name ${{inputs.model_name}} - --model_path ${{inputs.model_path}} - --evaluation_output ${{inputs.evaluation_output}} - --model_info_output_path ${{outputs.model_info_output_path}} -# \ No newline at end of file diff --git a/data-science/components/train.yml b/data-science/components/train.yml deleted file mode 100644 index 8bbc273..0000000 --- a/data-science/components/train.yml +++ /dev/null @@ -1,18 +0,0 @@ -# -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: train_model -display_name: train-model -type: command -inputs: - train_data: - type: uri_folder -outputs: - model_output: - type: uri_folder -code: ../src/train -environment: azureml:taxi-train-env@latest -command: >- - python train.py - --train_data ${{inputs.train_data}} - --model_output ${{outputs.model_output}} -# \ No newline at end of file diff --git a/mlops/azureml/azureml-sdkv2.ipynb b/mlops/azureml/azureml-sdkv2.ipynb index e723c01..c3a9797 100644 --- a/mlops/azureml/azureml-sdkv2.ipynb +++ b/mlops/azureml/azureml-sdkv2.ipynb @@ -88,7 +88,7 @@ "metadata": { "collapsed": false, "gather": { - "logged": 1670200031039 + "logged": 1671554468232 }, "jupyter": { "outputs_hidden": false, @@ -166,7 +166,7 @@ "from azure.ai.ml.entities import AmlCompute\n", "\n", "my_cluster = AmlCompute(\n", - " name=\"cpu-cluster-CA\",\n", + " name=\"cpu-cluster\",\n", " type=\"amlcompute\", \n", " size=\"STANDARD_DS3_V2\", \n", " min_instances=0, \n", @@ -181,7 +181,7 @@ "metadata": { "collapsed": false, "gather": { - "logged": 1670200031797 + "logged": 1671554474840 }, "jupyter": { "outputs_hidden": false, @@ -264,7 +264,7 @@ "\n", "my_data = Data(\n", " path=\"../../data/taxi-data.csv\",\n", - " type=AssetTypes.URI_FILE,\n", + " type=\"uri_file\",\n", " description=\"Taxi dataset\",\n", " name=\"taxi-data\"\n", ")\n", @@ -275,7 +275,7 @@ "metadata": { "collapsed": false, "gather": { - "logged": 1670200033270 + "logged": 1671554479153 }, "jupyter": { "outputs_hidden": false, @@ -333,7 +333,7 @@ "metadata": { "collapsed": false, "gather": { - "logged": 1670200035753 + "logged": 1671554483854 }, "jupyter": { "outputs_hidden": false, @@ -397,36 +397,106 @@ "cell_type": "code", "source": [ "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml import Input, Output, load_component\n", + "from azure.ai.ml import Input, Output, command\n", "from azure.ai.ml.constants import AssetTypes, InputOutputModes\n", "\n", - "# Create pipeline job\n", - "parent_dir = \"../../data-science/components\"\n", "\n", - "# 1. Load components\n", - "prepare_data = load_component(source=parent_dir + \"/prep.yml\")\n", - "train_model = load_component(source=parent_dir + \"/train.yml\")\n", - "evaluate_model = load_component(source=parent_dir + \"/evaluate.yml\")\n", - "register_model = load_component(source=parent_dir + \"/register.yml\")\n", + "# 1. Define components\n", + "\n", + "prep_data = command( \n", + " name=\"prep_data\",\n", + " display_name=\"prep-data\",\n", + " code=\"../../data-science/src/prep\",\n", + " command=\"python prep.py \\\n", + " --raw_data ${{inputs.raw_data}} \\\n", + " --train_data ${{outputs.train_data}} \\\n", + " --val_data ${{outputs.val_data}} \\\n", + " --test_data ${{outputs.test_data}} \\\n", + " --enable_monitoring ${{inputs.enable_monitoring}} \\\n", + " --table_name ${{inputs.table_name}}\",\n", + " environment=\"taxi-train-env@latest\",\n", + " inputs={\n", + " \"raw_data\": Input(type=\"uri_file\"),\n", + " \"enable_monitoring\": Input(type=\"string\"),\n", + " \"table_name\": Input(type=\"string\")\n", + " },\n", + " outputs={\n", + " \"train_data\": Output(type=\"uri_folder\"),\n", + " \"val_data\": Output(type=\"uri_folder\"),\n", + " \"test_data\": Output(type=\"uri_folder\"),\n", + " }\n", + ")\n", + "\n", + "train_model = command( \n", + " name=\"train_model\",\n", + " display_name=\"train-model\",\n", + " code=\"../../data-science/src/train\",\n", + " command=\"python train.py \\\n", + " --train_data ${{inputs.train_data}} \\\n", + " --model_output ${{outputs.model_output}}\",\n", + " environment=\"taxi-train-env@latest\",\n", + " inputs={\"train_data\": Input(type=\"uri_folder\")},\n", + " outputs={\"model_output\": Output(type=\"uri_folder\")}\n", + ")\n", + "\n", + "evaluate_model = command(\n", + " name=\"evaluate_model\",\n", + " display_name=\"evaluate-model\",\n", + " code=\"../../data-science/src/evaluate\",\n", + " command=\"python evaluate.py \\\n", + " --model_name ${{inputs.model_name}} \\\n", + " --model_input ${{inputs.model_input}} \\\n", + " --test_data ${{inputs.test_data}} \\\n", + " --evaluation_output ${{outputs.evaluation_output}}\",\n", + " environment=\"taxi-train-env@latest\",\n", + " inputs={\n", + " \"model_name\": Input(type=\"string\"),\n", + " \"model_input\": Input(type=\"uri_folder\"),\n", + " \"test_data\": Input(type=\"uri_folder\")\n", + " },\n", + " outputs={\n", + " \"evaluation_output\": Output(type=\"uri_folder\")\n", + " }\n", + ")\n", + "\n", + "register_model = command(\n", + " name=\"register_model\",\n", + " display_name=\"register-model\",\n", + " code=\"../../data-science/src/register\",\n", + " command=\"python register.py \\\n", + " --model_name ${{inputs.model_name}} \\\n", + " --model_path ${{inputs.model_path}} \\\n", + " --evaluation_output ${{inputs.evaluation_output}} \\\n", + " --model_info_output_path ${{outputs.model_info_output_path}}\",\n", + " environment=\"taxi-train-env@latest\",\n", + " inputs={\n", + " \"model_name\": Input(type=\"string\"),\n", + " \"model_path\": Input(type=\"uri_folder\"),\n", + " \"evaluation_output\": Input(type=\"uri_folder\")\n", + " },\n", + " outputs={\n", + " \"model_info_output_path\": Output(type=\"uri_folder\")\n", + " }\n", + ")\n", "\n", "# 2. Construct pipeline\n", "@pipeline()\n", "def taxi_training_pipeline(raw_data, enable_monitoring, table_name):\n", " \n", - " prepare = prepare_data(\n", + " prep = prep_data(\n", " raw_data=raw_data,\n", " enable_monitoring=enable_monitoring, \n", " table_name=table_name\n", " )\n", "\n", " train = train_model(\n", - " train_data=prepare.outputs.train_data\n", + " train_data=prep.outputs.train_data\n", " )\n", "\n", " evaluate = evaluate_model(\n", " model_name=\"taxi-model\",\n", " model_input=train.outputs.model_output,\n", - " test_data=prepare.outputs.test_data\n", + " test_data=prep.outputs.test_data\n", " )\n", "\n", "\n", @@ -437,15 +507,15 @@ " )\n", "\n", " return {\n", - " \"pipeline_job_train_data\": prepare.outputs.train_data,\n", - " \"pipeline_job_test_data\": prepare.outputs.test_data,\n", + " \"pipeline_job_train_data\": prep.outputs.train_data,\n", + " \"pipeline_job_test_data\": prep.outputs.test_data,\n", " \"pipeline_job_trained_model\": train.outputs.model_output,\n", " \"pipeline_job_score_report\": evaluate.outputs.evaluation_output,\n", " }\n", "\n", "\n", "pipeline_job = taxi_training_pipeline(\n", - " Input(type=AssetTypes.URI_FILE, path=\"taxi-data@latest\"), \"false\", \"taximonitoring\"\n", + " Input(type=\"uri_file\", path=\"taxi-data@latest\"), \"false\", \"taximonitoring\"\n", ")\n", "\n", "# set pipeline level compute\n", @@ -458,7 +528,7 @@ "metadata": { "collapsed": false, "gather": { - "logged": 1670200036044 + "logged": 1671554553702 }, "jupyter": { "outputs_hidden": false, @@ -475,7 +545,7 @@ "cell_type": "code", "source": [ "pipeline_job = ml_client.jobs.create_or_update(\n", - " pipeline_job, experiment_name=\"pipeline_samples\"\n", + " pipeline_job, experiment_name=\"taxi-training\"\n", ")\n", "pipeline_job" ], @@ -484,7 +554,7 @@ "metadata": { "collapsed": false, "gather": { - "logged": 1670200062228 + "logged": 1671554563966 }, "jupyter": { "outputs_hidden": false, @@ -564,7 +634,7 @@ "\n", "# create an online endpoint\n", "online_endpoint = ManagedOnlineEndpoint(\n", - " name=\"taxi-online-endpoint-3\", \n", + " name=\"taxi-online-ep\", \n", " description=\"Taxi online endpoint\",\n", " auth_mode=\"aml_token\",\n", ")\n", @@ -577,7 +647,7 @@ "metadata": { "collapsed": false, "gather": { - "logged": 1669584576485 + "logged": 1671555421278 }, "jupyter": { "outputs_hidden": false, @@ -624,7 +694,7 @@ "\n", "blue_deployment = ManagedOnlineDeployment(\n", " name=\"blue\",\n", - " endpoint_name=\"taxi-online-endpoint-3\",\n", + " endpoint_name=\"taxi-online-ep\",\n", " model=model,\n", " instance_type=\"Standard_DS2_v2\",\n", " instance_count=1,\n", @@ -639,7 +709,7 @@ "metadata": { "collapsed": false, "gather": { - "logged": 1669584886619 + "logged": 1671557131286 }, "jupyter": { "outputs_hidden": false @@ -922,7 +992,7 @@ }, "language_info": { "name": "python", - "version": "3.10.4", + "version": "3.10.6", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython",