remove yml components, defining components using command

This commit is contained in:
Maggie Mhanna 2022-12-20 17:28:55 +00:00
Родитель 391d24d12b
Коммит 775d206608
5 изменённых файлов: 98 добавлений и 124 удалений

Просмотреть файл

@ -1,24 +0,0 @@
# <component>
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
name: evaluate_model
display_name: evaluate-model
type: command
inputs:
model_name:
type: string
model_input:
type: uri_folder
test_data:
type: uri_folder
outputs:
evaluation_output:
type: uri_folder
code: ../src/evaluate
environment: azureml:taxi-train-env@latest
command: >-
python evaluate.py
--model_name ${{inputs.model_name}}
--model_input ${{inputs.model_input}}
--test_data ${{inputs.test_data}}
--evaluation_output ${{outputs.evaluation_output}}
# </component>

Просмотреть файл

@ -1,30 +0,0 @@
# <component>
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
name: prep_data
display_name: prep-data
type: command
inputs:
raw_data:
type: uri_file
enable_monitoring:
type: string
table_name:
type: string
outputs:
train_data:
type: uri_folder
val_data:
type: uri_folder
test_data:
type: uri_folder
code: ../src/prep
environment: azureml:taxi-train-env@latest
command: >-
python prep.py
--raw_data ${{inputs.raw_data}}
--train_data ${{outputs.train_data}}
--val_data ${{outputs.val_data}}
--test_data ${{outputs.test_data}}
--enable_monitoring ${{inputs.enable_monitoring}}
--table_name ${{inputs.table_name}}
# </component>

Просмотреть файл

@ -1,24 +0,0 @@
# <component>
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
name: register_model
display_name: register-model
type: command
inputs:
model_name:
type: string
model_path:
type: uri_folder
evaluation_output:
type: uri_folder
outputs:
model_info_output_path:
type: uri_folder
code: ../src/register
environment: azureml:taxi-train-env@latest
command: >-
python register.py
--model_name ${{inputs.model_name}}
--model_path ${{inputs.model_path}}
--evaluation_output ${{inputs.evaluation_output}}
--model_info_output_path ${{outputs.model_info_output_path}}
# </component>

Просмотреть файл

@ -1,18 +0,0 @@
# <component>
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
name: train_model
display_name: train-model
type: command
inputs:
train_data:
type: uri_folder
outputs:
model_output:
type: uri_folder
code: ../src/train
environment: azureml:taxi-train-env@latest
command: >-
python train.py
--train_data ${{inputs.train_data}}
--model_output ${{outputs.model_output}}
# </component>

Просмотреть файл

@ -88,7 +88,7 @@
"metadata": {
"collapsed": false,
"gather": {
"logged": 1670200031039
"logged": 1671554468232
},
"jupyter": {
"outputs_hidden": false,
@ -166,7 +166,7 @@
"from azure.ai.ml.entities import AmlCompute\n",
"\n",
"my_cluster = AmlCompute(\n",
" name=\"cpu-cluster-CA\",\n",
" name=\"cpu-cluster\",\n",
" type=\"amlcompute\", \n",
" size=\"STANDARD_DS3_V2\", \n",
" min_instances=0, \n",
@ -181,7 +181,7 @@
"metadata": {
"collapsed": false,
"gather": {
"logged": 1670200031797
"logged": 1671554474840
},
"jupyter": {
"outputs_hidden": false,
@ -264,7 +264,7 @@
"\n",
"my_data = Data(\n",
" path=\"../../data/taxi-data.csv\",\n",
" type=AssetTypes.URI_FILE,\n",
" type=\"uri_file\",\n",
" description=\"Taxi dataset\",\n",
" name=\"taxi-data\"\n",
")\n",
@ -275,7 +275,7 @@
"metadata": {
"collapsed": false,
"gather": {
"logged": 1670200033270
"logged": 1671554479153
},
"jupyter": {
"outputs_hidden": false,
@ -333,7 +333,7 @@
"metadata": {
"collapsed": false,
"gather": {
"logged": 1670200035753
"logged": 1671554483854
},
"jupyter": {
"outputs_hidden": false,
@ -397,36 +397,106 @@
"cell_type": "code",
"source": [
"from azure.ai.ml.dsl import pipeline\n",
"from azure.ai.ml import Input, Output, load_component\n",
"from azure.ai.ml import Input, Output, command\n",
"from azure.ai.ml.constants import AssetTypes, InputOutputModes\n",
"\n",
"# Create pipeline job\n",
"parent_dir = \"../../data-science/components\"\n",
"\n",
"# 1. Load components\n",
"prepare_data = load_component(source=parent_dir + \"/prep.yml\")\n",
"train_model = load_component(source=parent_dir + \"/train.yml\")\n",
"evaluate_model = load_component(source=parent_dir + \"/evaluate.yml\")\n",
"register_model = load_component(source=parent_dir + \"/register.yml\")\n",
"# 1. Define components\n",
"\n",
"prep_data = command( \n",
" name=\"prep_data\",\n",
" display_name=\"prep-data\",\n",
" code=\"../../data-science/src/prep\",\n",
" command=\"python prep.py \\\n",
" --raw_data ${{inputs.raw_data}} \\\n",
" --train_data ${{outputs.train_data}} \\\n",
" --val_data ${{outputs.val_data}} \\\n",
" --test_data ${{outputs.test_data}} \\\n",
" --enable_monitoring ${{inputs.enable_monitoring}} \\\n",
" --table_name ${{inputs.table_name}}\",\n",
" environment=\"taxi-train-env@latest\",\n",
" inputs={\n",
" \"raw_data\": Input(type=\"uri_file\"),\n",
" \"enable_monitoring\": Input(type=\"string\"),\n",
" \"table_name\": Input(type=\"string\")\n",
" },\n",
" outputs={\n",
" \"train_data\": Output(type=\"uri_folder\"),\n",
" \"val_data\": Output(type=\"uri_folder\"),\n",
" \"test_data\": Output(type=\"uri_folder\"),\n",
" }\n",
")\n",
"\n",
"train_model = command( \n",
" name=\"train_model\",\n",
" display_name=\"train-model\",\n",
" code=\"../../data-science/src/train\",\n",
" command=\"python train.py \\\n",
" --train_data ${{inputs.train_data}} \\\n",
" --model_output ${{outputs.model_output}}\",\n",
" environment=\"taxi-train-env@latest\",\n",
" inputs={\"train_data\": Input(type=\"uri_folder\")},\n",
" outputs={\"model_output\": Output(type=\"uri_folder\")}\n",
")\n",
"\n",
"evaluate_model = command(\n",
" name=\"evaluate_model\",\n",
" display_name=\"evaluate-model\",\n",
" code=\"../../data-science/src/evaluate\",\n",
" command=\"python evaluate.py \\\n",
" --model_name ${{inputs.model_name}} \\\n",
" --model_input ${{inputs.model_input}} \\\n",
" --test_data ${{inputs.test_data}} \\\n",
" --evaluation_output ${{outputs.evaluation_output}}\",\n",
" environment=\"taxi-train-env@latest\",\n",
" inputs={\n",
" \"model_name\": Input(type=\"string\"),\n",
" \"model_input\": Input(type=\"uri_folder\"),\n",
" \"test_data\": Input(type=\"uri_folder\")\n",
" },\n",
" outputs={\n",
" \"evaluation_output\": Output(type=\"uri_folder\")\n",
" }\n",
")\n",
"\n",
"register_model = command(\n",
" name=\"register_model\",\n",
" display_name=\"register-model\",\n",
" code=\"../../data-science/src/register\",\n",
" command=\"python register.py \\\n",
" --model_name ${{inputs.model_name}} \\\n",
" --model_path ${{inputs.model_path}} \\\n",
" --evaluation_output ${{inputs.evaluation_output}} \\\n",
" --model_info_output_path ${{outputs.model_info_output_path}}\",\n",
" environment=\"taxi-train-env@latest\",\n",
" inputs={\n",
" \"model_name\": Input(type=\"string\"),\n",
" \"model_path\": Input(type=\"uri_folder\"),\n",
" \"evaluation_output\": Input(type=\"uri_folder\")\n",
" },\n",
" outputs={\n",
" \"model_info_output_path\": Output(type=\"uri_folder\")\n",
" }\n",
")\n",
"\n",
"# 2. Construct pipeline\n",
"@pipeline()\n",
"def taxi_training_pipeline(raw_data, enable_monitoring, table_name):\n",
" \n",
" prepare = prepare_data(\n",
" prep = prep_data(\n",
" raw_data=raw_data,\n",
" enable_monitoring=enable_monitoring, \n",
" table_name=table_name\n",
" )\n",
"\n",
" train = train_model(\n",
" train_data=prepare.outputs.train_data\n",
" train_data=prep.outputs.train_data\n",
" )\n",
"\n",
" evaluate = evaluate_model(\n",
" model_name=\"taxi-model\",\n",
" model_input=train.outputs.model_output,\n",
" test_data=prepare.outputs.test_data\n",
" test_data=prep.outputs.test_data\n",
" )\n",
"\n",
"\n",
@ -437,15 +507,15 @@
" )\n",
"\n",
" return {\n",
" \"pipeline_job_train_data\": prepare.outputs.train_data,\n",
" \"pipeline_job_test_data\": prepare.outputs.test_data,\n",
" \"pipeline_job_train_data\": prep.outputs.train_data,\n",
" \"pipeline_job_test_data\": prep.outputs.test_data,\n",
" \"pipeline_job_trained_model\": train.outputs.model_output,\n",
" \"pipeline_job_score_report\": evaluate.outputs.evaluation_output,\n",
" }\n",
"\n",
"\n",
"pipeline_job = taxi_training_pipeline(\n",
" Input(type=AssetTypes.URI_FILE, path=\"taxi-data@latest\"), \"false\", \"taximonitoring\"\n",
" Input(type=\"uri_file\", path=\"taxi-data@latest\"), \"false\", \"taximonitoring\"\n",
")\n",
"\n",
"# set pipeline level compute\n",
@ -458,7 +528,7 @@
"metadata": {
"collapsed": false,
"gather": {
"logged": 1670200036044
"logged": 1671554553702
},
"jupyter": {
"outputs_hidden": false,
@ -475,7 +545,7 @@
"cell_type": "code",
"source": [
"pipeline_job = ml_client.jobs.create_or_update(\n",
" pipeline_job, experiment_name=\"pipeline_samples\"\n",
" pipeline_job, experiment_name=\"taxi-training\"\n",
")\n",
"pipeline_job"
],
@ -484,7 +554,7 @@
"metadata": {
"collapsed": false,
"gather": {
"logged": 1670200062228
"logged": 1671554563966
},
"jupyter": {
"outputs_hidden": false,
@ -564,7 +634,7 @@
"\n",
"# create an online endpoint\n",
"online_endpoint = ManagedOnlineEndpoint(\n",
" name=\"taxi-online-endpoint-3\", \n",
" name=\"taxi-online-ep\", \n",
" description=\"Taxi online endpoint\",\n",
" auth_mode=\"aml_token\",\n",
")\n",
@ -577,7 +647,7 @@
"metadata": {
"collapsed": false,
"gather": {
"logged": 1669584576485
"logged": 1671555421278
},
"jupyter": {
"outputs_hidden": false,
@ -624,7 +694,7 @@
"\n",
"blue_deployment = ManagedOnlineDeployment(\n",
" name=\"blue\",\n",
" endpoint_name=\"taxi-online-endpoint-3\",\n",
" endpoint_name=\"taxi-online-ep\",\n",
" model=model,\n",
" instance_type=\"Standard_DS2_v2\",\n",
" instance_count=1,\n",
@ -639,7 +709,7 @@
"metadata": {
"collapsed": false,
"gather": {
"logged": 1669584886619
"logged": 1671557131286
},
"jupyter": {
"outputs_hidden": false
@ -922,7 +992,7 @@
},
"language_info": {
"name": "python",
"version": "3.10.4",
"version": "3.10.6",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",