sample: flow in pipeline (#2620)
* doc: flow in pipeline for cli * doc: flow in pipeline sdk experience * doc: remind to grant permission to compute cluster * Update README.md * Update README.md * Update flow_in_pipeline.ipynb * update readme for cli sample * move bs4 inside tool function to reduce dependency for compile * doc: add information for office usage * doc: update component spec glob * feat: add requirements * azure-ai-ml is released * fix: smoke * fix: resolve comments * ci: add ci for flow in pipeline * doc: update readme * fix: smoke * create connection with wrong settings * fix: smoke * fix: highlight schema requirement * feat: use a sample without connection * fix: include flow.tools.json * fix: update text * fix: remove connection creation * fix: update input setting * fix: remove connection reference in python tool * fix: flow run setting * fix: further clean connection reference * feat: use the same pipeline in cli and sdk * fix: black * fix: make 2 example align
This commit is contained in:
Родитель
cb0cbee496
Коммит
5bb29978fd
51
.github/workflows/cli-jobs-pipelines-with-components-pipeline_job_with_flow_as_component-pipeline.yml
поставляемый
Normal file
51
.github/workflows/cli-jobs-pipelines-with-components-pipeline_job_with_flow_as_component-pipeline.yml
поставляемый
Normal file
|
@ -0,0 +1,51 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-pipelines-with-components-pipeline_job_with_flow_as_component-pipeline
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "4 6/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/pipelines-with-components/pipeline_job_with_flow_as_component/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-pipelines-with-components-pipeline_job_with_flow_as_component-pipeline.yml
|
||||
- cli/run-pipeline-jobs.sh
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../../run-job.sh pipeline.yml
|
||||
working-directory: cli/jobs/pipelines-with-components/pipeline_job_with_flow_as_component
|
75
.github/workflows/sdk-jobs-pipelines-1l_flow_in_pipeline-flow_in_pipeline.yml
поставляемый
Normal file
75
.github/workflows/sdk-jobs-pipelines-1l_flow_in_pipeline-flow_in_pipeline.yml
поставляемый
Normal file
|
@ -0,0 +1,75 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: sdk-jobs-pipelines-1l_flow_in_pipeline-flow_in_pipeline
|
||||
# This file is created by sdk/python/readme.py.
|
||||
# Please do not edit directly.
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "41 6/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- sdk/python/jobs/pipelines/1l_flow_in_pipeline/**
|
||||
- .github/workflows/sdk-jobs-pipelines-1l_flow_in_pipeline-flow_in_pipeline.yml
|
||||
- sdk/python/dev-requirements.txt
|
||||
- infra/bootstrapping/**
|
||||
- sdk/python/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: setup python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
- name: pip install notebook reqs
|
||||
run: pip install -r sdk/python/dev-requirements.txt
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup SDK
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: sdk/python
|
||||
continue-on-error: true
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: run jobs/pipelines/1l_flow_in_pipeline/flow_in_pipeline.ipynb
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
|
||||
bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" replace_template_values "flow_in_pipeline.ipynb";
|
||||
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
|
||||
papermill -k python flow_in_pipeline.ipynb flow_in_pipeline.output.ipynb
|
||||
working-directory: sdk/python/jobs/pipelines/1l_flow_in_pipeline
|
||||
- name: upload notebook's working folder as an artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: flow_in_pipeline
|
||||
path: sdk/python/jobs/pipelines/1l_flow_in_pipeline
|
|
@ -183,6 +183,7 @@ path|status|description
|
|||
[jobs/pipelines-with-components/image_classification_with_densenet/pipeline.yml](jobs/pipelines-with-components/image_classification_with_densenet/pipeline.yml)|[![jobs/pipelines-with-components/image_classification_with_densenet/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-image_classification_with_densenet-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-image_classification_with_densenet-pipeline.yml)|Train densenet for image classification
|
||||
[jobs/pipelines-with-components/nyc_taxi_data_regression/pipeline.yml](jobs/pipelines-with-components/nyc_taxi_data_regression/pipeline.yml)|[![jobs/pipelines-with-components/nyc_taxi_data_regression/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-nyc_taxi_data_regression-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-nyc_taxi_data_regression-pipeline.yml)|Train regression model based on nyc taxi dataset
|
||||
[jobs/pipelines-with-components/nyc_taxi_data_regression/single-job-pipeline.yml](jobs/pipelines-with-components/nyc_taxi_data_regression/single-job-pipeline.yml)|[![jobs/pipelines-with-components/nyc_taxi_data_regression/single-job-pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-nyc_taxi_data_regression-single-job-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-nyc_taxi_data_regression-single-job-pipeline.yml)|Single job pipeline to train regression model based on nyc taxi dataset
|
||||
[jobs/pipelines-with-components/pipeline_job_with_flow_as_component/pipeline.yml](jobs/pipelines-with-components/pipeline_job_with_flow_as_component/pipeline.yml)|[![jobs/pipelines-with-components/pipeline_job_with_flow_as_component/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-pipeline_job_with_flow_as_component-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-pipeline_job_with_flow_as_component-pipeline.yml)|The hello world pipeline job with flow as component
|
||||
[jobs/pipelines-with-components/pipeline_with_hyperparameter_sweep/pipeline.yml](jobs/pipelines-with-components/pipeline_with_hyperparameter_sweep/pipeline.yml)|[![jobs/pipelines-with-components/pipeline_with_hyperparameter_sweep/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-pipeline_with_hyperparameter_sweep-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-pipeline_with_hyperparameter_sweep-pipeline.yml)|Tune hyperparameters using TF component
|
||||
[jobs/pipelines-with-components/pipeline_with_pipeline_component/nyc_taxi_data_regression_with_pipeline_component/data_pipeline/data_pipeline.yml](jobs/pipelines-with-components/pipeline_with_pipeline_component/nyc_taxi_data_regression_with_pipeline_component/data_pipeline/data_pipeline.yml)|[![jobs/pipelines-with-components/pipeline_with_pipeline_component/nyc_taxi_data_regression_with_pipeline_component/data_pipeline/data_pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-pipeline_with_pipeline_component-nyc_taxi_data_regression_with_pipeline_component-data_pipeline-data_pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-pipeline_with_pipeline_component-nyc_taxi_data_regression_with_pipeline_component-data_pipeline-data_pipeline.yml)|pipeline component with data prep and transformation
|
||||
[jobs/pipelines-with-components/pipeline_with_pipeline_component/nyc_taxi_data_regression_with_pipeline_component/pipeline.yml](jobs/pipelines-with-components/pipeline_with_pipeline_component/nyc_taxi_data_regression_with_pipeline_component/pipeline.yml)|[![jobs/pipelines-with-components/pipeline_with_pipeline_component/nyc_taxi_data_regression_with_pipeline_component/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-pipeline_with_pipeline_component-nyc_taxi_data_regression_with_pipeline_component-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-pipeline_with_pipeline_component-nyc_taxi_data_regression_with_pipeline_component-pipeline.yml)|Train regression model based on nyc taxi dataset
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
This is a dummy pipeline job with anonymous reference of a flow as a component. Flow directory is copied from [sample in promptflow repository](https://github.com/microsoft/promptflow/tree/main/examples/flows/standard/basic) and remove connection dependency to avoid using promptflow connection in azure ml example repository.
|
||||
|
||||
Prerequirements:
|
||||
1. `.promptflow/flow.tools.json` in the flow directory is required to use a flow as a component. Usually you can use `pf flow validate` or `pf run validate` to generate it.
|
||||
2. You should either update connection name in `flow.dag.yaml` or update `connection.yaml` with your own api information and use `pf connection create --file connection.yaml` to create a workspace connection.
|
||||
3. You need to either edit the compute cluster in `pipeline.yml` or create a compute cluster named `cpu-cluster` in your workspace.
|
||||
4. Please ensure that there are `$schema` in your `flow.dag.yaml` and `run.yaml`
|
||||
1. `flow.dag.yaml`: `$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json`
|
||||
2. `run.yaml`: `$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Run.schema.json`
|
||||
|
||||
After that, you can run `az ml job create --file pipeline.yml` to submit the pipeline job.
|
||||
|
||||
References:
|
||||
- [microsoft/promptflow: Build high-quality LLM apps](https://github.com/microsoft/promptflow)
|
||||
- [Reference - Prompt flow docuentation](https://microsoft.github.io/promptflow/reference/index.html)
|
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"package": {},
|
||||
"code": {
|
||||
"hello.jinja2": {
|
||||
"type": "prompt",
|
||||
"inputs": {
|
||||
"text": {
|
||||
"type": [
|
||||
"string"
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "Please replace the template with your own prompt.",
|
||||
"source": "hello.jinja2"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
# Basic standard flow
|
||||
A basic standard flow using custom python tool that calls Azure OpenAI with connection info stored in environment variables.
|
||||
|
||||
Tools used in this flow:
|
||||
- `prompt` tool
|
||||
- custom `python` Tool
|
||||
|
||||
Connections used in this flow:
|
||||
- None
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Install promptflow sdk and other dependencies:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Run flow
|
||||
|
||||
- Prepare your Azure Open AI resource follow this [instruction](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal) and get your `api_key` if you don't have one.
|
||||
|
||||
- Setup environment variables
|
||||
|
||||
Ensure you have put your azure open ai endpoint key in [.env](.env) file. You can create one refer to this [example file](.env.example).
|
||||
|
||||
```bash
|
||||
cat .env
|
||||
```
|
||||
|
||||
- Test flow/node
|
||||
```bash
|
||||
# test with default input value in flow.dag.yaml
|
||||
pf flow test --flow .
|
||||
|
||||
# test with flow inputs
|
||||
pf flow test --flow . --inputs text="Java Hello World!"
|
||||
|
||||
# test node with inputs
|
||||
pf flow test --flow . --node llm --inputs prompt="Write a simple Hello World program that displays the greeting message when executed."
|
||||
```
|
||||
|
||||
- Create run with multiple lines data
|
||||
```bash
|
||||
# using environment from .env file (loaded in user code: hello.py)
|
||||
pf run create --flow . --data ./data.jsonl --stream
|
||||
```
|
||||
|
||||
- List and show run meta
|
||||
```bash
|
||||
# list created run
|
||||
pf run list
|
||||
|
||||
# get a sample run name
|
||||
name=$(pf run list -r 10 | jq '.[] | select(.name | contains("basic_variant_0")) | .name'| head -n 1 | tr -d '"')
|
||||
|
||||
# show specific run detail
|
||||
pf run show --name $name
|
||||
|
||||
# show output
|
||||
pf run show-details --name $name
|
||||
|
||||
# visualize run in browser
|
||||
pf run visualize --name $name
|
||||
```
|
||||
|
||||
## Run flow with connection
|
||||
Storing connection info in .env with plaintext is not safe. We recommend to use `pf connection` to guard secrets like `api_key` from leak.
|
||||
|
||||
- Show or create `open_ai_connection`
|
||||
```bash
|
||||
# create connection from `azure_openai.yml` file
|
||||
# Override keys with --set to avoid yaml file changes
|
||||
pf connection create --file ../../../connections/azure_openai.yml --set api_key=<your_api_key> api_base=<your_api_base>
|
||||
|
||||
# check if connection exists
|
||||
pf connection show -n open_ai_connection
|
||||
```
|
||||
|
||||
- Test using connection secret specified in environment variables
|
||||
**Note**: we used `'` to wrap value since it supports raw value without escape in powershell & bash. For windows command prompt, you may remove the `'` to avoid it become part of the value.
|
||||
|
||||
```bash
|
||||
# test with default input value in flow.dag.yaml
|
||||
pf flow test --flow . --environment-variables AZURE_OPENAI_API_KEY='${open_ai_connection.api_key}' AZURE_OPENAI_API_BASE='${open_ai_connection.api_base}'
|
||||
```
|
||||
|
||||
- Create run using connection secret binding specified in environment variables, see [run.yml](run.yml)
|
||||
```bash
|
||||
# create run
|
||||
pf run create --flow . --data ./data.jsonl --stream --environment-variables AZURE_OPENAI_API_KEY='${open_ai_connection.api_key}' AZURE_OPENAI_API_BASE='${open_ai_connection.api_base}'
|
||||
# create run using yaml file
|
||||
pf run create --file run.yml --stream
|
||||
|
||||
# show outputs
|
||||
name=$(pf run list -r 10 | jq '.[] | select(.name | contains("basic_variant_0")) | .name'| head -n 1 | tr -d '"')
|
||||
pf run show-details --name $name
|
||||
```
|
||||
|
||||
## Run flow in cloud with connection
|
||||
- Assume we already have a connection named `open_ai_connection` in workspace.
|
||||
```bash
|
||||
# set default workspace
|
||||
az account set -s <your_subscription_id>
|
||||
az configure --defaults group=<your_resource_group_name> workspace=<your_workspace_name>
|
||||
```
|
||||
|
||||
- Create run
|
||||
```bash
|
||||
# run with environment variable reference connection in azureml workspace
|
||||
pfazure run create --flow . --data ./data.jsonl --environment-variables AZURE_OPENAI_API_KEY='${open_ai_connection.api_key}' AZURE_OPENAI_API_BASE='${open_ai_connection.api_base}' --stream --runtime demo-mir
|
||||
# run using yaml file
|
||||
pfazure run create --file run.yml --stream --runtime demo-mir
|
||||
```
|
||||
|
||||
- List and show run meta
|
||||
```bash
|
||||
# list created run
|
||||
pfazure run list -r 3
|
||||
|
||||
# get a sample run name
|
||||
name=$(pfazure run list -r 100 | jq '.[] | select(.name | contains("basic_variant_0")) | .name'| head -n 1 | tr -d '"')
|
||||
|
||||
# show specific run detail
|
||||
pfazure run show --name $name
|
||||
|
||||
# show output
|
||||
pfazure run show-details --name $name
|
||||
|
||||
# visualize run in browser
|
||||
pfazure run visualize --name $name
|
||||
```
|
|
@ -0,0 +1,28 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
|
||||
inputs:
|
||||
text:
|
||||
type: string
|
||||
default: Hello World!
|
||||
outputs:
|
||||
output:
|
||||
type: string
|
||||
reference: ${llm.output}
|
||||
nodes:
|
||||
- name: hello_prompt
|
||||
type: prompt
|
||||
source:
|
||||
type: code
|
||||
path: hello.jinja2
|
||||
inputs:
|
||||
text: ${inputs.text}
|
||||
- name: llm
|
||||
type: python
|
||||
source:
|
||||
type: code
|
||||
path: hello.py
|
||||
inputs:
|
||||
prompt: ${hello_prompt.output}
|
||||
deployment_name: text-davinci-003
|
||||
max_tokens: "120"
|
||||
environment:
|
||||
python_requirements_txt: requirements.txt
|
|
@ -0,0 +1,2 @@
|
|||
{# Please replace the template with your own prompt. #}
|
||||
Write a simple {{text}} program that displays the greeting message when executed.
|
|
@ -0,0 +1,51 @@
|
|||
import os
|
||||
import openai
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from promptflow import tool
|
||||
|
||||
# The inputs section will change based on the arguments of the tool function, after you save the code
|
||||
# Adding type to arguments and return value will help the system show the types properly
|
||||
# Please update the function name/signature per need
|
||||
|
||||
|
||||
def to_bool(value) -> bool:
|
||||
return str(value).lower() == "true"
|
||||
|
||||
|
||||
@tool
|
||||
def my_python_tool(
|
||||
prompt: str,
|
||||
# for AOAI, deployment name is customized by user, not model name.
|
||||
deployment_name: str,
|
||||
suffix: str = None,
|
||||
max_tokens: int = 120,
|
||||
temperature: float = 1.0,
|
||||
top_p: float = 1.0,
|
||||
n: int = 1,
|
||||
logprobs: int = None,
|
||||
echo: bool = False,
|
||||
stop: list = None,
|
||||
presence_penalty: float = 0,
|
||||
frequency_penalty: float = 0,
|
||||
best_of: int = 1,
|
||||
logit_bias: dict = {},
|
||||
user: str = "",
|
||||
**kwargs,
|
||||
) -> str:
|
||||
if "AZURE_OPENAI_API_KEY" not in os.environ:
|
||||
# load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
if "AZURE_OPENAI_API_KEY" not in os.environ:
|
||||
raise Exception("Please specify environment variables: AZURE_OPENAI_API_KEY")
|
||||
|
||||
conn = dict(
|
||||
api_key=os.environ["AZURE_OPENAI_API_KEY"],
|
||||
api_base=os.environ["AZURE_OPENAI_API_BASE"],
|
||||
api_type=os.environ.get("AZURE_OPENAI_API_TYPE", "azure"),
|
||||
api_version=os.environ.get("AZURE_OPENAI_API_VERSION", "2023-07-01-preview"),
|
||||
)
|
||||
|
||||
# return directly to avoid using promptflow connection in azure ml example repository
|
||||
return f"fake answer based on {prompt}"
|
|
@ -0,0 +1,3 @@
|
|||
promptflow[azure]
|
||||
promptflow-tools
|
||||
python-dotenv
|
|
@ -0,0 +1,3 @@
|
|||
{"text": "Python Hello World!"}
|
||||
{"text": "C Hello World!"}
|
||||
{"text": "C# Hello World!"}
|
|
@ -0,0 +1,30 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
|
||||
type: pipeline
|
||||
display_name: pipeline_job_with_flow
|
||||
description: The hello world pipeline job with flow as component
|
||||
|
||||
compute: "azureml:cpu-cluster"
|
||||
|
||||
inputs:
|
||||
flow_input:
|
||||
type: uri_file
|
||||
path: "./data/data.jsonl"
|
||||
|
||||
jobs:
|
||||
flow_node_from_dag:
|
||||
type: parallel
|
||||
component: ./basic/flow.dag.yaml
|
||||
inputs:
|
||||
data: ${{parent.inputs.flow_input}}
|
||||
text: "${data.text}"
|
||||
environment_variables:
|
||||
AZURE_OPENAI_API_KEY: <your-api-key>
|
||||
AZURE_OPENAI_API_BASE: <your-api-base>
|
||||
AZURE_OPENAI_API_TYPE: azure
|
||||
|
||||
flow_node_from_run:
|
||||
type: parallel
|
||||
component: ./run.yml
|
||||
inputs:
|
||||
data: ${{parent.inputs.flow_input}}
|
||||
text: "${data.text}"
|
|
@ -0,0 +1,6 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Run.schema.json
|
||||
flow: ./basic
|
||||
environment_variables:
|
||||
AZURE_OPENAI_API_KEY: <your-api-key>
|
||||
AZURE_OPENAI_API_BASE: <your-api-base>
|
||||
AZURE_OPENAI_API_TYPE: azure
|
|
@ -187,6 +187,7 @@ Test Status is for branch - **_main_**
|
|||
|jobs|pipelines|[pipeline_with_train_eval_pipeline_component](jobs/pipelines/1j_pipeline_with_pipeline_component/pipeline_with_train_eval_pipeline_component/pipeline_with_train_eval_pipeline_component.ipynb)|Create pipeline with CommandComponents from local YAML file|[![pipeline_with_train_eval_pipeline_component](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-1j_pipeline_with_pipeline_component-pipeline_with_train_eval_pipeline_component-pipeline_with_train_eval_pipeline_component.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-1j_pipeline_with_pipeline_component-pipeline_with_train_eval_pipeline_component-pipeline_with_train_eval_pipeline_component.yml)|
|
||||
|jobs|pipelines|[automl-forecasting-demand-hierarchical-timeseries-in-pipeline](jobs/pipelines/1k_demand_forecasting_with_pipeline_components/automl-forecasting-demand-hierarchical-timeseries-in-pipeline/automl-forecasting-demand-hierarchical-timeseries-in-pipeline.ipynb)|*no description*|[![automl-forecasting-demand-hierarchical-timeseries-in-pipeline](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-1k_demand_forecasting_with_pipeline_components-automl-forecasting-demand-hierarchical-timeseries-in-pipeline-automl-forecasting-demand-hierarchical-timeseries-in-pipeline.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-1k_demand_forecasting_with_pipeline_components-automl-forecasting-demand-hierarchical-timeseries-in-pipeline-automl-forecasting-demand-hierarchical-timeseries-in-pipeline.yml)|
|
||||
|jobs|pipelines|[automl-forecasting-demand-many-models-in-pipeline](jobs/pipelines/1k_demand_forecasting_with_pipeline_components/automl-forecasting-demand-many-models-in-pipeline/automl-forecasting-demand-many-models-in-pipeline.ipynb)|*no description*|[![automl-forecasting-demand-many-models-in-pipeline](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-1k_demand_forecasting_with_pipeline_components-automl-forecasting-demand-many-models-in-pipeline-automl-forecasting-demand-many-models-in-pipeline.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-1k_demand_forecasting_with_pipeline_components-automl-forecasting-demand-many-models-in-pipeline-automl-forecasting-demand-many-models-in-pipeline.yml)|
|
||||
|jobs|pipelines|[flow_in_pipeline](jobs/pipelines/1l_flow_in_pipeline/flow_in_pipeline.ipynb)|Create pipeline using components to run a distributed job with tensorflow|[![flow_in_pipeline](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-1l_flow_in_pipeline-flow_in_pipeline.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-1l_flow_in_pipeline-flow_in_pipeline.yml)|
|
||||
|jobs|pipelines|[train_mnist_with_tensorflow](jobs/pipelines/2a_train_mnist_with_tensorflow/train_mnist_with_tensorflow.ipynb)|Create pipeline using components to run a distributed job with tensorflow|[![train_mnist_with_tensorflow](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-2a_train_mnist_with_tensorflow-train_mnist_with_tensorflow.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-2a_train_mnist_with_tensorflow-train_mnist_with_tensorflow.yml)|
|
||||
|jobs|pipelines|[train_cifar_10_with_pytorch](jobs/pipelines/2b_train_cifar_10_with_pytorch/train_cifar_10_with_pytorch.ipynb)|Get data, train and evaluate a model in pipeline with Components|[![train_cifar_10_with_pytorch](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-2b_train_cifar_10_with_pytorch-train_cifar_10_with_pytorch.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-2b_train_cifar_10_with_pytorch-train_cifar_10_with_pytorch.yml)|
|
||||
|jobs|pipelines|[nyc_taxi_data_regression](jobs/pipelines/2c_nyc_taxi_data_regression/nyc_taxi_data_regression.ipynb)|Build pipeline with components for 5 jobs - prep data, transform data, train model, predict results and evaluate model performance|[![nyc_taxi_data_regression](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-2c_nyc_taxi_data_regression-nyc_taxi_data_regression.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/sdk-jobs-pipelines-2c_nyc_taxi_data_regression-nyc_taxi_data_regression.yml)|
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
This is a dummy pipeline job with anonymous reference of a flow as a component. This example has reused the flow in corresponding CLI example, which is copied from [sample in promptflow repository](https://github.com/microsoft/promptflow/tree/main/examples/flows/standard/basic) and remove connection dependency to avoid using promptflow connection in azure ml example repository. Please check [this path](../../../../../cli/jobs/pipelines-with-components/pipeline_job_with_flow_as_component/) for dependent resources.
|
||||
|
||||
References:
|
||||
- [microsoft/promptflow: Build high-quality LLM apps](https://github.com/microsoft/promptflow)
|
||||
- [Reference - Prompt flow docuentation](https://microsoft.github.io/promptflow/reference/index.html)
|
|
@ -0,0 +1,235 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Use Flow as Component in Pipeline Job\n",
|
||||
"\n",
|
||||
"**Requirements** - In order to benefit from this tutorial, you will need:\n",
|
||||
"- A basic understanding of Machine Learning\n",
|
||||
"- An Azure account with an active subscription - [Create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F)\n",
|
||||
"- An Azure ML workspace with computer cluster - [Configure workspace](../../configuration.ipynb)\n",
|
||||
"- A python environment\n",
|
||||
"- Installed Azure Machine Learning Python SDK v2 - [install instructions](../../../README.md) - check the getting started section\n",
|
||||
"\n",
|
||||
"**Learning Objectives** - By the end of this tutorial, you should be able to:\n",
|
||||
"- Connect to your AML workspace from the Python SDK\n",
|
||||
"- Create `Pipeline` load flow as components from YAML\n",
|
||||
"\n",
|
||||
"**Motivations** - This notebook explains how to run a pipeline with distributed training component."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 1. Connect to Azure Machine Learning Workspace\n",
|
||||
"\n",
|
||||
"The [workspace](https://docs.microsoft.com/en-us/azure/machine-learning/concept-workspace) is the top-level resource for Azure Machine Learning, providing a centralized place to work with all the artifacts you create when you use Azure Machine Learning. In this section we will connect to the workspace in which the job will be run.\n",
|
||||
"\n",
|
||||
"## 1.1 Import the required libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import required libraries\n",
|
||||
"from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
|
||||
"\n",
|
||||
"from azure.ai.ml import MLClient, load_component, Input\n",
|
||||
"from azure.ai.ml.constants import AssetTypes\n",
|
||||
"from azure.ai.ml.dsl import pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1.2 Configure credential\n",
|
||||
"\n",
|
||||
"We are using `DefaultAzureCredential` to get access to workspace. \n",
|
||||
"`DefaultAzureCredential` should be capable of handling most Azure SDK authentication scenarios. \n",
|
||||
"\n",
|
||||
"Reference for more available credentials if it does not work for you: [configure credential example](../../configuration.ipynb), [azure-identity reference doc](https://docs.microsoft.com/en-us/python/api/azure-identity/azure.identity?view=azure-python)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" credential = DefaultAzureCredential()\n",
|
||||
" # Check if given credential can get token successfully.\n",
|
||||
" credential.get_token(\"https://management.azure.com/.default\")\n",
|
||||
"except Exception as ex:\n",
|
||||
" # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work\n",
|
||||
" credential = InteractiveBrowserCredential()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1.3 Get a handle to the workspace\n",
|
||||
"\n",
|
||||
"We use config file to connect to a workspace. The Azure ML workspace should be configured with computer cluster. [Check this notebook for configure a workspace](../../configuration.ipynb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get a handle to workspace\n",
|
||||
"ml_client = MLClient.from_config(credential=credential)\n",
|
||||
"\n",
|
||||
"# Retrieve an already attached Azure Machine Learning Compute.\n",
|
||||
"cluster_name = \"cpu-cluster\"\n",
|
||||
"print(ml_client.compute.get(cluster_name))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 2. Load flow as component\n",
|
||||
"\n",
|
||||
"We suppose that there has already been a flow authored with Promptflow SDK/CLI/portal and `.promptflow/flow.tools.json` is already generated in the flow directory:\n",
|
||||
"\n",
|
||||
"- For more information about how to author a flow, please check [official doc site](https://microsoft.github.io/promptflow/).\n",
|
||||
"- If `.promptflow/flow.tools.json` is not generated yet or is not up-to-date, you may use `pf flow validate` or `pf run validate` to generate it - [reference doc](https://microsoft.github.io/promptflow/reference/pf-command-reference.html).\n",
|
||||
"- Please ensure that there are `$schema` in your `flow.dag.yaml` and `run.yaml`, we depends on that to identify whether this is a flow\n",
|
||||
" - `flow.dag.yaml`: `$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json`\n",
|
||||
" - `run.yaml`: `$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Run.schema.json`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Then we can load its flow dag yaml as a component like regular component specs. Here we reused [the flow definition yaml in CLI examples](../../../../../cli/jobs/pipelines-with-components/pipeline_job_with_flow_as_component/web_classification/flow.dag.yaml)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"flow_from_dag = load_component(\n",
|
||||
" \"../../../../../cli/jobs/pipelines-with-components/pipeline_job_with_flow_as_component/basic/flow.dag.yaml\"\n",
|
||||
")\n",
|
||||
"flow_from_run = load_component(\n",
|
||||
" \"../../../../../cli/jobs/pipelines-with-components/pipeline_job_with_flow_as_component/run.yml\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 3. Pipeline job\n",
|
||||
"## 3.1 Build pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data_input = Input(\n",
|
||||
" path=\"../../../../../cli/jobs/pipelines-with-components/pipeline_job_with_flow_as_component/data/data.jsonl\",\n",
|
||||
" type=AssetTypes.URI_FILE,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@pipeline()\n",
|
||||
"def pipeline_func_with_flow(data):\n",
|
||||
" flow_node_from_dag = flow_from_dag(\n",
|
||||
" data=data,\n",
|
||||
" text=\"${data.text}\",\n",
|
||||
" )\n",
|
||||
" flow_node_from_dag.environment_variables = {\n",
|
||||
" \"AZURE_OPENAI_API_KEY\": \"<your-api-key>\",\n",
|
||||
" \"AZURE_OPENAI_API_BASE\": \"<your-api-base>\",\n",
|
||||
" \"AZURE_OPENAI_API_TYPE\": \"azure\",\n",
|
||||
" }\n",
|
||||
" flow_node_from_run = flow_from_run(\n",
|
||||
" data=data,\n",
|
||||
" text=\"${data.text}\",\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# create pipeline instance\n",
|
||||
"pipeline_job = pipeline_func_with_flow(data=data_input)\n",
|
||||
"pipeline_job.settings.default_compute = \"cpu-cluster\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3.2 Submit pipeline job"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# submit job to workspace\n",
|
||||
"pipeline_job = ml_client.jobs.create_or_update(\n",
|
||||
" pipeline_job, experiment_name=\"pipeline_samples\"\n",
|
||||
")\n",
|
||||
"pipeline_job"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Wait until the job completes\n",
|
||||
"ml_client.jobs.stream(pipeline_job.name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Next Steps\n",
|
||||
"You can see further examples of running a pipeline job [here](../README.md)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"description": {
|
||||
"description": "Create pipeline using components to run a distributed job with tensorflow"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.17"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Загрузка…
Ссылка в новой задаче