add alert for sdk sample workflow (#1296)

* rename k8s/amlarc to kubernetes

* add sdk IcM alert

Co-authored-by: Xue Wei <xuewe@microsoft.com>
This commit is contained in:
snowei 2022-05-12 19:35:01 +08:00 коммит произвёл GitHub
Родитель a3cb19dc05
Коммит 53f91eacdc
8 изменённых файлов: 64 добавлений и 16 удалений

Просмотреть файл

@ -1,4 +1,4 @@
name: cli-scripts-deploy-safe-rollout-k8s-online-endpoints
name: cli-scripts-deploy-safe-rollout-kubernetes-online-endpoints
on:
workflow_dispatch:
schedule:
@ -8,8 +8,8 @@ on:
- main
paths:
- cli/endpoints/online/**
- cli/deploy-safe-rollout-k8s-online-endpoints.sh
- .github/workflows/cli-scripts-deploy-safe-rollout-k8s-online-endpoints.yml
- cli/deploy-safe-rollout-kubernetes-online-endpoints.sh
- .github/workflows/cli-scripts-deploy-safe-rollout-kubernetes-online-endpoints.yml
- cli/setup.sh
- .github/kubernetes-compute/tool.sh
jobs:
@ -17,10 +17,12 @@ jobs:
runs-on: ubuntu-latest
env:
KEY_VAULT_NAME: amlarcgithubworkflowkv
SEVERITY: 3
OWNERS: amlarc@microsoft.com
TITLE: "[Github Workflow] Faild to run kubernetes-online-endpoints CLI samples"
GITHUB_REPO: https://github.com/Azure/azureml-examples
WORKFLOW_URL: https://github.com/Azure/azureml-examples/actions/workflows/cli-scripts-deploy-safe-rollout-k8s-online-endpoints.yml
TSG_ID: https://microsoft.sharepoint.com/teams/Vienna/SiteAssets/Vienna%20Notebook/AMLArcTSG.one#Troubleshooting%20github%20workflow&section-id=69e8bdb1-5734-4b07-967a-5a50a91cf040&page-id=53a1a232-f9f0-4192-b7d2-0474848ebb18&end
WORKFLOW_URL: https://github.com/Azure/azureml-examples/actions/workflows/cli-scripts-deploy-safe-rollout-kubernetes-online-endpoints.yml
TSG_ID: https://microsoft.sharepoint.com/teams/Vienna/_layouts/OneNote.aspx?id=%2Fteams%2FVienna%2FSiteAssets%2FVienna%20Notebook&wd=target%28AMLArcTSG.one%7C69E8BDB1-5734-4B07-967A-5A50A91CF040%2FTroubleshooting%20github%20workflow%7C53A1A232-F9F0-4192-B7D2-0474848EBB18%2F%29
steps:
- name: check out repo
uses: actions/checkout@v2
@ -32,14 +34,10 @@ jobs:
run: bash setup.sh
working-directory: cli
continue-on-error: true
- name: scripts installs
run: |
set -x
sudo apt-get update -y
sudo apt-get install uuid-runtime jq -y
sudo apt-get install xmlstarlet
- name: test script script
run: set -e; bash -x deploy-safe-rollout-k8s-online-endpoints.sh
- name: package installs
run: sudo apt-get install xmlstarlet
- name: test script
run: set -e; bash -x deploy-safe-rollout-kubernetes-online-endpoints.sh
working-directory: cli
- name: file IcM when fails
if: ${{ failure() && github.event_name == 'schedule' }}

Просмотреть файл

@ -11,9 +11,18 @@ on:
- sdk/endpoints/online/**
- .github/workflows/sdk-endpoints-online-kubernetes-online-endpoints-safe-rollout.yml
- notebooks/dev-requirements.txt
- .github/kubernetes-compute/tool.sh
jobs:
build:
runs-on: ubuntu-latest
env:
KEY_VAULT_NAME: amlarcgithubworkflowkv
SEVERITY: 3
OWNERS: amlarc@microsoft.com
TITLE: "[Github Workflow] Faild to run kubernetes-online-endpoints-safe-rollout notebook"
GITHUB_REPO: https://github.com/Azure/azureml-examples
WORKFLOW_URL: https://github.com/Azure/azureml-examples/actions/workflows/sdk-endpoints-online-kubernetes-online-endpoints-safe-rollout.yml
TSG_ID: https://microsoft.sharepoint.com/teams/Vienna/_layouts/OneNote.aspx?id=%2Fteams%2FVienna%2FSiteAssets%2FVienna%20Notebook&wd=target%28AMLArcTSG.one%7C69E8BDB1-5734-4B07-967A-5A50A91CF040%2FTroubleshooting%20github%20workflow%7C53A1A232-F9F0-4192-B7D2-0474848EBB18%2F%29
steps:
- name: check out repo
uses: actions/checkout@v2
@ -33,6 +42,8 @@ jobs:
run: bash setup.sh
working-directory: sdk
continue-on-error: true
- name: package installs
run: sudo apt-get install xmlstarlet
- name: setup CLI
run: bash setup.sh
working-directory: cli
@ -48,6 +59,20 @@ jobs:
papermill -k python kubernetes-online-endpoints-safe-rollout.ipynb kubernetes-online-endpoints-safe-rollout.output.ipynb
working-directory: sdk/endpoints/online/kubernetes
- name: file IcM when fails
if: ${{ failure() && github.event_name == 'schedule' }}
run: |
# download certificates
export ICM_HOST_NAME=ICM-HOST-PROD
export ICM_CONNECTOR_ID_NAME=ICM-CONNECTOR-ID-PROD
export ICM_ROUTING_ID_NAME=ICM-ROUTING-ID-PROD
set -e; bash -x .github/kubernetes-compute/tool.sh download_icm_cert
export ICM_HOST=$(cat icm_host)
export CONNECTOR_ID=$(cat icm_connector_id)
export ROUTING_ID=$(cat icm_routing_id)
export SUMMARY=$(set -e; bash -x .github/kubernetes-compute/tool.sh gen_summary_for_github_test)
set -e; bash -x .github/kubernetes-compute/tool.sh file_icm
timeout-minutes: 30
- name: upload notebook's working folder as an artifact
if: ${{ always() }}
uses: actions/upload-artifact@v2

Просмотреть файл

@ -11,9 +11,18 @@ on:
- sdk/endpoints/online/**
- .github/workflows/sdk-endpoints-online-kubernetes-online-endpoints-simple-deployment.yml
- notebooks/dev-requirements.txt
- .github/kubernetes-compute/tool.sh
jobs:
build:
runs-on: ubuntu-latest
env:
KEY_VAULT_NAME: amlarcgithubworkflowkv
SEVERITY: 3
OWNERS: amlarc@microsoft.com
TITLE: "[Github Workflow] Faild to run kubernetes-online-endpoints-simple-deployment notebook"
GITHUB_REPO: https://github.com/Azure/azureml-examples
WORKFLOW_URL: https://github.com/Azure/azureml-examples/actions/workflows/sdk-endpoints-online-kubernetes-online-endpoints-simple-deployment.yml
TSG_ID: https://microsoft.sharepoint.com/teams/Vienna/_layouts/OneNote.aspx?id=%2Fteams%2FVienna%2FSiteAssets%2FVienna%20Notebook&wd=target%28AMLArcTSG.one%7C69E8BDB1-5734-4B07-967A-5A50A91CF040%2FTroubleshooting%20github%20workflow%7C53A1A232-F9F0-4192-B7D2-0474848EBB18%2F%29
steps:
- name: check out repo
uses: actions/checkout@v2
@ -33,6 +42,8 @@ jobs:
run: bash setup.sh
working-directory: sdk
continue-on-error: true
- name: package installs
run: sudo apt-get install xmlstarlet
- name: setup CLI
run: bash setup.sh
working-directory: cli
@ -48,6 +59,20 @@ jobs:
papermill -k python kubernetes-online-endpoints-simple-deployment.ipynb kubernetes-online-endpoints-simple-deployment.output.ipynb
working-directory: sdk/endpoints/online/kubernetes
- name: file IcM when fails
if: ${{ failure() && github.event_name == 'schedule' }}
run: |
# download certificates
export ICM_HOST_NAME=ICM-HOST-PROD
export ICM_CONNECTOR_ID_NAME=ICM-CONNECTOR-ID-PROD
export ICM_ROUTING_ID_NAME=ICM-ROUTING-ID-PROD
set -e; bash -x .github/kubernetes-compute/tool.sh download_icm_cert
export ICM_HOST=$(cat icm_host)
export CONNECTOR_ID=$(cat icm_connector_id)
export ROUTING_ID=$(cat icm_routing_id)
export SUMMARY=$(set -e; bash -x .github/kubernetes-compute/tool.sh gen_summary_for_github_test)
set -e; bash -x .github/kubernetes-compute/tool.sh file_icm
timeout-minutes: 30
- name: upload notebook's working folder as an artifact
if: ${{ always() }}
uses: actions/upload-artifact@v2

Просмотреть файл

@ -8,11 +8,11 @@ export ENDPOINT_NAME="<YOUR_ENDPOINT_NAME>"
export ENDPOINT_NAME=endpt-k8s-`echo $RANDOM`
# <create_endpoint>
az ml online-endpoint create --name $ENDPOINT_NAME -f endpoints/online/amlarc/endpoint.yml
az ml online-endpoint create --name $ENDPOINT_NAME -f endpoints/online/kubernetes/kubernetes-endpoint.yml
# </create_endpoint>
# <create_blue>
az ml online-deployment create --name blue --endpoint $ENDPOINT_NAME -f endpoints/online/amlarc/blue-deployment.yml --all-traffic
az ml online-deployment create --name blue --endpoint $ENDPOINT_NAME -f endpoints/online/kubernetes/kubernetes-blue-deployment.yml --all-traffic
# </create_blue>
# <test_blue>
@ -25,7 +25,7 @@ az ml online-deployment update --name blue --endpoint $ENDPOINT_NAME --set insta
# </scale_blue>
# <create_green>
az ml online-deployment create --name green --endpoint $ENDPOINT_NAME -f endpoints/online/amlarc/green-deployment.yml
az ml online-deployment create --name green --endpoint $ENDPOINT_NAME -f endpoints/online/kubernetes/kubernetes-green-deployment.yml
# </create_green>
# <get_traffic>