Automation test for spark CLI samples (#2377)
* Enable test for submit_spark_standalone_jobs * Generate workflow yaml * update spark job files for automation test * Add workflow for serverless spark with user identity job * Add scripts to upload input data * Update workflow to refer the script * Update source file path * Update workflow with correct file path * Update working directory * Update workflow * Update the path * Update the script to upload data * Update the overwrite mode * Update destination blob name * Use blob upload batch * Add spark pipeline tests * Update spark component extension * Add script to attache uai * Update property name in workflow * Update script parameters * Update assign uai script * Format the script * Update setup identities script * Update path to infra bootstraping * Enable automation test for attached spark job * Update resource path * Update setup attached resource script * Update script of setup resources * Update setup attached resource script2 * Add logic to assign identity role * Format the empty check * Check if identity is empty * Update to get compute properties * update readme * Reformat the script * Update schema location and revert sdk notebook changes * Attach pool first * Rename resources and merge main * Update format in yml * Add role assigment to uid
This commit is contained in:
Родитель
2cee822a7a
Коммит
0b829b9277
61
.github/workflows/cli-jobs-spark-attached-spark-pipeline-default-identity.yml
поставляемый
Normal file
61
.github/workflows/cli-jobs-spark-attached-spark-pipeline-default-identity.yml
поставляемый
Normal file
|
@ -0,0 +1,61 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-spark-attached-spark-pipeline-default-identity
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "30 9/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/spark/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-spark-attached-spark-pipeline-default-identity.yml
|
||||
- cli/jobs/spark/data/titanic.csv
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: setup attached spark
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
run: |
|
||||
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark.yml
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../run-job.sh attached-spark-pipeline-default-identity.yml
|
||||
working-directory: cli/jobs/spark
|
66
.github/workflows/cli-jobs-spark-attached-spark-pipeline-managed-identity.yml
поставляемый
Normal file
66
.github/workflows/cli-jobs-spark-attached-spark-pipeline-managed-identity.yml
поставляемый
Normal file
|
@ -0,0 +1,66 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-spark-attached-spark-pipeline-managed-identity
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "43 7/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/spark/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-spark-attached-spark-pipeline-managed-identity.yml
|
||||
- cli/jobs/spark/data/titanic.csv
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: setup identities
|
||||
run: |
|
||||
bash -x setup-identities.sh
|
||||
working-directory: cli/jobs/spark
|
||||
continue-on-error: true
|
||||
- name: setup attached spark
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
run: |
|
||||
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-system-identity.yml
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../run-job.sh attached-spark-pipeline-managed-identity.yml
|
||||
working-directory: cli/jobs/spark
|
61
.github/workflows/cli-jobs-spark-attached-spark-pipeline-user-identity.yml
поставляемый
Normal file
61
.github/workflows/cli-jobs-spark-attached-spark-pipeline-user-identity.yml
поставляемый
Normal file
|
@ -0,0 +1,61 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-spark-attached-spark-pipeline-user-identity
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "15 4/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/spark/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-spark-attached-spark-pipeline-user-identity.yml
|
||||
- cli/jobs/spark/data/titanic.csv
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: setup attached spark
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
run: |
|
||||
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-user-identity.yml
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../run-job.sh attached-spark-pipeline-user-identity.yml
|
||||
working-directory: cli/jobs/spark
|
61
.github/workflows/cli-jobs-spark-attached-spark-standalone-default-identity.yml
поставляемый
Normal file
61
.github/workflows/cli-jobs-spark-attached-spark-standalone-default-identity.yml
поставляемый
Normal file
|
@ -0,0 +1,61 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-spark-attached-spark-standalone-default-identity
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "15 0/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/spark/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-spark-attached-spark-standalone-default-identity.yml
|
||||
- cli/jobs/spark/data/titanic.csv
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: setup attached spark
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
run: |
|
||||
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark.yml
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../run-job.sh attached-spark-standalone-default-identity.yml
|
||||
working-directory: cli/jobs/spark
|
66
.github/workflows/cli-jobs-spark-attached-spark-standalone-managed-identity.yml
поставляемый
Normal file
66
.github/workflows/cli-jobs-spark-attached-spark-standalone-managed-identity.yml
поставляемый
Normal file
|
@ -0,0 +1,66 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-spark-attached-spark-standalone-managed-identity
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "16 1/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/spark/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-spark-attached-spark-standalone-managed-identity.yml
|
||||
- cli/jobs/spark/data/titanic.csv
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: setup identities
|
||||
run: |
|
||||
bash -x setup-identities.sh
|
||||
working-directory: cli/jobs/spark
|
||||
continue-on-error: true
|
||||
- name: setup attached spark
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
run: |
|
||||
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-system-identity.yml
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../run-job.sh attached-spark-standalone-managed-identity.yml
|
||||
working-directory: cli/jobs/spark
|
61
.github/workflows/cli-jobs-spark-attached-spark-standalone-user-identity.yml
поставляемый
Normal file
61
.github/workflows/cli-jobs-spark-attached-spark-standalone-user-identity.yml
поставляемый
Normal file
|
@ -0,0 +1,61 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-spark-attached-spark-standalone-user-identity
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "7 1/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/spark/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-spark-attached-spark-standalone-user-identity.yml
|
||||
- cli/jobs/spark/data/titanic.csv
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: setup attached spark
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
run: |
|
||||
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-user-identity.yml
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../run-job.sh attached-spark-standalone-user-identity.yml
|
||||
working-directory: cli/jobs/spark
|
55
.github/workflows/cli-jobs-spark-serverless-spark-pipeline-default-identity.yml
поставляемый
Normal file
55
.github/workflows/cli-jobs-spark-serverless-spark-pipeline-default-identity.yml
поставляемый
Normal file
|
@ -0,0 +1,55 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-spark-serverless-spark-pipeline-default-identity
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "33 10/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/spark/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-spark-serverless-spark-pipeline-default-identity.yml
|
||||
- cli/jobs/spark/data/titanic.csv
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../run-job.sh serverless-spark-pipeline-default-identity.yml
|
||||
working-directory: cli/jobs/spark
|
61
.github/workflows/cli-jobs-spark-serverless-spark-pipeline-managed-identity.yml
поставляемый
Normal file
61
.github/workflows/cli-jobs-spark-serverless-spark-pipeline-managed-identity.yml
поставляемый
Normal file
|
@ -0,0 +1,61 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-spark-serverless-spark-pipeline-managed-identity
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "57 5/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/spark/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-spark-serverless-spark-pipeline-managed-identity.yml
|
||||
- cli/jobs/spark/data/titanic.csv
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: setup identities
|
||||
run: |
|
||||
bash -x setup-identities.sh
|
||||
working-directory: cli/jobs/spark
|
||||
continue-on-error: true
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../run-job.sh serverless-spark-pipeline-managed-identity.yml
|
||||
working-directory: cli/jobs/spark
|
55
.github/workflows/cli-jobs-spark-serverless-spark-pipeline-user-identity.yml
поставляемый
Normal file
55
.github/workflows/cli-jobs-spark-serverless-spark-pipeline-user-identity.yml
поставляемый
Normal file
|
@ -0,0 +1,55 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-spark-serverless-spark-pipeline-user-identity
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "56 7/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/spark/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-spark-serverless-spark-pipeline-user-identity.yml
|
||||
- cli/jobs/spark/data/titanic.csv
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../run-job.sh serverless-spark-pipeline-user-identity.yml
|
||||
working-directory: cli/jobs/spark
|
55
.github/workflows/cli-jobs-spark-serverless-spark-standalone-default-identity.yml
поставляемый
Normal file
55
.github/workflows/cli-jobs-spark-serverless-spark-standalone-default-identity.yml
поставляемый
Normal file
|
@ -0,0 +1,55 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-spark-serverless-spark-standalone-default-identity
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "19 11/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/spark/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-spark-serverless-spark-standalone-default-identity.yml
|
||||
- cli/jobs/spark/data/titanic.csv
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../run-job.sh serverless-spark-standalone-default-identity.yml
|
||||
working-directory: cli/jobs/spark
|
61
.github/workflows/cli-jobs-spark-serverless-spark-standalone-managed-identity.yml
поставляемый
Normal file
61
.github/workflows/cli-jobs-spark-serverless-spark-standalone-managed-identity.yml
поставляемый
Normal file
|
@ -0,0 +1,61 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-spark-serverless-spark-standalone-managed-identity
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "46 0/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/spark/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-spark-serverless-spark-standalone-managed-identity.yml
|
||||
- cli/jobs/spark/data/titanic.csv
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: setup identities
|
||||
run: |
|
||||
bash -x setup-identities.sh
|
||||
working-directory: cli/jobs/spark
|
||||
continue-on-error: true
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../run-job.sh serverless-spark-standalone-managed-identity.yml
|
||||
working-directory: cli/jobs/spark
|
55
.github/workflows/cli-jobs-spark-serverless-spark-standalone-user-identity.yml
поставляемый
Normal file
55
.github/workflows/cli-jobs-spark-serverless-spark-standalone-user-identity.yml
поставляемый
Normal file
|
@ -0,0 +1,55 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: cli-jobs-spark-serverless-spark-standalone-user-identity
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "27 1/12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- cli/jobs/spark/**
|
||||
- infra/bootstrapping/**
|
||||
- .github/workflows/cli-jobs-spark-serverless-spark-standalone-user-identity.yml
|
||||
- cli/jobs/spark/data/titanic.csv
|
||||
- cli/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra/bootstrapping
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
- name: run job
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
|
||||
bash -x ../../run-job.sh serverless-spark-standalone-user-identity.yml
|
||||
working-directory: cli/jobs/spark
|
|
@ -1,5 +1,5 @@
|
|||
# attached-spark-pipeline-default-identity.yaml
|
||||
$schema: http://azureml/sdk-2-0/PipelineJob.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
|
||||
type: pipeline
|
||||
display_name: Titanic-Spark-CLI-Pipeline-3
|
||||
description: Spark component for Titanic data in Pipeline
|
||||
|
@ -7,7 +7,7 @@ description: Spark component for Titanic data in Pipeline
|
|||
jobs:
|
||||
spark_job:
|
||||
type: spark
|
||||
component: ./spark-job-component.yaml
|
||||
component: ./spark-job-component.yml
|
||||
inputs:
|
||||
titanic_data:
|
||||
type: uri_file
|
|
@ -1,5 +1,5 @@
|
|||
# attached-spark-pipeline-managed-identity.yaml
|
||||
$schema: http://azureml/sdk-2-0/PipelineJob.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
|
||||
type: pipeline
|
||||
display_name: Titanic-Spark-CLI-Pipeline-1
|
||||
description: Spark component for Titanic data in Pipeline
|
||||
|
@ -7,7 +7,7 @@ description: Spark component for Titanic data in Pipeline
|
|||
jobs:
|
||||
spark_job:
|
||||
type: spark
|
||||
component: ./spark-job-component.yaml
|
||||
component: ./spark-job-component.yml
|
||||
inputs:
|
||||
titanic_data:
|
||||
type: uri_file
|
|
@ -1,5 +1,5 @@
|
|||
# attached-spark-pipeline-user-identity.yaml
|
||||
$schema: http://azureml/sdk-2-0/PipelineJob.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
|
||||
type: pipeline
|
||||
display_name: Titanic-Spark-CLI-Pipeline-2
|
||||
description: Spark component for Titanic data in Pipeline
|
||||
|
@ -7,7 +7,7 @@ description: Spark component for Titanic data in Pipeline
|
|||
jobs:
|
||||
spark_job:
|
||||
type: spark
|
||||
component: ./spark-job-component.yaml
|
||||
component: ./spark-job-component.yml
|
||||
inputs:
|
||||
titanic_data:
|
||||
type: uri_file
|
|
@ -1,5 +1,5 @@
|
|||
# attached-spark-standalone-default-identity.yaml
|
||||
$schema: http://azureml/sdk-2-0/SparkJob.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/sparkJob.schema.json
|
||||
type: spark
|
||||
|
||||
code: ./src
|
||||
|
@ -29,4 +29,4 @@ args: >-
|
|||
--titanic_data ${{inputs.titanic_data}}
|
||||
--wrangled_data ${{outputs.wrangled_data}}
|
||||
|
||||
compute: yuachengcompute
|
||||
compute: mysparkcompute
|
|
@ -1,5 +1,5 @@
|
|||
# attached-spark-standalone-managed-identity.yaml
|
||||
$schema: http://azureml/sdk-2-0/SparkJob.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/sparkJob.schema.json
|
||||
type: spark
|
||||
|
||||
code: ./src
|
|
@ -1,5 +1,5 @@
|
|||
# attached-spark-standalone-user-identity.yaml
|
||||
$schema: http://azureml/sdk-2-0/SparkJob.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/sparkJob.schema.json
|
||||
type: spark
|
||||
|
||||
code: ./src
|
|
@ -1,5 +1,5 @@
|
|||
# serverless-spark-pipeline-default-identity.yaml
|
||||
$schema: http://azureml/sdk-2-0/PipelineJob.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
|
||||
type: pipeline
|
||||
display_name: Titanic-Spark-CLI-Pipeline-6
|
||||
description: Spark component for Titanic data in Pipeline
|
||||
|
@ -7,7 +7,7 @@ description: Spark component for Titanic data in Pipeline
|
|||
jobs:
|
||||
spark_job:
|
||||
type: spark
|
||||
component: ./spark-job-component.yaml
|
||||
component: ./spark-job-component.yml
|
||||
inputs:
|
||||
titanic_data:
|
||||
type: uri_file
|
|
@ -1,5 +1,5 @@
|
|||
# serverless-spark-pipeline-managed-identity.yaml
|
||||
$schema: http://azureml/sdk-2-0/PipelineJob.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
|
||||
type: pipeline
|
||||
display_name: Titanic-Spark-CLI-Pipeline-4
|
||||
description: Spark component for Titanic data in Pipeline
|
||||
|
@ -7,7 +7,7 @@ description: Spark component for Titanic data in Pipeline
|
|||
jobs:
|
||||
spark_job:
|
||||
type: spark
|
||||
component: ./spark-job-component.yaml
|
||||
component: ./spark-job-component.yml
|
||||
inputs:
|
||||
titanic_data:
|
||||
type: uri_file
|
|
@ -1,5 +1,5 @@
|
|||
# serverless-spark-pipeline-user-identity.yaml
|
||||
$schema: http://azureml/sdk-2-0/PipelineJob.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
|
||||
type: pipeline
|
||||
display_name: Titanic-Spark-CLI-Pipeline-5
|
||||
description: Spark component for Titanic data in Pipeline
|
||||
|
@ -7,7 +7,7 @@ description: Spark component for Titanic data in Pipeline
|
|||
jobs:
|
||||
spark_job:
|
||||
type: spark
|
||||
component: ./spark-job-component.yaml
|
||||
component: ./spark-job-component.yml
|
||||
inputs:
|
||||
titanic_data:
|
||||
type: uri_file
|
|
@ -1,5 +1,5 @@
|
|||
# serverless-spark-standalone-default-identity.yaml
|
||||
$schema: http://azureml/sdk-2-0/SparkJob.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/sparkJob.schema.json
|
||||
type: spark
|
||||
|
||||
code: ./src
|
|
@ -1,5 +1,5 @@
|
|||
# serverless-spark-standalone-managed-identity.yaml
|
||||
$schema: http://azureml/sdk-2-0/SparkJob.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/sparkJob.schema.json
|
||||
type: spark
|
||||
|
||||
code: ./src
|
|
@ -1,5 +1,5 @@
|
|||
# serverless-spark-standalone-user-identity.yaml
|
||||
$schema: http://azureml/sdk-2-0/SparkJob.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/sparkJob.schema.json
|
||||
type: spark
|
||||
|
||||
code: ./src
|
|
@ -0,0 +1,51 @@
|
|||
# <create_variables>
|
||||
SUBSCRIPTION_ID=$(az account show --query id -o tsv)
|
||||
LOCATION=$(az ml workspace show --query location -o tsv)
|
||||
RESOURCE_GROUP=$(az group show --query name -o tsv)
|
||||
AML_WORKSPACE_NAME=$(az configure -l --query "[?name=='workspace'].value" -o tsv)
|
||||
API_VERSION="2022-05-01"
|
||||
TOKEN=$(az account get-access-token --query accessToken -o tsv)
|
||||
|
||||
GEN2_STORAGE_NAME=${RESOURCE_GROUP}gen2
|
||||
GEN2_FILE_SYSTEM=${RESOURCE_GROUP}file
|
||||
SYNAPSE_WORKSPACE_NAME=${AML_WORKSPACE_NAME}-syws
|
||||
SQL_ADMIN_LOGIN_USER="automation"
|
||||
SQL_ADMIN_LOGIN_PASSWORD="auto123!"
|
||||
SPARK_POOL_NAME="automationpool"
|
||||
SPARK_POOL_ADMIN_ROLE_ID="6e4bf58a-b8e1-4cc3-bbf9-d73143322b78"
|
||||
ATTACHED_COMPUTE_NAME="mysparkcompute"
|
||||
#</create_variables>
|
||||
|
||||
#<create_uai>
|
||||
AML_USER_MANAGED_ID=${RESOURCE_GROUP}-uai
|
||||
az identity create --name $AML_USER_MANAGED_ID --resource-group $RESOURCE_GROUP --location $LOCATION
|
||||
AML_USER_MANAGED_ID_OID=$(az identity show --resource-group $RESOURCE_GROUP -n $AML_USER_MANAGED_ID --query principalId -o tsv)
|
||||
#</create_uai>
|
||||
|
||||
#<create_attached_resources>
|
||||
az storage account create --name $GEN2_STORAGE_NAME --resource-group $RESOURCE_GROUP --location $LOCATION --sku Standard_LRS --kind StorageV2 --enable-hierarchical-namespace true
|
||||
az storage fs create -n $GEN2_FILE_SYSTEM --account-name $GEN2_STORAGE_NAME
|
||||
az synapse workspace create --name $SYNAPSE_WORKSPACE_NAME --resource-group $RESOURCE_GROUP --storage-account $GEN2_STORAGE_NAME --file-system $GEN2_FILE_SYSTEM --sql-admin-login-user $SQL_ADMIN_LOGIN_USER --sql-admin-login-password $SQL_ADMIN_LOGIN_PASSWORD --location $LOCATION
|
||||
az role assignment create --role "Storage Blob Data Owner" --assignee $AML_USER_MANAGED_ID_OID --scope /subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RESOURCE_GROUP/providers/Microsoft.Storage/storageAccounts/$GEN2_STORAGE_NAME/blobServices/default/containers/$GEN2_FILE_SYSTEM
|
||||
az synapse spark pool create --name $SPARK_POOL_NAME --workspace-name $SYNAPSE_WORKSPACE_NAME --resource-group $RESOURCE_GROUP --spark-version 3.2 --node-count 3 --node-size Medium --min-node-count 3 --max-node-count 10 --enable-auto-scale true
|
||||
az synapse workspace firewall-rule create --name allowAll --workspace-name $SYNAPSE_WORKSPACE_NAME --resource-group $RESOURCE_GROUP --start-ip-address 0.0.0.0 --end-ip-address 255.255.255.255
|
||||
|
||||
TEMP_COMPUTE_FILE="temp-compute-setup.yml"
|
||||
cp $1 $TEMP_COMPUTE_FILE
|
||||
sed -i "s/<SUBSCRIPTION_ID>/$SUBSCRIPTION_ID/g;
|
||||
s/<RESOURCE_GROUP>/$RESOURCE_GROUP/g;
|
||||
s/<SYNAPSE_WORKSPACE_NAME>/$SYNAPSE_WORKSPACE_NAME/g;
|
||||
s/<SPARK_POOL_NAME>/$SPARK_POOL_NAME/g;
|
||||
s/<AML_USER_MANAGED_ID>/$AML_USER_MANAGED_ID/g;" $TEMP_COMPUTE_FILE
|
||||
|
||||
az ml compute attach --file $TEMP_COMPUTE_FILE --subscription $SUBSCRIPTION_ID --resource-group $RESOURCE_GROUP --workspace-name $AML_WORKSPACE_NAME
|
||||
az synapse role assignment create --workspace-name $SYNAPSE_WORKSPACE_NAME --role $SPARK_POOL_ADMIN_ROLE_ID --assignee $AML_USER_MANAGED_ID_OID
|
||||
|
||||
COMPUTE_MANAGED_IDENTITY=$(az ml compute show --name $ATTACHED_COMPUTE_NAME --resource-group $RESOURCE_GROUP --workspace-name $AML_WORKSPACE_NAME --query identity.principal_id --out tsv)
|
||||
|
||||
if [[ ! -z "$COMPUTE_MANAGED_IDENTITY" ]]
|
||||
then
|
||||
az synapse role assignment create --workspace-name $SYNAPSE_WORKSPACE_NAME --role $SPARK_POOL_ADMIN_ROLE_ID --assignee $COMPUTE_MANAGED_IDENTITY
|
||||
fi
|
||||
|
||||
#</create_attached_resources>
|
|
@ -0,0 +1,24 @@
|
|||
# <create_variables>
|
||||
SUBSCRIPTION_ID=$(az account show --query id -o tsv)
|
||||
LOCATION=$(az ml workspace show --query location -o tsv)
|
||||
RESOURCE_GROUP=$(az group show --query name -o tsv)
|
||||
AML_WORKSPACE_NAME=$(az configure -l --query "[?name=='workspace'].value" -o tsv)
|
||||
API_VERSION="2022-05-01"
|
||||
TOKEN=$(az account get-access-token --query accessToken -o tsv)
|
||||
|
||||
AML_USER_MANAGED_ID=${RESOURCE_GROUP}-uai
|
||||
#</create_variables>
|
||||
|
||||
#<create_uai>
|
||||
az identity create --name $AML_USER_MANAGED_ID --resource-group $RESOURCE_GROUP --location $LOCATION
|
||||
#</create_uai>
|
||||
|
||||
TEMP_UAI_FILE="temp-user-assigned-identity.yml"
|
||||
cp user-assigned-identity.yml $TEMP_UAI_FILE
|
||||
sed -i "s/<SUBSCRIPTION_ID>/$SUBSCRIPTION_ID/g;
|
||||
s/<RESOURCE_GROUP>/$RESOURCE_GROUP/g;
|
||||
s/<AML_USER_MANAGED_ID>/$AML_USER_MANAGED_ID/g;" $TEMP_UAI_FILE
|
||||
|
||||
#<assign_uai_to_workspace>
|
||||
az ml workspace update --subscription $SUBSCRIPTION_ID --resource-group $RESOURCE_GROUP --name $AML_WORKSPACE_NAME --file $TEMP_UAI_FILE
|
||||
#</assign_uai_to_workspace>
|
|
@ -1,5 +1,5 @@
|
|||
# spark-job-component.yaml
|
||||
$schema: http://azureml/sdk-2-0/SparkComponent.json
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/sparkComponent.schema.json
|
||||
name: titanic_spark_component
|
||||
type: spark
|
||||
version: 1
|
|
@ -1,7 +0,0 @@
|
|||
# user-assigned-identity.yaml
|
||||
identity:
|
||||
type: "system_assigned,user_assigned"
|
||||
user_assigned_identities:
|
||||
- resource_id: /subscriptions/<SUBSCRIPTION_ID/resourceGroups/<RESOURCE_GROUP>/providers/Microsoft.ManagedIdentity/userAssignedIdentities/<AML_USER_MANAGED_ID>
|
||||
tenant_id: 00x000xx-00x0-00xx-00xx-0x0xx000xx00
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
# user-assigned-identity.yaml
|
||||
identity:
|
||||
type: "system_assigned,user_assigned"
|
||||
user_assigned_identities:
|
||||
"/subscriptions/<SUBSCRIPTION_ID>/resourceGroups/<RESOURCE_GROUP>/providers/Microsoft.ManagedIdentity/userAssignedIdentities/<AML_USER_MANAGED_ID>" : {}
|
||||
|
|
@ -9,7 +9,7 @@ import string
|
|||
import yaml
|
||||
|
||||
# define constants
|
||||
EXCLUDED_JOBS = ["java", "spark"]
|
||||
EXCLUDED_JOBS = ["java", "spark-job-component", "storage_pe", "user-assigned-identity"]
|
||||
# TODO: Re-include these below endpoints and deployments when the workflow generation code supports substituting vars in .yaml files.
|
||||
EXCLUDED_ENDPOINTS = [
|
||||
"1-uai-create-endpoint",
|
||||
|
@ -77,6 +77,7 @@ def main(args):
|
|||
jobs += sorted(glob.glob("jobs/basics/*.yml", recursive=False))
|
||||
jobs += sorted(glob.glob("jobs/*/basics/**/*job*.yml", recursive=True))
|
||||
jobs += sorted(glob.glob("jobs/pipelines/**/*pipeline*.yml", recursive=True))
|
||||
jobs += sorted(glob.glob("jobs/spark/*.yml", recursive=False))
|
||||
jobs += sorted(
|
||||
glob.glob("jobs/automl-standalone-jobs/**/cli-automl-*.yml", recursive=True)
|
||||
)
|
||||
|
@ -420,6 +421,7 @@ def write_job_workflow(job):
|
|||
filename, project_dir, hyphenated = parse_path(job)
|
||||
posix_project_dir = project_dir.replace(os.sep, "/")
|
||||
is_pipeline_sample = "jobs/pipelines" in job
|
||||
is_spark_sample = "jobs/spark" in job
|
||||
creds = CREDENTIALS
|
||||
schedule_hour, schedule_minute = get_schedule_time(filename)
|
||||
# Duplicate name in working directory during checkout
|
||||
|
@ -439,6 +441,8 @@ on:
|
|||
- .github/workflows/cli-{hyphenated}.yml\n"""
|
||||
if is_pipeline_sample:
|
||||
workflow_yaml += " - cli/run-pipeline-jobs.sh\n" ""
|
||||
if is_spark_sample:
|
||||
workflow_yaml += " - cli/jobs/spark/data/titanic.csv\n" ""
|
||||
workflow_yaml += f""" - cli/setup.sh
|
||||
concurrency:
|
||||
group: {GITHUB_CONCURRENCY_GROUP}
|
||||
|
@ -465,8 +469,10 @@ jobs:
|
|||
source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: run job
|
||||
continue-on-error: true\n"""
|
||||
if is_spark_sample:
|
||||
workflow_yaml += get_spark_setup_workflow(job)
|
||||
workflow_yaml += f""" - name: run job
|
||||
run: |
|
||||
source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh";
|
||||
source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh";\n"""
|
||||
|
@ -701,7 +707,7 @@ jobs:
|
|||
creds: {creds}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
bash bootstrap.sh
|
||||
bash bootstrapping/bootstrap.sh
|
||||
working-directory: infra
|
||||
continue-on-error: false
|
||||
- name: setup-cli
|
||||
|
@ -856,6 +862,42 @@ def get_endpoint_name(filename, hyphenated):
|
|||
return endpoint_name
|
||||
|
||||
|
||||
def get_spark_setup_workflow(job):
|
||||
is_attached = "attached-spark" in job
|
||||
is_user_identity = "user-identity" in job
|
||||
is_managed_identity = "managed-identity" in job
|
||||
is_default_identity = "default-identity" in job
|
||||
workflow = f""" - name: upload data
|
||||
run: |
|
||||
bash -x upload-data-to-blob.sh jobs/spark/
|
||||
working-directory: cli
|
||||
continue-on-error: true\n"""
|
||||
if is_managed_identity:
|
||||
workflow += f""" - name: setup identities
|
||||
run: |
|
||||
bash -x setup-identities.sh
|
||||
working-directory: cli/jobs/spark
|
||||
continue-on-error: true\n"""
|
||||
if is_attached:
|
||||
workflow += f""" - name: setup attached spark
|
||||
working-directory: cli
|
||||
continue-on-error: true"""
|
||||
if is_attached and is_user_identity:
|
||||
workflow += f"""
|
||||
run: |
|
||||
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-user-identity.yml\n"""
|
||||
if is_attached and is_managed_identity:
|
||||
workflow += f"""
|
||||
run: |
|
||||
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-system-identity.yml\n"""
|
||||
if is_attached and is_default_identity:
|
||||
workflow += f"""
|
||||
run: |
|
||||
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark.yml\n"""
|
||||
|
||||
return workflow
|
||||
|
||||
|
||||
# run functions
|
||||
if __name__ == "__main__":
|
||||
# setup argparse
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
# attached-spark-system-identity.yaml
|
||||
name: my-spark-pool
|
||||
|
||||
type: synapsespark
|
||||
|
||||
resource_id: /subscriptions/<SUBSCRIPTION_ID/resourceGroups/<RESOURCE_GROUP>/providers/Microsoft.Synapse/workspaces/<SYNAPSE_WORKSPACE_NAME>/bigDataPools/<SPARK_POOL_NAME>
|
||||
|
||||
identity:
|
||||
type: system_assigned
|
|
@ -0,0 +1,9 @@
|
|||
# attached-spark-system-identity.yaml
|
||||
name: mysparkcompute
|
||||
|
||||
type: synapsespark
|
||||
|
||||
resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourceGroups/<RESOURCE_GROUP>/providers/Microsoft.Synapse/workspaces/<SYNAPSE_WORKSPACE_NAME>/bigDataPools/<SPARK_POOL_NAME>
|
||||
|
||||
identity:
|
||||
type: system_assigned
|
|
@ -1,11 +0,0 @@
|
|||
# attached-spark-user-identity.yaml
|
||||
name: my-spark-pool
|
||||
|
||||
type: synapsespark
|
||||
|
||||
resource_id: /subscriptions/<SUBSCRIPTION_ID/resourceGroups/<RESOURCE_GROUP>/providers/Microsoft.Synapse/workspaces/<SYNAPSE_WORKSPACE_NAME>/bigDataPools/<SPARK_POOL_NAME>
|
||||
|
||||
identity:
|
||||
type: user_assigned
|
||||
user_assigned_identities:
|
||||
- resource_id: /subscriptions/<SUBSCRIPTION_ID/resourceGroups/<RESOURCE_GROUP>/providers/Microsoft.ManagedIdentity/userAssignedIdentities/<AML_USER_MANAGED_ID>
|
|
@ -0,0 +1,11 @@
|
|||
# attached-spark-user-identity.yml
|
||||
name: mysparkcompute
|
||||
|
||||
type: synapsespark
|
||||
|
||||
resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourceGroups/<RESOURCE_GROUP>/providers/Microsoft.Synapse/workspaces/<SYNAPSE_WORKSPACE_NAME>/bigDataPools/<SPARK_POOL_NAME>
|
||||
|
||||
identity:
|
||||
type: user_assigned
|
||||
user_assigned_identities:
|
||||
- resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourceGroups/<RESOURCE_GROUP>/providers/Microsoft.ManagedIdentity/userAssignedIdentities/<AML_USER_MANAGED_ID>
|
|
@ -1,6 +0,0 @@
|
|||
# attached-spark.yaml
|
||||
name: my-spark-pool
|
||||
|
||||
type: synapsespark
|
||||
|
||||
resource_id: /subscriptions/<SUBSCRIPTION_ID/resourceGroups/<RESOURCE_GROUP>/providers/Microsoft.Synapse/workspaces/<SYNAPSE_WORKSPACE_NAME>/bigDataPools/<SPARK_POOL_NAME>
|
|
@ -0,0 +1,6 @@
|
|||
# attached-spark.yaml
|
||||
name: mysparkcompute
|
||||
|
||||
type: synapsespark
|
||||
|
||||
resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourceGroups/<RESOURCE_GROUP>/providers/Microsoft.Synapse/workspaces/<SYNAPSE_WORKSPACE_NAME>/bigDataPools/<SPARK_POOL_NAME>
|
|
@ -0,0 +1,19 @@
|
|||
# <create_variables>
|
||||
SUBSCRIPTION_ID=$(az account show --query id -o tsv)
|
||||
LOCATION=$(az ml workspace show --query location -o tsv)
|
||||
RESOURCE_GROUP=$(az group show --query name -o tsv)
|
||||
WORKSPACE=$(az configure -l --query "[?name=='workspace'].value" -o tsv)
|
||||
API_VERSION="2022-05-01"
|
||||
TOKEN=$(az account get-access-token --query accessToken -o tsv)
|
||||
#</create_variables>
|
||||
|
||||
# <get_storage_details>
|
||||
response=$(curl --location --request GET "https://management.azure.com/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RESOURCE_GROUP/providers/Microsoft.MachineLearningServices/workspaces/$WORKSPACE/datastores?api-version=$API_VERSION&isDefault=true" \
|
||||
--header "Authorization: Bearer $TOKEN")
|
||||
AZUREML_DEFAULT_CONTAINER=$(echo $response | jq -r '.value[0].properties.containerName')
|
||||
export AZURE_STORAGE_ACCOUNT=$(echo $response | jq -r '.value[0].properties.accountName')
|
||||
# </get_storage_details>
|
||||
|
||||
# <upload_data>
|
||||
az storage blob upload-batch -s $1 --pattern *.csv -d $AZUREML_DEFAULT_CONTAINER --account-name $AZURE_STORAGE_ACCOUNT --overwrite true
|
||||
# </upload_data>
|
Загрузка…
Ссылка в новой задаче