SynapseML/pipeline.yaml

756 строки
23 KiB
YAML

resources:
- repo: self
trigger:
branches:
include:
- master
paths:
exclude:
- README.md
- CONTRIBUTORS.md
- SECURITY.md
- docs/*
- CODEOWNERS
- .github
pr:
branches:
include:
- master
paths:
exclude:
- README.md
- docs/*
- CODEOWNERS
- .github
schedules:
- cron: "0 0 * * *"
displayName: Daily midnight build
branches:
include:
- master
parameters:
- name: runSynapseExtensionE2ETests
displayName: Run Synapse Extension E2E Tests
type: boolean
default: true
- name: SYNAPSE_ENVIRONMENT
displayName: Synapse Extension E2E Test Environment
type: string
default: weekly
values:
- dev
- daily
- weekly
variables:
runTests: True
CONDA_CACHE_DIR: /usr/share/miniconda/envs
ComponentDetection.Timeout: 900
jobs:
- job: Style
cancelTimeoutInMinutes: 0
condition: eq(variables.runTests, 'True')
pool:
vmImage: ubuntu-20.04
steps:
- task: AzureCLI@2
displayName: 'Scala Style Check'
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt scalastyle test:scalastyle'
- template: templates/conda.yml
- bash: |
set -e
source activate synapseml
black --diff --color . && black --check -q .
displayName: 'Python Style Check'
- job: Publish
cancelTimeoutInMinutes: 0
pool:
vmImage: ubuntu-20.04
steps:
#- template: templates/ivy_cache.yml
- template: templates/update_cli.yml
- template: templates/conda.yml
- template: templates/kv.yml
- bash: |
set -e
sudo apt-get install graphviz doxygen -y
source activate synapseml
sbt packagePython
sbt publishBlob publishDocs publishR publishPython
sbt publishSigned
sbt genBuildInfo
echo "##vso[task.uploadsummary]$(pwd)/target/Build.md"
displayName: Publish Artifacts
env:
STORAGE-KEY: $(storage-key)
NEXUS-UN: $(nexus-un)
NEXUS-PW: $(nexus-pw)
PGP-PRIVATE: $(pgp-private)
PGP-PUBLIC: $(pgp-public)
PGP-PW: $(pgp-pw)
SYNAPSEML_ENABLE_PUBLISH: true
- bash: |
set -e
sbt publishBadges
condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/master'))
displayName: Publish Badges
env:
STORAGE-KEY: $(storage-key)
NEXUS-UN: $(nexus-un)
NEXUS-PW: $(nexus-pw)
PGP-PRIVATE: $(pgp-private)
PGP-PUBLIC: $(pgp-public)
PGP-PW: $(pgp-pw)
SYNAPSEML_ENABLE_PUBLISH: true
- job: E2E
timeoutInMinutes: 120
cancelTimeoutInMinutes: 0
pool:
vmImage: ubuntu-20.04
strategy:
matrix:
databricks-cpu:
TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests"
databricks-gpu:
TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests"
synapse:
TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseTests"
# ${{ if eq(parameters.runSynapseExtensionE2ETests, true) }}:
# synapse-internal:
# TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseExtension.SynapseExtensionsTests"
steps:
#- template: templates/ivy_cache.yml
- template: templates/update_cli.yml
- template: templates/conda.yml
- template: templates/kv.yml
- bash: |
set -e
source activate synapseml
sbt packagePython
sbt publishBlob
displayName: Publish Blob Artifacts
env:
STORAGE-KEY: $(storage-key)
NEXUS-UN: $(nexus-un)
NEXUS-PW: $(nexus-pw)
PGP-PRIVATE: $(pgp-private)
PGP-PUBLIC: $(pgp-public)
PGP-PW: $(pgp-pw)
SYNAPSEML_ENABLE_PUBLISH: true
- task: AzureCLI@2
displayName: 'E2E'
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
set -e
source activate synapseml
sbt "testOnly $(TEST-CLASS)"
condition: and(succeeded(), eq(variables.runTests, 'True'))
- task: PublishTestResults@2
displayName: 'Publish Test Results'
inputs:
testResultsFiles: '**/test-reports/TEST-*.xml'
failTaskOnFailedTests: true
condition: and(eq(variables.runTests, 'True'), succeededOrFailed())
- job: PublishDocker
displayName: PublishDocker
pool:
vmImage: ubuntu-20.04
steps:
- task: AzureCLI@2
displayName: 'Get Docker Tag + Version'
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
VERSION=$(sbt "core/version" | tail -1 | cut -d' ' -f2 | sed 's/\x1b\[[0-9;]*m//g')
echo '##vso[task.setvariable variable=version]'$VERSION
echo '##vso[task.setvariable variable=gittag]'$(git tag -l --points-at HEAD)
- task: Docker@2
displayName: Demo Image Build
inputs:
containerRegistry: 'SynapseML MCR'
repository: 'public/mmlspark/build-demo'
command: 'build'
buildContext: "."
Dockerfile: 'tools/docker/demo/Dockerfile'
tags: $(version)
arguments: --build-arg SYNAPSEML_VERSION=$(version)
- task: Docker@2
displayName: Demo Image Push
inputs:
containerRegistry: 'SynapseML MCR'
repository: 'public/mmlspark/build-demo'
command: 'push'
tags: $(version)
- task: Docker@2
displayName: Minimal Image Build
inputs:
containerRegistry: 'SynapseML MCR'
repository: 'public/mmlspark/build-minimal'
command: 'build'
buildContext: "."
Dockerfile: 'tools/docker/minimal/Dockerfile'
tags: $(version)
arguments: --build-arg SYNAPSEML_VERSION=$(version)
- task: Docker@2
displayName: Minimal Image Push
inputs:
containerRegistry: 'SynapseML MCR'
repository: 'public/mmlspark/build-minimal'
command: 'push'
tags: $(version)
- task: Docker@2
condition: startsWith(variables['gittag'], 'v')
displayName: Release Image Build
inputs:
containerRegistry: 'SynapseML MCR'
repository: 'public/mmlspark/release'
command: 'build'
buildContext: "."
Dockerfile: 'tools/docker/demo/Dockerfile'
tags: |
$(version)
latest
arguments: --build-arg SYNAPSEML_VERSION=$(version)
- task: Docker@2
condition: startsWith(variables['gittag'], 'v')
displayName: Release Image Push
inputs:
containerRegistry: 'SynapseML MCR'
repository: 'public/mmlspark/release'
command: 'push'
tags: |
$(version)
latest
- task: ComponentGovernanceComponentDetection@0
- job: Release
cancelTimeoutInMinutes: 0
pool:
vmImage: ubuntu-20.04
steps:
- template: templates/update_cli.yml
- bash: |
echo '##vso[task.setvariable variable=tag]'$(git tag -l --points-at HEAD)
displayName: 'Get Git Tag'
- bash: |
set -e
wget https://github.com/git-chglog/git-chglog/releases/download/0.8.0/git-chglog_linux_amd64
chmod +x git-chglog_linux_amd64
./git-chglog_linux_amd64 -o CHANGELOG.md $TAG
condition: startsWith(variables['tag'], 'v')
- task: GitHubRelease@0
condition: startsWith(variables['tag'], 'v')
inputs:
gitHubConnection: 'MMLSpark Github'
repositoryName: '$(Build.Repository.Name)'
action: 'create'
target: '$(Build.SourceVersion)'
tagSource: 'auto'
releaseNotesFile: 'CHANGELOG.md'
isDraft: true
- bash: echo "##vso[task.prependpath]$CONDA/bin"
condition: startsWith(variables['tag'], 'v')
displayName: Add conda to PATH
- bash: sudo chown -R $(whoami):$(id -ng) $(CONDA_CACHE_DIR)
displayName: Fix directory permissions
condition: startsWith(variables['tag'], 'v')
- task: Cache@2
displayName: Use cached Anaconda environment
condition: startsWith(variables['tag'], 'v')
inputs:
key: 'conda | "$(Agent.OS)" | environment.yml'
restoreKeys: |
python | "$(Agent.OS)"
python
path: $(CONDA_CACHE_DIR)
cacheHitVar: CONDA_CACHE_RESTORED
- bash: |
conda env create -f environment.yml -v -v -v
condition: and(startsWith(variables['tag'], 'v'), eq(variables.CONDA_CACHE_RESTORED, 'false'))
displayName: Create Anaconda environment
- task: AzureKeyVault@1
condition: startsWith(variables['tag'], 'v')
inputs:
azureSubscription: 'SynapseML Build'
keyVaultName: mmlspark-keys
- bash: |
set -e
source activate synapseml
sbt publishPypi
condition: startsWith(variables['tag'], 'v')
env:
STORAGE-KEY: $(storage-key)
NEXUS-UN: $(nexus-un)
NEXUS-PW: $(nexus-pw)
PGP-PRIVATE: $(pgp-private)
PGP-PUBLIC: $(pgp-public)
PGP-PW: $(pgp-pw)
PYPI-API-TOKEN: $(pypi-api-token)
SYNAPSEML_ENABLE_PUBLISH: true
displayName: 'publish python package to pypi'
- bash: |
set -e
source activate synapseml
sbt publishSigned
sbt sonatypeBundleRelease
condition: startsWith(variables['tag'], 'v')
env:
STORAGE-KEY: $(storage-key)
NEXUS-UN: $(nexus-un)
NEXUS-PW: $(nexus-pw)
PGP-PRIVATE: $(pgp-private)
PGP-PUBLIC: $(pgp-public)
PGP-PW: $(pgp-pw)
SYNAPSEML_ENABLE_PUBLISH: true
displayName: 'publish jar package to maven central'
- job: PythonTests
timeoutInMinutes: 120
cancelTimeoutInMinutes: 0
condition: eq(variables.runTests, 'True')
pool:
vmImage: ubuntu-22.04
strategy:
matrix:
core:
PACKAGE: "core"
deep-learning:
PACKAGE: "deepLearning"
lightgbm:
PACKAGE: "lightgbm"
opencv:
PACKAGE: "opencv"
vw:
PACKAGE: "vw"
cognitive:
PACKAGE: "cognitive"
steps:
#- template: templates/ivy_cache.yml
- template: templates/update_cli.yml
- template: templates/conda.yml
- template: templates/kv.yml
- task: AzureCLI@2
displayName: 'Install and package deps'
timeoutInMinutes: 40
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
source activate synapseml
sbt coverage getDatasets installPipPackage
sbt publishM2
- task: AzureCLI@2
displayName: 'Test Python Code'
timeoutInMinutes: 40
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
set -e
source activate synapseml
export SBT_OPTS="-XX:+UseG1GC"
echo "##vso[task.setvariable variable=SBT_OPTS]$SBT_OPTS"
echo "SBT_OPTS=$SBT_OPTS"
(sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython)
- task: PublishTestResults@2
displayName: 'Publish Test Results'
inputs:
testResultsFiles: '**/python-test-*.xml'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@2
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/codecov.yml
- job: DotnetTests
timeoutInMinutes: 120
cancelTimeoutInMinutes: 0
condition: eq(variables.runTests, 'True')
pool:
vmImage: ubuntu-20.04
strategy:
matrix:
core:
PACKAGE: "core"
deep-learning:
PACKAGE: "deepLearning"
lightgbm:
PACKAGE: "lightgbm"
opencv:
PACKAGE: "opencv"
vw:
PACKAGE: "vw"
cognitive:
PACKAGE: "cognitive"
steps:
- task: ShellScript@2
inputs:
scriptPath: tools/dotnet/dotnetSetup.sh
- task: AzureKeyVault@1
condition: startsWith(variables['tag'], 'v')
inputs:
azureSubscription: 'SynapseML Build'
keyVaultName: mmlspark-keys
- task: AzureCLI@1
displayName: 'Test Dotnet Code'
timeoutInMinutes: 30
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
publishTestResults: true
inlineScript: |
set -e
echo "SPARK_HOME=$SPARK_HOME"
echo "DOTNET_WORKER_DIR=$DOTNET_WORKER_DIR"
sbt coverage publishDotnetTestBase
sbt publishLocal
sbt "project $(PACKAGE)" coverage publishDotnet
export SBT_OPTS="-XX:+UseG1GC"
echo "##vso[task.setvariable variable=SBT_OPTS]$SBT_OPTS"
(timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
(sbt "project $(PACKAGE)" coverage testDotnet) || (sbt "project $(PACKAGE)" coverage testDotnet) || (sbt "project $(PACKAGE)" coverage testDotnet)
env:
SYNAPSEML_ENABLE_PUBLISH: true
- task: PublishTestResults@2
displayName: 'Publish Test Results'
inputs:
testRunner: VSTest
testResultsFiles: '**/dotnet_test_results_*.trx'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@1
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/codecov.yml
- job: RTests
timeoutInMinutes: 60
cancelTimeoutInMinutes: 0
condition: eq(variables.runTests, 'True')
pool:
vmImage: ubuntu-20.04
strategy:
matrix:
core:
PACKAGE: "core"
deep-learning:
PACKAGE: "deepLearning"
lightgbm:
PACKAGE: "lightgbm"
opencv:
PACKAGE: "opencv"
vw:
PACKAGE: "vw"
cognitive:
PACKAGE: "cognitive"
steps:
#- template: templates/ivy_cache_2.yml
- template: templates/update_cli.yml
- template: templates/conda.yml
- template: templates/kv.yml
- task: AzureCLI@2
displayName: 'Test R Code'
timeoutInMinutes: 60
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
set -e
export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M -Duser.timezone=GMT"
source activate synapseml
(timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
# tests in opencv and deep-learning source R wrappers from core, so core wrappers must exist
if [[ $(PACKAGE) -eq "opencv" || $(PACKAGE) -eq "deepLearning" ]]; then
(sbt "project core" rCodegen) || (echo "retrying" && sbt "project core" rCodegen) || (echo "retrying" && sbt "project core" rCodegen)
fi
sbt publishM2
(timeout 20m sbt "project $(PACKAGE)" coverage testR) || (echo "retrying" && timeout 20m sbt "project $(PACKAGE)" coverage testR) || (echo "retrying" && timeout 20m sbt "project $(PACKAGE)" coverage testR)
- task: PublishTestResults@2
displayName: 'Publish Test Results'
inputs:
testResultsFiles: '**/r-test-*.xml'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@2
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/codecov.yml
- job: WebsiteSamplesTests
cancelTimeoutInMinutes: 0
condition: eq(variables.runTests, 'True')
pool:
vmImage: ubuntu-20.04
steps:
#- template: templates/ivy_cache.yml
- template: templates/update_cli.yml
- template: templates/conda.yml
- template: templates/kv.yml
- task: AzureCLI@2
displayName: 'Test Website Samples'
timeoutInMinutes: 30
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
set -e
source activate synapseml
sbt packagePython
sbt publishBlob
(timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
(sbt coverage testWebsiteDocs)
- task: PublishTestResults@2
displayName: 'Publish Test Results'
inputs:
testResultsFiles: '**/website-test-result.xml'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@2
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/codecov.yml
- job: WebsiteAutoDeployment
cancelTimeoutInMinutes: 0
pool:
vmImage: ubuntu-20.04
steps:
- checkout: self
persistCredentials: true
- template: templates/update_cli.yml
- template: templates/conda.yml
- template: templates/kv.yml
- task: NodeTool@0
inputs:
versionSpec: '16.x'
displayName: 'Install Node.js'
- task: AzureCLI@2
displayName: 'Convert notebooks to markdowns'
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
source activate synapseml
sbt convertNotebooks
- bash: |
set -e
yarn install
cd website
yarn
yarn build
displayName: 'yarn install and build'
- bash: |
set -e
git config --global user.name "${GH_NAME}"
git config --global user.email "${GH_EMAIL}"
git checkout -b main
echo "machine github.com login ${GH_NAME} password ${GH_TOKEN}" > ~/.netrc
cd website
GIT_USER="${GH_NAME}" yarn deploy
condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/master'))
env:
GH_NAME: $(gh-name)
GH_EMAIL: $(gh-email)
GH_TOKEN: $(gh-token)
displayName: 'yarn deploy'
- job: UnitTests
cancelTimeoutInMinutes: 1
timeoutInMinutes: 80
condition: eq(variables.runTests, 'True')
pool:
vmImage: ubuntu-20.04
strategy:
matrix:
automl:
PACKAGE: "automl"
causal:
PACKAGE: "causal"
geospatial:
PACKAGE: "geospatial"
onnx:
PACKAGE: "onnx"
anomaly:
PACKAGE: "cognitive.anomaly"
FLAKY: "true"
bing:
PACKAGE: "cognitive.bing"
FLAKY: "true"
face:
PACKAGE: "cognitive.face"
FLAKY: "true"
form:
PACKAGE: "cognitive.form"
FLAKY: "true"
language:
PACKAGE: "cognitive.language"
FLAKY: "true"
openai:
PACKAGE: "cognitive.openai"
FLAKY: "true"
search:
PACKAGE: "cognitive.search"
FFMPEG: "true"
FLAKY: "true"
speech:
PACKAGE: "cognitive.speech"
FFMPEG: "true"
FLAKY: "true"
text:
PACKAGE: "cognitive.text"
FLAKY: "true"
translate:
PACKAGE: "cognitive.translate"
FLAKY: "true"
vision:
PACKAGE: "cognitive.vision"
FLAKY: "true"
core:
PACKAGE: "core"
explainers1:
PACKAGE: "explainers.split1"
explainers2:
PACKAGE: "explainers.split2"
explainers3:
PACKAGE: "explainers.split3"
exploratory:
PACKAGE: "exploratory"
featurize:
PACKAGE: "featurize"
image:
PACKAGE: "image"
io1:
PACKAGE: "io.split1"
FLAKY: "true"
io2:
PACKAGE: "io.split2"
FLAKY: "true"
isolationforest:
PACKAGE: "isolationforest"
flaky:
PACKAGE: "flaky" #TODO fix flaky test so isolation is not needed
FLAKY: "true"
lightgbm1:
PACKAGE: "lightgbm.split1" #TODO speed up LGBM Tests and remove split
FLAKY: "true"
lightgbm2:
PACKAGE: "lightgbm.split2"
FLAKY: "true"
lightgbm3:
PACKAGE: "lightgbm.split3"
FLAKY: "true"
lightgbm4:
PACKAGE: "lightgbm.split4"
FLAKY: "true"
lightgbm5:
PACKAGE: "lightgbm.split5"
FLAKY: "true"
lightgbm6:
PACKAGE: "lightgbm.split6"
FLAKY: "true"
opencv:
PACKAGE: "opencv"
recommendation:
PACKAGE: "recommendation"
stages:
PACKAGE: "stages"
nn:
PACKAGE: "nn"
train:
PACKAGE: "train"
vw:
PACKAGE: "vw"
steps:
#- template: templates/ivy_cache.yml
- template: templates/update_cli.yml
- task: AzureCLI@2
displayName: 'Setup repo'
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
(timeout 30s pip install requests) || (echo "retrying" && timeout 30s pip install requests)
(timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
- task: AzureCLI@2
displayName: 'Unit Test'
timeoutInMinutes: 90
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
ulimit -c unlimited
(${FFMPEG:-false} && sudo apt-get update && \
sudo apt-get install ffmpeg libgstreamer1.0-0 \
gstreamer1.0-plugins-base gstreamer1.0-plugins-good gstreamer1.0-plugins-ugly -y)
export SBT_OPTS="-Xmx2G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=2G -Xss2M -Duser.timezone=GMT"
(timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") ||
(${FLAKY:-false} && timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") ||
(${FLAKY:-false} && timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**")
- task: PublishTestResults@2
displayName: 'Publish Test Results'
inputs:
testResultsFiles: '**/test-reports/TEST-*.xml'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@2
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'SynapseML Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/kv.yml
- template: templates/codecov.yml