зеркало из https://github.com/microsoft/nni.git
Fix VM image builder on 1ES (#5060)
This commit is contained in:
Родитель
ac892fc764
Коммит
81c9b9389d
10
README.md
10
README.md
|
@ -245,16 +245,18 @@ We appreciate all contributions from community to make NNI thrive.
|
|||
| Type | Status |
|
||||
| :---: | :---: |
|
||||
| Fast test | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/fast%20test?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=54&branchName=master) |
|
||||
| Full linux | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/full%20test%20-%20linux?repoName=microsoft%2Fnni&branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=62&repoName=microsoft%2Fnni&branchName=master) |
|
||||
| Full windows | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/full%20test%20-%20windows?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=63&branchName=master) |
|
||||
| Full test - HPO | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/full%20test%20-%20HPO?repoName=microsoft%2Fnni&branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=90&repoName=microsoft%2Fnni&branchName=master) |
|
||||
| Full test - NAS | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/full%20test%20-%20NAS?repoName=microsoft%2Fnni&branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=89&repoName=microsoft%2Fnni&branchName=master) |
|
||||
| Full test - compression | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/full%20test%20-%20compression?repoName=microsoft%2Fnni&branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=91&repoName=microsoft%2Fnni&branchName=master) |
|
||||
|
||||
### Training services
|
||||
|
||||
| Type | Status |
|
||||
| :---: | :---: |
|
||||
| Local - linux | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20local%20-%20linux?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=92&branchName=master) |
|
||||
| Local - windows | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20local%20-%20windows?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=98&branchName=master) |
|
||||
| Remote - linux to linux | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20remote%20-%20linux%20to%20linux?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=64&branchName=master) |
|
||||
| Remote - linux to windows | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20remote%20-%20linux%20to%20windows?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=67&branchName=master) |
|
||||
| Remote - windows to linux | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20remote%20-%20windows%20to%20linux?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=68&branchName=master) |
|
||||
| Remote - windows to windows | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20remote%20-%20windows%20to%20windows?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=99&branchName=master) |
|
||||
| OpenPAI | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20openpai%20-%20linux?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=65&branchName=master) |
|
||||
| Frameworkcontroller | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20frameworkcontroller?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=70&branchName=master) |
|
||||
| Kubeflow | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20kubeflow?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=69&branchName=master) |
|
||||
|
|
|
@ -4,8 +4,8 @@ trigger: none
|
|||
pr: none
|
||||
|
||||
variables:
|
||||
resource_group: nni
|
||||
gallery_name: nniImageGallery
|
||||
resource_group: nni-image-builder
|
||||
gallery_name: nniImageGalleryV2
|
||||
network_security_group: nni-image-builder-nsg
|
||||
managed_image_name: nni-linux-image
|
||||
image_definition_name: nniLinuxImage
|
||||
|
|
|
@ -4,11 +4,11 @@ trigger: none
|
|||
pr: none
|
||||
|
||||
variables:
|
||||
resource_group: nni
|
||||
gallery_name: nniImageGallery
|
||||
resource_group: nni-image-builder
|
||||
gallery_name: nniImageGalleryV2
|
||||
network_security_group: nni-image-builder-nsg
|
||||
managed_image_name: nni-windows-image
|
||||
image_name: nniWindowsImage
|
||||
image_definition_name: nniWindowsImage
|
||||
packer_config: config_windows
|
||||
|
||||
jobs:
|
||||
|
|
|
@ -46,3 +46,11 @@ jobs:
|
|||
- template: templates/cache-dependencies-template.yml
|
||||
parameters:
|
||||
platform: ubuntu-latest-gpu
|
||||
|
||||
- job: windows_gpu
|
||||
pool:
|
||||
vmImage: windows-latest
|
||||
steps:
|
||||
- template: templates/cache-dependencies-template.yml
|
||||
parameters:
|
||||
platform: windows-gpu
|
||||
|
|
|
@ -35,9 +35,7 @@ stages:
|
|||
timeoutInMinutes: 60
|
||||
|
||||
steps:
|
||||
- template: templates/fix-apt-1es.yml
|
||||
parameters:
|
||||
check_gpu: true
|
||||
- template: templates/check-gpu-status.yml
|
||||
|
||||
- template: templates/install-dependencies.yml
|
||||
parameters:
|
||||
|
|
|
@ -35,9 +35,7 @@ stages:
|
|||
timeoutInMinutes: 60
|
||||
|
||||
steps:
|
||||
- template: templates/fix-apt-1es.yml
|
||||
parameters:
|
||||
check_gpu: true
|
||||
- template: templates/check-gpu-status.yml
|
||||
|
||||
- template: templates/install-dependencies.yml
|
||||
parameters:
|
||||
|
|
|
@ -32,12 +32,10 @@ stages:
|
|||
jobs:
|
||||
- job: linux
|
||||
pool: nni-it-1es-11
|
||||
timeoutInMinutes: 60
|
||||
timeoutInMinutes: 90
|
||||
|
||||
steps:
|
||||
- template: templates/fix-apt-1es.yml
|
||||
parameters:
|
||||
check_gpu: true
|
||||
- template: templates/check-gpu-status.yml
|
||||
|
||||
- template: templates/install-dependencies.yml
|
||||
parameters:
|
||||
|
@ -55,24 +53,25 @@ stages:
|
|||
|
||||
- job: windows
|
||||
pool: nni-it-1es-windows
|
||||
timeoutInMinutes: 60
|
||||
timeoutInMinutes: 90
|
||||
|
||||
steps:
|
||||
# FIXME: Windows should use GPU,
|
||||
# but it's not used now since driver is not installed in the image.
|
||||
- template: templates/check-gpu-status.yml
|
||||
parameters:
|
||||
platform: windows
|
||||
|
||||
- template: templates/install-dependencies.yml
|
||||
parameters:
|
||||
platform: windows
|
||||
platform: windows-gpu
|
||||
python_env: noop
|
||||
|
||||
- template: templates/install-nni.yml
|
||||
parameters:
|
||||
user: false
|
||||
|
||||
# NOTE: Data needs to be downloaded if Windows has GPU.
|
||||
# Also, the download template needs to be updated with powershell syntax.
|
||||
# - template: templates/download-test-data.yml
|
||||
- template: templates/download-test-data.yml
|
||||
parameters:
|
||||
platform: windows
|
||||
|
||||
- powershell: |
|
||||
python test/vso_tools/ssl_patch.py
|
||||
|
|
|
@ -11,8 +11,7 @@ jobs:
|
|||
timeoutInMinutes: 90
|
||||
|
||||
steps:
|
||||
# FIXME: should use GPU here
|
||||
- template: templates/fix-apt-1es.yml
|
||||
# TODO: consider adding GPU tests here
|
||||
|
||||
- template: templates/install-dependencies.yml
|
||||
parameters:
|
||||
|
|
|
@ -11,9 +11,7 @@ jobs:
|
|||
timeoutInMinutes: 60
|
||||
|
||||
steps:
|
||||
- template: templates/fix-apt-1es.yml
|
||||
parameters:
|
||||
check_gpu: true
|
||||
- template: templates/check-gpu-status.yml
|
||||
|
||||
- template: templates/install-dependencies.yml
|
||||
parameters:
|
||||
|
|
|
@ -11,6 +11,9 @@ jobs:
|
|||
timeoutInMinutes: 120
|
||||
|
||||
steps:
|
||||
- template: templates/check-gpu-status.yml
|
||||
parameters:
|
||||
platform: windows
|
||||
|
||||
- template: templates/install-dependencies.yml
|
||||
parameters:
|
||||
|
@ -38,8 +41,7 @@ jobs:
|
|||
# We can't install it on-the-fly because we can't elevate the permission here.
|
||||
- powershell: |
|
||||
cd test
|
||||
python training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts local `
|
||||
--exclude mnist-pytorch-local-gpu
|
||||
python training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts local
|
||||
displayName: Integration test
|
||||
|
||||
- template: templates/save-crashed-info.yml
|
||||
|
|
|
@ -16,9 +16,8 @@ jobs:
|
|||
timeoutInMinutes: 120
|
||||
|
||||
steps:
|
||||
- template: templates/fix-apt-1es.yml
|
||||
# FIXME: GPU is not supported yet.
|
||||
# Change to ubuntu-latest-gpu when it's done.
|
||||
# TODO: We don't currently have a test for GPU.
|
||||
# And nvidia-docker is not installed yet.
|
||||
|
||||
- template: templates/install-dependencies.yml
|
||||
parameters:
|
||||
|
|
|
@ -1,16 +1,51 @@
|
|||
# BEFORE READING:
|
||||
#
|
||||
# 1. We are now running agents on 1ES, all the notes about VMSS can be safely ignored.
|
||||
# 2. Many actions can be done on both cloud shell and web portal. Choose whichever you prefer.
|
||||
|
||||
steps:
|
||||
- script: |
|
||||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
|
||||
displayName: Install azcli
|
||||
|
||||
# Please follow the tutorial of [image builder](https://docs.microsoft.com/en-us/azure/virtual-machines/image-builder-overview)
|
||||
# to set up a managed identity, and,
|
||||
# 1. Assign the role following the instruction.
|
||||
# 2. Assign contributor role of the resource group to the identity.
|
||||
# 3. Add the identity to VMSS.
|
||||
#
|
||||
# Update 2022/7 (running on Microsoft-hosted agents).
|
||||
# Update 2022/7 (running on Microsoft-hosted agents / 1ES agents).
|
||||
# Use a service principal. This service principal must be assigned contributor access to the resource group.
|
||||
#
|
||||
# Alternative option: managed identity.
|
||||
# Follow tutorial of [image builder](https://docs.microsoft.com/en-us/azure/virtual-machines/image-builder-overview).
|
||||
#
|
||||
# Either way, the identity / service principal must be assigned contributor access to the resource group.
|
||||
# We also added the following role (but I'm not sure whether it's necessary):
|
||||
#
|
||||
# {
|
||||
# "properties": {
|
||||
# "roleName": "ImageBuilderRole",
|
||||
# "description": "Image Builder access to create resources for the image build, you should delete or split out as appropriate",
|
||||
# "assignableScopes": [
|
||||
# "/subscriptions/<subscription_id>/resourceGroups/<resource_group>"
|
||||
# ],
|
||||
# "permissions": [
|
||||
# {
|
||||
# "actions": [
|
||||
# "Microsoft.Compute/galleries/read",
|
||||
# "Microsoft.Compute/galleries/images/read",
|
||||
# "Microsoft.Compute/galleries/images/versions/read",
|
||||
# "Microsoft.Compute/galleries/images/versions/write",
|
||||
# "Microsoft.Compute/images/write",
|
||||
# "Microsoft.Compute/images/read",
|
||||
# "Microsoft.Compute/images/delete",
|
||||
# "Microsoft.VirtualMachineImages/imageTemplates/write",
|
||||
# "Microsoft.VirtualMachineImages/imageTemplates/read",
|
||||
# "Microsoft.VirtualMachineImages/imageTemplates/delete"
|
||||
# ],
|
||||
# "notActions": [],
|
||||
# "dataActions": [],
|
||||
# "notDataActions": []
|
||||
# }
|
||||
# ]
|
||||
# }
|
||||
# }
|
||||
#
|
||||
- script: |
|
||||
az login --service-principal -u $(client_id) -p $(client_secret) --tenant $(tenant_id)
|
||||
displayName: Login to Azure
|
||||
|
@ -28,10 +63,12 @@ steps:
|
|||
az provider show -n Microsoft.Network -o json
|
||||
displayName: Register features
|
||||
|
||||
# Need to create an image gallerybefore this.
|
||||
# Only need to create once.
|
||||
# Need to create an image gallery before this.
|
||||
# Only need to create once (can be done on web portal).
|
||||
# az sig create --resource-group <resource_group> --gallery-name <sig_name>
|
||||
|
||||
#
|
||||
# NOTE: Remember to add READER access to the image gallery for "1ES Resource Management".
|
||||
#
|
||||
# Add a image definition (also only once).
|
||||
# az sig image-definition create -g <resource_group> \
|
||||
# --gallery-name <sig_name> \
|
||||
|
@ -45,6 +82,8 @@ steps:
|
|||
# --sku 20_04-nni \
|
||||
# --os-type Linux \
|
||||
# --hyper-v-generation V2
|
||||
#
|
||||
# This can be done on web portal, remember to choose V2 for Hyper-V generation.
|
||||
|
||||
- script: |
|
||||
set -e
|
||||
|
@ -96,7 +135,7 @@ steps:
|
|||
# The workaround here is to use a monitor to detect the machine ready signal and change its WinRM port.
|
||||
- script: |
|
||||
cd test/vso_tools/build_vm
|
||||
python3 packer_build_windows.py
|
||||
python3 packer_build_windows.py $(packer_config).json $(resource_group)
|
||||
displayName: (Windows) Packer build
|
||||
condition: and(succeeded(), contains(variables['packer_config'], 'windows'))
|
||||
|
||||
|
@ -109,14 +148,14 @@ steps:
|
|||
# TODO: Should delete the managed image after build is done.
|
||||
# Image gallery alone is enough. Keeping it for now for debugging purposes.
|
||||
|
||||
# No further actions are needed here. VM images are already set to latest. They should be auto-updated.
|
||||
# In case you want to do it on your own:
|
||||
#
|
||||
# To deploy the image on VMSS, run this in Cloud Shell:
|
||||
# az vmss update --resource-group nni --name nni-windows-it \
|
||||
# --set virtualMachineProfile.storageProfile.imageReference.id=/subscriptions/{subscriptionId}/resourceGroups/nni/providers/Microsoft.Compute/galleries/nniImageGallery/images/nniWindowsImage/versions/Latest
|
||||
#
|
||||
# To deploy the image on 1ES, similar actions need to be performed on the web portal of 1ES managed images.
|
||||
#
|
||||
# Probably need to enlarge the disk size, in case it's too small:
|
||||
# az vmss update -n nni-it -g nni --set virtualMachineProfile.storageProfile.osDisk.diskSizeGb=50
|
||||
#
|
||||
# No need to update the image every time, because it's already set to latest.
|
||||
#
|
||||
# NOTE: After using 1ES pool, the pool image has to be updated manually to the latest version.
|
||||
# However, no successful build has been performed yet, because of resource shortage in Southeast Asia.
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
parameters:
|
||||
- name: platform
|
||||
type: string
|
||||
default: linux
|
||||
|
||||
steps:
|
||||
|
||||
# Install GPU driver on Windows.
|
||||
# Installer has already been downloaded and saved in the image.
|
||||
- powershell: |
|
||||
Start-Process -Verb RunAs -FilePath "$env:ProgramData\driver_installer.exe" -ArgumentList "/s /n" -Wait
|
||||
displayName: (Windows) Install GPU driver
|
||||
condition: and(succeeded(), contains('${{ parameters.platform }}', 'windows'))
|
||||
|
||||
# Make sure GPU isn't broken.
|
||||
- script: |
|
||||
nvidia-smi
|
||||
displayName: Check GPU status
|
|
@ -7,13 +7,26 @@
|
|||
# because it's not easy to setup auto-download for some datasets.
|
||||
# See cache-dependencies-template.yml on how to generate credentials to upload new test data.
|
||||
|
||||
parameters:
|
||||
- name: platform
|
||||
type: string
|
||||
default: linux
|
||||
|
||||
steps:
|
||||
|
||||
- script: |
|
||||
set -e
|
||||
mkdir -p test/data
|
||||
cd test
|
||||
azcopy copy 'https://nni.blob.core.windows.net/testdata/*' data
|
||||
python vso_tools/unpack_testdata.py
|
||||
ls -al data
|
||||
- ${{ if contains(parameters.platform, 'windows') }}:
|
||||
powershell: |
|
||||
New-Item -Path test/data -ItemType directory -Force
|
||||
cd test
|
||||
azcopy copy 'https://nni.blob.core.windows.net/testdata/*' data
|
||||
python vso_tools/unpack_testdata.py
|
||||
Get-ChildItem data
|
||||
${{ else }}:
|
||||
script: |
|
||||
set -e
|
||||
mkdir -p test/data
|
||||
cd test
|
||||
azcopy copy 'https://nni.blob.core.windows.net/testdata/*' data
|
||||
python vso_tools/unpack_testdata.py
|
||||
ls -al data
|
||||
displayName: Download test data
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
# Fix apt-related issues on 1ES linux pipeline.
|
||||
|
||||
# 1ES has an auto-upgraded with apt-get running in the background, periodically.
|
||||
# This leads to bad consequences:
|
||||
# 1) apt is locked when install is actually needed
|
||||
# 2) unattended upgrade could possibly break the GPU driver version, and crash nvidia-smi.
|
||||
#
|
||||
# The ultimate solution should be to upgrade the VM image correctly,
|
||||
# but it's currently infeasible because of a resource group limitation.
|
||||
# We introduce a workaround here by force disabling the auto-upgrade and,
|
||||
# fix the broken dependencies if upgrade has already been accidentally run.
|
||||
#
|
||||
# This file can be removed after image is updated to latest.
|
||||
|
||||
parameters:
|
||||
- name: check_gpu
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
steps:
|
||||
|
||||
# Don't set -e
|
||||
# Always make sure the lock is released.
|
||||
- script: |
|
||||
set -x
|
||||
sudo bash test/vso_tools/build_vm/disable_apt_daily.sh
|
||||
sudo apt-get -o DPkg::Lock::Timeout=120 --fix-broken -y install
|
||||
displayName: (1ES) Disable apt upgrade
|
||||
|
||||
# Make sure GPU isn't broken.
|
||||
# Sometimes we can't save the GPU because upgrade runs too early.
|
||||
# We have to rerun the pipeline if unlucky. But it doesn't matter if we don't intend to use GPU at all.
|
||||
- script: |
|
||||
echo "There can be unlucky cases when we can't save the GPU. If nvidia-smi fails, try to rerun the failed jobs."
|
||||
nvidia-smi
|
||||
displayName: (1ES) Check GPU status
|
||||
condition: and(succeeded(), ${{ parameters.check_gpu }})
|
|
@ -261,6 +261,7 @@ _yarn_env['PATH'] = str(Path().resolve() / 'nni_node') + path_env_seperator + os
|
|||
_yarn_path = Path().resolve() / 'toolchain/yarn/bin' / yarn_executable
|
||||
|
||||
def _yarn(path, *args):
|
||||
_print('yarn ' + ' '.join(args) + f' (path: {path})')
|
||||
if os.environ.get('GLOBAL_TOOLCHAIN'):
|
||||
subprocess.run(['yarn', *args], cwd=path, check=True)
|
||||
else:
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import logging
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
|
@ -196,6 +197,18 @@ def test_hub_oneshot(space_type, strategy_type):
|
|||
if strategy_type in ['darts', 'gumbel'] and space_type == 'mobilenetv3':
|
||||
pytest.skip('Skip as it consumes too much memory.')
|
||||
|
||||
WINDOWS_SPACES = [
|
||||
# Skip some spaces as Windows platform is slow.
|
||||
'nasbench201',
|
||||
'mobilenetv3',
|
||||
'proxylessnas',
|
||||
'shufflenet',
|
||||
'autoformer',
|
||||
'darts',
|
||||
]
|
||||
if sys.platform == 'win32' and space_type not in WINDOWS_SPACES:
|
||||
pytest.skip('Skip as Windows is too slow.')
|
||||
|
||||
model_space = _hub_factory(space_type)
|
||||
|
||||
dataset_type = 'cifar10'
|
||||
|
|
|
@ -22,11 +22,11 @@
|
|||
"gallery_name": "<gallery_name>",
|
||||
"image_name": "<image_name>",
|
||||
"image_version": "<image_version>",
|
||||
"replication_regions": ["southeastasia", "westus2", "eastus"],
|
||||
"replication_regions": ["southeastasia", "westus3", "eastus"],
|
||||
"storage_account_type": "Standard_LRS"
|
||||
},
|
||||
|
||||
"build_resource_group_name": "nni",
|
||||
"build_resource_group_name": "<resource_group>",
|
||||
"vm_size": "Standard_DS2_v2"
|
||||
}],
|
||||
"provisioners": [{
|
||||
|
|
|
@ -20,11 +20,11 @@
|
|||
"gallery_name": "<gallery_name>",
|
||||
"image_name": "<image_name>",
|
||||
"image_version": "<image_version>",
|
||||
"replication_regions": ["southeastasia", "westus2", "eastus"],
|
||||
"replication_regions": ["southeastasia", "westus3", "eastus"],
|
||||
"storage_account_type": "Standard_LRS"
|
||||
},
|
||||
|
||||
"build_resource_group_name": "nni",
|
||||
"build_resource_group_name": "<resource_group>",
|
||||
"vm_size": "Standard_D2s_v4",
|
||||
|
||||
"allowed_inbound_ip_addresses": ["<ip_address>"],
|
||||
|
|
|
@ -15,8 +15,8 @@ import subprocess
|
|||
import sys
|
||||
import time
|
||||
|
||||
BUILD_COMMAND = 'PACKER_LOG=1 packer build packer_windows.json'
|
||||
RESOURCE_GROUP = 'nni'
|
||||
BUILD_COMMAND = 'PACKER_LOG=1 packer build ' + sys.argv[1]
|
||||
RESOURCE_GROUP = sys.argv[2]
|
||||
|
||||
|
||||
def monitor_print(*args):
|
||||
|
@ -24,6 +24,9 @@ def monitor_print(*args):
|
|||
|
||||
|
||||
def main():
|
||||
monitor_print('Build command:', BUILD_COMMAND)
|
||||
monitor_print('Resource group:', RESOURCE_GROUP)
|
||||
|
||||
process = subprocess.Popen(BUILD_COMMAND, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
while True:
|
||||
retcode = process.poll()
|
||||
|
|
|
@ -60,7 +60,17 @@ sudo apt-get install -y cuda-drivers
|
|||
# Reference: https://dev.to/akaszynski/create-an-azure-self-hosted-agent-without-going-insane-173g
|
||||
# We only need Python 3.7 and 3.9 for now.
|
||||
sudo add-apt-repository ppa:deadsnakes/ppa
|
||||
sudo apt-get install -y python3.7-dev python3.7-venv python3.9-dev python3.9-venv
|
||||
sudo apt-get install -y python3.7-dev python3.7-venv python3.9-dev python3.9-venv python3.10-dev python3.10-venv python3.11-dev python3.11-venv
|
||||
|
||||
# Disable the periodical apt-get upgrade.
|
||||
# Sometimes, unattended upgrade blocks apt-get install
|
||||
sudo sed -i -e "s/Update-Package-Lists \"1\"/Update-Package-Lists \"0\"/g" /etc/apt/apt.conf.d/10periodic
|
||||
sudo sed -i -e "s/Update-Package-Lists \"1\"/Update-Package-Lists \"0\"/g" /etc/apt/apt.conf.d/20auto-upgrades
|
||||
sudo sed -i -e "s/Unattended-Upgrade \"1\"/Unattended-Upgrade \"0\"/g" /etc/apt/apt.conf.d/20auto-upgrades
|
||||
sudo systemctl disable apt-daily.timer
|
||||
sudo systemctl disable apt-daily.service
|
||||
sudo systemctl disable apt-daily-upgrade.timer
|
||||
sudo systemctl disable apt-daily-upgrade.service
|
||||
|
||||
# Deprovision
|
||||
sudo /usr/sbin/waagent -force -deprovision
|
||||
|
|
|
@ -66,9 +66,9 @@ choco install -y --no-progress vcredist2012 vcredist2013 vcredist2015 vcredist20
|
|||
# Install CUDA.
|
||||
Write-Host "Installing CUDA..."
|
||||
$CudaUrl = "https://developer.download.nvidia.com/compute/cuda/11.7.0/network_installers/cuda_11.7.0_windows_network.exe"
|
||||
Invoke-WebRequest $CudaUrl -OutFile "cuda_installer.exe"
|
||||
Start-Process -FilePath "cuda_installer.exe" -ArgumentList "/s /n" -Wait
|
||||
Remove-Item "cuda_installer.exe"
|
||||
Invoke-WebRequest $CudaUrl -OutFile "$env:ProgramData\cuda_installer.exe"
|
||||
Start-Process -FilePath "$env:ProgramData\cuda_installer.exe" -ArgumentList "/s /n" -Wait
|
||||
# Remove-Item "cuda_installer.exe"
|
||||
# Verify CUDA.
|
||||
Write-Host "Verify CUDA installation..."
|
||||
$CudaDir = "$env:ProgramFiles\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin"
|
||||
|
@ -77,6 +77,9 @@ $CudaDir = "$env:ProgramFiles\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin"
|
|||
Get-ChildItem $CudaDir
|
||||
$env:path = "$env:path;$CudaDir"
|
||||
|
||||
# Download GPU driver.
|
||||
Invoke-WebRequest "https://us.download.nvidia.com/tesla/516.94/516.94-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe" -OutFile "$env:ProgramData\driver_installer.exe"
|
||||
|
||||
Write-Host "Installing utilities..."
|
||||
|
||||
# Install azcopy for cache download.
|
||||
|
|
Загрузка…
Ссылка в новой задаче