Split full test into HPO, NAS and Compression (#4846)

This commit is contained in:
Yuge Zhang 2022-05-10 08:51:41 +08:00 коммит произвёл GitHub
Родитель 65fad189a8
Коммит 1246a58264
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
15 изменённых файлов: 419 добавлений и 84 удалений

Просмотреть файл

@ -106,6 +106,7 @@ linkcheck_ignore = [
r'https://www\.cs\.toronto\.edu/', # CIFAR-10
r'https://help\.aliyun\.com/document_detail/\d+\.html', # Aliyun
r'http://www\.image-net\.org/', # ImageNet
r'https://www\.msra\.cn/', # MSRA
]
# Ignore all links located in release.rst

Просмотреть файл

@ -38,3 +38,11 @@ jobs:
- template: templates/cache-dependencies-template.yml
parameters:
platform: macos
- job: ubuntu_gpu
pool:
vmImage: ubuntu-latest
steps:
- template: templates/cache-dependencies-template.yml
parameters:
platform: ubuntu-latest-gpu

Просмотреть файл

@ -124,10 +124,7 @@ stages:
parameters:
platform: ubuntu-latest
- script: |
python setup.py develop
echo "##vso[task.setvariable variable=PATH]${HOME}/.local/bin:${PATH}"
displayName: Install NNI
- template: templates/install-nni.yml
- script: |
set -e
@ -177,10 +174,7 @@ stages:
parameters:
platform: ubuntu-legacy
- script: |
python setup.py develop
echo "##vso[task.setvariable variable=PATH]${HOME}/.local/bin:${PATH}"
displayName: Install NNI
- template: templates/install-nni.yml
- script: |
cd test
@ -208,9 +202,9 @@ stages:
parameters:
platform: windows
- script: |
python setup.py develop --no-user
displayName: Install NNI
- template: templates/install-nni.yml
parameters:
user: false
- script: |
cd test
@ -235,10 +229,7 @@ stages:
parameters:
platform: macos
- script: |
python setup.py develop
echo "##vso[task.setvariable variable=PATH]${HOME}/.local/bin:${PATH}"
displayName: Install NNI
- template: templates/install-nni.yml
- script: |
CI=true yarn --cwd ts/nni_manager test --exclude test/core/nnimanager.test.ts

Просмотреть файл

@ -0,0 +1,31 @@
trigger: none
pr: none
# schedules:
# - cron: 0 16 * * *
# branches:
# include: [ master ]
jobs:
- job: linux
pool: nni-it
timeoutInMinutes: 120
steps:
- template: templates/setup-non-ms-hosted-agent.yml
- template: templates/config-version.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-dependencies.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-nni.yml
- template: templates/download-test-data.yml
- script: |
cd test
source scripts/model_compression.sh
displayName: Model compression test

Просмотреть файл

@ -0,0 +1,48 @@
trigger: none
pr: none
# schedules:
# - cron: 0 16 * * *
# branches:
# include: [ master ]
jobs:
- job: linux
pool: nni-it
timeoutInMinutes: 120
steps:
- template: templates/setup-non-ms-hosted-agent.yml
- template: templates/config-version.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-dependencies.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-nni.yml
- script: |
set -e
cd examples/tuners/customized_tuner
python setup.py develop
nnictl algo register --meta meta_file.yml
displayName: Install customized tuner
- script: |
set -e
(cd test && python -m pytest ut)
export PATH=$PATH:$PWD/toolchain/yarn/bin
export CI=true
(cd ts/nni_manager && yarn test)
displayName: SDK unit test
# TODO: this should be replaced with a dedicated HPO algo test suite
- script: |
cd test
python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local
displayName: Integration test
# TODO: should add a test on platforms other than linux

Просмотреть файл

@ -1,67 +0,0 @@
trigger: none
pr: none
schedules:
- cron: 0 16 * * *
branches:
include: [ master ]
jobs:
- job: linux
pool: nni-ci-gpu-local
timeoutInMinutes: 120
steps:
- script: |
echo "##vso[task.setvariable variable=PATH]${PATH}:${HOME}/.local/bin"
echo "##vso[task.setvariable variable=NNI_RELEASE]999.$(date -u +%Y%m%d%H%M%S)"
python3 -m pip install -U -r dependencies/setup.txt
python3 -m pip install -r dependencies/develop.txt
displayName: Prepare
- script: |
set -e
python3 setup.py build_ts
python3 setup.py bdist_wheel -p manylinux1_x86_64
python3 -m pip install dist/nni-${NNI_RELEASE}-py3-none-manylinux1_x86_64.whl[SMAC,BOHB,PPOTuner]
displayName: Install NNI
- script: |
set -e
sudo apt-get install swig -y
python3 -m pip install -r dependencies/recommended_gpu.txt
python3 -m pip install -e .[SMAC,BOHB,PPOTuner,DNGO]
displayName: Install extra dependencies
- script: |
set -e
cd examples/tuners/customized_tuner
python3 setup.py develop --user
nnictl algo register --meta meta_file.yml
displayName: Install customized tuner
- script: |
set -e
(cd test && python3 -m pytest ut)
export PATH=$PATH:$PWD/toolchain/yarn/bin
export CI=true
(cd ts/nni_manager && yarn test)
displayName: Unit test
continueOnError: true
- script: |
cd test
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local
displayName: Integration test
continueOnError: true
- script: |
cd test
source scripts/nas.sh
displayName: NAS test
continueOnError: true
- script: |
cd test
source scripts/model_compression.sh
displayName: Model compression test

Просмотреть файл

@ -0,0 +1,29 @@
trigger: none
pr: none
schedules:
- cron: 0 16 * * *
branches:
include: [ master ]
jobs:
- job: linux
pool: nni-it
timeoutInMinutes: 120
steps:
- template: templates/setup-non-ms-hosted-agent.yml
- template: templates/config-version.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-dependencies.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-nni.yml
- script: |
cd test
source scripts/nas.sh
displayName: NAS test

Просмотреть файл

@ -6,7 +6,36 @@ schedules:
include: [ master ]
jobs:
- job: local_windows
- job: linux
pool: nni-it
timeoutInMinutes: 120
steps:
- template: templates/setup-non-ms-hosted-agent.yml
- template: templates/config-version.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-dependencies.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-nni.yml
- script: |
set -e
cd examples/tuners/customized_tuner
python setup.py develop
nnictl algo register --meta meta_file.yml
displayName: Install customized tuner
- script: |
cd test
python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local
displayName: Integration test
- job: windows
pool: NNI CI WINDOWS FULL TEST
timeoutInMinutes: 120
@ -31,6 +60,8 @@ jobs:
nnictl algo register --meta meta_file.yml
displayName: Install example customized tuner
# TODO: remove unit test here
- script: |
cd test
python -m pytest ut

Просмотреть файл

@ -27,6 +27,11 @@ steps:
displayName: (legacy) Activate legacy dependencies
condition: and(succeeded(), contains('${{ parameters.platform }}', 'legacy'))
- script: |
mv dependencies/recommended_gpu.txt dependencies/recommended.txt
displayName: (GPU) Activate CUDA dependencies
condition: and(succeeded(), contains('${{ parameters.platform }}', 'gpu'))
- script: |
set -e
echo '===== develop ====='

Просмотреть файл

@ -0,0 +1,19 @@
# The test data should be uploaded to:
# https://nni.blob.core.windows.net/testdata/mnist.zip
# https://nni.blob.core.windows.net/testdata/cifar10.zip
# ...
#
# We can't upload the data regularly on pipelines,
# because it's not easy to setup auto-download for some datasets.
# See cache-dependencies-template.yml on how to generate credentials to upload new test data.
steps:
- script: |
set -e
mkdir -p test/data
cd test
azcopy copy 'https://nni.blob.core.windows.net/testdata/*' data
python vso_tools/unpack_testdata.py
ls -al data
displayName: Download test data

Просмотреть файл

@ -10,7 +10,7 @@ steps:
- script: |
set -e
sudo apt-get install -y swig3.0
sudo rm /usr/bin/swig
sudo rm -f /usr/bin/swig
sudo ln -s swig3.0 /usr/bin/swig
displayName: (Ubuntu) Downgrade swig
condition: and(succeeded(), contains('${{ parameters.platform }}', 'ubuntu'))
@ -43,6 +43,11 @@ steps:
displayName: (legacy) Activate legacy dependencies
condition: and(succeeded(), contains('${{ parameters.platform }}', 'legacy'))
- script: |
mv dependencies/recommended_gpu.txt dependencies/recommended.txt
displayName: (GPU) Activate CUDA dependencies
condition: and(succeeded(), contains('${{ parameters.platform }}', 'gpu'))
- script: |
echo '===== develop ====='
python -m pip install -r dependencies/develop.txt
@ -54,6 +59,12 @@ steps:
python -m pip install -r dependencies/recommended.txt
displayName: Install Python dependencies
- script: |
ls -al ${HOME}/.local/bin
echo "##vso[task.setvariable variable=PATH]${HOME}/.local/bin:${PATH}"
displayName: Add .local/bin to PATH
condition: and(succeeded(), not(contains('${{ parameters.platform }}', 'windows')))
# TODO: Delete this after upgrading to PyTorch 1.11.
- script: |
python test/vso_tools/interim_patch.py

Просмотреть файл

@ -0,0 +1,14 @@
parameters:
- name: user
type: boolean
default: true
steps:
- ${{ if eq(parameters.user, true) }}:
script: |
python setup.py develop
${{ else }}:
script: |
python setup.py develop --no-user
displayName: Install NNI

Просмотреть файл

@ -0,0 +1,85 @@
steps:
# OS from VMSS is very clean... Need to install basic utilities.
# Build essentials are required.
- script: |
set -e
sudo apt update
sudo apt install -y build-essential cmake
displayName: Install build essential
# Install azcopy for cache download.
# https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10#use-azcopy-in-a-script
- script: |
set -e
mkdir -p tmp
cd tmp
wget -O azcopy_v10.tar.gz https://aka.ms/downloadazcopy-v10-linux && tar -xf azcopy_v10.tar.gz --strip-components=1
sudo cp ./azcopy /usr/bin/
sudo chmod +x /usr/bin/azcopy
displayName: Setup azcopy
# VM with GPU needs to install drivers. Reference:
# https://docs.microsoft.com/en-us/azure/virtual-machines/linux/n-series-driver-setup
# https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html
# https://linuxhint.com/install-cuda-ubuntu/
- script: |
lspci | grep -i NVIDIA
displayName: GPU status verification
- script: |
set -e
sudo apt install linux-headers-$(uname -r) -y
sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
sudo apt update
sudo apt install -y cuda-drivers
displayName: Install CUDA
# Technically we need a reboot here, but looks like it also works without reboot.
- script: |
nvidia-smi
displayName: nvidia-smi verification
# UsePythonVersion task only works when the specific Python version is already installed.
# The following is for linux.
# Reference: https://dev.to/akaszynski/create-an-azure-self-hosted-agent-without-going-insane-173g
# We only need Python 3.7 and 3.9 for now.
# --system-site-packages is required to make packages installed with --user visible to virtualenv.
- script: |
set -e
sudo add-apt-repository ppa:deadsnakes/ppa
sudo apt install -y python3.7-dev python3.7-venv python3.9-dev python3.9-venv
mkdir $(Agent.ToolsDirectory)/Python
displayName: Download Python
- script: |
set -e
cd $(Agent.ToolsDirectory)/Python
PY37_VER=$(python3.7 -c "import sys; print('.'.join([f'{val}' for val in sys.version_info[:3]]))")
mkdir $PY37_VER
ln -s $PY37_VER 3.7
cd $PY37_VER
python3.7 -m venv x64 --system-site-packages
touch x64.complete
displayName: Configure Python 3.7
- script: |
set -e
cd $(Agent.ToolsDirectory)/Python
PY39_VER=$(python3.9 -c "import sys; print('.'.join([f'{val}' for val in sys.version_info[:3]]))")
mkdir $PY39_VER
ln -s $PY39_VER 3.9
cd $PY39_VER
python3.9 -m venv x64 --system-site-packages
touch x64.complete
displayName: Configure Python 3.9

Просмотреть файл

@ -0,0 +1,105 @@
"""
Hosting some commonly used datasets in tests on NNI blob.
If --sas-token is set, the script will attempt to upload archives.
See YAMLs in pipelines/ folder for instructions on how to generate an SAS token.
"""
import argparse
import glob
import os
import random
import shutil
import subprocess
import warnings
from collections import defaultdict
from pathlib import Path
from torchvision.datasets import CIFAR10, MNIST, ImageNet
IMAGENET_DIR = Path(os.environ.get('IMAGENET_DIR', '/mnt/data/imagenet'))
# Should contain meta.bin, val, and (optionally) train
# val should contain 1000 folders: n01440764, n01443537, ...
def prepare_cifar10(data_dir: Path):
print('Preparing CIFAR10...')
CIFAR10(str(data_dir / 'cifar10'), download=True)
for file in glob.glob(str(data_dir / 'cifar10' / '**' / '*.gz'), recursive=True):
Path(file).unlink()
def prepare_mnist(data_dir: Path):
print('Preparing MNIST...')
MNIST(str(data_dir / 'mnist'), download=True)
for file in glob.glob(str(data_dir / 'mnist' / '**' / '*.gz'), recursive=True):
Path(file).unlink()
def prepare_imagenet_subset(data_dir: Path, imagenet_dir: Path):
print('Preparing ImageNet subset...')
random_state = random.Random(42)
# Use a predownloaded imagenet. Only validation set is used.
imagenet = ImageNet(imagenet_dir, split='val')
# Randomly select 10% of the data in each category
images = defaultdict(list)
for image_path, category_id in imagenet.imgs:
images[category_id].append(image_path)
# Target root dir
subset_dir = data_dir / 'imagenet'
shutil.rmtree(subset_dir, ignore_errors=True)
copied_count = 0
for category_id, imgs in images.items():
random_state.shuffle(imgs)
for img in imgs[:len(imgs) // 10]:
folder_name = Path(img).parent.name
file_name = Path(img).name
(subset_dir / folder_name).mkdir(exist_ok=True, parents=True)
shutil.copyfile(img, subset_dir / folder_name / file_name)
copied_count += 1
print(f'Generated a subset of {copied_count} images.')
def zip_datasets(data_dir: Path):
datasets = [d for d in data_dir.iterdir() if d.is_dir()]
for dataset in datasets:
dataset_name = dataset.name
print(f'Creating archive for {dataset}...')
shutil.make_archive(str(data_dir / dataset_name), 'zip', data_dir, dataset_name)
def upload_datasets(sas_token):
if not sas_token:
warnings.warn('sas_token is not set. Upload is skipped.')
return
subprocess.run([
'azcopy',
'copy',
'data/*.zip',
'https://nni.blob.core.windows.net/testdata/?' + sas_token,
], check=True)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--no-generate', default=False, action='store_true')
parser.add_argument('--sas-token', default=None, type=str)
parser.add_argument('--data-dir', default='./data', type=str)
parser.add_argument('--imagenet-dir', default='/mnt/data/imagenet', type=str)
args = parser.parse_args()
data_dir = Path(args.data_dir)
if args.no_generate:
print('Generation is skipped.')
else:
prepare_cifar10(data_dir)
prepare_mnist(data_dir)
prepare_imagenet_subset(data_dir, Path(args.imagenet_dir))
zip_datasets(data_dir)
upload_datasets(args.sas_token)
if __name__ == '__main__':
main()

Просмотреть файл

@ -0,0 +1,24 @@
"""
Unpacked the downloaded zipped datasets.
Opposite to ``pack_testdata.py``.
"""
import argparse
import glob
import os
import shutil
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--data-dir', default='./data', type=str)
args = parser.parse_args()
for zipfile in glob.glob(os.path.join(args.data_dir, '*.zip')):
print(f'Unpacking {zipfile}')
shutil.unpack_archive(zipfile, extract_dir=args.data_dir)
if __name__ == '__main__':
main()