## Describe your changes

Fix ort nightly pipeline

- Upgrade to cuda 12.6 + TensorRT 10.5.
- Fix python 3.8 example test.
- Fix & skip some tests.
- Fix yaml format.

## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [ ] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Lint and apply fixes to your code by running `lintrunner -a`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.
- [ ] Is this PR including examples changes? If yes, please remember to
update [example
documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md)
in a follow-up PR.

## (Optional) Issue link
This commit is contained in:
Xiaoyu 2025-01-16 16:24:40 -08:00 коммит произвёл GitHub
Родитель 3d6e7317a7
Коммит b0896bb953
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
17 изменённых файлов: 206 добавлений и 158 удалений

Просмотреть файл

@ -2,7 +2,8 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
FROM ubuntu:22.04
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
ARG PYTHON_VERSION

Просмотреть файл

@ -2,10 +2,11 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
ARG PYTHON_VERSION
ARG TENSORRT_VERSION=10.0.1.6-1+cuda12.4
ARG TENSORRT_VERSION
RUN apt-get update && \
apt-get install -y \

Просмотреть файл

@ -4,11 +4,18 @@ parameters:
dockerfile: ''
python_version: ''
docker_image: ''
base_image: ''
trt_version: ''
steps:
- script: |
docker login -u $(docker-username) -p $(docker-password)
docker build --build-arg PYTHON_VERSION=${{ parameters.python_version }} -t ${{ parameters.docker_image }} -f $(Build.SourcesDirectory)/${{ parameters.dockerfile }} .
docker build \
--build-arg BASE_IMAGE=${{ parameters.base_image }} \
--build-arg TENSORRT_VERSION=${{ parameters.trt_version }} \
--build-arg PYTHON_VERSION=${{ parameters.python_version }} \
-t ${{ parameters.docker_image }} \
-f $(Build.SourcesDirectory)/${{ parameters.dockerfile }} .
displayName: Build Docker Image
- script: |

Просмотреть файл

@ -8,42 +8,42 @@ parameters:
doc_version: 'latest'
jobs:
- job: ${{parameters.job_name}}
displayName: ${{parameters.display_name}}
pool: ${{ parameters.pool}}
variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip
- job: ${{parameters.job_name}}
displayName: ${{parameters.display_name}}
pool: ${{ parameters.pool}}
variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip
steps:
- template: olive-setup-template.yaml
parameters:
python_version: '3.9'
onnxruntime: 'onnxruntime'
torch: 'torch'
steps:
- template: olive-setup-template.yaml
parameters:
python_version: '3.9'
onnxruntime: 'onnxruntime'
torch: 'torch'
# checkout release branch if doc_version provided
- script: |
git config --global user.email "olivedevteam@microsoft.com"
git config --global user.name "olivedevteam"
git fetch origin rel-${{parameters.doc_version}}
git checkout rel-${{parameters.doc_version}}
displayName: Checkout release branch
condition: ne('${{parameters.doc_version}}', 'latest')
# checkout release branch if doc_version provided
- script: |
git config --global user.email "olivedevteam@microsoft.com"
git config --global user.name "olivedevteam"
git fetch origin rel-${{parameters.doc_version}}
git checkout rel-${{parameters.doc_version}}
displayName: Checkout release branch
condition: ne('${{parameters.doc_version}}', 'latest')
- script: |
# set -e, otherwise make html fails but the build continues
set -e
cd docs
python -m pip install -r requirements.txt
make html
make linkcheck
make schema
displayName: Make Docs
- script: |
# set -e, otherwise make html fails but the build continues
set -e
cd docs
python -m pip install -r requirements.txt
make html
make linkcheck
make schema
displayName: Make Docs
- task: PublishPipelineArtifact@1
inputs:
path: $(Build.SourcesDirectory)/docs/build/html
artifactName: olive_doc_src
artifactType: pipeline
displayName: Publish Docs to Pipeline Artifact
condition: and(succeeded(), eq('${{ parameters.publish_docs }}', 'true'))
- task: PublishPipelineArtifact@1
inputs:
path: $(Build.SourcesDirectory)/docs/build/html
artifactName: olive_doc_src
artifactType: pipeline
displayName: Publish Docs to Pipeline Artifact
condition: and(succeeded(), eq('${{ parameters.publish_docs }}', 'true'))

Просмотреть файл

@ -7,6 +7,8 @@ parameters:
device: 'cpu'
dockerfile: '.azure_pipelines/dockerfiles/linux-cpu.dockerfile'
docker_image: 'olive-pipeline:latest'
base_image: 'ubuntu:22.04'
trt_version: ''
onnxruntime: 'onnxruntime'
subfolder: 'local'
torch: 'torch'
@ -14,65 +16,72 @@ parameters:
onnxruntime_nightly: false
jobs:
- job: ${{ parameters.name }}_Test_Examples
timeoutInMinutes: 300
pool:
name: ${{ parameters.pool }}
strategy:
matrix:
${{ insert }}: ${{ parameters.examples }}
variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip
HF_HOME: $(Pipeline.Workspace)/.cache/huggingface
OLIVE_TEMPDIR: $(Pipeline.Workspace)/.olive_tempdir
- job: ${{ parameters.name }}_Test_Examples
timeoutInMinutes: 300
pool:
name: ${{ parameters.pool }}
strategy:
matrix:
${{ insert }}: ${{ parameters.examples }}
variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip
HF_HOME: $(Pipeline.Workspace)/.cache/huggingface
OLIVE_TEMPDIR: $(Pipeline.Workspace)/.olive_tempdir
steps:
- template: build-docker-image-template.yaml
parameters:
python_version: ${{ parameters.python_version }}
dockerfile: ${{ parameters.dockerfile }}
docker_image: ${{ parameters.docker_image }}
steps:
- script: docker system df && docker system prune -a -f && docker system df
displayName: Clean docker images
continueOnError: true
# set exampleRequirements to requirements.txt if user does not specify
- script:
echo "##vso[task.setvariable variable=exampleRequirements]requirements.txt"
displayName: Set exampleRequirements
condition: eq(variables['exampleRequirements'], '')
- template: build-docker-image-template.yaml
parameters:
python_version: ${{ parameters.python_version }}
dockerfile: ${{ parameters.dockerfile }}
docker_image: ${{ parameters.docker_image }}
base_image: ${{ parameters.base_image }}
trt_version: ${{ parameters.trt_version }}
- script: |
GPU_OPTION=""
if [ "${{ parameters.device }}" = "gpu" ]; then
GPU_OPTION="--gpus=all"
fi
docker run \
$GPU_OPTION \
-v $(Build.SourcesDirectory)/logs:/logs \
-e WORKSPACE_SUBSCRIPTION_ID=$(workspace-subscription-id) \
-e WORKSPACE_RESOURCE_GROUP=$(workspace-resource-group) \
-e WORKSPACE_NAME=$(workspace-name) \
-e MANAGED_IDENTITY_CLIENT_ID=$(olive-1es-identity-client-id) \
-e PIPELINE_TEST_ACCOUNT_NAME=$(pipeline-test-account-name) \
-e PIPELINE_TEST_CONTAINER_NAME=$(pipeline-test-container-name) \
-e KEYVAULT_NAME=$(keyvault-name) \
-e HF_TOKEN=$(hf_token) \
${{ parameters.docker_image }} \
bash .azure_pipelines/scripts/${{ parameters.test_script }} \
${{ parameters.torch }} \
${{ parameters.onnxruntime }} \
${{ parameters.onnxruntime_nightly }} \
examples/$(exampleFolder)/$(exampleRequirements) \
examples/test/${{ parameters.subfolder }}/test_$(exampleName).py
displayName: Run Tests in Docker
# set exampleRequirements to requirements.txt if user does not specify
- script:
echo "##vso[task.setvariable variable=exampleRequirements]requirements.txt"
displayName: Set exampleRequirements
condition: eq(variables['exampleRequirements'], '')
# Step 3: Publish test results
- task: PublishTestResults@2
condition: succeededOrFailed()
inputs:
testResultsFiles: '**/logs/test_examples-TestOlive.xml'
testRunTitle: '$(Build.BuildNumber)[$(Agent.JobName)]'
failTaskOnFailedTests: true
displayName: Publish Test Results
- script: |
GPU_OPTION=""
if [ "${{ parameters.device }}" = "gpu" ]; then
GPU_OPTION="--gpus=all"
fi
docker run \
--shm-size=4g \
$GPU_OPTION \
-v $(Build.SourcesDirectory)/logs:/logs \
-e WORKSPACE_SUBSCRIPTION_ID=$(workspace-subscription-id) \
-e WORKSPACE_RESOURCE_GROUP=$(workspace-resource-group) \
-e WORKSPACE_NAME=$(workspace-name) \
-e MANAGED_IDENTITY_CLIENT_ID=$(olive-1es-identity-client-id) \
-e PIPELINE_TEST_ACCOUNT_NAME=$(pipeline-test-account-name) \
-e PIPELINE_TEST_CONTAINER_NAME=$(pipeline-test-container-name) \
-e KEYVAULT_NAME=$(keyvault-name) \
-e HF_TOKEN=$(hf_token) \
${{ parameters.docker_image }} \
bash .azure_pipelines/scripts/${{ parameters.test_script }} \
${{ parameters.torch }} \
${{ parameters.onnxruntime }} \
${{ parameters.onnxruntime_nightly }} \
examples/$(exampleFolder)/$(exampleRequirements) \
examples/test/${{ parameters.subfolder }}/test_$(exampleName).py
displayName: Run Tests in Docker
- script: sudo git clean -dfX
condition: always()
displayName: Clean remaining artifacts
# Step 3: Publish test results
- task: PublishTestResults@2
condition: succeededOrFailed()
inputs:
testResultsFiles: '**/logs/test_examples-TestOlive.xml'
testRunTitle: '$(Build.BuildNumber)[$(Agent.JobName)]'
failTaskOnFailedTests: true
displayName: Publish Test Results
- script: sudo git clean -dfX
condition: always()
displayName: Clean remaining artifacts

Просмотреть файл

@ -7,60 +7,62 @@ parameters:
device: 'cpu'
python_version: '3.10'
onnxruntime: 'onnxruntime'
onnxruntime_nightly: false
subfolder: 'local'
torch: 'torch'
jobs:
- job: ${{ parameters.name }}_Test_Examples
timeoutInMinutes: 300
pool:
name: ${{ parameters.pool }}
strategy:
matrix:
${{ insert }}: ${{ parameters.examples }}
variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip
HF_HOME: $(Pipeline.Workspace)/.cache/huggingface
OLIVE_TEMPDIR: $(Pipeline.Workspace)/.olive_tempdir
PYTEST_BASETEMP: $(Pipeline.Workspace)/.pytest_basetemp
- job: ${{ parameters.name }}_Test_Examples
timeoutInMinutes: 300
pool:
name: ${{ parameters.pool }}
strategy:
matrix:
${{ insert }}: ${{ parameters.examples }}
variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip
HF_HOME: $(Pipeline.Workspace)/.cache/huggingface
OLIVE_TEMPDIR: $(Pipeline.Workspace)/.olive_tempdir
PYTEST_BASETEMP: $(Pipeline.Workspace)/.pytest_basetemp
steps:
- template: olive-setup-template.yaml
parameters:
python_version: ${{ parameters.python_version }}
onnxruntime: ${{ parameters.onnxruntime }}
torch: ${{ parameters.torch }}
steps:
- template: olive-setup-template.yaml
parameters:
python_version: ${{ parameters.python_version }}
onnxruntime: ${{ parameters.onnxruntime }}
onnxruntime_nightly: ${{ parameters.onnxruntime_nightly }}
torch: ${{ parameters.torch }}
# set exampleRequirements to requirements.txt if user does not specify
- script:
echo "##vso[task.setvariable variable=exampleRequirements]requirements.txt"
displayName: Set exampleRequirements
condition: eq(variables['exampleRequirements'], '')
# set exampleRequirements to requirements.txt if user does not specify
- script:
echo "##vso[task.setvariable variable=exampleRequirements]requirements.txt"
displayName: Set exampleRequirements
condition: eq(variables['exampleRequirements'], '')
- script: |
python -m pip install pytest
python -m pip install azure-identity azure-storage-blob tabulate
python -m pip install -r $(Build.SourcesDirectory)/examples/$(exampleFolder)/$(exampleRequirements)
python -m pytest -v -s --log-cli-level=WARNING --junitxml=$(Build.SourcesDirectory)/logs/test_examples-TestOlive.xml $(Build.SourcesDirectory)/examples/test/${{ parameters.subfolder }}/test_$(exampleName).py --basetemp $(PYTEST_BASETEMP)
displayName: Test Examples
env:
WORKSPACE_SUBSCRIPTION_ID: $(workspace-subscription-id)
WORKSPACE_RESOURCE_GROUP: $(workspace-resource-group)
WORKSPACE_NAME: $(workspace-name)
MANAGED_IDENTITY_CLIENT_ID: $(olive-1es-identity-client-id)
PIPELINE_TEST_ACCOUNT_NAME: $(pipeline-test-account-name)
PIPELINE_TEST_CONTAINER_NAME: $(pipeline-test-container-name)
KEYVAULT_NAME: $(keyvault-name)
HF_TOKEN: $(hf_token)
- script: |
python -m pip install pytest
python -m pip install azure-identity azure-storage-blob tabulate
python -m pip install -r $(Build.SourcesDirectory)/examples/$(exampleFolder)/$(exampleRequirements)
python -m pytest -v -s --log-cli-level=WARNING --junitxml=$(Build.SourcesDirectory)/logs/test_examples-TestOlive.xml $(Build.SourcesDirectory)/examples/test/${{ parameters.subfolder }}/test_$(exampleName).py --basetemp $(PYTEST_BASETEMP)
displayName: Test Examples
env:
WORKSPACE_SUBSCRIPTION_ID: $(workspace-subscription-id)
WORKSPACE_RESOURCE_GROUP: $(workspace-resource-group)
WORKSPACE_NAME: $(workspace-name)
MANAGED_IDENTITY_CLIENT_ID: $(olive-1es-identity-client-id)
PIPELINE_TEST_ACCOUNT_NAME: $(pipeline-test-account-name)
PIPELINE_TEST_CONTAINER_NAME: $(pipeline-test-container-name)
KEYVAULT_NAME: $(keyvault-name)
HF_TOKEN: $(hf_token)
- task: PublishTestResults@2
condition: succeededOrFailed()
inputs:
testResultsFiles: '**/*TestOlive*.xml'
testRunTitle: '$(Build.BuildNumber)[$(Agent.JobName)]'
failTaskOnFailedTests: true
displayName: Upload pipeline run test results
- task: PublishTestResults@2
condition: succeededOrFailed()
inputs:
testResultsFiles: '**/*TestOlive*.xml'
testRunTitle: '$(Build.BuildNumber)[$(Agent.JobName)]'
failTaskOnFailedTests: true
displayName: Upload pipeline run test results
- script: git clean -dfX
condition: always()
displayName: Clean remaining artifacts
- script: git clean -dfX
condition: always()
displayName: Clean remaining artifacts

Просмотреть файл

@ -1,6 +1,7 @@
parameters:
python_version: '3.10'
onnxruntime: 'onnxruntime'
onnxruntime_nightly: false
torch: torch
steps:
@ -15,11 +16,10 @@ steps:
- script: python -m pip install .
displayName: Install Olive
- ${{ if startsWith(parameters.onnxruntime, 'ort-nightly') }}:
- ${{ if eq(parameters.onnxruntime_nightly, true) }}:
- script: |
pip install onnxruntime
pip uninstall -y onnxruntime
pip install ${{ parameters.onnxruntime }} --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/
pip install -r https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt
pip install ${{ parameters.onnxruntime }} --pre --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ --disable-pip-version-check
displayName: Install ${{ parameters.onnxruntime }}
- ${{ else }}:
- script: |

Просмотреть файл

@ -5,6 +5,7 @@ parameters:
windows: False
python_version: '3.10'
onnxruntime: 'onnxruntime'
onnxruntime_nightly: false
torch: 'torch'
requirements_file: 'requirements-test.txt'
@ -31,6 +32,7 @@ jobs:
parameters:
python_version: ${{ parameters.python_version }}
onnxruntime: ${{ parameters.onnxruntime }}
onnxruntime_nightly: ${{ parameters.onnxruntime_nightly }}
torch: ${{ parameters.torch }}
- ${{ if and(eq(variables.WINDOWS, 'True'), eq(variables.testType, 'multiple_ep')) }}:

Просмотреть файл

@ -7,6 +7,8 @@ parameters:
device: 'cpu'
dockerfile: '.azure_pipelines/dockerfiles/linux-gpu.dockerfile'
docker_image: 'olive-pipeline:latest'
base_image: 'nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04'
trt_version: '10.5.0.18-1+cuda12.6'
python_version: '3.10'
onnxruntime: 'onnxruntime'
torch: 'torch'
@ -20,21 +22,25 @@ jobs:
pool:
name: ${{ parameters.pool}}
variables:
testType: ${{ parameters.test_type }}
python_version: ${{ parameters.python_version }}
requirements_file: ${{ parameters.requirements_file }}
PIP_CACHE_DIR: $(Pipeline.Workspace)/.cache/pip
HF_HOME: $(Pipeline.Workspace)/.cache/huggingface
steps:
- script: docker system df && docker system prune -a -f && docker system df
displayName: Clean docker images
continueOnError: true
- template: build-docker-image-template.yaml
parameters:
python_version: ${{ parameters.python_version }}
dockerfile: ${{ parameters.dockerfile }}
docker_image: ${{ parameters.docker_image }}
base_image: ${{ parameters.base_image }}
trt_version: ${{ parameters.trt_version }}
- script: |
docker run \
--shm-size=4g \
--gpus=all \
-v /var/run/docker.sock:/var/run/docker.sock \
-v $(Build.SourcesDirectory)/logs:/logs \
@ -47,8 +53,8 @@ jobs:
${{ parameters.torch }} \
${{ parameters.onnxruntime }} \
${{ parameters.onnxruntime_nightly }} \
test/$(requirements_file) \
test/$(testType)
test/${{ parameters.requirements_file }} \
test/${{ parameters.test_type }}
displayName: Run Tests in Docker
- task: CredScan@3

Просмотреть файл

@ -64,7 +64,6 @@ jobs:
pool: $(OLIVE_POOL_UBUNTU2004_GPU_V100)
test_type: 'unit_test'
device: 'gpu'
dockerfile: '.azure_pipelines/dockerfiles/linux-gpu.dockerfile'
onnxruntime: onnxruntime-gpu==1.19.2
requirements_file: 'requirements-test-gpu.txt'
@ -125,6 +124,8 @@ jobs:
device: 'gpu'
onnxruntime: onnxruntime-gpu
dockerfile: '.azure_pipelines/dockerfiles/linux-gpu.dockerfile'
base_image: 'nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04'
trt_version: '10.5.0.18-1+cuda12.6'
examples:
bert_cuda_gpu:
exampleFolder: bert

Просмотреть файл

@ -40,6 +40,7 @@ jobs:
pool: $(OLIVE_POOL_UBUNTU2004)
onnxruntime: onnxruntime
python_version: '3.8'
base_image: 'ubuntu:20.04'
examples:
mobilenet_qnn_toolkit:
exampleFolder: mobilenet

Просмотреть файл

@ -15,7 +15,7 @@ variables:
jobs:
# Linux unit test
- template: job_templates/olive-test-linux-template.yaml
- template: job_templates/olive-test-cpu-template.yaml
parameters:
name: Linux_CI_Unit_Test_Olive
pool: $(OLIVE_POOL_UBUNTU2004)
@ -26,7 +26,7 @@ jobs:
onnxruntime_nightly: true
# Linux GPU unit test
- template: job_templates/olive-test-linux-template.yaml
- template: job_templates/olive-test-linux-gpu-template.yaml
parameters:
name: Linux_GPU_CI_Unit_Test_Olive
pool: $(OLIVE_POOL_UBUNTU2004_GPU_V100)

Просмотреть файл

@ -13,7 +13,8 @@ pip install "$1"
# Step 2: Install ONNX Runtime (Nightly or Stable)
if [ "$3" = "true" ]; then
echo "Installing ONNX Runtime Nightly..."
pip install --pre "$2" --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/
pip install -r https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt
pip install "$2" --pre --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ --disable-pip-version-check
else
echo "Installing ONNX Runtime Stable..."
pip install "$2"

Просмотреть файл

@ -21,7 +21,7 @@ git+https://github.com/xiaoyu-work/TransformerCompression.git ; python_version >
# TODO(team): 55399 Switch back to the latest version once it's compatible with the pipeline
marshmallow<3.24.0
mlflow>=2.4.0
neural-compressor
neural-compressor<2.4
nncf==2.7.0
nvidia-modelopt
onnx-graphsurgeon

Просмотреть файл

@ -5,10 +5,18 @@
from pathlib import Path
from test.unit_test.utils import get_onnx_model
import onnxruntime
import pytest
from packaging import version
from olive.passes.olive_pass import create_pass_from_dict
from olive.passes.onnx.nvmo_quantization import NVModelOptQuantization
@pytest.mark.skipif(
version.parse(onnxruntime.__version__) > version.parse("1.20.1"),
reason="Fails on onnxruntime 1.21",
)
def test_nvmo_quantization(tmp_path):
ov_model = get_onnx_model()
data_dir = tmp_path / "data"

Просмотреть файл

@ -66,6 +66,9 @@ def test_invalid_ep_config(use_gpu, fp16, accelerator_spec, mock_inferece_sessio
from onnxruntime.transformers.onnx_model import OnnxModel
from packaging import version
if accelerator_spec == DEFAULT_GPU_TRT_ACCELERATOR and not mock_inferece_session:
pytest.skip("Skipping test: TRT EP does not support compiled nodes when mock_inferece_session=False")
logger = logging.getLogger("olive")
logger.propagate = True

Просмотреть файл

@ -5,9 +5,11 @@
from pathlib import Path
from test.unit_test.utils import get_onnx_model
import onnxruntime
import pytest
import torch
from onnxruntime.quantization.calibrate import CalibrationDataReader
from packaging import version
from olive.data.config import DataComponentConfig, DataConfig
from olive.data.registry import Registry
@ -37,6 +39,10 @@ def dummy_calibration_reader(dataset, batch_size, **kwargs):
return RandomDataReader()
@pytest.mark.skipif(
version.parse(onnxruntime.__version__) >= version.parse("1.20"),
reason="Fails on onnxruntime 1.20+",
)
@pytest.mark.parametrize("calibrate_method", ["MinMSE", "NonOverflow"])
def test_vitis_ai_quantization_pass(calibrate_method, tmp_path):
# setup