Add llama2 example to pipeline (#1328)

## Describe your changes

Add llama2 example to pipeline

## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [ ] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Lint and apply fixes to your code by running `lintrunner -a`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.
- [ ] Is this PR including examples changes? If yes, please remember to
update [example
documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md)
in a follow-up PR.

## (Optional) Issue link
This commit is contained in:
Xiaoyu 2024-08-28 14:52:04 -07:00 коммит произвёл GitHub
Родитель 7ff67d5e68
Коммит ad157bc419
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
9 изменённых файлов: 176 добавлений и 27 удалений

Просмотреть файл

@ -46,6 +46,10 @@ jobs:
WORKSPACE_RESOURCE_GROUP: $(workspace-resource-group)
WORKSPACE_NAME: $(workspace-name)
MANAGED_IDENTITY_CLIENT_ID: $(olive-1es-identity-client-id)
PIPELINE_TEST_ACCOUNT_URL: $(pipeline-test-account-url)
PIPELINE_TEST_CONTAINER_NAME: $(pipeline-test-container-name)
KEYVAULT_NAME: $(keyvault-name)
HF_TOKEN: $(hf_token)
- task: PublishTestResults@2
condition: succeededOrFailed()

Просмотреть файл

@ -40,6 +40,10 @@ jobs:
resnet_vitis_ai_ptq_cpu:
exampleFolder: resnet
exampleName: resnet_vitis_ai_ptq_cpu_aml
llama2:
exampleFolder: llama2
exampleName: llama2
exampleRequirements: requirements-pipeline.txt
# Windows examples test
- template: job_templates/olive-example-template.yaml

Просмотреть файл

@ -150,6 +150,10 @@ jobs:
exampleFolder: stable_diffusion
exampleName: stable_diffusion_cuda_gpu
exampleRequirements: requirements-common.txt
llama2:
exampleFolder: llama2
exampleName: llama2
exampleRequirements: requirements-pipeline.txt
# Multiple EP Linux testing
- template: job_templates/olive-test-template.yaml

Просмотреть файл

@ -16,8 +16,7 @@ dependencies:
- optimum
- scipy
- scikit-learn
- torch
- onnxruntime-genai
- torch
- --extra-index-url https://download.pytorch.org/whl/cu118
- transformers>=4.41.1
- git+https://github.com/microsoft/Olive#egg=olive-ai[gpu,azureml]

Просмотреть файл

@ -0,0 +1,6 @@
-r requirements-qlora.txt
azure-ai-ml
azure-identity
azure-keyvault-secrets
azureml-fsspec
huggingface_hub

Просмотреть файл

@ -1,6 +1,7 @@
-r requirements.txt
accelerate
bitsandbytes
onnxruntime_genai
peft
scikit-learn
sentencepiece

Просмотреть файл

@ -0,0 +1,67 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import os
import uuid
import pytest
from olive.common.utils import get_credentials
from ..utils import get_example_dir, get_gpu_compute, patch_config
account_url = os.environ.get("PIPELINE_TEST_ACCOUNT_URL")
container_name = os.environ.get("PIPELINE_TEST_CONTAINER_NAME")
@pytest.fixture(scope="module", autouse=True)
def setup():
"""Setups any state specific to the execution of the given module."""
os.chdir(get_example_dir("llama2"))
@pytest.mark.parametrize("search_algorithm", [False])
@pytest.mark.parametrize("execution_order", [None])
@pytest.mark.parametrize("system", ["local_system"])
@pytest.mark.parametrize("cloud_cache_config", [False, {"account_url": account_url, "container_name": container_name}])
@pytest.mark.parametrize("olive_json", ["llama2_qlora.json"])
def test_llama2(search_algorithm, execution_order, system, cloud_cache_config, olive_json):
from olive.workflows import run as olive_run
olive_config = patch_config(olive_json, search_algorithm, execution_order, system, is_gpu=False, hf_token=True)
# reduce qlora steps for faster test
olive_config["passes"]["f"]["training_args"]["max_steps"] = 5
olive_config["passes"]["f"]["training_args"]["logging_steps"] = 5
olive_config["passes"]["f"]["training_args"]["per_device_train_batch_size"] = 2
olive_config["passes"]["f"]["training_args"]["per_device_eval_batch_size"] = 2
# add cloud cache system
olive_config["cloud_cache_config"] = cloud_cache_config
olive_config["systems"]["aml_system"] = get_gpu_compute(True)
olive_config["systems"]["aml_system"]["datastores"] = container_name
# set workflow host
olive_config["workflow_host"] = "aml_system"
# set a random workflow id, otherwise the test will fail due to aml job cache
workflow_id = str(uuid.uuid4())
olive_config["workflow_id"] = workflow_id
olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
# assert outputs
container_client = get_blob_client()
assert any(container_client.list_blobs(workflow_id))
# delete outputs
for blob in container_client.list_blobs(workflow_id):
container_client.delete_blob(blob.name)
def get_blob_client():
from azure.storage.blob import ContainerClient
return ContainerClient(account_url=account_url, container_name=container_name, credential=get_credentials())

Просмотреть файл

@ -0,0 +1,39 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import os
import pytest
from olive.common.hf.login import huggingface_login
from ..utils import assert_nodes, get_example_dir, patch_config
@pytest.fixture(scope="module", autouse=True)
def setup():
"""Setups any state specific to the execution of the given module."""
os.chdir(get_example_dir("llama2"))
@pytest.mark.parametrize("search_algorithm", [False])
@pytest.mark.parametrize("execution_order", [None])
@pytest.mark.parametrize("system", ["local_system"])
@pytest.mark.parametrize("olive_json", ["llama2_qlora.json"])
def test_llama2(search_algorithm, execution_order, system, olive_json):
from olive.workflows import run as olive_run
hf_token = os.environ.get("HF_TOKEN")
huggingface_login(hf_token)
olive_config = patch_config(olive_json, search_algorithm, execution_order, system)
# reduce qlora steps for faster test
olive_config["passes"]["f"]["training_args"]["max_steps"] = 5
olive_config["passes"]["f"]["training_args"]["logging_steps"] = 5
olive_config["passes"]["f"]["training_args"]["per_device_train_batch_size"] = 2
olive_config["passes"]["f"]["training_args"]["per_device_eval_batch_size"] = 2
footprint = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
assert_nodes(footprint)

Просмотреть файл

@ -17,14 +17,30 @@ from olive.common.utils import run_subprocess
def check_output(footprints):
"""Check if the search output is valid."""
assert_nodes(footprints)
assert_metrics(footprints)
def assert_nodes(footprints):
assert footprints, "footprints is empty. The search must have failed for all accelerator specs."
for footprint in footprints.values():
assert footprint.nodes
def assert_metrics(footprints):
for footprint in footprints.values():
for v in footprint.nodes.values():
assert all(metric_result.value > 0 for metric_result in v.metrics.value.values())
def patch_config(config_json_path: str, search_algorithm: str, execution_order: str, system: str, is_gpu: bool = False):
def patch_config(
config_json_path: str,
search_algorithm: str,
execution_order: str,
system: str,
is_gpu: bool = False,
hf_token: bool = False,
):
"""Load the config json file and patch it with the given search algorithm, execution order and system."""
with open(config_json_path) as fin:
olive_config = json.load(fin)
@ -47,7 +63,7 @@ def patch_config(config_json_path: str, search_algorithm: str, execution_order:
update_azureml_config(olive_config)
if system == "aml_system":
# set aml_system
set_aml_system(olive_config, is_gpu=is_gpu)
set_aml_system(olive_config, is_gpu=is_gpu, hf_token=hf_token)
olive_config["host"] = system
olive_config["target"] = system
elif system == "docker_system":
@ -88,43 +104,52 @@ def update_azureml_config(olive_config):
if client_id is None and not exclude_managed_identity_credential:
raise Exception("Please set the environment variable MANAGED_IDENTITY_CLIENT_ID")
keyvault_name = os.environ.get("KEYVAULT_NAME")
olive_config["azureml_client"] = {
"subscription_id": subscription_id,
"resource_group": resource_group,
"workspace_name": workspace_name,
# pipeline agents have multiple managed identities, so we need to specify the client_id
"default_auth_params": {"managed_identity_client_id": client_id, **exclude_managed_identity_credential},
"keyvault_name": keyvault_name,
}
def set_aml_system(olive_config, is_gpu=False):
def set_aml_system(olive_config, is_gpu=False, hf_token=False):
"""Set the aml_system in the olive config."""
if "systems" not in olive_config:
olive_config["systems"] = {}
if is_gpu:
olive_config["systems"]["aml_system"] = {
"type": "AzureML",
"accelerators": [{"device": "GPU", "execution_providers": ["CUDAExecutionProvider"]}],
"aml_compute": "gpu-cluster",
"aml_docker_config": {
"base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04",
"conda_file_path": "conda_gpu.yaml",
},
"is_dev": True,
}
olive_config["systems"]["aml_system"] = get_gpu_compute(hf_token) if is_gpu else get_cpu_compute(hf_token)
else:
olive_config["systems"]["aml_system"] = {
"type": "AzureML",
"accelerators": [{"device": "CPU", "execution_providers": ["CPUExecutionProvider"]}],
"aml_compute": "cpu-cluster",
"aml_docker_config": {
"base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04",
"conda_file_path": "conda.yaml",
},
"is_dev": True,
}
def get_gpu_compute(hf_token):
return {
"type": "AzureML",
"accelerators": [{"device": "GPU", "execution_providers": ["CUDAExecutionProvider"]}],
"aml_compute": "gpu-cluster",
"aml_docker_config": {
"base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04",
"conda_file_path": "conda_gpu.yaml",
},
"is_dev": True,
"hf_token": hf_token,
}
def get_cpu_compute(hf_token):
return {
"type": "AzureML",
"accelerators": [{"device": "CPU", "execution_providers": ["CPUExecutionProvider"]}],
"aml_compute": "cpu-cluster",
"aml_docker_config": {
"base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04",
"conda_file_path": "conda.yaml",
},
"is_dev": True,
"hf_token": hf_token,
}
def set_docker_system(olive_config):