Add llama2 example to pipeline (#1328)
## Describe your changes Add llama2 example to pipeline ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. - [ ] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR. ## (Optional) Issue link
This commit is contained in:
Родитель
7ff67d5e68
Коммит
ad157bc419
|
@ -46,6 +46,10 @@ jobs:
|
|||
WORKSPACE_RESOURCE_GROUP: $(workspace-resource-group)
|
||||
WORKSPACE_NAME: $(workspace-name)
|
||||
MANAGED_IDENTITY_CLIENT_ID: $(olive-1es-identity-client-id)
|
||||
PIPELINE_TEST_ACCOUNT_URL: $(pipeline-test-account-url)
|
||||
PIPELINE_TEST_CONTAINER_NAME: $(pipeline-test-container-name)
|
||||
KEYVAULT_NAME: $(keyvault-name)
|
||||
HF_TOKEN: $(hf_token)
|
||||
|
||||
- task: PublishTestResults@2
|
||||
condition: succeededOrFailed()
|
||||
|
|
|
@ -40,6 +40,10 @@ jobs:
|
|||
resnet_vitis_ai_ptq_cpu:
|
||||
exampleFolder: resnet
|
||||
exampleName: resnet_vitis_ai_ptq_cpu_aml
|
||||
llama2:
|
||||
exampleFolder: llama2
|
||||
exampleName: llama2
|
||||
exampleRequirements: requirements-pipeline.txt
|
||||
|
||||
# Windows examples test
|
||||
- template: job_templates/olive-example-template.yaml
|
||||
|
|
|
@ -150,6 +150,10 @@ jobs:
|
|||
exampleFolder: stable_diffusion
|
||||
exampleName: stable_diffusion_cuda_gpu
|
||||
exampleRequirements: requirements-common.txt
|
||||
llama2:
|
||||
exampleFolder: llama2
|
||||
exampleName: llama2
|
||||
exampleRequirements: requirements-pipeline.txt
|
||||
|
||||
# Multiple EP Linux testing
|
||||
- template: job_templates/olive-test-template.yaml
|
||||
|
|
|
@ -16,8 +16,7 @@ dependencies:
|
|||
- optimum
|
||||
- scipy
|
||||
- scikit-learn
|
||||
- torch
|
||||
- onnxruntime-genai
|
||||
- torch
|
||||
- --extra-index-url https://download.pytorch.org/whl/cu118
|
||||
- transformers>=4.41.1
|
||||
- git+https://github.com/microsoft/Olive#egg=olive-ai[gpu,azureml]
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
-r requirements-qlora.txt
|
||||
azure-ai-ml
|
||||
azure-identity
|
||||
azure-keyvault-secrets
|
||||
azureml-fsspec
|
||||
huggingface_hub
|
|
@ -1,6 +1,7 @@
|
|||
-r requirements.txt
|
||||
accelerate
|
||||
bitsandbytes
|
||||
onnxruntime_genai
|
||||
peft
|
||||
scikit-learn
|
||||
sentencepiece
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
import os
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
from olive.common.utils import get_credentials
|
||||
|
||||
from ..utils import get_example_dir, get_gpu_compute, patch_config
|
||||
|
||||
account_url = os.environ.get("PIPELINE_TEST_ACCOUNT_URL")
|
||||
container_name = os.environ.get("PIPELINE_TEST_CONTAINER_NAME")
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def setup():
|
||||
"""Setups any state specific to the execution of the given module."""
|
||||
os.chdir(get_example_dir("llama2"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("search_algorithm", [False])
|
||||
@pytest.mark.parametrize("execution_order", [None])
|
||||
@pytest.mark.parametrize("system", ["local_system"])
|
||||
@pytest.mark.parametrize("cloud_cache_config", [False, {"account_url": account_url, "container_name": container_name}])
|
||||
@pytest.mark.parametrize("olive_json", ["llama2_qlora.json"])
|
||||
def test_llama2(search_algorithm, execution_order, system, cloud_cache_config, olive_json):
|
||||
from olive.workflows import run as olive_run
|
||||
|
||||
olive_config = patch_config(olive_json, search_algorithm, execution_order, system, is_gpu=False, hf_token=True)
|
||||
|
||||
# reduce qlora steps for faster test
|
||||
olive_config["passes"]["f"]["training_args"]["max_steps"] = 5
|
||||
olive_config["passes"]["f"]["training_args"]["logging_steps"] = 5
|
||||
olive_config["passes"]["f"]["training_args"]["per_device_train_batch_size"] = 2
|
||||
olive_config["passes"]["f"]["training_args"]["per_device_eval_batch_size"] = 2
|
||||
|
||||
# add cloud cache system
|
||||
olive_config["cloud_cache_config"] = cloud_cache_config
|
||||
|
||||
olive_config["systems"]["aml_system"] = get_gpu_compute(True)
|
||||
olive_config["systems"]["aml_system"]["datastores"] = container_name
|
||||
|
||||
# set workflow host
|
||||
olive_config["workflow_host"] = "aml_system"
|
||||
|
||||
# set a random workflow id, otherwise the test will fail due to aml job cache
|
||||
workflow_id = str(uuid.uuid4())
|
||||
olive_config["workflow_id"] = workflow_id
|
||||
|
||||
olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
|
||||
|
||||
# assert outputs
|
||||
container_client = get_blob_client()
|
||||
assert any(container_client.list_blobs(workflow_id))
|
||||
|
||||
# delete outputs
|
||||
for blob in container_client.list_blobs(workflow_id):
|
||||
container_client.delete_blob(blob.name)
|
||||
|
||||
|
||||
def get_blob_client():
|
||||
from azure.storage.blob import ContainerClient
|
||||
|
||||
return ContainerClient(account_url=account_url, container_name=container_name, credential=get_credentials())
|
|
@ -0,0 +1,39 @@
|
|||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from olive.common.hf.login import huggingface_login
|
||||
|
||||
from ..utils import assert_nodes, get_example_dir, patch_config
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def setup():
|
||||
"""Setups any state specific to the execution of the given module."""
|
||||
os.chdir(get_example_dir("llama2"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("search_algorithm", [False])
|
||||
@pytest.mark.parametrize("execution_order", [None])
|
||||
@pytest.mark.parametrize("system", ["local_system"])
|
||||
@pytest.mark.parametrize("olive_json", ["llama2_qlora.json"])
|
||||
def test_llama2(search_algorithm, execution_order, system, olive_json):
|
||||
from olive.workflows import run as olive_run
|
||||
|
||||
hf_token = os.environ.get("HF_TOKEN")
|
||||
huggingface_login(hf_token)
|
||||
|
||||
olive_config = patch_config(olive_json, search_algorithm, execution_order, system)
|
||||
|
||||
# reduce qlora steps for faster test
|
||||
olive_config["passes"]["f"]["training_args"]["max_steps"] = 5
|
||||
olive_config["passes"]["f"]["training_args"]["logging_steps"] = 5
|
||||
olive_config["passes"]["f"]["training_args"]["per_device_train_batch_size"] = 2
|
||||
olive_config["passes"]["f"]["training_args"]["per_device_eval_batch_size"] = 2
|
||||
|
||||
footprint = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
|
||||
assert_nodes(footprint)
|
|
@ -17,14 +17,30 @@ from olive.common.utils import run_subprocess
|
|||
|
||||
def check_output(footprints):
|
||||
"""Check if the search output is valid."""
|
||||
assert_nodes(footprints)
|
||||
assert_metrics(footprints)
|
||||
|
||||
|
||||
def assert_nodes(footprints):
|
||||
assert footprints, "footprints is empty. The search must have failed for all accelerator specs."
|
||||
for footprint in footprints.values():
|
||||
assert footprint.nodes
|
||||
|
||||
|
||||
def assert_metrics(footprints):
|
||||
for footprint in footprints.values():
|
||||
for v in footprint.nodes.values():
|
||||
assert all(metric_result.value > 0 for metric_result in v.metrics.value.values())
|
||||
|
||||
|
||||
def patch_config(config_json_path: str, search_algorithm: str, execution_order: str, system: str, is_gpu: bool = False):
|
||||
def patch_config(
|
||||
config_json_path: str,
|
||||
search_algorithm: str,
|
||||
execution_order: str,
|
||||
system: str,
|
||||
is_gpu: bool = False,
|
||||
hf_token: bool = False,
|
||||
):
|
||||
"""Load the config json file and patch it with the given search algorithm, execution order and system."""
|
||||
with open(config_json_path) as fin:
|
||||
olive_config = json.load(fin)
|
||||
|
@ -47,7 +63,7 @@ def patch_config(config_json_path: str, search_algorithm: str, execution_order:
|
|||
update_azureml_config(olive_config)
|
||||
if system == "aml_system":
|
||||
# set aml_system
|
||||
set_aml_system(olive_config, is_gpu=is_gpu)
|
||||
set_aml_system(olive_config, is_gpu=is_gpu, hf_token=hf_token)
|
||||
olive_config["host"] = system
|
||||
olive_config["target"] = system
|
||||
elif system == "docker_system":
|
||||
|
@ -88,43 +104,52 @@ def update_azureml_config(olive_config):
|
|||
if client_id is None and not exclude_managed_identity_credential:
|
||||
raise Exception("Please set the environment variable MANAGED_IDENTITY_CLIENT_ID")
|
||||
|
||||
keyvault_name = os.environ.get("KEYVAULT_NAME")
|
||||
|
||||
olive_config["azureml_client"] = {
|
||||
"subscription_id": subscription_id,
|
||||
"resource_group": resource_group,
|
||||
"workspace_name": workspace_name,
|
||||
# pipeline agents have multiple managed identities, so we need to specify the client_id
|
||||
"default_auth_params": {"managed_identity_client_id": client_id, **exclude_managed_identity_credential},
|
||||
"keyvault_name": keyvault_name,
|
||||
}
|
||||
|
||||
|
||||
def set_aml_system(olive_config, is_gpu=False):
|
||||
def set_aml_system(olive_config, is_gpu=False, hf_token=False):
|
||||
"""Set the aml_system in the olive config."""
|
||||
if "systems" not in olive_config:
|
||||
olive_config["systems"] = {}
|
||||
|
||||
if is_gpu:
|
||||
olive_config["systems"]["aml_system"] = {
|
||||
"type": "AzureML",
|
||||
"accelerators": [{"device": "GPU", "execution_providers": ["CUDAExecutionProvider"]}],
|
||||
"aml_compute": "gpu-cluster",
|
||||
"aml_docker_config": {
|
||||
"base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04",
|
||||
"conda_file_path": "conda_gpu.yaml",
|
||||
},
|
||||
"is_dev": True,
|
||||
}
|
||||
olive_config["systems"]["aml_system"] = get_gpu_compute(hf_token) if is_gpu else get_cpu_compute(hf_token)
|
||||
|
||||
else:
|
||||
olive_config["systems"]["aml_system"] = {
|
||||
"type": "AzureML",
|
||||
"accelerators": [{"device": "CPU", "execution_providers": ["CPUExecutionProvider"]}],
|
||||
"aml_compute": "cpu-cluster",
|
||||
"aml_docker_config": {
|
||||
"base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04",
|
||||
"conda_file_path": "conda.yaml",
|
||||
},
|
||||
"is_dev": True,
|
||||
}
|
||||
|
||||
def get_gpu_compute(hf_token):
|
||||
return {
|
||||
"type": "AzureML",
|
||||
"accelerators": [{"device": "GPU", "execution_providers": ["CUDAExecutionProvider"]}],
|
||||
"aml_compute": "gpu-cluster",
|
||||
"aml_docker_config": {
|
||||
"base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04",
|
||||
"conda_file_path": "conda_gpu.yaml",
|
||||
},
|
||||
"is_dev": True,
|
||||
"hf_token": hf_token,
|
||||
}
|
||||
|
||||
|
||||
def get_cpu_compute(hf_token):
|
||||
return {
|
||||
"type": "AzureML",
|
||||
"accelerators": [{"device": "CPU", "execution_providers": ["CPUExecutionProvider"]}],
|
||||
"aml_compute": "cpu-cluster",
|
||||
"aml_docker_config": {
|
||||
"base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04",
|
||||
"conda_file_path": "conda.yaml",
|
||||
},
|
||||
"is_dev": True,
|
||||
"hf_token": hf_token,
|
||||
}
|
||||
|
||||
|
||||
def set_docker_system(olive_config):
|
||||
|
|
Загрузка…
Ссылка в новой задаче