Add llama2 example to pipeline (#1328)

## Describe your changes Add llama2 example to pipeline ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. - [ ] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR. ## (Optional) Issue link
2024-08-28 14:52:04 -07:00 · 2024-08-28 14:52:04 -07:00 · ad157bc419
--- a/.azure_pipelines/job_templates/olive-example-template.yaml
+++ b/.azure_pipelines/job_templates/olive-example-template.yaml
@ -46,6 +46,10 @@ jobs:
        WORKSPACE_RESOURCE_GROUP: $(workspace-resource-group)
        WORKSPACE_NAME: $(workspace-name)
        MANAGED_IDENTITY_CLIENT_ID: $(olive-1es-identity-client-id)
+        PIPELINE_TEST_ACCOUNT_URL: $(pipeline-test-account-url)
+        PIPELINE_TEST_CONTAINER_NAME: $(pipeline-test-container-name)
+        KEYVAULT_NAME: $(keyvault-name)
+        HF_TOKEN: $(hf_token)

    - task: PublishTestResults@2
      condition: succeededOrFailed()
--- a/.azure_pipelines/olive-aml-ci.yaml
+++ b/.azure_pipelines/olive-aml-ci.yaml
@ -40,6 +40,10 @@ jobs:
      resnet_vitis_ai_ptq_cpu:
        exampleFolder: resnet
        exampleName: resnet_vitis_ai_ptq_cpu_aml
+      llama2:
+        exampleFolder: llama2
+        exampleName: llama2
+        exampleRequirements: requirements-pipeline.txt

 # Windows examples test
 - template: job_templates/olive-example-template.yaml
--- a/.azure_pipelines/olive-ci.yaml
+++ b/.azure_pipelines/olive-ci.yaml
@ -150,6 +150,10 @@ jobs:
        exampleFolder: stable_diffusion
        exampleName: stable_diffusion_cuda_gpu
        exampleRequirements: requirements-common.txt
+      llama2:
+        exampleFolder: llama2
+        exampleName: llama2
+        exampleRequirements: requirements-pipeline.txt

 # Multiple EP Linux testing
 - template: job_templates/olive-test-template.yaml
--- a/examples/llama2/conda_gpu.yaml
+++ b/examples/llama2/conda_gpu.yaml
@ -16,8 +16,7 @@ dependencies:
      - optimum
      - scipy
      - scikit-learn
-      - torch
      - onnxruntime-genai
+      - torch
      - --extra-index-url https://download.pytorch.org/whl/cu118
-      - transformers>=4.41.1
      - git+https://github.com/microsoft/Olive#egg=olive-ai[gpu,azureml]
--- a/examples/llama2/requirements-pipeline.txt
+++ b/examples/llama2/requirements-pipeline.txt
@ -0,0 +1,6 @@
+-r requirements-qlora.txt
+azure-ai-ml
+azure-identity
+azure-keyvault-secrets
+azureml-fsspec
+huggingface_hub
--- a/examples/llama2/requirements-qlora.txt
+++ b/examples/llama2/requirements-qlora.txt
@ -1,6 +1,7 @@
 -r requirements.txt
 accelerate
 bitsandbytes
+onnxruntime_genai
 peft
 scikit-learn
 sentencepiece
--- a/examples/test/azureml/test_llama2.py
+++ b/examples/test/azureml/test_llama2.py
@ -0,0 +1,67 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import os
+import uuid
+
+import pytest
+
+from olive.common.utils import get_credentials
+
+from ..utils import get_example_dir, get_gpu_compute, patch_config
+
+account_url = os.environ.get("PIPELINE_TEST_ACCOUNT_URL")
+container_name = os.environ.get("PIPELINE_TEST_CONTAINER_NAME")
+
+
+@pytest.fixture(scope="module", autouse=True)
+def setup():
+    """Setups any state specific to the execution of the given module."""
+    os.chdir(get_example_dir("llama2"))
+
+
+@pytest.mark.parametrize("search_algorithm", [False])
+@pytest.mark.parametrize("execution_order", [None])
+@pytest.mark.parametrize("system", ["local_system"])
+@pytest.mark.parametrize("cloud_cache_config", [False, {"account_url": account_url, "container_name": container_name}])
+@pytest.mark.parametrize("olive_json", ["llama2_qlora.json"])
+def test_llama2(search_algorithm, execution_order, system, cloud_cache_config, olive_json):
+    from olive.workflows import run as olive_run
+
+    olive_config = patch_config(olive_json, search_algorithm, execution_order, system, is_gpu=False, hf_token=True)
+
+    # reduce qlora steps for faster test
+    olive_config["passes"]["f"]["training_args"]["max_steps"] = 5
+    olive_config["passes"]["f"]["training_args"]["logging_steps"] = 5
+    olive_config["passes"]["f"]["training_args"]["per_device_train_batch_size"] = 2
+    olive_config["passes"]["f"]["training_args"]["per_device_eval_batch_size"] = 2
+
+    # add cloud cache system
+    olive_config["cloud_cache_config"] = cloud_cache_config
+
+    olive_config["systems"]["aml_system"] = get_gpu_compute(True)
+    olive_config["systems"]["aml_system"]["datastores"] = container_name
+
+    # set workflow host
+    olive_config["workflow_host"] = "aml_system"
+
+    # set a random workflow id, otherwise the test will fail due to aml job cache
+    workflow_id = str(uuid.uuid4())
+    olive_config["workflow_id"] = workflow_id
+
+    olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
+
+    # assert outputs
+    container_client = get_blob_client()
+    assert any(container_client.list_blobs(workflow_id))
+
+    # delete outputs
+    for blob in container_client.list_blobs(workflow_id):
+        container_client.delete_blob(blob.name)
+
+
+def get_blob_client():
+    from azure.storage.blob import ContainerClient
+
+    return ContainerClient(account_url=account_url, container_name=container_name, credential=get_credentials())
--- a/examples/test/local/test_llama2.py
+++ b/examples/test/local/test_llama2.py
@ -0,0 +1,39 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import os
+
+import pytest
+
+from olive.common.hf.login import huggingface_login
+
+from ..utils import assert_nodes, get_example_dir, patch_config
+
+
+@pytest.fixture(scope="module", autouse=True)
+def setup():
+    """Setups any state specific to the execution of the given module."""
+    os.chdir(get_example_dir("llama2"))
+
+
+@pytest.mark.parametrize("search_algorithm", [False])
+@pytest.mark.parametrize("execution_order", [None])
+@pytest.mark.parametrize("system", ["local_system"])
+@pytest.mark.parametrize("olive_json", ["llama2_qlora.json"])
+def test_llama2(search_algorithm, execution_order, system, olive_json):
+    from olive.workflows import run as olive_run
+
+    hf_token = os.environ.get("HF_TOKEN")
+    huggingface_login(hf_token)
+
+    olive_config = patch_config(olive_json, search_algorithm, execution_order, system)
+
+    # reduce qlora steps for faster test
+    olive_config["passes"]["f"]["training_args"]["max_steps"] = 5
+    olive_config["passes"]["f"]["training_args"]["logging_steps"] = 5
+    olive_config["passes"]["f"]["training_args"]["per_device_train_batch_size"] = 2
+    olive_config["passes"]["f"]["training_args"]["per_device_eval_batch_size"] = 2
+
+    footprint = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
+    assert_nodes(footprint)
--- a/examples/test/utils.py
+++ b/examples/test/utils.py
@ -17,14 +17,30 @@ from olive.common.utils import run_subprocess

 def check_output(footprints):
    """Check if the search output is valid."""
+    assert_nodes(footprints)
+    assert_metrics(footprints)
+
+
+def assert_nodes(footprints):
    assert footprints, "footprints is empty. The search must have failed for all accelerator specs."
    for footprint in footprints.values():
        assert footprint.nodes
+
+
+def assert_metrics(footprints):
+    for footprint in footprints.values():
        for v in footprint.nodes.values():
            assert all(metric_result.value > 0 for metric_result in v.metrics.value.values())


-def patch_config(config_json_path: str, search_algorithm: str, execution_order: str, system: str, is_gpu: bool = False):
+def patch_config(
+    config_json_path: str,
+    search_algorithm: str,
+    execution_order: str,
+    system: str,
+    is_gpu: bool = False,
+    hf_token: bool = False,
+):
    """Load the config json file and patch it with the given search algorithm, execution order and system."""
    with open(config_json_path) as fin:
        olive_config = json.load(fin)
@ -47,7 +63,7 @@ def patch_config(config_json_path: str, search_algorithm: str, execution_order:
    update_azureml_config(olive_config)
    if system == "aml_system":
        # set aml_system
-        set_aml_system(olive_config, is_gpu=is_gpu)
+        set_aml_system(olive_config, is_gpu=is_gpu, hf_token=hf_token)
        olive_config["host"] = system
        olive_config["target"] = system
    elif system == "docker_system":
@ -88,43 +104,52 @@ def update_azureml_config(olive_config):
    if client_id is None and not exclude_managed_identity_credential:
        raise Exception("Please set the environment variable MANAGED_IDENTITY_CLIENT_ID")

+    keyvault_name = os.environ.get("KEYVAULT_NAME")
+
    olive_config["azureml_client"] = {
        "subscription_id": subscription_id,
        "resource_group": resource_group,
        "workspace_name": workspace_name,
        # pipeline agents have multiple managed identities, so we need to specify the client_id
        "default_auth_params": {"managed_identity_client_id": client_id, **exclude_managed_identity_credential},
+        "keyvault_name": keyvault_name,
    }


-def set_aml_system(olive_config, is_gpu=False):
+def set_aml_system(olive_config, is_gpu=False, hf_token=False):
    """Set the aml_system in the olive config."""
    if "systems" not in olive_config:
        olive_config["systems"] = {}

-    if is_gpu:
-        olive_config["systems"]["aml_system"] = {
-            "type": "AzureML",
-            "accelerators": [{"device": "GPU", "execution_providers": ["CUDAExecutionProvider"]}],
-            "aml_compute": "gpu-cluster",
-            "aml_docker_config": {
-                "base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04",
-                "conda_file_path": "conda_gpu.yaml",
-            },
-            "is_dev": True,
-        }
+    olive_config["systems"]["aml_system"] = get_gpu_compute(hf_token) if is_gpu else get_cpu_compute(hf_token)

-    else:
-        olive_config["systems"]["aml_system"] = {
-            "type": "AzureML",
-            "accelerators": [{"device": "CPU", "execution_providers": ["CPUExecutionProvider"]}],
-            "aml_compute": "cpu-cluster",
-            "aml_docker_config": {
-                "base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04",
-                "conda_file_path": "conda.yaml",
-            },
-            "is_dev": True,
-        }
+
+def get_gpu_compute(hf_token):
+    return {
+        "type": "AzureML",
+        "accelerators": [{"device": "GPU", "execution_providers": ["CUDAExecutionProvider"]}],
+        "aml_compute": "gpu-cluster",
+        "aml_docker_config": {
+            "base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04",
+            "conda_file_path": "conda_gpu.yaml",
+        },
+        "is_dev": True,
+        "hf_token": hf_token,
+    }
+
+
+def get_cpu_compute(hf_token):
+    return {
+        "type": "AzureML",
+        "accelerators": [{"device": "CPU", "execution_providers": ["CPUExecutionProvider"]}],
+        "aml_compute": "cpu-cluster",
+        "aml_docker_config": {
+            "base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04",
+            "conda_file_path": "conda.yaml",
+        },
+        "is_dev": True,
+        "hf_token": hf_token,
+    }


 def set_docker_system(olive_config):