From 610a3b57bf3ce3fdda6ed26f24d30d8822bea3e6 Mon Sep 17 00:00:00 2001 From: devang-ml <47577486+devang-ml@users.noreply.github.com> Date: Mon, 26 Aug 2024 12:33:11 -0700 Subject: [PATCH] Generate genai metadata only if ORT generate() API is used (#1332) ## Describe your changes Generate genai metadata only if ORT generate() API is used ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. - [ ] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR. ## (Optional) Issue link --------- Co-authored-by: Jambay Kinley --- examples/llama2/llama2_multilora.ipynb | 2 +- olive/cli/finetune.py | 16 +++++++++++----- requirements-dev.txt | 2 +- test/unit_test/cli/test_cli.py | 2 +- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/examples/llama2/llama2_multilora.ipynb b/examples/llama2/llama2_multilora.ipynb index 7b6caf6f4..a43faf6f9 100644 --- a/examples/llama2/llama2_multilora.ipynb +++ b/examples/llama2/llama2_multilora.ipynb @@ -98,7 +98,7 @@ " --train_split \"train[:4096]\" --eval_split \"train[4096:4224]\" \\\n", " --text_template \"### Language: {programming_language} \\n### Question: {prompt} \\n### Answer: {response}\" \\\n", " --per_device_train_batch_size 16 --per_device_eval_batch_size 16 --max_steps 150 --logging_steps 50 \\\n", - " -o models/tiny-codes" + " -o models/tiny-codes --use_ort_genai" ] }, { diff --git a/olive/cli/finetune.py b/olive/cli/finetune.py index 32e6e5fb9..84cbaff34 100644 --- a/olive/cli/finetune.py +++ b/olive/cli/finetune.py @@ -41,7 +41,7 @@ class FineTuneCommand(BaseOliveCLICommand): help="The precision of the optimized model and adapters.", ) - # model options + # Model options model_group = sub_parser.add_argument_group("model options") model_group.add_argument( "-m", @@ -63,7 +63,7 @@ class FineTuneCommand(BaseOliveCLICommand): choices=["bfloat16", "float16", "float32"], help="The torch dtype to use for training.", ) - # dataset options + # Dataset options dataset_group = sub_parser.add_argument_group("dataset options") dataset_group.add_argument( "-d", @@ -100,7 +100,7 @@ class FineTuneCommand(BaseOliveCLICommand): default=1024, help="Maximum sequence length for the data.", ) - # lora options + # LoRA options lora_group = sub_parser.add_argument_group("lora options") lora_group.add_argument( "--method", @@ -134,7 +134,7 @@ class FineTuneCommand(BaseOliveCLICommand): # TODO(jambayk): what about checkpoint_dir and resume from checkpoint support? clean checkpoint dir? sub_parser.add_argument("--clean", action="store_true", help="Run in a clean cache directory") - # remote options + # AzureML options remote_group = sub_parser.add_argument_group("remote options") remote_group.add_argument( "--azureml_config", @@ -158,6 +158,9 @@ class FineTuneCommand(BaseOliveCLICommand): ), ) # Cloud cache doesn't support azureml resources yet, only hf-id + model_group.add_argument( + "--use_ort_genai", action="store_true", help="Use OnnxRuntie generate() API to run the model" + ) sub_parser.set_defaults(func=FineTuneCommand) @@ -179,7 +182,7 @@ class FineTuneCommand(BaseOliveCLICommand): # need to improve the output structure of olive run output_path = Path(self.args.output_path) output_path.mkdir(parents=True, exist_ok=True) - hardlink_copy_dir(Path(tempdir) / "f-c-o-e-m" / "gpu-cuda_model", output_path) + hardlink_copy_dir(Path(tempdir) / "-".join(run_config["passes"].keys()) / "gpu-cuda_model", output_path) logger.info("Model and adapters saved to %s", output_path.resolve()) @@ -242,6 +245,9 @@ class FineTuneCommand(BaseOliveCLICommand): config["data_configs"].append(eval_data_config) config["passes"]["f"]["eval_data_config"] = "eval_data" + if not self.args.use_ort_genai: + del config["passes"]["m"] + if self.args.azureml_config: assert self.args.azureml_cluster, "AzureML cluster must be provided if using azureml_config." diff --git a/requirements-dev.txt b/requirements-dev.txt index 0de2b3a27..74a12519c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,6 @@ -r requirements.txt black -editorconfig-checker +editorconfig-checker==2.7.3 flake8 isort lintrunner diff --git a/test/unit_test/cli/test_cli.py b/test/unit_test/cli/test_cli.py index ca7fe3923..9c292d7ae 100644 --- a/test/unit_test/cli/test_cli.py +++ b/test/unit_test/cli/test_cli.py @@ -111,7 +111,7 @@ def test_finetune_command(mock_tempdir, mock_run, tmp_path): # setup mock_tempdir.return_value = tmpdir.resolve() - workflow_output_dir = tmpdir / "f-c-o-e-m" / "gpu-cuda_model" + workflow_output_dir = tmpdir / "f-c-o-e" / "gpu-cuda_model" workflow_output_dir.mkdir(parents=True) dummy_output = workflow_output_dir / "dummy_output" with open(dummy_output, "w") as f: