Generate genai metadata only if ORT generate() API is used (#1332)

## Describe your changes Generate genai metadata only if ORT generate() API is used ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. - [ ] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR. ## (Optional) Issue link --------- Co-authored-by: Jambay Kinley <jambaykinley@microsoft.com>
2024-08-26 12:33:11 -07:00 · 2024-08-26 12:33:11 -07:00 · 610a3b57bf
--- a/examples/llama2/llama2_multilora.ipynb
+++ b/examples/llama2/llama2_multilora.ipynb
@ -98,7 +98,7 @@
    "    --train_split \"train[:4096]\" --eval_split \"train[4096:4224]\" \\\n",
    "    --text_template \"### Language: {programming_language} \\n### Question: {prompt} \\n### Answer: {response}\" \\\n",
    "    --per_device_train_batch_size 16 --per_device_eval_batch_size 16 --max_steps 150 --logging_steps 50 \\\n",
-    "    -o models/tiny-codes"
+    "    -o models/tiny-codes --use_ort_genai"
   ]
  },
  {
--- a/olive/cli/finetune.py
+++ b/olive/cli/finetune.py
@ -41,7 +41,7 @@ class FineTuneCommand(BaseOliveCLICommand):
            help="The precision of the optimized model and adapters.",
        )

-        # model options
+        # Model options
        model_group = sub_parser.add_argument_group("model options")
        model_group.add_argument(
            "-m",
@ -63,7 +63,7 @@ class FineTuneCommand(BaseOliveCLICommand):
            choices=["bfloat16", "float16", "float32"],
            help="The torch dtype to use for training.",
        )
-        # dataset options
+        # Dataset options
        dataset_group = sub_parser.add_argument_group("dataset options")
        dataset_group.add_argument(
            "-d",
@ -100,7 +100,7 @@ class FineTuneCommand(BaseOliveCLICommand):
            default=1024,
            help="Maximum sequence length for the data.",
        )
-        # lora options
+        # LoRA options
        lora_group = sub_parser.add_argument_group("lora options")
        lora_group.add_argument(
            "--method",
@ -134,7 +134,7 @@ class FineTuneCommand(BaseOliveCLICommand):
        # TODO(jambayk): what about checkpoint_dir and resume from checkpoint support? clean checkpoint dir?
        sub_parser.add_argument("--clean", action="store_true", help="Run in a clean cache directory")

-        # remote options
+        # AzureML options
        remote_group = sub_parser.add_argument_group("remote options")
        remote_group.add_argument(
            "--azureml_config",
@ -158,6 +158,9 @@ class FineTuneCommand(BaseOliveCLICommand):
            ),
        )
        # Cloud cache doesn't support azureml resources yet, only hf-id
+        model_group.add_argument(
+            "--use_ort_genai", action="store_true", help="Use OnnxRuntie generate() API to run the model"
+        )

        sub_parser.set_defaults(func=FineTuneCommand)

@ -179,7 +182,7 @@ class FineTuneCommand(BaseOliveCLICommand):
            # need to improve the output structure of olive run
            output_path = Path(self.args.output_path)
            output_path.mkdir(parents=True, exist_ok=True)
-            hardlink_copy_dir(Path(tempdir) / "f-c-o-e-m" / "gpu-cuda_model", output_path)
+            hardlink_copy_dir(Path(tempdir) / "-".join(run_config["passes"].keys()) / "gpu-cuda_model", output_path)

            logger.info("Model and adapters saved to %s", output_path.resolve())

@ -242,6 +245,9 @@ class FineTuneCommand(BaseOliveCLICommand):
            config["data_configs"].append(eval_data_config)
            config["passes"]["f"]["eval_data_config"] = "eval_data"

+        if not self.args.use_ort_genai:
+            del config["passes"]["m"]
+
        if self.args.azureml_config:
            assert self.args.azureml_cluster, "AzureML cluster must be provided if using azureml_config."

--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -1,6 +1,6 @@
 -r requirements.txt
 black
-editorconfig-checker
+editorconfig-checker==2.7.3
 flake8
 isort
 lintrunner
--- a/test/unit_test/cli/test_cli.py
+++ b/test/unit_test/cli/test_cli.py
@ -111,7 +111,7 @@ def test_finetune_command(mock_tempdir, mock_run, tmp_path):

    # setup
    mock_tempdir.return_value = tmpdir.resolve()
-    workflow_output_dir = tmpdir / "f-c-o-e-m" / "gpu-cuda_model"
+    workflow_output_dir = tmpdir / "f-c-o-e" / "gpu-cuda_model"
    workflow_output_dir.mkdir(parents=True)
    dummy_output = workflow_output_dir / "dummy_output"
    with open(dummy_output, "w") as f: