Generate genai metadata only if ORT generate() API is used (#1332)

## Describe your changes
Generate genai metadata only if ORT generate() API is used

## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [ ] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Lint and apply fixes to your code by running `lintrunner -a`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.
- [ ] Is this PR including examples changes? If yes, please remember to
update [example
documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md)
in a follow-up PR.

## (Optional) Issue link

---------

Co-authored-by: Jambay Kinley <jambaykinley@microsoft.com>
This commit is contained in:
devang-ml 2024-08-26 12:33:11 -07:00 коммит произвёл GitHub
Родитель 74fc96a680
Коммит 610a3b57bf
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
4 изменённых файлов: 14 добавлений и 8 удалений

Просмотреть файл

@ -98,7 +98,7 @@
" --train_split \"train[:4096]\" --eval_split \"train[4096:4224]\" \\\n",
" --text_template \"### Language: {programming_language} \\n### Question: {prompt} \\n### Answer: {response}\" \\\n",
" --per_device_train_batch_size 16 --per_device_eval_batch_size 16 --max_steps 150 --logging_steps 50 \\\n",
" -o models/tiny-codes"
" -o models/tiny-codes --use_ort_genai"
]
},
{

Просмотреть файл

@ -41,7 +41,7 @@ class FineTuneCommand(BaseOliveCLICommand):
help="The precision of the optimized model and adapters.",
)
# model options
# Model options
model_group = sub_parser.add_argument_group("model options")
model_group.add_argument(
"-m",
@ -63,7 +63,7 @@ class FineTuneCommand(BaseOliveCLICommand):
choices=["bfloat16", "float16", "float32"],
help="The torch dtype to use for training.",
)
# dataset options
# Dataset options
dataset_group = sub_parser.add_argument_group("dataset options")
dataset_group.add_argument(
"-d",
@ -100,7 +100,7 @@ class FineTuneCommand(BaseOliveCLICommand):
default=1024,
help="Maximum sequence length for the data.",
)
# lora options
# LoRA options
lora_group = sub_parser.add_argument_group("lora options")
lora_group.add_argument(
"--method",
@ -134,7 +134,7 @@ class FineTuneCommand(BaseOliveCLICommand):
# TODO(jambayk): what about checkpoint_dir and resume from checkpoint support? clean checkpoint dir?
sub_parser.add_argument("--clean", action="store_true", help="Run in a clean cache directory")
# remote options
# AzureML options
remote_group = sub_parser.add_argument_group("remote options")
remote_group.add_argument(
"--azureml_config",
@ -158,6 +158,9 @@ class FineTuneCommand(BaseOliveCLICommand):
),
)
# Cloud cache doesn't support azureml resources yet, only hf-id
model_group.add_argument(
"--use_ort_genai", action="store_true", help="Use OnnxRuntie generate() API to run the model"
)
sub_parser.set_defaults(func=FineTuneCommand)
@ -179,7 +182,7 @@ class FineTuneCommand(BaseOliveCLICommand):
# need to improve the output structure of olive run
output_path = Path(self.args.output_path)
output_path.mkdir(parents=True, exist_ok=True)
hardlink_copy_dir(Path(tempdir) / "f-c-o-e-m" / "gpu-cuda_model", output_path)
hardlink_copy_dir(Path(tempdir) / "-".join(run_config["passes"].keys()) / "gpu-cuda_model", output_path)
logger.info("Model and adapters saved to %s", output_path.resolve())
@ -242,6 +245,9 @@ class FineTuneCommand(BaseOliveCLICommand):
config["data_configs"].append(eval_data_config)
config["passes"]["f"]["eval_data_config"] = "eval_data"
if not self.args.use_ort_genai:
del config["passes"]["m"]
if self.args.azureml_config:
assert self.args.azureml_cluster, "AzureML cluster must be provided if using azureml_config."

Просмотреть файл

@ -1,6 +1,6 @@
-r requirements.txt
black
editorconfig-checker
editorconfig-checker==2.7.3
flake8
isort
lintrunner

Просмотреть файл

@ -111,7 +111,7 @@ def test_finetune_command(mock_tempdir, mock_run, tmp_path):
# setup
mock_tempdir.return_value = tmpdir.resolve()
workflow_output_dir = tmpdir / "f-c-o-e-m" / "gpu-cuda_model"
workflow_output_dir = tmpdir / "f-c-o-e" / "gpu-cuda_model"
workflow_output_dir.mkdir(parents=True)
dummy_output = workflow_output_dir / "dummy_output"
with open(dummy_output, "w") as f: