Adding task_name and user_column_names for data import validations for MaaS (#3311)

This commit is contained in:
skanakamedal 2024-08-30 16:44:48 +05:30 коммит произвёл GitHub
Родитель bb35244b71
Коммит 7e08c29e06
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
5 изменённых файлов: 21 добавлений и 4 удалений

Просмотреть файл

@ -11,6 +11,14 @@ description: FTaaS component to copy user training data to output
environment: azureml://registries/azureml/environments/acft-hf-nlp-data-import/versions/3
inputs:
task_name:
type: string
default: "ChatCompletion"
description: Finetune task name.
enum:
- "ChatCompletion"
- "TextGeneration"
# Inputs
train_file_path:
type: uri_file

Просмотреть файл

@ -135,9 +135,11 @@ jobs:
environment_variables:
_AZUREML_CR_ENABLE_ITP_CAP: "false"
inputs:
task_name: "ChatCompletion"
train_file_path: '${{parent.inputs.train_file_path}}'
validation_file_path: '${{parent.inputs.validation_file_path}}'
system_properties: '${{parent.inputs.system_properties}}'
user_column_names: 'messages'
oss_chat_completion_finetune:
type: command
component: azureml:oss_chat_completion_finetune:0.0.21

Просмотреть файл

@ -150,6 +150,7 @@ jobs:
environment_variables:
_AZUREML_CR_ENABLE_ITP_CAP: "false"
inputs:
task_name: "TextGeneration"
train_file_path: '${{parent.inputs.train_file_path}}'
validation_file_path: '${{parent.inputs.validation_file_path}}'
system_properties: '${{parent.inputs.system_properties}}'

Просмотреть файл

@ -295,14 +295,16 @@ def _run_subprocess_cmd(cmd: List[str], component_name: str):
def _initiate_run():
"""Run the Data Import script."""
# Data Import
task_name = decode_param_from_env_var("task_name")
cmd = [
"python", "-m", "azureml.acft.contrib.hf.nlp.entry_point.data_import.data_import",
"--task_name", "TextGeneration",
"--task_name", task_name,
"--output_dataset", decode_output_from_env_var("output_dataset")
]
add_train_validation_file_path_input(cmd=cmd, input_name="train_file_path")
add_train_validation_file_path_input(
cmd=cmd, input_name="validation_file_path")
add_optional_param(cmd, "user_column_names")
logger.info(f"Starting the command: {cmd}")
_run_subprocess_cmd(cmd=cmd, component_name="Data Import")
@ -322,9 +324,10 @@ def run():
if __name__ == "__main__":
task_name = decode_param_from_env_var("task_name")
# set logger
set_logging_parameters(
task_type="TextGeneration",
task_type=task_name,
acft_custom_dimensions={
LoggingLiterals.PROJECT_NAME: PROJECT_NAME,
LoggingLiterals.PROJECT_VERSION_NUMBER: VERSION,

Просмотреть файл

@ -295,14 +295,16 @@ def _run_subprocess_cmd(cmd: List[str], component_name: str):
def _initiate_run():
"""Run the Data Import script."""
# Data Import
task_name = decode_param_from_env_var("task_name")
cmd = [
"python", "-m", "azureml.acft.contrib.hf.nlp.entry_point.data_import.data_import",
"--task_name", "TextGeneration",
"--task_name", task_name,
"--output_dataset", decode_output_from_env_var("output_dataset")
]
add_train_validation_file_path_input(cmd=cmd, input_name="train_file_path")
add_train_validation_file_path_input(
cmd=cmd, input_name="validation_file_path")
add_optional_param(cmd, "user_column_names")
logger.info(f"Starting the command: {cmd}")
_run_subprocess_cmd(cmd=cmd, component_name="Data Import")
@ -322,9 +324,10 @@ def run():
if __name__ == "__main__":
task_name = decode_param_from_env_var("task_name")
# set logger
set_logging_parameters(
task_type="TextGeneration",
task_type=task_name,
acft_custom_dimensions={
LoggingLiterals.PROJECT_NAME: PROJECT_NAME,
LoggingLiterals.PROJECT_VERSION_NUMBER: VERSION,