This commit is contained in:
Jeff Omhover 2022-07-14 11:07:36 -07:00
Родитель b061261ce3
Коммит 574075f5f0
3 изменённых файлов: 13 добавлений и 15 удалений

Просмотреть файл

@ -1,17 +1,18 @@
# for local testing (cpu)
torchvision==0.12.0
torch==1.11.0
# torchvision==0.12.0
# torch==1.11.0
pytorch_lightning==1.6.4
transformers==4.18.0
datasets==2.0.0
rouge_score==0.0.4
sentencepiece==0.1.96
# for metrics reporting/plotting
mlflow==1.25.1
azureml-mlflow==1.41.0
matplotlib==3.5.2
tqdm==4.64.0
# matplotlib==3.5.2
# tqdm==4.64.0
psutil==5.9.0
# for unit testing
pytest==7.1.2
pytest==7.1.2

Просмотреть файл

@ -8,7 +8,7 @@ inputs:
max_samples:
value: 10000
pretrained_model_name:
value: "t5-small"
value: "microsoft/ssr-base"
num_train_epochs:
value: 5
batch_size:
@ -47,9 +47,9 @@ jobs:
--max_input_length ${{inputs.max_input_length}}
--max_target_length ${{inputs.max_target_length}}
--padding ${{inputs.padding}}
--source_prefix ${{inputs.source_prefix}}
--encodings ${{outputs.encodings}}
environment: azureml:nlp_summarization_train@latest
compute: azureml:cpu-cluster-lg
inputs:
dataset_name: "ccdv/pubmed-summarization"
dataset_config: "section"
@ -60,7 +60,6 @@ jobs:
max_target_length: 40
padding: "max_length"
pretrained_model_name: ${{parent.inputs.pretrained_model_name}}
source_prefix: "summarize: "
outputs:
encodings: ${{parent.outputs.prepared_data}}
@ -79,14 +78,12 @@ jobs:
--registered_model_name ${{inputs.registered_model_name}}
--output_dir outputs
--num_train_epochs ${{inputs.num_train_epochs}}
--disable_tqdm True
--do_train --do_eval
--source_prefix ${{inputs.source_prefix}}
--trained_model_path ${{outputs.trained_model}}
environment: azureml:nlp_summarization_train@latest
inputs:
preprocessed_datasets: ${{parent.jobs.prep_finetuning_dataset.outputs.encodings}}
registered_model_name: "t5-small-cnn-pubmed"
registered_model_name: "microsoft-ssr-base-cnn-pubmed"
#model_path: ${{parent.jobs.train_step.outputs.trained_model_path
pretrained_model_name: ${{parent.inputs.pretrained_model_name}}
max_samples: ${{parent.inputs.max_samples}}
@ -94,13 +91,12 @@ jobs:
num_train_epochs: ${{parent.inputs.num_train_epochs}}
per_device_train_batch_size: ${{parent.inputs.batch_size}}
per_device_eval_batch_size: ${{parent.inputs.batch_size}}
source_prefix: "summarize: "
outputs:
trained_model: ${{parent.outputs.finetuned_model}}
compute: azureml:gpu-cluster
distribution:
type: pytorch
process_count_per_instance: 1 # number of gpus
process_count_per_instance: 8 # number of gpus
resources:
instance_count: 1 # number of nodes
@ -117,8 +113,9 @@ jobs:
--trained_model_path ""
--do_eval
environment: azureml:nlp_summarization_train@latest
compute: azureml:gpu-cluster
inputs:
preprocessed_datasets: ${{parent.jobs.prep_finetuning_dataset.outputs.encodings}}
model_path: ${{parent.jobs.finetune_model.outputs.trained_model}}
max_samples: ${{parent.inputs.max_samples}}
registered_model_name: "t5-small-cnn-pubmed"
registered_model_name: "microsoft/ssr-base-cnn-pubmed"

Просмотреть файл

@ -1,6 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
name: nlp_summarization_train
version: dev2
version: dev3
build:
path: ../../../data-science/environment/