running pipeline e2e
This commit is contained in:
Родитель
b061261ce3
Коммит
574075f5f0
|
@ -1,17 +1,18 @@
|
|||
# for local testing (cpu)
|
||||
torchvision==0.12.0
|
||||
torch==1.11.0
|
||||
# torchvision==0.12.0
|
||||
# torch==1.11.0
|
||||
pytorch_lightning==1.6.4
|
||||
transformers==4.18.0
|
||||
datasets==2.0.0
|
||||
rouge_score==0.0.4
|
||||
sentencepiece==0.1.96
|
||||
|
||||
# for metrics reporting/plotting
|
||||
mlflow==1.25.1
|
||||
azureml-mlflow==1.41.0
|
||||
matplotlib==3.5.2
|
||||
tqdm==4.64.0
|
||||
# matplotlib==3.5.2
|
||||
# tqdm==4.64.0
|
||||
psutil==5.9.0
|
||||
|
||||
# for unit testing
|
||||
pytest==7.1.2
|
||||
pytest==7.1.2
|
||||
|
|
|
@ -8,7 +8,7 @@ inputs:
|
|||
max_samples:
|
||||
value: 10000
|
||||
pretrained_model_name:
|
||||
value: "t5-small"
|
||||
value: "microsoft/ssr-base"
|
||||
num_train_epochs:
|
||||
value: 5
|
||||
batch_size:
|
||||
|
@ -47,9 +47,9 @@ jobs:
|
|||
--max_input_length ${{inputs.max_input_length}}
|
||||
--max_target_length ${{inputs.max_target_length}}
|
||||
--padding ${{inputs.padding}}
|
||||
--source_prefix ${{inputs.source_prefix}}
|
||||
--encodings ${{outputs.encodings}}
|
||||
environment: azureml:nlp_summarization_train@latest
|
||||
compute: azureml:cpu-cluster-lg
|
||||
inputs:
|
||||
dataset_name: "ccdv/pubmed-summarization"
|
||||
dataset_config: "section"
|
||||
|
@ -60,7 +60,6 @@ jobs:
|
|||
max_target_length: 40
|
||||
padding: "max_length"
|
||||
pretrained_model_name: ${{parent.inputs.pretrained_model_name}}
|
||||
source_prefix: "summarize: "
|
||||
outputs:
|
||||
encodings: ${{parent.outputs.prepared_data}}
|
||||
|
||||
|
@ -79,14 +78,12 @@ jobs:
|
|||
--registered_model_name ${{inputs.registered_model_name}}
|
||||
--output_dir outputs
|
||||
--num_train_epochs ${{inputs.num_train_epochs}}
|
||||
--disable_tqdm True
|
||||
--do_train --do_eval
|
||||
--source_prefix ${{inputs.source_prefix}}
|
||||
--trained_model_path ${{outputs.trained_model}}
|
||||
environment: azureml:nlp_summarization_train@latest
|
||||
inputs:
|
||||
preprocessed_datasets: ${{parent.jobs.prep_finetuning_dataset.outputs.encodings}}
|
||||
registered_model_name: "t5-small-cnn-pubmed"
|
||||
registered_model_name: "microsoft-ssr-base-cnn-pubmed"
|
||||
#model_path: ${{parent.jobs.train_step.outputs.trained_model_path
|
||||
pretrained_model_name: ${{parent.inputs.pretrained_model_name}}
|
||||
max_samples: ${{parent.inputs.max_samples}}
|
||||
|
@ -94,13 +91,12 @@ jobs:
|
|||
num_train_epochs: ${{parent.inputs.num_train_epochs}}
|
||||
per_device_train_batch_size: ${{parent.inputs.batch_size}}
|
||||
per_device_eval_batch_size: ${{parent.inputs.batch_size}}
|
||||
source_prefix: "summarize: "
|
||||
outputs:
|
||||
trained_model: ${{parent.outputs.finetuned_model}}
|
||||
compute: azureml:gpu-cluster
|
||||
distribution:
|
||||
type: pytorch
|
||||
process_count_per_instance: 1 # number of gpus
|
||||
process_count_per_instance: 8 # number of gpus
|
||||
resources:
|
||||
instance_count: 1 # number of nodes
|
||||
|
||||
|
@ -117,8 +113,9 @@ jobs:
|
|||
--trained_model_path ""
|
||||
--do_eval
|
||||
environment: azureml:nlp_summarization_train@latest
|
||||
compute: azureml:gpu-cluster
|
||||
inputs:
|
||||
preprocessed_datasets: ${{parent.jobs.prep_finetuning_dataset.outputs.encodings}}
|
||||
model_path: ${{parent.jobs.finetune_model.outputs.trained_model}}
|
||||
max_samples: ${{parent.inputs.max_samples}}
|
||||
registered_model_name: "t5-small-cnn-pubmed"
|
||||
registered_model_name: "microsoft/ssr-base-cnn-pubmed"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
|
||||
name: nlp_summarization_train
|
||||
version: dev2
|
||||
version: dev3
|
||||
build:
|
||||
path: ../../../data-science/environment/
|
||||
|
|
Загрузка…
Ссылка в новой задаче