210 строки
7.1 KiB
Python
210 строки
7.1 KiB
Python
"""
|
|
Functions to deploy training
|
|
|
|
To run locally, use:
|
|
> cd ./code
|
|
> conda activate nlp
|
|
> python deploy/training.py
|
|
|
|
TODO: switch to AML pipelines
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import shutil
|
|
import math
|
|
from azureml.core import Workspace, Experiment
|
|
from azureml.train.dnn import PyTorch
|
|
from azureml.train.hyperdrive import (BayesianParameterSampling, RandomParameterSampling,
|
|
HyperDriveConfig, PrimaryMetricGoal,
|
|
choice, uniform, loguniform)
|
|
|
|
############################################
|
|
##### Helper Functions
|
|
############################################
|
|
|
|
def get_project_config(fn):
|
|
if os.path.isfile(f'./project/{fn}'):
|
|
with open(f'./project/{fn}', encoding='utf-8') as fp:
|
|
params = json.load(fp)
|
|
elif os.path.isfile(f'../project/{fn}'):
|
|
with open(f'../project/{fn}', encoding='utf-8') as fp:
|
|
params = json.load(fp)
|
|
elif os.path.isfile(f'config.json'):
|
|
## Training Config
|
|
with open('config.json', encoding='utf-8') as fp:
|
|
params = json.load(fp)
|
|
elif os.path.isfile(f'./code/config.json'):
|
|
## Inference Config
|
|
with open('./code/config.json', encoding='utf-8') as fp:
|
|
params = json.load(fp)
|
|
else:
|
|
raise Exception(f'Project parameters not found -> {fn}')
|
|
return params
|
|
|
|
def load_env():
|
|
# TODO:
|
|
pass
|
|
|
|
def get_best_argument(details, argument):
|
|
args_list = details['runDefinition']['arguments']
|
|
for i, a in enumerate(args_list):
|
|
if argument in a :
|
|
return args_list[i + 1]
|
|
|
|
############################################
|
|
##### Parameters
|
|
############################################
|
|
# Two stages: dev + train.
|
|
## dev: test changes, trial runs
|
|
## train: training, full runs, deployment
|
|
|
|
# PARAMETERS
|
|
project_name = f"msforum_de"
|
|
single_run = True
|
|
update_model = False
|
|
compute_name = 'gpucluster-nc12'
|
|
experiment_name = project_name
|
|
|
|
## Load
|
|
params = get_project_config(f'{project_name}.config.json')
|
|
language = params.get('language')
|
|
env = params.get('environment')
|
|
|
|
############################################
|
|
##### AML Setup
|
|
############################################
|
|
|
|
## Workspace
|
|
# auth = InteractiveLoginAuthentication(tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47")
|
|
ws = Workspace.get(name='nlp-ml',
|
|
subscription_id='50324bce-875f-4a7b-9d3c-0e33679f5d72',
|
|
resource_group='nlp')
|
|
# ,auth=auth)
|
|
|
|
## Compute target
|
|
compute_target= ws.compute_targets[compute_name]
|
|
script_folder = "."
|
|
|
|
#TODO: load from file (load_env)
|
|
pip_packages=[
|
|
'azureml-sdk',
|
|
'azureml-dataprep[pandas,fuse]',
|
|
'mlflow==1.0.0',
|
|
'azureml-mlflow',
|
|
'spacy',
|
|
'transformers==2.4.1',
|
|
'scipy',
|
|
'numpy',
|
|
'azure-storage-blob',
|
|
'tqdm',
|
|
'boto3',
|
|
'scipy>=1.3.2',
|
|
'sklearn',
|
|
'seqeval',
|
|
'dotmap==1.3.0',
|
|
'farm==0.4.1',
|
|
'flair==0.4.5'
|
|
]
|
|
conda_packages=[
|
|
# 'pytorch', #Included in the PytorchEstimator
|
|
# 'torchvision',
|
|
'pip==19.3.1', #NOTE: workaround for #745 issue
|
|
'gensim',
|
|
'numpy',
|
|
'pandas'
|
|
]
|
|
|
|
############################################
|
|
##### Training
|
|
############################################
|
|
|
|
fn_config_infer = 'config.json'
|
|
shutil.copy(f'./project/{project_name}.config.json', f'./code/{fn_config_infer}')
|
|
|
|
os.chdir('./code')
|
|
|
|
## Experiment
|
|
exp = Experiment(workspace = ws, name = experiment_name)
|
|
|
|
## Config
|
|
script_params = {
|
|
'--task' : 1,
|
|
# '--run_prepare' : '',
|
|
# '--do_format' : '',
|
|
# '--download_source' : '',
|
|
'--use_cuda' : '',
|
|
'--n_epochs' : 3,
|
|
'--learning_rate' : 4e-5,
|
|
'--model_type' : 'distilbert',
|
|
'--max_seq_len' : 256,
|
|
'--embeds_dropout' : 0.2,
|
|
'--register_model' : ''
|
|
}
|
|
est = PyTorch(source_directory = script_folder,
|
|
compute_target = compute_target,
|
|
script_params = script_params,
|
|
entry_script = 'train.py',
|
|
pip_packages = pip_packages,
|
|
conda_packages = conda_packages,
|
|
use_gpu = True)
|
|
## Run
|
|
if single_run:
|
|
run = exp.submit(est)
|
|
#Remove temp config
|
|
os.remove(fn_config_infer)
|
|
run.wait_for_completion(show_output = True)
|
|
else:
|
|
### Hyperparameters params
|
|
if language == 'en':
|
|
model_type = choice('roberta','bert','albert') #,'xlm-roberta'
|
|
elif language == 'de':
|
|
model_type = choice('distilbert','bert', 'roberta')
|
|
elif language == 'it' or language == 'es':
|
|
model_type = choice('bert')
|
|
elif language == 'fr':
|
|
model_type = choice('camembert', 'bert') #,'xlm-roberta'
|
|
param_sampling = RandomParameterSampling({
|
|
'--learning_rate' : choice(1e-5, 2e-5, 3e-5, 4e-5),
|
|
'--model_type' : model_type,
|
|
'--max_seq_len' : choice(128, 256),
|
|
'--embeds_dropout' : choice(0.1, 0.2, 0.3)
|
|
})
|
|
## Prepare HyperDrive Config
|
|
hdc = HyperDriveConfig(estimator=est,
|
|
hyperparameter_sampling = param_sampling,
|
|
policy = None, # NOTE: not possible for bayesian
|
|
primary_metric_name = 'f1macro',
|
|
primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
|
|
max_total_runs = 40,
|
|
max_concurrent_runs = 4)
|
|
## Run hyperparameter tuning
|
|
hyperdrive_run = exp.submit(config=hdc)
|
|
hyperdrive_run.wait_for_completion(show_output = True)
|
|
if update_model:
|
|
## Get Results
|
|
best_run = hyperdrive_run.get_best_run_by_primary_metric()
|
|
## Experiment
|
|
experiment_name = experiment_name + "-train"
|
|
exp = Experiment(workspace = ws, name = experiment_name)
|
|
#Parameters determined by hyperparams
|
|
script_params_hyper = {
|
|
'--learning_rate' : get_best_argument(best_run.get_details(), 'learning_rate'),
|
|
'--model_type' : get_best_argument(best_run.get_details(), 'model_type'),
|
|
'--max_seq_len' : get_best_argument(best_run.get_details(), 'max_seq_len'),
|
|
'--embeds_dropout' : get_best_argument(best_run.get_details(), 'embeds_dropout'),
|
|
'--register_model' : ''
|
|
}
|
|
script_params_best = {**script_params, **script_params_hyper}
|
|
est_best = PyTorch(source_directory = script_folder,
|
|
compute_target = compute_target,
|
|
script_params = script_params_best,
|
|
entry_script = 'train.py',
|
|
pip_packages = pip_packages,
|
|
conda_packages = conda_packages,
|
|
use_gpu = True)
|
|
# # Run single
|
|
run = exp.submit(est_best)
|
|
run.wait_for_completion(show_output = False)
|
|
#Remove temp config
|
|
os.remove(fn_config_infer) |