Merge branch 'master' of https://github.com/microsoft/verseagility
This commit is contained in:
Коммит
3df4cf534d
15
README.md
15
README.md
|
@ -10,21 +10,6 @@
|
|||
## Live Demo
|
||||
> http://nlp-demo-app.azurewebsites.net/
|
||||
|
||||
## Deployment
|
||||
|
||||
1. Click on the button to start the resource deployment:
|
||||
<a href="https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fchristian-vorhemus%2Ffunction-app%2Fmaster%2Fazuredeploy.json" target="_blank">
|
||||
<img src="https://raw.githubusercontent.com/Azure/azure-quickstart-templates/master/1-CONTRIBUTION-GUIDE/images/deploytoazure.png"/>
|
||||
</a>
|
||||
|
||||
2. After the deployment has finished (~30min) as a workaround for now, add the function "default" host key as an environment variable
|
||||
named "FunctionHostKey" in the function (if the variable is already there, replace the value) and click "Save"
|
||||
<img src="demo/functionkey.png" width="400">
|
||||
|
||||
3. If you put files in the storage account "data" container, the files are processed and put in CosmosDB following the standardized output format.
|
||||
<img src="demo/data_container.png" width="400">
|
||||
|
||||
|
||||
## Naming
|
||||
### Azure
|
||||
> nlp-\<component\>-\<environment\>
|
||||
|
|
13
code/data.py
13
code/data.py
|
@ -226,11 +226,16 @@ class Data():
|
|||
logger.warning(f'SAVED: {self.fn_lookup[fn]}')
|
||||
|
||||
def load(self, fn, header=0, encoding='utf-8', file_type='dataframe'):
|
||||
if fn in self.fn_lookup:
|
||||
fn = self.fn_lookup[fn]
|
||||
if file_type == 'dataframe':
|
||||
return pd.read_csv(self.fn_lookup[fn], sep='\t', encoding=encoding, header=header)
|
||||
data = pd.read_csv(fn, sep='\t', encoding=encoding, header=header)
|
||||
elif file_type == 'list':
|
||||
with open(self.fn_lookup[fn], encoding=encoding) as f:
|
||||
with open(fn, encoding=encoding) as f:
|
||||
data = f.readlines()
|
||||
return data
|
||||
elif file_type == 'json':
|
||||
with open(fn, encoding=encoding) as f:
|
||||
data = json.load(f)
|
||||
else:
|
||||
raise Exception(f'[ERROR] - file type ({file_type}) not supported in data loader')
|
||||
raise Exception(f'[ERROR] - file type ({file_type}) not supported in data loader')
|
||||
return data
|
|
@ -133,6 +133,7 @@ farm_model_lookup = {
|
|||
'en' : 'albert-base-v2'
|
||||
},
|
||||
'distilbert' : {
|
||||
'xx' : 'distilbert-base-multilingual-cased',
|
||||
'de' : 'distilbert-base-german-cased'
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,9 +2,10 @@
|
|||
Functions to deploy pipeline
|
||||
|
||||
To run locally, use:
|
||||
> cd ./code
|
||||
> cd ./root
|
||||
> conda activate nlp
|
||||
> python deploy/pipeline.py
|
||||
> python deploy/pipeline.py --language en --do_prepare --do_train
|
||||
> python deploy/pipeline.py --language en --do_deploy
|
||||
|
||||
#NOTE: not using AML Pipelines yet,
|
||||
due to technical restrictions
|
||||
|
@ -14,6 +15,7 @@ import json
|
|||
import shutil
|
||||
import math
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
from azureml.core import Workspace, Experiment, Model
|
||||
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
|
||||
|
@ -70,16 +72,27 @@ def get_best_argument(details, argument):
|
|||
# Two stages: dev + train.
|
||||
## dev: test changes, trial runs
|
||||
## train: training, full runs, deployment
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--language",
|
||||
default='en',
|
||||
type=str,
|
||||
help="")
|
||||
parser.add_argument('--do_prepare',
|
||||
action='store_true',
|
||||
help="")
|
||||
parser.add_argument('--do_train',
|
||||
action='store_true',
|
||||
help="")
|
||||
parser.add_argument('--do_deploy',
|
||||
action='store_true',
|
||||
help="")
|
||||
args = parser.parse_args()
|
||||
|
||||
# PARAMETERS
|
||||
project_name = f"msforum_en"
|
||||
compute_name = 'gpucluster-nc12'
|
||||
project_name = f"msforum_{args.language}"
|
||||
compute_name = 'gpucluster-nc6'
|
||||
experiment_name = project_name
|
||||
|
||||
do_prepare = False
|
||||
do_train = True
|
||||
do_deploy = False
|
||||
|
||||
## Load
|
||||
params = get_project_config(f'{project_name}.config.json')
|
||||
language = params.get('language')
|
||||
|
@ -94,12 +107,13 @@ env = params.get('environment')
|
|||
# auth = MsiAuthentication()
|
||||
# except Exception as e:
|
||||
# logger.warning(e)
|
||||
# auth = InteractiveLoginAuthentication(tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47")
|
||||
# auth = None
|
||||
auth = InteractiveLoginAuthentication(tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47")
|
||||
|
||||
ws = Workspace.get(name='nlp-ml',
|
||||
subscription_id='50324bce-875f-4a7b-9d3c-0e33679f5d72',
|
||||
resource_group='nlp')
|
||||
# ,auth=auth)
|
||||
resource_group='nlp',
|
||||
auth=auth)
|
||||
|
||||
## Compute target
|
||||
compute_target = ws.compute_targets[compute_name]
|
||||
|
@ -147,7 +161,7 @@ tasks = params.get("tasks")
|
|||
##### PREPARE
|
||||
############################################
|
||||
|
||||
if do_prepare:
|
||||
if args.do_prepare:
|
||||
logging.warning(f'[INFO] Running prepare for {project_name}')
|
||||
for task in tasks:
|
||||
config = tasks.get(task)
|
||||
|
@ -167,13 +181,14 @@ if do_prepare:
|
|||
use_gpu = False
|
||||
)
|
||||
run = exp.submit(est)
|
||||
run.wait_for_completion(show_output = True)
|
||||
if args.do_train:
|
||||
run.wait_for_completion(show_output = True)
|
||||
|
||||
############################################
|
||||
##### TRAIN
|
||||
############################################
|
||||
|
||||
if do_train:
|
||||
if args.do_train:
|
||||
logging.warning(f'[INFO] Running train for {project_name}')
|
||||
for task in tasks:
|
||||
config = tasks.get(task)
|
||||
|
@ -221,11 +236,11 @@ if do_train:
|
|||
##### DEPLOY
|
||||
############################################
|
||||
|
||||
version = '0.1'
|
||||
version = '0.2'
|
||||
auth_enabled = True
|
||||
compute_type = 'ACI'
|
||||
|
||||
if do_deploy:
|
||||
if args.do_deploy:
|
||||
logging.warning(f'[INFO] Running deploy for {project_name}')
|
||||
# Fetch Models
|
||||
models = []
|
||||
|
@ -240,8 +255,12 @@ if do_deploy:
|
|||
logging.warning(f'[INFO] Added Model : {model.name} (v{model.version})')
|
||||
|
||||
# Deployment Target
|
||||
memory_gb = 2
|
||||
# ram_size = params.get('environment')
|
||||
# if ram_size is not None:
|
||||
# memory_gb = ram_size
|
||||
if compute_type == 'ACI':
|
||||
compute_config = AciWebservice.deploy_configuration(cpu_cores=2, memory_gb=6, auth_enabled=auth_enabled)
|
||||
compute_config = AciWebservice.deploy_configuration(cpu_cores=2, memory_gb=memory_gb, auth_enabled=auth_enabled)
|
||||
elif compute_type == 'AKS':
|
||||
compute_config = AksWebservice.deploy_configuration() #TODO:
|
||||
|
||||
|
@ -288,6 +307,7 @@ if do_deploy:
|
|||
# Test service
|
||||
try:
|
||||
service.run(json.dumps([{"body": "Mein Windows Vista rechner will nicht mehr - ich kriege dauernd fehler meldungen. Ich wollte mir eh einen neuen kaufen, aber ich hab kein Geld. Kann Bill Gates mir helfen?"}]))
|
||||
logging.warning(f'[SUCCESS] Service was deployed.')
|
||||
except Exception as e:
|
||||
logging.warning(f'[ERROR] Service was not deployed as expected. {e}')
|
||||
|
||||
|
|
|
@ -23,12 +23,12 @@ dependencies:
|
|||
- bs4
|
||||
##DEMO ENV
|
||||
- pillow
|
||||
- streamlit==0.48.1
|
||||
- streamlit==0.56
|
||||
# - langdetect
|
||||
# - lightgbm
|
||||
# - pandas_ml
|
||||
- numpy
|
||||
- pandas
|
||||
- pandas=1.0.1
|
||||
# - scikit-learn
|
||||
# - nltk
|
||||
# - nb_conda
|
||||
|
|
|
@ -10,10 +10,10 @@
|
|||
"1": {
|
||||
"label": "subcat",
|
||||
"type": "classification",
|
||||
"model_type": "bert",
|
||||
"model_type": "distilbert",
|
||||
"max_seq_len": 256,
|
||||
"embeds_dropout":0.3,
|
||||
"learning_rate":3e-5,
|
||||
"learning_rate":2e-5,
|
||||
"prepare": true
|
||||
},
|
||||
"3": {
|
||||
|
|
|
@ -10,10 +10,10 @@
|
|||
"1": {
|
||||
"label": "subcat",
|
||||
"type": "classification",
|
||||
"model_type": "camembert",
|
||||
"model_type": "distilbert",
|
||||
"max_seq_len": 256,
|
||||
"embeds_dropout":0.3,
|
||||
"learning_rate":3e-5,
|
||||
"learning_rate":2e-5,
|
||||
"prepare": true
|
||||
},
|
||||
"3": {
|
||||
|
|
|
@ -10,10 +10,10 @@
|
|||
"1": {
|
||||
"label": "subcat",
|
||||
"type": "classification",
|
||||
"model_type": "bert",
|
||||
"model_type": "distilbert",
|
||||
"max_seq_len": 256,
|
||||
"embeds_dropout":0.3,
|
||||
"learning_rate":3e-5,
|
||||
"learning_rate":2e-5,
|
||||
"prepare": true
|
||||
},
|
||||
"3": {
|
||||
|
|
Загрузка…
Ссылка в новой задаче