add gensen aml notebook testing
This commit is contained in:
Родитель
08d3aa2eb2
Коммит
27fd2c4c90
|
@ -82,6 +82,8 @@
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"import shutil\n",
|
"import shutil\n",
|
||||||
|
"import papermill as pm\n",
|
||||||
|
"import scrapbook as sb\n",
|
||||||
"\n",
|
"\n",
|
||||||
"sys.path.append(\"../../\")\n",
|
"sys.path.append(\"../../\")\n",
|
||||||
"from utils_nlp.dataset import snli, preprocess, Split\n",
|
"from utils_nlp.dataset import snli, preprocess, Split\n",
|
||||||
|
@ -116,12 +118,26 @@
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"parameters"
|
||||||
|
]
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# Model configuration\n",
|
||||||
|
"NROWS = None\n",
|
||||||
"CACHE_DIR = \"./temp\"\n",
|
"CACHE_DIR = \"./temp\"\n",
|
||||||
"AZUREML_CONFIG_PATH = \"./.azureml\"\n",
|
"AZUREML_CONFIG_PATH = \"./.azureml\"\n",
|
||||||
"AZUREML_VERBOSE = False # Prints verbose azureml logs when True"
|
"AZUREML_VERBOSE = False # Prints verbose azureml logs when True\n",
|
||||||
|
"MAX_EPOCH = None\n",
|
||||||
|
"ENTRY_SCRIPT = \"utils_nlp/gensen/gensen_train.py\"\n",
|
||||||
|
"TRAIN_SCRIPT = \"gensen_train.py\"\n",
|
||||||
|
"CONFIG_PATH = \"gensen_config.json\"\n",
|
||||||
|
"EXPERIMENT_NAME = \"NLP-SS-GenSen-deepdive\"\n",
|
||||||
|
"UTIL_NLP_PATH = \"../../utils_nlp\"\n",
|
||||||
|
"MAX_TOTAL_RUNS = 8\n",
|
||||||
|
"MAX_CONCURRENT_RUNS = 4"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -213,9 +229,9 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"data_dir = os.path.join(CACHE_DIR, \"data\")\n",
|
"data_dir = os.path.join(CACHE_DIR, \"data\")\n",
|
||||||
"train = snli.load_pandas_df(data_dir, file_split=Split.TRAIN)\n",
|
"train = snli.load_pandas_df(data_dir, file_split=Split.TRAIN, nrows=NROWS)\n",
|
||||||
"dev = snli.load_pandas_df(data_dir, file_split=Split.DEV)\n",
|
"dev = snli.load_pandas_df(data_dir, file_split=Split.DEV, nrows=NROWS)\n",
|
||||||
"test = snli.load_pandas_df(data_dir, file_split=Split.TEST)"
|
"test = snli.load_pandas_df(data_dir, file_split=Split.TEST, nrows=NROWS)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -749,7 +765,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"_ = shutil.copytree(\"../../utils_nlp\", utils_folder)"
|
"_ = shutil.copytree(UTIL_NLP_PATH, utils_folder)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -758,8 +774,8 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"_ = shutil.copy(\"gensen_train.py\", os.path.join(utils_folder, \"gensen\"))\n",
|
"_ = shutil.copy(TRAIN_SCRIPT, os.path.join(utils_folder, \"gensen\"))\n",
|
||||||
"_ = shutil.copy(\"gensen_config.json\", os.path.join(utils_folder, \"gensen\"))"
|
"_ = shutil.copy(CONFIG_PATH, os.path.join(utils_folder, \"gensen\"))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -790,13 +806,14 @@
|
||||||
"script_params = {\n",
|
"script_params = {\n",
|
||||||
" \"--config\": \"utils_nlp/gensen/gensen_config.json\",\n",
|
" \"--config\": \"utils_nlp/gensen/gensen_config.json\",\n",
|
||||||
" \"--data_folder\": ws.get_default_datastore().as_mount(),\n",
|
" \"--data_folder\": ws.get_default_datastore().as_mount(),\n",
|
||||||
|
" \"--max_epoch\": MAX_EPOCH,\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"estimator = PyTorch(\n",
|
"estimator = PyTorch(\n",
|
||||||
" source_directory=project_folder,\n",
|
" source_directory=project_folder,\n",
|
||||||
" script_params=script_params,\n",
|
" script_params=script_params,\n",
|
||||||
" compute_target=compute_target,\n",
|
" compute_target=compute_target,\n",
|
||||||
" entry_script=\"utils_nlp/gensen/gensen_train.py\",\n",
|
" entry_script=ENTRY_SCRIPT,\n",
|
||||||
" node_count=2,\n",
|
" node_count=2,\n",
|
||||||
" process_count_per_node=1,\n",
|
" process_count_per_node=1,\n",
|
||||||
" distributed_training=MpiConfiguration(),\n",
|
" distributed_training=MpiConfiguration(),\n",
|
||||||
|
@ -842,7 +859,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"experiment_name = \"pytorch-gensen\"\n",
|
"experiment_name = EXPERIMENT_NAME\n",
|
||||||
"experiment = Experiment(ws, name=experiment_name)"
|
"experiment = Experiment(ws, name=experiment_name)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -2029,7 +2046,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"_ = run.wait_for_completion(show_output=True) # Block until the script has completed training."
|
"_ = run.wait_for_completion(show_output=AZUREML_VERBOSE) # Block until the script has completed training."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -2084,8 +2101,8 @@
|
||||||
" policy=early_termination_policy,\n",
|
" policy=early_termination_policy,\n",
|
||||||
" primary_metric_name=\"min_val_loss\",\n",
|
" primary_metric_name=\"min_val_loss\",\n",
|
||||||
" primary_metric_goal=PrimaryMetricGoal.MINIMIZE,\n",
|
" primary_metric_goal=PrimaryMetricGoal.MINIMIZE,\n",
|
||||||
" max_total_runs=8,\n",
|
" max_total_runs=MAX_TOTAL_RUNS,\n",
|
||||||
" max_concurrent_runs=4,\n",
|
" max_concurrent_runs=MAX_CONCURRENT_RUNS,\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -2119,7 +2136,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"#RunDetails(hyperdrive_run).show()"
|
"RunDetails(hyperdrive_run).show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -2168,13 +2185,22 @@
|
||||||
"source": [
|
"source": [
|
||||||
"best_run = hyperdrive_run.get_best_run_by_primary_metric()\n",
|
"best_run = hyperdrive_run.get_best_run_by_primary_metric()\n",
|
||||||
"best_run_metrics = best_run.get_metrics()\n",
|
"best_run_metrics = best_run.get_metrics()\n",
|
||||||
"if AZUREML_VERBOSE:\n",
|
"print(\n",
|
||||||
" print(best_run)\n",
|
" \"Best Run:\\n Validation loss: {0:.5f} \\n Learning rate: {1:.5f} \\n\".format(\n",
|
||||||
" print(\n",
|
" best_run_metrics[\"min_val_loss\"], best_run_metrics[\"learning_rate\"]\n",
|
||||||
" \"Best Run:\\n Validation loss: {0:.5f} \\n Learning rate: {1:.5f} \\n\".format(\n",
|
" )\n",
|
||||||
" best_run_metrics[\"best_val_loss\"][-1], best_run_metrics[\"lr\"]\n",
|
")"
|
||||||
" )\n",
|
]
|
||||||
" )"
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Persist properties of the run so we can access the logged metrics later\n",
|
||||||
|
"sb.glue(\"min_val_loss\", best_run_metrics['min_val_loss'])\n",
|
||||||
|
"sb.glue(\"learning_rate\", best_run_metrics['learning_rate'])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -2197,10 +2223,11 @@
|
||||||
"name": "minxia"
|
"name": "minxia"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
"celltoolbar": "Tags",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python (nlp_cpu)",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "nlp_cpu"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
|
|
|
@ -595,6 +595,7 @@ def train(config, data_folder, learning_rate=0.0001, max_epoch=None):
|
||||||
)
|
)
|
||||||
if training_complete:
|
if training_complete:
|
||||||
mlflow.log_metric("min_val_loss", float(min_val_loss))
|
mlflow.log_metric("min_val_loss", float(min_val_loss))
|
||||||
|
mlflow.log_metric("learning_rate", learning_rate)
|
||||||
break
|
break
|
||||||
|
|
||||||
logging.info("Evaluating on NLI")
|
logging.info("Evaluating on NLI")
|
||||||
|
@ -633,10 +634,18 @@ if __name__ == "__main__":
|
||||||
help="Limit training to specified number of epochs.",
|
help="Limit training to specified number of epochs.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--max_epoch",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="Limit training to specified number of epochs.",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
data_path = args.data_folder
|
data_path = args.data_folder
|
||||||
lr = args.learning_rate
|
lr = args.learning_rate
|
||||||
|
|
||||||
config_file_path = args.config
|
config_file_path = args.config
|
||||||
|
max_epoch = args.max_epoch
|
||||||
config_obj = read_config(config_file_path)
|
config_obj = read_config(config_file_path)
|
||||||
train(config_obj, data_path, lr)
|
train(config_obj, data_path, lr, max_epoch)
|
||||||
|
|
|
@ -50,6 +50,9 @@ def notebooks():
|
||||||
"gensen_local": os.path.join(
|
"gensen_local": os.path.join(
|
||||||
folder_notebooks, "sentence_similarity", "gensen_local.ipynb"
|
folder_notebooks, "sentence_similarity", "gensen_local.ipynb"
|
||||||
),
|
),
|
||||||
|
"gensen_azureml": os.path.join(
|
||||||
|
folder_notebooks, "sentence_similarity", "gensen_aml_deep_dive.ipynb"
|
||||||
|
),
|
||||||
}
|
}
|
||||||
return paths
|
return paths
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче