add gensen aml notebook testing

This commit is contained in:
Liqun Shao 2019-07-25 18:06:22 -04:00
Родитель 08d3aa2eb2
Коммит 27fd2c4c90
3 изменённых файлов: 63 добавлений и 24 удалений

Просмотреть файл

@ -82,6 +82,8 @@
"import os\n",
"import pandas as pd\n",
"import shutil\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"\n",
"sys.path.append(\"../../\")\n",
"from utils_nlp.dataset import snli, preprocess, Split\n",
@ -116,12 +118,26 @@
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"# Model configuration\n",
"NROWS = None\n",
"CACHE_DIR = \"./temp\"\n",
"AZUREML_CONFIG_PATH = \"./.azureml\"\n",
"AZUREML_VERBOSE = False # Prints verbose azureml logs when True"
"AZUREML_VERBOSE = False # Prints verbose azureml logs when True\n",
"MAX_EPOCH = None\n",
"ENTRY_SCRIPT = \"utils_nlp/gensen/gensen_train.py\"\n",
"TRAIN_SCRIPT = \"gensen_train.py\"\n",
"CONFIG_PATH = \"gensen_config.json\"\n",
"EXPERIMENT_NAME = \"NLP-SS-GenSen-deepdive\"\n",
"UTIL_NLP_PATH = \"../../utils_nlp\"\n",
"MAX_TOTAL_RUNS = 8\n",
"MAX_CONCURRENT_RUNS = 4"
]
},
{
@ -213,9 +229,9 @@
"outputs": [],
"source": [
"data_dir = os.path.join(CACHE_DIR, \"data\")\n",
"train = snli.load_pandas_df(data_dir, file_split=Split.TRAIN)\n",
"dev = snli.load_pandas_df(data_dir, file_split=Split.DEV)\n",
"test = snli.load_pandas_df(data_dir, file_split=Split.TEST)"
"train = snli.load_pandas_df(data_dir, file_split=Split.TRAIN, nrows=NROWS)\n",
"dev = snli.load_pandas_df(data_dir, file_split=Split.DEV, nrows=NROWS)\n",
"test = snli.load_pandas_df(data_dir, file_split=Split.TEST, nrows=NROWS)"
]
},
{
@ -749,7 +765,7 @@
"metadata": {},
"outputs": [],
"source": [
"_ = shutil.copytree(\"../../utils_nlp\", utils_folder)"
"_ = shutil.copytree(UTIL_NLP_PATH, utils_folder)"
]
},
{
@ -758,8 +774,8 @@
"metadata": {},
"outputs": [],
"source": [
"_ = shutil.copy(\"gensen_train.py\", os.path.join(utils_folder, \"gensen\"))\n",
"_ = shutil.copy(\"gensen_config.json\", os.path.join(utils_folder, \"gensen\"))"
"_ = shutil.copy(TRAIN_SCRIPT, os.path.join(utils_folder, \"gensen\"))\n",
"_ = shutil.copy(CONFIG_PATH, os.path.join(utils_folder, \"gensen\"))"
]
},
{
@ -790,13 +806,14 @@
"script_params = {\n",
" \"--config\": \"utils_nlp/gensen/gensen_config.json\",\n",
" \"--data_folder\": ws.get_default_datastore().as_mount(),\n",
" \"--max_epoch\": MAX_EPOCH,\n",
"}\n",
"\n",
"estimator = PyTorch(\n",
" source_directory=project_folder,\n",
" script_params=script_params,\n",
" compute_target=compute_target,\n",
" entry_script=\"utils_nlp/gensen/gensen_train.py\",\n",
" entry_script=ENTRY_SCRIPT,\n",
" node_count=2,\n",
" process_count_per_node=1,\n",
" distributed_training=MpiConfiguration(),\n",
@ -842,7 +859,7 @@
"metadata": {},
"outputs": [],
"source": [
"experiment_name = \"pytorch-gensen\"\n",
"experiment_name = EXPERIMENT_NAME\n",
"experiment = Experiment(ws, name=experiment_name)"
]
},
@ -2029,7 +2046,7 @@
}
],
"source": [
"_ = run.wait_for_completion(show_output=True) # Block until the script has completed training."
"_ = run.wait_for_completion(show_output=AZUREML_VERBOSE) # Block until the script has completed training."
]
},
{
@ -2084,8 +2101,8 @@
" policy=early_termination_policy,\n",
" primary_metric_name=\"min_val_loss\",\n",
" primary_metric_goal=PrimaryMetricGoal.MINIMIZE,\n",
" max_total_runs=8,\n",
" max_concurrent_runs=4,\n",
" max_total_runs=MAX_TOTAL_RUNS,\n",
" max_concurrent_runs=MAX_CONCURRENT_RUNS,\n",
")"
]
},
@ -2119,7 +2136,7 @@
"metadata": {},
"outputs": [],
"source": [
"#RunDetails(hyperdrive_run).show()"
"RunDetails(hyperdrive_run).show()"
]
},
{
@ -2168,15 +2185,24 @@
"source": [
"best_run = hyperdrive_run.get_best_run_by_primary_metric()\n",
"best_run_metrics = best_run.get_metrics()\n",
"if AZUREML_VERBOSE:\n",
" print(best_run)\n",
"print(\n",
" \"Best Run:\\n Validation loss: {0:.5f} \\n Learning rate: {1:.5f} \\n\".format(\n",
" best_run_metrics[\"best_val_loss\"][-1], best_run_metrics[\"lr\"]\n",
" best_run_metrics[\"min_val_loss\"], best_run_metrics[\"learning_rate\"]\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Persist properties of the run so we can access the logged metrics later\n",
"sb.glue(\"min_val_loss\", best_run_metrics['min_val_loss'])\n",
"sb.glue(\"learning_rate\", best_run_metrics['learning_rate'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -2197,10 +2223,11 @@
"name": "minxia"
}
],
"celltoolbar": "Tags",
"kernelspec": {
"display_name": "Python (nlp_cpu)",
"display_name": "Python 3",
"language": "python",
"name": "nlp_cpu"
"name": "python3"
},
"language_info": {
"codemirror_mode": {

Просмотреть файл

@ -595,6 +595,7 @@ def train(config, data_folder, learning_rate=0.0001, max_epoch=None):
)
if training_complete:
mlflow.log_metric("min_val_loss", float(min_val_loss))
mlflow.log_metric("learning_rate", learning_rate)
break
logging.info("Evaluating on NLI")
@ -633,10 +634,18 @@ if __name__ == "__main__":
help="Limit training to specified number of epochs.",
)
parser.add_argument(
"--max_epoch",
type=int,
default=None,
help="Limit training to specified number of epochs.",
)
args = parser.parse_args()
data_path = args.data_folder
lr = args.learning_rate
config_file_path = args.config
max_epoch = args.max_epoch
config_obj = read_config(config_file_path)
train(config_obj, data_path, lr)
train(config_obj, data_path, lr, max_epoch)

Просмотреть файл

@ -50,6 +50,9 @@ def notebooks():
"gensen_local": os.path.join(
folder_notebooks, "sentence_similarity", "gensen_local.ipynb"
),
"gensen_azureml": os.path.join(
folder_notebooks, "sentence_similarity", "gensen_aml_deep_dive.ipynb"
),
}
return paths