add gensen aml notebook testing

This commit is contained in:
Liqun Shao 2019-07-25 18:06:22 -04:00
Родитель 08d3aa2eb2
Коммит 27fd2c4c90
3 изменённых файлов: 63 добавлений и 24 удалений

Просмотреть файл

@ -82,6 +82,8 @@
"import os\n", "import os\n",
"import pandas as pd\n", "import pandas as pd\n",
"import shutil\n", "import shutil\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"\n", "\n",
"sys.path.append(\"../../\")\n", "sys.path.append(\"../../\")\n",
"from utils_nlp.dataset import snli, preprocess, Split\n", "from utils_nlp.dataset import snli, preprocess, Split\n",
@ -116,12 +118,26 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"metadata": {}, "metadata": {
"tags": [
"parameters"
]
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Model configuration\n",
"NROWS = None\n",
"CACHE_DIR = \"./temp\"\n", "CACHE_DIR = \"./temp\"\n",
"AZUREML_CONFIG_PATH = \"./.azureml\"\n", "AZUREML_CONFIG_PATH = \"./.azureml\"\n",
"AZUREML_VERBOSE = False # Prints verbose azureml logs when True" "AZUREML_VERBOSE = False # Prints verbose azureml logs when True\n",
"MAX_EPOCH = None\n",
"ENTRY_SCRIPT = \"utils_nlp/gensen/gensen_train.py\"\n",
"TRAIN_SCRIPT = \"gensen_train.py\"\n",
"CONFIG_PATH = \"gensen_config.json\"\n",
"EXPERIMENT_NAME = \"NLP-SS-GenSen-deepdive\"\n",
"UTIL_NLP_PATH = \"../../utils_nlp\"\n",
"MAX_TOTAL_RUNS = 8\n",
"MAX_CONCURRENT_RUNS = 4"
] ]
}, },
{ {
@ -213,9 +229,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"data_dir = os.path.join(CACHE_DIR, \"data\")\n", "data_dir = os.path.join(CACHE_DIR, \"data\")\n",
"train = snli.load_pandas_df(data_dir, file_split=Split.TRAIN)\n", "train = snli.load_pandas_df(data_dir, file_split=Split.TRAIN, nrows=NROWS)\n",
"dev = snli.load_pandas_df(data_dir, file_split=Split.DEV)\n", "dev = snli.load_pandas_df(data_dir, file_split=Split.DEV, nrows=NROWS)\n",
"test = snli.load_pandas_df(data_dir, file_split=Split.TEST)" "test = snli.load_pandas_df(data_dir, file_split=Split.TEST, nrows=NROWS)"
] ]
}, },
{ {
@ -749,7 +765,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"_ = shutil.copytree(\"../../utils_nlp\", utils_folder)" "_ = shutil.copytree(UTIL_NLP_PATH, utils_folder)"
] ]
}, },
{ {
@ -758,8 +774,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"_ = shutil.copy(\"gensen_train.py\", os.path.join(utils_folder, \"gensen\"))\n", "_ = shutil.copy(TRAIN_SCRIPT, os.path.join(utils_folder, \"gensen\"))\n",
"_ = shutil.copy(\"gensen_config.json\", os.path.join(utils_folder, \"gensen\"))" "_ = shutil.copy(CONFIG_PATH, os.path.join(utils_folder, \"gensen\"))"
] ]
}, },
{ {
@ -790,13 +806,14 @@
"script_params = {\n", "script_params = {\n",
" \"--config\": \"utils_nlp/gensen/gensen_config.json\",\n", " \"--config\": \"utils_nlp/gensen/gensen_config.json\",\n",
" \"--data_folder\": ws.get_default_datastore().as_mount(),\n", " \"--data_folder\": ws.get_default_datastore().as_mount(),\n",
" \"--max_epoch\": MAX_EPOCH,\n",
"}\n", "}\n",
"\n", "\n",
"estimator = PyTorch(\n", "estimator = PyTorch(\n",
" source_directory=project_folder,\n", " source_directory=project_folder,\n",
" script_params=script_params,\n", " script_params=script_params,\n",
" compute_target=compute_target,\n", " compute_target=compute_target,\n",
" entry_script=\"utils_nlp/gensen/gensen_train.py\",\n", " entry_script=ENTRY_SCRIPT,\n",
" node_count=2,\n", " node_count=2,\n",
" process_count_per_node=1,\n", " process_count_per_node=1,\n",
" distributed_training=MpiConfiguration(),\n", " distributed_training=MpiConfiguration(),\n",
@ -842,7 +859,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"experiment_name = \"pytorch-gensen\"\n", "experiment_name = EXPERIMENT_NAME\n",
"experiment = Experiment(ws, name=experiment_name)" "experiment = Experiment(ws, name=experiment_name)"
] ]
}, },
@ -2029,7 +2046,7 @@
} }
], ],
"source": [ "source": [
"_ = run.wait_for_completion(show_output=True) # Block until the script has completed training." "_ = run.wait_for_completion(show_output=AZUREML_VERBOSE) # Block until the script has completed training."
] ]
}, },
{ {
@ -2084,8 +2101,8 @@
" policy=early_termination_policy,\n", " policy=early_termination_policy,\n",
" primary_metric_name=\"min_val_loss\",\n", " primary_metric_name=\"min_val_loss\",\n",
" primary_metric_goal=PrimaryMetricGoal.MINIMIZE,\n", " primary_metric_goal=PrimaryMetricGoal.MINIMIZE,\n",
" max_total_runs=8,\n", " max_total_runs=MAX_TOTAL_RUNS,\n",
" max_concurrent_runs=4,\n", " max_concurrent_runs=MAX_CONCURRENT_RUNS,\n",
")" ")"
] ]
}, },
@ -2119,7 +2136,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"#RunDetails(hyperdrive_run).show()" "RunDetails(hyperdrive_run).show()"
] ]
}, },
{ {
@ -2168,13 +2185,22 @@
"source": [ "source": [
"best_run = hyperdrive_run.get_best_run_by_primary_metric()\n", "best_run = hyperdrive_run.get_best_run_by_primary_metric()\n",
"best_run_metrics = best_run.get_metrics()\n", "best_run_metrics = best_run.get_metrics()\n",
"if AZUREML_VERBOSE:\n", "print(\n",
" print(best_run)\n", " \"Best Run:\\n Validation loss: {0:.5f} \\n Learning rate: {1:.5f} \\n\".format(\n",
" print(\n", " best_run_metrics[\"min_val_loss\"], best_run_metrics[\"learning_rate\"]\n",
" \"Best Run:\\n Validation loss: {0:.5f} \\n Learning rate: {1:.5f} \\n\".format(\n", " )\n",
" best_run_metrics[\"best_val_loss\"][-1], best_run_metrics[\"lr\"]\n", ")"
" )\n", ]
" )" },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Persist properties of the run so we can access the logged metrics later\n",
"sb.glue(\"min_val_loss\", best_run_metrics['min_val_loss'])\n",
"sb.glue(\"learning_rate\", best_run_metrics['learning_rate'])"
] ]
}, },
{ {
@ -2197,10 +2223,11 @@
"name": "minxia" "name": "minxia"
} }
], ],
"celltoolbar": "Tags",
"kernelspec": { "kernelspec": {
"display_name": "Python (nlp_cpu)", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "nlp_cpu" "name": "python3"
}, },
"language_info": { "language_info": {
"codemirror_mode": { "codemirror_mode": {

Просмотреть файл

@ -595,6 +595,7 @@ def train(config, data_folder, learning_rate=0.0001, max_epoch=None):
) )
if training_complete: if training_complete:
mlflow.log_metric("min_val_loss", float(min_val_loss)) mlflow.log_metric("min_val_loss", float(min_val_loss))
mlflow.log_metric("learning_rate", learning_rate)
break break
logging.info("Evaluating on NLI") logging.info("Evaluating on NLI")
@ -633,10 +634,18 @@ if __name__ == "__main__":
help="Limit training to specified number of epochs.", help="Limit training to specified number of epochs.",
) )
parser.add_argument(
"--max_epoch",
type=int,
default=None,
help="Limit training to specified number of epochs.",
)
args = parser.parse_args() args = parser.parse_args()
data_path = args.data_folder data_path = args.data_folder
lr = args.learning_rate lr = args.learning_rate
config_file_path = args.config config_file_path = args.config
max_epoch = args.max_epoch
config_obj = read_config(config_file_path) config_obj = read_config(config_file_path)
train(config_obj, data_path, lr) train(config_obj, data_path, lr, max_epoch)

Просмотреть файл

@ -50,6 +50,9 @@ def notebooks():
"gensen_local": os.path.join( "gensen_local": os.path.join(
folder_notebooks, "sentence_similarity", "gensen_local.ipynb" folder_notebooks, "sentence_similarity", "gensen_local.ipynb"
), ),
"gensen_azureml": os.path.join(
folder_notebooks, "sentence_similarity", "gensen_aml_deep_dive.ipynb"
),
} }
return paths return paths