add gensen aml notebook testing
This commit is contained in:
Родитель
08d3aa2eb2
Коммит
27fd2c4c90
|
@ -82,6 +82,8 @@
|
|||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"import shutil\n",
|
||||
"import papermill as pm\n",
|
||||
"import scrapbook as sb\n",
|
||||
"\n",
|
||||
"sys.path.append(\"../../\")\n",
|
||||
"from utils_nlp.dataset import snli, preprocess, Split\n",
|
||||
|
@ -116,12 +118,26 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"parameters"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Model configuration\n",
|
||||
"NROWS = None\n",
|
||||
"CACHE_DIR = \"./temp\"\n",
|
||||
"AZUREML_CONFIG_PATH = \"./.azureml\"\n",
|
||||
"AZUREML_VERBOSE = False # Prints verbose azureml logs when True"
|
||||
"AZUREML_VERBOSE = False # Prints verbose azureml logs when True\n",
|
||||
"MAX_EPOCH = None\n",
|
||||
"ENTRY_SCRIPT = \"utils_nlp/gensen/gensen_train.py\"\n",
|
||||
"TRAIN_SCRIPT = \"gensen_train.py\"\n",
|
||||
"CONFIG_PATH = \"gensen_config.json\"\n",
|
||||
"EXPERIMENT_NAME = \"NLP-SS-GenSen-deepdive\"\n",
|
||||
"UTIL_NLP_PATH = \"../../utils_nlp\"\n",
|
||||
"MAX_TOTAL_RUNS = 8\n",
|
||||
"MAX_CONCURRENT_RUNS = 4"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -213,9 +229,9 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"data_dir = os.path.join(CACHE_DIR, \"data\")\n",
|
||||
"train = snli.load_pandas_df(data_dir, file_split=Split.TRAIN)\n",
|
||||
"dev = snli.load_pandas_df(data_dir, file_split=Split.DEV)\n",
|
||||
"test = snli.load_pandas_df(data_dir, file_split=Split.TEST)"
|
||||
"train = snli.load_pandas_df(data_dir, file_split=Split.TRAIN, nrows=NROWS)\n",
|
||||
"dev = snli.load_pandas_df(data_dir, file_split=Split.DEV, nrows=NROWS)\n",
|
||||
"test = snli.load_pandas_df(data_dir, file_split=Split.TEST, nrows=NROWS)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -749,7 +765,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"_ = shutil.copytree(\"../../utils_nlp\", utils_folder)"
|
||||
"_ = shutil.copytree(UTIL_NLP_PATH, utils_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -758,8 +774,8 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"_ = shutil.copy(\"gensen_train.py\", os.path.join(utils_folder, \"gensen\"))\n",
|
||||
"_ = shutil.copy(\"gensen_config.json\", os.path.join(utils_folder, \"gensen\"))"
|
||||
"_ = shutil.copy(TRAIN_SCRIPT, os.path.join(utils_folder, \"gensen\"))\n",
|
||||
"_ = shutil.copy(CONFIG_PATH, os.path.join(utils_folder, \"gensen\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -790,13 +806,14 @@
|
|||
"script_params = {\n",
|
||||
" \"--config\": \"utils_nlp/gensen/gensen_config.json\",\n",
|
||||
" \"--data_folder\": ws.get_default_datastore().as_mount(),\n",
|
||||
" \"--max_epoch\": MAX_EPOCH,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"estimator = PyTorch(\n",
|
||||
" source_directory=project_folder,\n",
|
||||
" script_params=script_params,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" entry_script=\"utils_nlp/gensen/gensen_train.py\",\n",
|
||||
" entry_script=ENTRY_SCRIPT,\n",
|
||||
" node_count=2,\n",
|
||||
" process_count_per_node=1,\n",
|
||||
" distributed_training=MpiConfiguration(),\n",
|
||||
|
@ -842,7 +859,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment_name = \"pytorch-gensen\"\n",
|
||||
"experiment_name = EXPERIMENT_NAME\n",
|
||||
"experiment = Experiment(ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
|
@ -2029,7 +2046,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"_ = run.wait_for_completion(show_output=True) # Block until the script has completed training."
|
||||
"_ = run.wait_for_completion(show_output=AZUREML_VERBOSE) # Block until the script has completed training."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2084,8 +2101,8 @@
|
|||
" policy=early_termination_policy,\n",
|
||||
" primary_metric_name=\"min_val_loss\",\n",
|
||||
" primary_metric_goal=PrimaryMetricGoal.MINIMIZE,\n",
|
||||
" max_total_runs=8,\n",
|
||||
" max_concurrent_runs=4,\n",
|
||||
" max_total_runs=MAX_TOTAL_RUNS,\n",
|
||||
" max_concurrent_runs=MAX_CONCURRENT_RUNS,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
@ -2119,7 +2136,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#RunDetails(hyperdrive_run).show()"
|
||||
"RunDetails(hyperdrive_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2168,13 +2185,22 @@
|
|||
"source": [
|
||||
"best_run = hyperdrive_run.get_best_run_by_primary_metric()\n",
|
||||
"best_run_metrics = best_run.get_metrics()\n",
|
||||
"if AZUREML_VERBOSE:\n",
|
||||
" print(best_run)\n",
|
||||
" print(\n",
|
||||
" \"Best Run:\\n Validation loss: {0:.5f} \\n Learning rate: {1:.5f} \\n\".format(\n",
|
||||
" best_run_metrics[\"best_val_loss\"][-1], best_run_metrics[\"lr\"]\n",
|
||||
" )\n",
|
||||
" )"
|
||||
"print(\n",
|
||||
" \"Best Run:\\n Validation loss: {0:.5f} \\n Learning rate: {1:.5f} \\n\".format(\n",
|
||||
" best_run_metrics[\"min_val_loss\"], best_run_metrics[\"learning_rate\"]\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Persist properties of the run so we can access the logged metrics later\n",
|
||||
"sb.glue(\"min_val_loss\", best_run_metrics['min_val_loss'])\n",
|
||||
"sb.glue(\"learning_rate\", best_run_metrics['learning_rate'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2197,10 +2223,11 @@
|
|||
"name": "minxia"
|
||||
}
|
||||
],
|
||||
"celltoolbar": "Tags",
|
||||
"kernelspec": {
|
||||
"display_name": "Python (nlp_cpu)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "nlp_cpu"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
|
|
@ -595,6 +595,7 @@ def train(config, data_folder, learning_rate=0.0001, max_epoch=None):
|
|||
)
|
||||
if training_complete:
|
||||
mlflow.log_metric("min_val_loss", float(min_val_loss))
|
||||
mlflow.log_metric("learning_rate", learning_rate)
|
||||
break
|
||||
|
||||
logging.info("Evaluating on NLI")
|
||||
|
@ -633,10 +634,18 @@ if __name__ == "__main__":
|
|||
help="Limit training to specified number of epochs.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--max_epoch",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Limit training to specified number of epochs.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
data_path = args.data_folder
|
||||
lr = args.learning_rate
|
||||
|
||||
config_file_path = args.config
|
||||
max_epoch = args.max_epoch
|
||||
config_obj = read_config(config_file_path)
|
||||
train(config_obj, data_path, lr)
|
||||
train(config_obj, data_path, lr, max_epoch)
|
||||
|
|
|
@ -50,6 +50,9 @@ def notebooks():
|
|||
"gensen_local": os.path.join(
|
||||
folder_notebooks, "sentence_similarity", "gensen_local.ipynb"
|
||||
),
|
||||
"gensen_azureml": os.path.join(
|
||||
folder_notebooks, "sentence_similarity", "gensen_aml_deep_dive.ipynb"
|
||||
),
|
||||
}
|
||||
return paths
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче