rename tests to be the same as the notebook file

2019-09-27 16:52:13 +01:00 · 2019-09-27 16:52:13 +01:00 · 6926b71a2a
--- a/examples/question_answering/bidaf_aml_deep_dive.ipynb
+++ b/examples/question_answering/bidaf_aml_deep_dive.ipynb
@ -16,7 +16,7 @@
    "# BiDAF Model Deep Dive on AzureML"
   ]
  },
-    {
+  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
@ -1002,7 +1002,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.8"
  }
 },
 "nbformat": 4,
--- a/examples/sentence_similarity/bert_senteval.ipynb
+++ b/examples/sentence_similarity/bert_senteval.ipynb
@ -7,7 +7,7 @@
    "# Parallel Experimentation with BERT on AzureML"
   ]
  },
-    {
+  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
@ -34,9 +34,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "System version: 3.6.8 |Anaconda, Inc.| (default, Dec 30 2018, 01:22:34) \n",
+      "[GCC 7.3.0]\n",
+      "AzureML version: 1.0.57\n"
+     ]
+    }
+   ],
   "source": [
    "import os\n",
    "import sys\n",
@ -49,6 +59,7 @@
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
+    "import azureml\n",
    "from azureml.core import Experiment\n",
    "from azureml.data.data_reference import DataReference\n",
    "from azureml.train.dnn import PyTorch\n",
@ -58,7 +69,11 @@
    "from utils_nlp.azureml.azureml_utils import get_or_create_workspace, get_or_create_amlcompute\n",
    "from utils_nlp.models.bert.common import Language, Tokenizer\n",
    "from utils_nlp.models.bert.sequence_encoding import BERTSentenceEncoder, PoolingStrategy\n",
-    "from utils_nlp.eval.senteval import SentEvalConfig"
+    "from utils_nlp.eval.senteval import SentEvalConfig\n",
+    "\n",
+    "%matplotlib inline\n",
+    "print(\"System version: {}\".format(sys.version))\n",
+    "print(\"AzureML version: {}\".format(azureml.core.VERSION))"
   ]
  },
  {
@ -72,6 +87,12 @@
   "outputs": [],
   "source": [
    "# azureml config\n",
+    "subscription_id = \"0ca618d2-22a8-413a-96d0-0f1b531129c3\"\n",
+    "resource_group = \"nlpbp_project_resources\"  \n",
+    "workspace_name = \"nlpazuremltestws\"  \n",
+    "workspace_region = \"eastus2\"\n",
+    "\n",
+    "\n",
    "subscription_id = \"YOUR_SUBSCRIPTION_ID\"\n",
    "resource_group = \"YOUR_RESOURCE_GROUP_NAME\"  \n",
    "workspace_name = \"YOUR_WORKSPACE_NAME\"  \n",
@ -627,6 +648,18 @@
    "Here we aggregate the outputs from each SentEval experiment to plot the distribution of Pearson correlations reported across the different encodings. We can see that for the STS Benchmark downstream task, the first layer achieves the highest Pearson correlation on the test dataset. As suggested in [bert-as-a-service](https://github.com/hanxiao/bert-as-service), this can be interpreted as a representation that is closer to the original word embedding."
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = [\n",
+    "    pickle.load(open(f, \"rb\"))\n",
+    "    for f in sorted(glob.glob(os.path.join(CACHE_DIR, \"outputs\", \"*.pkl\")))\n",
+    "]"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 18,
@ -656,34 +689,28 @@
    }
   ],
   "source": [
-    "%matplotlib inline\n",
+    "if len(results) == 24:\n",
+    "    df = pd.DataFrame(\n",
+    "        np.reshape(\n",
+    "            [r[\"STSBenchmark\"][\"pearson\"] for r in results],\n",
+    "            (len(EXP_PARAMS[\"layer_index\"]), len(EXP_PARAMS[\"pooling_strategy\"])),\n",
+    "        ).T,\n",
+    "        index=[s.value for s in EXP_PARAMS[\"pooling_strategy\"]],\n",
+    "        columns=EXP_PARAMS[\"layer_index\"],\n",
+    "    )\n",
+    "    fig, ax = plt.subplots(figsize=(10, 2))\n",
    "\n",
-    "results = [\n",
-    "    pickle.load(open(f, \"rb\"))\n",
-    "    for f in sorted(glob.glob(os.path.join(CACHE_DIR, \"outputs\", \"*.pkl\")))\n",
-    "]\n",
-    "\n",
-    "df = pd.DataFrame(\n",
-    "    np.reshape(\n",
-    "        [r[\"STSBenchmark\"][\"pearson\"] for r in results],\n",
-    "        (len(EXP_PARAMS[\"layer_index\"]), len(EXP_PARAMS[\"pooling_strategy\"])),\n",
-    "    ).T,\n",
-    "    index=[s.value for s in EXP_PARAMS[\"pooling_strategy\"]],\n",
-    "    columns=EXP_PARAMS[\"layer_index\"],\n",
-    ")\n",
-    "fig, ax = plt.subplots(figsize=(10, 2))\n",
-    "\n",
-    "sns.heatmap(df, annot=True, fmt=\".2g\", ax=ax).set_title(\n",
-    "    \"Pearson correlations of BERT sequence encodings on STS Benchmark\"\n",
-    ")"
+    "    sns.heatmap(df, annot=True, fmt=\".2g\", ax=ax).set_title(\n",
+    "        \"Pearson correlations of BERT sequence encodings on STS Benchmark\"\n",
+    "    )"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python (nlp_cpu)",
+   "display_name": "Python (nlp_gpu)",
   "language": "python",
-   "name": "nlp_cpu"
+   "name": "nlp_gpu"
  },
  "language_info": {
   "codemirror_mode": {
@ -695,7 +722,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.4"
+   "version": "3.6.8"
  }
 },
 "nbformat": 4,
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -37,10 +37,10 @@ def notebooks():
        ),
        "bert_encoder": os.path.join(folder_notebooks, "sentence_similarity", "bert_encoder.ipynb"),
        "gensen_local": os.path.join(folder_notebooks, "sentence_similarity", "gensen_local.ipynb"),
-        "gensen_azureml": os.path.join(
+        "gensen_aml_deep_dive": os.path.join(
            folder_notebooks, "sentence_similarity", "gensen_aml_deep_dive.ipynb"
        ),
-        "similarity_automl_local": os.path.join(
+        "automl_local_deployment_aci": os.path.join(
            folder_notebooks, "sentence_similarity", "automl_local_deployment_aci.ipynb"
        ),
        "automl_with_pipelines_deployment_aks": os.path.join(
--- a/tests/integration/test_notebooks_sentence_similarity.py
+++ b/tests/integration/test_notebooks_sentence_similarity.py
@ -33,7 +33,6 @@ def baseline_results():
    }


-
@pytest.mark.integration
 def test_similarity_embeddings_baseline_runs(notebooks, baseline_results):
    notebook_path = notebooks["similarity_embeddings_baseline"]
@ -42,10 +41,12 @@ def test_similarity_embeddings_baseline_runs(notebooks, baseline_results):
    for key, value in baseline_results.items():
        assert results[key] == pytest.approx(value, abs=ABS_TOL)

-        
+
@pytest.mark.gpu
@pytest.mark.integration
-@pytest.mark.skip(reason="push for release, no horovod installation automation or documentation yet")
+@pytest.mark.skip(
+    reason="push for release, no horovod installation automation or documentation yet"
+)
 def test_gensen_local(notebooks):
    notebook_path = notebooks["gensen_local"]
    pm.execute_notebook(
@ -119,10 +120,10 @@ def test_similarity_embeddings_baseline_runs(notebooks, baseline_results):
@pytest.mark.usefixtures("teardown_service")
@pytest.mark.integration
@pytest.mark.azureml
-def test_automl_local_runs(
+def test_automl_local_deployment_aci(
    notebooks, subscription_id, resource_group, workspace_name, workspace_region
 ):
-    notebook_path = notebooks["similarity_automl_local"]
+    notebook_path = notebooks["automl_local_deployment_aci"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
@ -143,9 +144,11 @@ def test_automl_local_runs(

@pytest.mark.integration
@pytest.mark.azureml
-@pytest.mark.skip(reason="push for release, no horovod installation automation or documentation yet")
-def test_similarity_gensen_azureml_runs(notebooks):
-    notebook_path = notebooks["gensen_azureml"]
+@pytest.mark.skip(
+    reason="push for release, no horovod installation automation or documentation yet"
+)
+def test_gensen_aml_deep_dive(notebooks):
+    notebook_path = notebooks["gensen_aml_deep_dive"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,