added notebook tests

2019-08-15 00:20:04 +00:00 · 2019-08-15 00:20:04 +00:00 · 90c49a6bb7
--- a/similarity/notebooks/12_fast_retrieval.ipynb
+++ b/similarity/notebooks/12_fast_retrieval.ipynb
@ -117,7 +117,10 @@
    "\n",
    "# Image reader configuration\n",
    "BATCH_SIZE = 16\n",
-    "IM_SIZE = 300"
+    "IM_SIZE = 300\n",
+    "\n",
+    "# Number of comparison of nearest neighbor versus exhaustive search for accuracy computation\n",
+    "NUM_RANK_ITER = 100"
   ]
  },
  {
@ -263,7 +266,7 @@
    "valid_features_list /= np.linalg.norm(valid_features_list, axis=1)[:,None]\n",
    "\n",
    "# Build nearest neighbor object using the reference set\n",
-    "nn = NearestNeighbors(algorithm='auto', metric='euclidean', n_neighbors=100)\n",
+    "nn = NearestNeighbors(algorithm='auto', metric='euclidean', n_neighbors=min(100,len(valid_features_list)))\n",
    "nn.fit(valid_features_list)\n",
    "nn"
   ]
@ -439,7 +442,7 @@
   ],
   "source": [
    "ranks = []\n",
-    "for iter in tqdm(range(100)):\n",
+    "for iter in tqdm(range(NUM_RANK_ITER)):\n",
    "    \n",
    "    # Get random query image\n",
    "    query_im_path = str(np.random.choice(data.train_ds.items))\n",
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -103,6 +103,7 @@ def similarity_notebooks():
        "11": os.path.join(
            folder_notebooks, "11_exploring_hyperparameters.ipynb"
        ),
+        "12": os.path.join(folder_notebooks, "12_fast_retrieval.ipynb"),
    }
    return paths

--- a/tests/integration/similarity/test_integration_similarity_notebooks.py
+++ b/tests/integration/similarity/test_integration_similarity_notebooks.py
@ -42,4 +42,20 @@ def test_11_notebook_run(similarity_notebooks, tiny_ic_data_path):
        kernel_name=KERNEL_NAME,
    )
    nb_output = sb.read_notebook(OUTPUT_NOTEBOOK)
-    assert min(nb_output.scraps["ranks"].data) <= 30
+    assert min(nb_output.scraps["ranks"].data) <= 30
+
+
+@pytest.mark.notebooks
+@pytest.mark.linuxgpu
+def test_12_notebook_run(similarity_notebooks):
+    notebook_path = similarity_notebooks["12"]
+    pm.execute_notebook(
+        notebook_path,
+        OUTPUT_NOTEBOOK,
+        parameters=dict(PM_VERSION=pm.__version__),
+        kernel_name=KERNEL_NAME,
+    )
+
+    nb_output = sb.read_notebook(OUTPUT_NOTEBOOK)
+    assert nb_output.scraps["median_rank"].data <= 5
+    assert nb_output.scraps["feature_dimension"].data == 512
--- a/tests/unit/similarity/test_similarity_notebooks.py
+++ b/tests/unit/similarity/test_similarity_notebooks.py
@ -65,3 +65,18 @@ def test_11_notebook_run(similarity_notebooks, tiny_ic_data_path):
        ),
        kernel_name=KERNEL_NAME,
    )
+
+@pytest.mark.notebooks
+def test_12_notebook_run(similarity_notebooks, tiny_ic_data_path):
+    notebook_path = similarity_notebooks["12"]
+    pm.execute_notebook(
+        notebook_path,
+        OUTPUT_NOTEBOOK,
+        parameters=dict(
+            PM_VERSION=pm.__version__,
+            DATA_PATH=tiny_ic_data_path,
+            IM_SIZE=30,
+            NUM_RANK_ITER=5
+        ),
+        kernel_name=KERNEL_NAME,
+    )
--- a/utils_cv/classification/model.py
+++ b/utils_cv/classification/model.py
@ -160,7 +160,7 @@ def get_preds(
    """

    # Note: In Fastai, for DatasetType.Train, only the output of complete minibatches is computed. Ie if one has 101 images, 
-    # and uses a minibatch size of 10, then len(feats) is 96 and not 101. For DatasetType.Valid this is not the case,
+    # and uses a minibatch size of 16, then len(feats) is 96 and not 101. For DatasetType.Valid this is not the case,
    # and len(feats) is as expected 101. A way around this is to use DatasetType.Fix instead when referring to the training set.
    # See e.g. issue: https://forums.fast.ai/t/get-preds-returning-less-results-than-length-of-original-dataset/34148
    if dl == DatasetType.Train: