tweaks to challenge notebook

2022-12-19 00:06:55 +00:00 · 2022-12-19 00:06:55 +00:00 · b26a538a06
--- a/environment.yml
+++ b/environment.yml
@ -13,6 +13,7 @@ dependencies:
  - torchvision=0.14.0
  - tqdm=4.62.3
  - ipykernel=6.15.2
+  - jupyter=1.0.0
  - pip:
    - thop==0.1.1.post2209072238
    - timm==0.6.12
--- a/orbit_challenge_getting_started.ipynb
+++ b/orbit_challenge_getting_started.ipynb
@ -13,41 +13,35 @@
   ]
  },
  {
+   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First, we need a local copy of the ORBIT dataset. If you already have a copy of the data, you can skip this step!\n",
    "\n",
-    "In this script, we will download a local copy of the validation data only, where each image is 224x224 pixels. This will take up 4.3GB of disk space. Note, the validation data comes from 6 validation users and is used here as a starter task. For the main Challenge, you will need to use the test data which comes from a different set of 17 test users. \n",
+    "In this script, we will download a local copy of the validation data (already resized to 224x224 frames) as well as extra frame annotations (e.g. object bounding boxes, quality issues) for the train, validation and test data. This will take ~4.3GB of disk space. Note, the validation data comes from 6 validation users and is used here as a starter task. For the main Challenge, you will need to use the test data which comes from a different set of 17 test users. \n",
    "\n",
-    "To download the full dataset, you can use [download_pretrained_dataset.py](scripts/download_pretrained_dataset.py). The full dataset takes up 83GB in full size (1080x1080), 54GB for the 224x224 version and 17GB for the 84x84 version."
+    "To download the full dataset, you can use [download_pretrained_dataset.py](scripts/download_pretrained_dataset.py). The full dataset takes up 83GB (1080x1080 frames) or 54GB (224x224 frames)."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Dataset already saved at /hdd/data/orbitdataset/orbit_benchmark/orbit_benchmark_a_224/test.\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
-    "DATA_ROOT = \"/hdd/data/orbitdataset/orbit_benchmark/orbit_benchmark_a_224\"\n",
-    "DATA_SPLIT = \"test\"\n",
+    "DATA_ROOT = \"orbit_benchmark_224\" # note, we are downloading the validation set already resized to 224x224 frames\n",
+    "DATA_SPLIT = \"validation\"\n",
    "validation_path = Path(DATA_ROOT, DATA_SPLIT)\n",
-    "from pathlib import Path\n",
+    "annotation_path = Path(DATA_ROOT, 'annotations')\n",
    "\n",
+    "# download validation split\n",
    "if not validation_path.is_dir():\n",
    "    validation_path.parent.mkdir(parents=True, exist_ok=True)\n",
    "    print(\"Downloading validation.zip...\")\n",
-    "    !wget -O validation.zip https://city.figshare.com/ndownloader/files/28368351\n",
+    "    !wget -O validation.zip https://city.figshare.com/ndownloader/files/28368351 \n",
    "\n",
    "    print(\"Unzipping validation.zip...\")\n",
    "    !unzip -q validation.zip -d {DATA_ROOT}\n",
@ -58,7 +52,24 @@
    "        print(f\"Dataset ready at {validation_path}.\")\n",
    "    # You can now delete the zip file.\n",
    "else:\n",
-    "    print(f\"Dataset already saved at {validation_path}.\")"
+    "    print(f\"Dataset already saved at {validation_path}.\")\n",
+    "\n",
+    "# download (train, validation and test) annotations\n",
+    "if not annotation_path.is_dir():\n",
+    "    annotation_path.parent.mkdir(parents=True, exist_ok=True)\n",
+    "    print(\"Downloading orbit_extra_annotations.zip...\")\n",
+    "    !wget -O orbit_extra_annotations.zip https://github.com/microsoft/ORBIT-Dataset/raw/dev/data/orbit_extra_annotations.zip\n",
+    "\n",
+    "    print(\"Unzipping orbit_extra_annotations.zip...\")\n",
+    "    !unzip -q orbit_extra_annotations.zip -d {annotation_path}\n",
+    "\n",
+    "    if not annotation_path.is_dir():\n",
+    "        raise ValueError(f\"Path {annotation_path} is not a directory.\")\n",
+    "    else:\n",
+    "        print(f\"Annotations ready at {annotation_path}.\")\n",
+    "    # You can now delete the zip file.\n",
+    "else:\n",
+    "    print(f\"Annotations already saved at {annotation_path}.\")"
   ]
  },
  {
@ -70,45 +81,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Creating data queue...\n",
-      "Filtering context frames ['no_object_not_present_issue'].\n",
-      "Filtering target frames ['no_object_not_present_issue'].\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Loading test users from /hdd/data/orbitdataset/orbit_benchmark/orbit_benchmark_a_224/test: 100%|██████████| 17/17 [00:07<00:00,  2.29it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Min context frames/obj: 561 (P642 'tumble dryer')\n",
-      "Min target frames/obj: 76 (P198 'ipod in wallet')\n",
-      "Max context frames/obj: 9095 (P421 'table fan')\n",
-      "Max target frames/obj: 3500 (P901 'house door')\n",
-      "Loaded data summary: 17 users, 158 objects, 1195 videos (#context: 898, #target: 297)\n",
-      "Created data queue, queue uses 2 workers.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from data.queues import UserEpisodicDatasetQueue\n",
    "\n",
@ -126,8 +101,8 @@
    "    frame_size=224, # width and height of frame \n",
    "    frame_norm_method='imagenet_inception', # normalize frames using imagenet inception statistics since we're using ViT-B-32 pretrained on ImageNet-21K (see below).\n",
    "    annotations_to_load=[], # do not load any frame annotations\n",
-    "    filter_by_annotations=[['no_object_not_present_issue'], ['no_object_not_present_issue']], # only includes context and target frames with the 'no_object_not_present_issue' tag\n",
-    "    num_tasks=50, # sample 50 tasks per user\n",
+    "    filter_by_annotations=[[], ['no_object_not_present_issue']], # only includes target frames with the 'object_not_present_issue=False' tag. Note, context frames are not filtered as extra annotations cannot be used for personalisation.\n",
+    "    num_tasks=10, # sample 10 tasks per user. Note, this is just for the starter task. The full challenge will require you to sample 50 tasks per user.\n",
    "    test_mode=True, # sample test (rather than train) tasks\n",
    "    with_cluster_labels=False, # use user's personalised object names as labels, rather than broader object categories\n",
    "    with_caps=False, # do not impose any sampling caps\n",
@ -148,17 +123,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Checkpoint already exists at orbit_pretrained_checkpoints/orbit_cluve_protonets_cosine_vit_b_32_224_lite.pth.\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "checkpoint_path = Path(\"orbit_pretrained_checkpoints\", \"orbit_cluve_protonets_cosine_vit_b_32_224_lite.pth\")\n",
    "\n",
@ -173,17 +140,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Instance of SingleStepFewShotRecogniser created on device cuda:0.\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "import torch\n",
    "from model.few_shot_recognisers import SingleStepFewShotRecogniser\n",
@ -220,37 +179,19 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "We are now going to run our data through our model. We go through each task (50 tasks per user, since we specified `num_tasks = 50` above) and use the task's context clips to create a personalized model. We then evaluate the personalized model on each frame in the task's target videos.\n",
+    "We are now going to run our data through our model. We go through each task (10 tasks per user, since we specified `num_tasks = 10` above) and use the task's context clips to create a personalized model for that user's task. We then evaluate the personalized model on each frame in the task's target videos.\n",
    "\n",
-    "The results for each task will be saved to a JSON file (this is what should be submitted to the evaluation server) and the aggregate stats will be printed to the console."
+    "The results for each task will be saved to a JSON file (this is what should be submitted to the evaluation server) and the aggregate stats will be printed to the console. You should get a frame accuracy of 83.05 +/- 1.73% - see `Average over all videos (leaderboard metric)`."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Running evaluation...\n"
-     ]
-    },
-    {
-     "ename": "ValueError",
-     "evalue": "Unknown format code 'd' for object of type 'float'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m/tmp/ipykernel_89556/704779747.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     54\u001b[0m             \u001b[0mevaluator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_current_user\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtask\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"task_id\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     55\u001b[0m             \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcurrent_video_stats\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevaluator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_mean_stats\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcurrent_user\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 56\u001b[0;31m             \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"User {task['task_id']} ({evaluator.current_user+1}/{len(data_queue)}) {get_stats_str(current_video_stats)}, avg #context clips/task: {np.mean(num_context_clips_per_task):d}, avg #target clips/task: {np.mean(num_target_clips_per_task):d}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     57\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mnum_test_tasks\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     58\u001b[0m                 \u001b[0mnum_context_clips_per_task\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mValueError\u001b[0m: Unknown format code 'd' for object of type 'float'"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "import numpy as np\n",
+    "from tqdm.notebook import tqdm\n",
    "from typing import Dict, Tuple\n",
    "from data.utils import attach_frame_history\n",
    "from utils.eval_metrics import TestEvaluator\n",
@ -260,18 +201,17 @@
    "\n",
    "metrics = ['frame_acc']\n",
    "evaluator = TestEvaluator(metrics, output_dir)\n",
-    "num_test_tasks = data_queue.num_users * data_queue.num_tasks\n",
    "\n",
    "def get_stats_str(stats: Dict[str, Tuple[float, float]], dps: int=2) -> str:\n",
    "    stats_str = \"\\t\".join([f\"{metric}: {stats[metric][0]*100:.{dps}f} ({stats[metric][1]*100:.{dps}f})\" for metric in metrics])\n",
    "    return stats_str\n",
    "\n",
-    "print(\"Running evaluation...\")\n",
    "num_context_clips_per_task = []\n",
    "num_target_clips_per_task = []\n",
+    "num_test_tasks = data_queue.num_users * data_queue.num_tasks\n",
    "with torch.no_grad():\n",
-    "    for step, task in enumerate(data_queue.get_tasks()):\n",
-    "        context_clips = task[\"context_clips\"].to(device)          # Torch tensor of shape: (N, clip_length, C, H, W), dtype float32\n",
+    "    for step, task in enumerate(tqdm(data_queue.get_tasks(), desc=f\"Running evaluation on {data_queue.num_tasks} tasks per test user\", total=num_test_tasks)):\n",
+    "        context_clips = task[\"context_clips\"].to(device)        # Torch tensor of shape: (N, clip_length, C, H, W), dtype float32\n",
    "        context_labels = task[\"context_labels\"].to(device)      # Torch tensor of shape: (N), dtype int64\n",
    "        object_list = task[\"object_list\"]                       # List of str of length num_objects\n",
    "        num_context_clips = len(context_clips)\n",
@ -305,7 +245,7 @@
    "        if (step+1) % data_queue.num_tasks == 0:\n",
    "            evaluator.set_current_user(task[\"task_id\"])\n",
    "            _,_,_,current_video_stats = evaluator.get_mean_stats(current_user=True)\n",
-    "            print(f\"User {task['task_id']} ({evaluator.current_user+1}/{len(data_queue)}) {get_stats_str(current_video_stats)}, avg #context clips/task: {np.mean(num_context_clips_per_task):.0f}, avg #target clips/task: {np.mean(num_target_clips_per_task):.0f}\")\n",
+    "            tqdm.write(f\"User {task['task_id']} ({evaluator.current_user+1}/{len(data_queue)}) {get_stats_str(current_video_stats)}, avg # context clips/task: {np.mean(num_context_clips_per_task):.0f}, avg # target clips/task: {np.mean(num_target_clips_per_task):.0f}\")\n",
    "            if (step+1) < num_test_tasks:\n",
    "                num_context_clips_per_task = []\n",
    "                num_target_clips_per_task = []\n",
@ -321,7 +261,7 @@
    "print(f\"Average over all users: {get_stats_str(stats_per_user)}\")\n",
    "print(f\"Average over all objects: {get_stats_str(stats_per_obj)}\")\n",
    "print(f\"Average over all tasks: {get_stats_str(stats_per_task)}\")\n",
-    "print(f\"Average over all videos: {get_stats_str(stats_per_video)}\")\n",
+    "print(f\"Average over all videos (leaderboard metric): {get_stats_str(stats_per_video)}\")\n",
    "evaluator.save()\n",
    "print(f\"Results saved to {evaluator.json_results_path}.\")"
   ]
--- a/requirements.txt
+++ b/requirements.txt
@ -6,3 +6,4 @@ plotly-orca==1.3.1
 tqdm==4.62.3
 timm==0.6.12
 ipykernel==6.15.2
+jupyter==1.0.0