FairMOT-01 (#553)

2020-06-23 15:07:40 -04:00 · 2020-06-23 15:07:40 -04:00 · 68c9911e3c
--- a/scenarios/tracking/01_training_introduction.ipynb
+++ b/scenarios/tracking/01_training_introduction.ipynb
@ -20,7 +20,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Initialization"
+    "## 00 Initialization"
   ]
  },
  {
@ -45,22 +45,18 @@
    "sys.path.append(\"../../\")\n",
    "\n",
    "import os\n",
+    "import os.path as osp\n",
    "import time\n",
+    "from ipywidgets import Video\n",
    "import matplotlib.pyplot as plt\n",
-    "from typing import Iterator\n",
-    "from pathlib import Path\n",
-    "from PIL import Image\n",
-    "from random import randrange\n",
-    "from typing import Tuple\n",
    "import torch\n",
    "import torchvision\n",
-    "from torchvision import transforms\n",
-    "import scrapbook as sb\n",
    "\n",
-    "from ipywidgets import Video\n",
+    "from utils_cv.tracking.data import Urls\n",
    "from utils_cv.tracking.dataset import TrackingDataset\n",
-    "from utils_cv.tracking.model import TrackingLearner\n",
+    "from utils_cv.tracking.model import TrackingLearner, write_video\n",
    "\n",
+    "from utils_cv.common.data import data_path, download, unzip_url\n",
    "from utils_cv.common.gpu import which_processor, is_windows\n",
    "\n",
    "# Change matplotlib backend so that plots are shown for windows\n",
@ -115,10 +111,22 @@
    }
   ],
   "source": [
-    "EPOCHS = 2\n",
+    "EPOCHS = 1\n",
    "LEARNING_RATE = 0.0001\n",
    "BATCH_SIZE = 1\n",
+    "\n",
    "SAVE_MODEL = True\n",
+    "FRAME_RATE = 30\n",
+    "\n",
+    "CONF_THRES = 0.3\n",
+    "TRACK_BUFFER = 300\n",
+    "IM_SIZE = (1080, 1920)\n",
+    "\n",
+    "TRAIN_DATA_PATH = unzip_url(Urls.fridge_objects_path, exist_ok=True)\n",
+    "EVAL_DATA_PATH = unzip_url(Urls.carcans_annotations_path, exist_ok=True)\n",
+    "\n",
+    "BASELINE_MODEL = \"./models/all_dla34_new.pth\"\n",
+    "FT_MODEL = \"./models/model_30.pth\"\n",
    "\n",
    "# train on the GPU or on the CPU, if a GPU is not available\n",
    "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
@ -129,43 +137,20 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Prepare Training Dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['labels_with_ids', '.ipynb_checkpoints', 'images']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "DATA_PATH_TRAIN = \"./data/odFridgeObjects_FairMOTformat/\"\n",
-    "os.listdir(DATA_PATH_TRAIN)"
+    "## 01 Finetune a Pretrained Model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Load Training Dataset"
+    "Initialize the training dataset."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "scrolled": true
-   },
+   "execution_count": 4,
+   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
@ -183,7 +168,7 @@
   ],
   "source": [
    "data_train = TrackingDataset(\n",
-    "    DATA_PATH_TRAIN,\n",
+    "    TRAIN_DATA_PATH,\n",
    "    batch_size=BATCH_SIZE\n",
    ")"
   ]
@ -192,12 +177,12 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Finetune a Pretrained Model"
+    "Initialize and load the model. We use the baseline FairMOT model, which can be downloaded [here](https://drive.google.com/file/d/1udpOPum8fJdoEQm6n0jsIgMMViOMFinu/view)."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@ -209,10 +194,53 @@
    }
   ],
   "source": [
-    "tracker = TrackingLearner(data_train) \n",
+    "tracker = TrackingLearner(data_train, \"./models/fairmot_ft.pth\")\n",
    "print(f\"Model: {type(tracker.model)}\")"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading /home/jihon/computervision-recipes/scenarios/tracking/models/all_dla34.pth\n",
+      "loaded /home/jihon/computervision-recipes/scenarios/tracking/models/all_dla34.pth, epoch 10\n",
+      "Resumed optimizer with start lr 0.0001\n",
+      "=====  Epoch: 11/11  =====\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda/envs/cv/lib/python3.7/site-packages/torch/nn/_reduction.py:43: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n",
+      "  warnings.warn(warning.format(ret))\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loss: 1.1128346400433464\n",
+      "hm_loss: 0.06353224289612051\n",
+      "wh_loss: 1.57920023114543\n",
+      "off_loss: 0.18636367223715702\n",
+      "id_loss: 0.8860541224528692\n",
+      "time: 44.016666666666666\n",
+      "Model saved to ./models/fairmot_ft.pth\n"
+     ]
+    }
+   ],
+   "source": [
+    "tracker.fit(num_epochs=EPOCHS, lr=LEARNING_RATE, resume=True)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 7,
@ -221,16 +249,138 @@
   },
   "outputs": [
    {
-     "name": "stderr",
+     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "/anaconda/envs/cv/lib/python3.7/site-packages/torch/nn/_reduction.py:43: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n",
-      "  warnings.warn(warning.format(ret))\n"
+      "Model saved to ./models/model_01.pth\n"
     ]
    }
   ],
   "source": [
-    "tracker.fit(num_epochs=EPOCHS, lr=LEARNING_RATE)"
+    "if SAVE_MODEL:\n",
+    "    tracker.save(f\"./models/model_{EPOCHS:02d}.pth\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 02 Evaluate"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that `EVAL_DATA_PATH` follows the FairMOT input format."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Creating model...\n",
+      "loaded ./models/fairmot_ft.pth, epoch 11\n"
+     ]
+    }
+   ],
+   "source": [
+    "eval_results = tracker.predict(\n",
+    "    EVAL_DATA_PATH,\n",
+    "    conf_thres=CONF_THRES,\n",
+    "    track_buffer=TRACK_BUFFER,\n",
+    "    im_size=IM_SIZE,\n",
+    "    frame_rate=FRAME_RATE\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "eval_metrics = tracker.evaluate(eval_results, EVAL_DATA_PATH) "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 03 Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_video = download(\n",
+    "    Urls.carcans_video_path, osp.join(data_path(), \"carcans.mp4\")\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Creating model...\n",
+      "loaded ./models/fairmot_ft.pth, epoch 11\n",
+      "Lenth of the video: 251 frames\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_results = tracker.predict(\n",
+    "    input_video,\n",
+    "    conf_thres=CONF_THRES,\n",
+    "    track_buffer=TRACK_BUFFER,\n",
+    "    im_size=IM_SIZE,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_video = osp.join(data_path(), \"carcans_output.mp4\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "write_video(test_results, input_video, output_video)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Video.from_file(output_video)"
   ]
  }
 ],
--- a/utils_cv/classification/plot.py
+++ b/utils_cv/classification/plot.py
@ -34,10 +34,7 @@ def plot_thresholds(
        metric_function: The metric function
        y_pred: predicted probabilities.
        y_true: True class indices.
-<<<<<<< HEAD
        samples: Number of threshold samples
-=======
->>>>>>> master
        figsize: Figure size (w, h)
    """
    metric_name = metric_function.__name__
--- a/utils_cv/tracking/data.py
+++ b/utils_cv/tracking/data.py
@ -0,0 +1,17 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+from typing import List
+from urllib.parse import urljoin
+
+
+class Urls:
+    base = "https://cvbp.blob.core.windows.net/public/datasets/tracking/"
+
+    fridge_objects_path = urljoin(base, "odFridgeObjects_FairMOT-Format.zip")
+    carcans_annotations_path = urljoin(base, "carcans_vott-csv-export.zip")
+    carcans_video_path = urljoin(base, "car_cans_8s.mp4")
+
+    @classmethod
+    def all(cls) -> List[str]:
+        return [v for k, v in cls.__dict__.items() if k.endswith("_path")]
--- a/utils_cv/tracking/dataset.py
+++ b/utils_cv/tracking/dataset.py
@ -15,10 +15,7 @@ class TrackingDataset:
    """A multi-object tracking dataset."""

    def __init__(
-        self,
-        data_root: str,
-        name: str = "default",
-        batch_size: int = 12,
+        self, data_root: str, name: str = "default", batch_size: int = 12,
    ) -> None:
        """
        Args:
--- a/utils_cv/tracking/model.py
+++ b/utils_cv/tracking/model.py
@ -2,21 +2,28 @@
 # Licensed under the MIT License.

 import argparse
+from collections import OrderedDict
+from copy import deepcopy
+import glob
+import requests
 import os
 import os.path as osp
-from typing import Dict, List
-import requests
+from typing import Dict, List, Tuple

 import torch
 import torch.cuda as cuda
 import torch.nn as nn
 from torch.utils.data import DataLoader
+
+import cv2
+import pandas as pd
 import matplotlib.pyplot as plt

 from .references.fairmot.datasets.dataset.jde import LoadImages, LoadVideo
 from .references.fairmot.models.model import (
    create_model,
    load_model,
+    save_model,
 )
 from .references.fairmot.tracker.multitracker import JDETracker
 from .references.fairmot.trains.train_factory import train_factory
@ -24,7 +31,8 @@ from .references.fairmot.trains.train_factory import train_factory
 from .bbox import TrackingBbox
 from .dataset import TrackingDataset
 from .opts import opts
-from ..common.gpu import torch_device, get_gpu_str
+from .plot import draw_boxes, assign_colors
+from ..common.gpu import torch_device

 BASELINE_URL = (
    "https://drive.google.com/open?id=1udpOPum8fJdoEQm6n0jsIgMMViOMFinu"
@ -68,6 +76,7 @@ def _download_baseline(url, destination) -> None:

    save_response_content(response, destination)

+
 def _get_gpu_str():
    if cuda.is_available():
        devices = [str(x) for x in range(cuda.device_count())]
@ -75,13 +84,57 @@ def _get_gpu_str():
    else:
        return "-1"  # cpu

+
+def write_video(
+    results: Dict[int, List[TrackingBbox]], input_video: str, output_video: str
+) -> None:
+    """ 
+    Plot the predicted tracks on the input video. Write the output to {output_path}.
+
+    Args:
+        results: dictionary mapping frame id to a list of predicted TrackingBboxes
+        input_video: path to the input video
+        output_video: path to write out the output video
+    """
+    results = OrderedDict(sorted(results.items()))
+    # read video and initialize new tracking video
+    video = cv2.VideoCapture()
+    video.open(input_video)
+
+    image_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+    image_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    fourcc = cv2.VideoWriter_fourcc(*"MP4V")
+    frame_rate = int(video.get(cv2.CAP_PROP_FPS))
+    writer = cv2.VideoWriter(
+        output_video, fourcc, frame_rate, (image_width, image_height)
+    )
+
+    # assign bbox color per id
+    unique_ids = list(
+        set([bb.track_id for frame in results.values() for bb in frame])
+    )
+    color_map = assign_colors(unique_ids)
+
+    # create images and add to video writer, adapted from https://github.com/ZQPei/deep_sort_pytorch
+    frame_idx = 0
+    while video.grab():
+        _, cur_image = video.retrieve()
+        cur_tracks = results[frame_idx]
+        if len(cur_tracks) > 0:
+            cur_image = draw_boxes(cur_image, cur_tracks, color_map)
+        writer.write(cur_image)
+        frame_idx += 1
+
+    print(f"Output saved to {output_video}.")
+
+
 class TrackingLearner(object):
    """Tracking Learner for Multi-Object Tracking"""

    def __init__(
        self,
-        dataset: TrackingDataset = None,
-        model: nn.Module = None,
+        dataset: TrackingDataset,
+        model_path: str,
        arch: str = "dla_34",
        head_conv: int = None,
    ) -> None:
@ -92,7 +145,7 @@ class TrackingLearner(object):

        Args:
            dataset: the dataset
-            model: the model
+            model_path: path to save model
            arch: the model architecture
                Supported architectures: resdcn_34, resdcn_50, resfpndcn_34, dla_34, hrnet_32
            head_conv: conv layer channels for output head. None maps to the default setting.
@ -101,47 +154,27 @@ class TrackingLearner(object):
        self.opt = opts()
        self.opt.arch = arch
        self.opt.head_conv = head_conv if head_conv else -1
-        self.opt.gpus = get_gpu_str()
+        self.opt.gpus = _get_gpu_str()
        self.opt.device = torch_device()

        self.dataset = dataset
-        self.model = model if model is not None else self.init_model()
+        self.model = self.init_model()
+        self.model_path = model_path

    def init_model(self) -> nn.Module:
        """
        Download and initialize the baseline FairMOT model.
        """
        model_dir = osp.join(self.opt.root_dir, "models")
-        os.makedirs(model_dir, exist_ok=True)
-        _download_baseline(BASELINE_URL, osp.join(model_dir, "all_dla34.pth"))
+        baseline_path = osp.join(model_dir, "all_dla34.pth")
+        #         os.makedirs(model_dir, exist_ok=True)
+        #         _download_baseline(BASELINE_URL, baseline_path)
+        self.opt.load_model = baseline_path
+
        return create_model(self.opt.arch, self.opt.heads, self.opt.head_conv)

-    def load(self, path: str = None, resume=False) -> None:
-        """
-        Load a model from path. 
-        """
-        if resume:
-            # if resume, load optimizer and start_epoch as well as model state dict
-            # set path to model_last.pth if path is not provided
-            model_dir = (
-                self.opt.save_dir[:-4]
-                if self.opt.save_dir.endswith("TEST")
-                else self.opt.save_dir
-            )
-            self.model, self.optimizer, self.start_epoch = load_model(
-                self.model,
-                path if path else osp.join(model_dir, "model_last.pth"),
-                self.optimizer,
-                resume,
-                self.opt.lr,
-                self.opt.lr_step,
-            )
-        else:
-            # otherwise just load the model state dict
-            self.model = load_model(self.model, path)
-
    def fit(
-        self, lr: float = 1e-4, lr_step: str = "20,27", num_epochs: int = 30,
+        self, lr: float = 1e-4, lr_step: str = "20,27", num_epochs: int = 30
    ) -> None:
        """
        The main training loop.
@ -159,34 +192,60 @@ class TrackingLearner(object):
        if not self.dataset:
            raise Exception("No dataset provided")

-        self.opt.lr = lr
-        self.opt.lr_step = lr_step
-        self.opt.num_epochs = num_epochs
+        opt_fit = deepcopy(self.opt)  # copy opt to avoid bug
+        opt_fit.lr = lr
+        opt_fit.lr_step = lr_step
+        opt_fit.num_epochs = num_epochs

        # update dataset options
-        self.opt.update_dataset_info_and_set_heads(self.dataset.train_data)
+        opt_fit.update_dataset_info_and_set_heads(self.dataset.train_data)

        # initialize dataloader
        train_loader = self.dataset.train_dl

-        self.optimizer = torch.optim.Adam(self.model.parameters(), self.opt.lr)
-        self.start_epoch = 0
+        self.optimizer = torch.optim.Adam(self.model.parameters(), opt_fit.lr)
+        start_epoch = 0
+        print(f"Loading {opt_fit.load_model}")
+        self.model = load_model(self.model, opt_fit.load_model)

-        Trainer = train_factory[self.opt.task]
-        trainer = Trainer(self.opt.opt, self.model, self.optimizer)
-        trainer.set_device(
-            self.opt.gpus, self.opt.chunk_sizes, self.opt.device
-        )
+        Trainer = train_factory[opt_fit.task]
+        trainer = Trainer(opt_fit.opt, self.model, self.optimizer)
+        trainer.set_device(opt_fit.gpus, opt_fit.chunk_sizes, opt_fit.device)

        # training loop
-        for epoch in range(self.start_epoch + 1, self.opt.num_epochs + 1):
-            mark = epoch if self.opt.save_all else "last"
+        for epoch in range(
+            start_epoch + 1, start_epoch + opt_fit.num_epochs + 1
+        ):
+            print(
+                "=" * 5,
+                f" Epoch: {epoch}/{start_epoch + opt_fit.num_epochs} ",
+                "=" * 5,
+            )
+            self.epoch = epoch
            log_dict_train, _ = trainer.train(epoch, train_loader)
-            if epoch in self.opt.lr_step:
-                lr = self.opt.lr * (0.1 ** (self.opt.lr_step.index(epoch) + 1))
+            for k, v in log_dict_train.items():
+                print(f"{k}: {v}")
+            if epoch in opt_fit.lr_step:
+                lr = opt_fit.lr * (0.1 ** (opt_fit.lr_step.index(epoch) + 1))
                for param_group in optimizer.param_groups:
                    param_group["lr"] = lr

+        # save after training because at inference-time FairMOT src reads model weights from disk
+        self.save(self.model_path)
+
+    def save(self, path) -> None:
+        """
+        Save the model to a specified path.
+        """
+        model_dir, _ = osp.split(path)
+        os.makedirs(model_dir, exist_ok=True)
+
+        save_model(path, self.epoch, self.model, self.optimizer)
+        print(f"Model saved to {path}")
+
+    def evaluate(self, results, gt) -> pd.DataFrame:
+        pass
+
    def predict(
        self,
        im_or_video_path: str,
@ -195,8 +254,7 @@ class TrackingLearner(object):
        nms_thres: float = 0.4,
        track_buffer: int = 30,
        min_box_area: float = 200,
-        input_h: float = None,
-        input_w: float = None,
+        im_size: Tuple[float, float] = (None, None),
        frame_rate: int = 30,
    ) -> Dict[int, List[TrackingBbox]]:
        """
@ -210,43 +268,47 @@ class TrackingLearner(object):
            nms_thres: iou thresh for nms
            track_buffer: tracking buffer
            min_box_area: filter out tiny boxes
-            input_h: input height. Default from dataset
-            input_w: input width. Default from dataset
+            im_size: (input height, input_weight)
            frame_rate: frame rate

        Returns a list of TrackingBboxes

        Implementation inspired from code found here: https://github.com/ifzhang/FairMOT/blob/master/src/track.py
        """
-        self.opt.conf_thres = conf_thres
-        self.opt.det_thres = det_thres
-        self.opt.nms_thres = nms_thres
-        self.opt.track_buffer = track_buffer
-        self.opt.min_box_area = min_box_area
+        opt_pred = deepcopy(self.opt)  # copy opt to avoid bug
+        opt_pred.conf_thres = conf_thres
+        opt_pred.det_thres = det_thres
+        opt_pred.nms_thres = nms_thres
+        opt_pred.track_buffer = track_buffer
+        opt_pred.min_box_area = min_box_area

+        input_h, input_w = im_size
        input_height = input_h if input_h else -1
        input_width = input_w if input_w else -1
-        self.opt.update_dataset_res(input_height, input_width)
+        opt_pred.update_dataset_res(input_height, input_width)

        # initialize tracker
-        tracker = JDETracker(self.opt.opt, frame_rate=frame_rate)
+        opt_pred.load_model = self.model_path
+        tracker = JDETracker(opt_pred.opt, frame_rate=frame_rate)

        # initialize dataloader
-        dataloader = self.get_dataloader(im_or_video_path)
+        dataloader = self._get_dataloader(
+            im_or_video_path, opt_pred.input_h, opt_pred.input_w
+        )

        frame_id = 0
        out = {}
        results = []
        for path, img, img0 in dataloader:
            blob = torch.from_numpy(img).cuda().unsqueeze(0)
-            online_targets = self.tracker.update(blob, img0)
+            online_targets = tracker.update(blob, img0)
            online_bboxes = []
            for t in online_targets:
                tlwh = t.tlwh
                tlbr = t.tlbr
                tid = t.track_id
                vertical = tlwh[2] / tlwh[3] > 1.6
-                if tlwh[2] * tlwh[3] > self.opt.min_box_area and not vertical:
+                if tlwh[2] * tlwh[3] > opt_pred.min_box_area and not vertical:
                    bb = TrackingBbox(
                        tlbr[1], tlbr[0], tlbr[3], tlbr[2], frame_id, tid
                    )
@ -256,7 +318,9 @@ class TrackingLearner(object):

        return out

-    def get_dataloader(self, im_or_video_path: str) -> DataLoader:
+    def _get_dataloader(
+        self, im_or_video_path: str, input_h, input_w
+    ) -> DataLoader:
        """
        Creates a dataloader from images or video in the given path.

@ -275,10 +339,8 @@ class TrackingLearner(object):
        im_format = [".jpg", ".jpeg", ".png", ".tif"]
        video_format = [".mp4", ".avi"]

-        input_w = self.opt.input_w
-        input_h = self.opt.input_h
-
        # if path is to a root directory of images
+
        if (
            osp.isdir(im_or_video_path)
            and len(
--- a/utils_cv/tracking/opts.py
+++ b/utils_cv/tracking/opts.py
@ -14,7 +14,7 @@ class opts(object):

    def __init__(
        self,
-        root_dir: str = os.getcwd(),
+        load_model: str = "",
        gpus: str = "0, 1",
        save_all: bool = False,
        arch: str = "dla_34",
@ -32,13 +32,15 @@ class opts(object):
        track_buffer: int = 30,
        min_box_area: float = 200,
        reid_dim: int = 512,
+        root_dir: str = os.getcwd(),
    ) -> None:
        self._init_opt()

+        self.load_model = load_model
        self.gpus = gpus
        self.save_all = save_all
        self.arch = arch
-        self.head_conv = head_conv if head_conv != -1 else 256  # init default
+        self.head_conv = head_conv
        self.input_h = input_h
        self.input_w = input_w
        self.lr = lr
@ -62,10 +64,9 @@ class opts(object):

        self._opt.task = "mot"
        self._opt.dataset = "jde"
+        self._opt.resume = False
        self._opt.exp_id = "default"
        self._opt.test = False
-        self._opt.load_model = ""
-        self._opt.resume = False
        self._opt.num_workers = 8
        self._opt.not_cuda_benchmark = False
        self._opt.seed = 317
@ -171,7 +172,7 @@ class opts(object):
        self._opt.output_res = max(self._opt.output_h, self._opt.output_w)

        if self._opt.task == "mot":
-            self.heads = {
+            self._opt.heads = {
                "hm": self._opt.num_classes,
                "wh": 2
                if not self._opt.cat_spec_wh
@ -179,13 +180,22 @@ class opts(object):
                "id": self._opt.reid_dim,
            }
            if self._opt.reg_offset:
-                self.heads.update({"reg": 2})
+                self._opt.heads.update({"reg": 2})
            self._opt.nID = dataset.nID
            self._opt.img_size = (self._opt.input_w, self._opt.input_h)
        else:
            assert 0, "task not defined"

    ### getters and setters ###
+    @property
+    def load_model(self):
+        return self._load_model
+
+    @load_model.setter
+    def load_model(self, value):
+        self._load_model = value
+        self._opt.load_model = self._load_model
+
    @property
    def gpus(self):
        return self._gpus
@ -224,7 +234,7 @@ class opts(object):

    @head_conv.setter
    def head_conv(self, value):
-        self._head_conv = value
+        self._head_conv = value if value != -1 else 256
        self._opt.head_conv = self._head_conv

    @property
@ -344,14 +354,9 @@ class opts(object):
        self._root_dir = value
        self._opt.root_dir = self._root_dir

-        self._exp_dir = osp.join(self._root_dir, "exp", self._opt.task)
-        self._opt.exp_dir = self._exp_dir
-
-        self._save_dir = osp.join(self._exp_dir, self._opt.exp_id)
-        self._opt.save_dir = self._save_dir
-
-        self._debug_dir = osp.join(self._save_dir, "debug")
-        self._opt.debug_dir = self._debug_dir
+        self._opt.exp_dir = osp.join(self._root_dir, "exp", self._opt.task)
+        self._opt.save_dir = osp.join(self._opt.exp_dir, self._opt.exp_id)
+        self._opt.debug_dir = osp.join(self._opt.save_dir, "debug")

    @property
    def device(self):
@ -362,28 +367,27 @@ class opts(object):
        self._device = value
        self._opt.device = self._device

-    @property
-    def heads(self):
-        return self._heads
-
-    @heads.setter
-    def heads(self, value):
-        self._heads = value
-        self._opt.heads = self._heads
-
    ### getters only ####
    @property
    def opt(self):
        return self._opt

+    @property
+    def resume(self):
+        return self._resume
+
    @property
    def task(self):
        return self._opt.task

    @property
    def save_dir(self):
-        return self.opt._save_dir
+        return self._opt.save_dir

    @property
    def chunk_sizes(self):
        return self._opt.chunk_sizes
+
+    @property
+    def heads(self):
+        return self._opt.heads
--- a/utils_cv/tracking/plot.py
+++ b/utils_cv/tracking/plot.py
@ -0,0 +1,74 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os.path as osp
+from typing import Dict, List, Tuple
+import cv2
+import numpy as np
+
+from .bbox import TrackingBbox
+
+
+def draw_boxes(
+    im: np.ndarray,
+    cur_tracks: List[TrackingBbox],
+    color_map: Dict[int, Tuple[int, int, int]],
+) -> np.ndarray:
+    """ 
+    Overlay bbox and id labels onto the frame
+
+    Args:
+        im: raw frame
+        cur_tracks: list of bboxes in the current frame
+        color_map: dictionary mapping ids to bbox colors
+    """
+
+    cur_ids = [bb.track_id for bb in cur_tracks]
+    tracks = dict(zip(cur_ids, cur_tracks))
+
+    for label, bb in tracks.items():
+        left = round(bb.left)
+        top = round(bb.top)
+        right = round(bb.right)
+        bottom = round(bb.bottom)
+
+        # box text and bar
+        color = color_map[label]
+        label = str(label)
+
+        # last two args of getTextSize() are font_scale and thickness
+        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 1, 1)[0]
+        cv2.rectangle(im, (left, top), (right, bottom), color, 3)
+        cv2.putText(
+            im,
+            "id_" + label,
+            (left, top + t_size[1] - 30),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1,
+            color,
+            3,
+        )
+
+    return im
+
+
+def assign_colors(id_list: List[int],) -> Dict[int, Tuple[int, int, int]]:
+    """ 
+    Produce corresponding unique color palettes for unique ids
+    
+    Args:
+        id_list: list of track ids 
+    """
+    palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
+
+    color_list = []
+    id_list2 = list(range(len(id_list)))
+
+    # adapted from https://github.com/ZQPei/deep_sort_pytorch
+    for i in id_list2:
+        color = [int((p * ((i + 1) ** 5 - i + 1)) % 255) for p in palette]
+        color_list.append(tuple(color))
+
+    color_map = dict(zip(id_list, color_list))
+
+    return color_map
--- a/utils_cv/tracking/references/README.txt
+++ b/utils_cv/tracking/references/README.txt
@ -5,4 +5,5 @@ Our aim is to make as little edits to these files as possible, so that newer ver

 The only edits made are listed below, and highlighted in the code with a "# EDITED" comment:
 - Fixing import statements, e.g. "import utils" -> "from . import utils"
- Not hard-coding input resolution values in datasets/dataset/jde.py
+- Not hard-coding input resolution values in datasets/dataset/jde.py
+- Setting the logging level to WARNING
--- a/utils_cv/tracking/references/fairmot/tracking_utils/log.py
+++ b/utils_cv/tracking/references/fairmot/tracking_utils/log.py
@ -10,7 +10,7 @@ def get_logger(name='root'):
    handler.setFormatter(formatter)

    logger = logging.getLogger(name)
-    logger.setLevel(logging.DEBUG)
+    logger.setLevel(logging.WARNING) # EDITED
    logger.addHandler(handler)
    return logger