FairMOT-01 (#553)
This commit is contained in:
Родитель
aad3637bca
Коммит
68c9911e3c
|
@ -20,7 +20,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization"
|
||||
"## 00 Initialization"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -45,22 +45,18 @@
|
|||
"sys.path.append(\"../../\")\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import os.path as osp\n",
|
||||
"import time\n",
|
||||
"from ipywidgets import Video\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from typing import Iterator\n",
|
||||
"from pathlib import Path\n",
|
||||
"from PIL import Image\n",
|
||||
"from random import randrange\n",
|
||||
"from typing import Tuple\n",
|
||||
"import torch\n",
|
||||
"import torchvision\n",
|
||||
"from torchvision import transforms\n",
|
||||
"import scrapbook as sb\n",
|
||||
"\n",
|
||||
"from ipywidgets import Video\n",
|
||||
"from utils_cv.tracking.data import Urls\n",
|
||||
"from utils_cv.tracking.dataset import TrackingDataset\n",
|
||||
"from utils_cv.tracking.model import TrackingLearner\n",
|
||||
"from utils_cv.tracking.model import TrackingLearner, write_video\n",
|
||||
"\n",
|
||||
"from utils_cv.common.data import data_path, download, unzip_url\n",
|
||||
"from utils_cv.common.gpu import which_processor, is_windows\n",
|
||||
"\n",
|
||||
"# Change matplotlib backend so that plots are shown for windows\n",
|
||||
|
@ -115,10 +111,22 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"EPOCHS = 2\n",
|
||||
"EPOCHS = 1\n",
|
||||
"LEARNING_RATE = 0.0001\n",
|
||||
"BATCH_SIZE = 1\n",
|
||||
"\n",
|
||||
"SAVE_MODEL = True\n",
|
||||
"FRAME_RATE = 30\n",
|
||||
"\n",
|
||||
"CONF_THRES = 0.3\n",
|
||||
"TRACK_BUFFER = 300\n",
|
||||
"IM_SIZE = (1080, 1920)\n",
|
||||
"\n",
|
||||
"TRAIN_DATA_PATH = unzip_url(Urls.fridge_objects_path, exist_ok=True)\n",
|
||||
"EVAL_DATA_PATH = unzip_url(Urls.carcans_annotations_path, exist_ok=True)\n",
|
||||
"\n",
|
||||
"BASELINE_MODEL = \"./models/all_dla34_new.pth\"\n",
|
||||
"FT_MODEL = \"./models/model_30.pth\"\n",
|
||||
"\n",
|
||||
"# train on the GPU or on the CPU, if a GPU is not available\n",
|
||||
"device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
|
||||
|
@ -129,43 +137,20 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prepare Training Dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['labels_with_ids', '.ipynb_checkpoints', 'images']"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"DATA_PATH_TRAIN = \"./data/odFridgeObjects_FairMOTformat/\"\n",
|
||||
"os.listdir(DATA_PATH_TRAIN)"
|
||||
"## 01 Finetune a Pretrained Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Training Dataset"
|
||||
"Initialize the training dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
|
@ -183,7 +168,7 @@
|
|||
],
|
||||
"source": [
|
||||
"data_train = TrackingDataset(\n",
|
||||
" DATA_PATH_TRAIN,\n",
|
||||
" TRAIN_DATA_PATH,\n",
|
||||
" batch_size=BATCH_SIZE\n",
|
||||
")"
|
||||
]
|
||||
|
@ -192,12 +177,12 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Finetune a Pretrained Model"
|
||||
"Initialize and load the model. We use the baseline FairMOT model, which can be downloaded [here](https://drive.google.com/file/d/1udpOPum8fJdoEQm6n0jsIgMMViOMFinu/view)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -209,10 +194,53 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"tracker = TrackingLearner(data_train) \n",
|
||||
"tracker = TrackingLearner(data_train, \"./models/fairmot_ft.pth\")\n",
|
||||
"print(f\"Model: {type(tracker.model)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading /home/jihon/computervision-recipes/scenarios/tracking/models/all_dla34.pth\n",
|
||||
"loaded /home/jihon/computervision-recipes/scenarios/tracking/models/all_dla34.pth, epoch 10\n",
|
||||
"Resumed optimizer with start lr 0.0001\n",
|
||||
"===== Epoch: 11/11 =====\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/anaconda/envs/cv/lib/python3.7/site-packages/torch/nn/_reduction.py:43: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n",
|
||||
" warnings.warn(warning.format(ret))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"loss: 1.1128346400433464\n",
|
||||
"hm_loss: 0.06353224289612051\n",
|
||||
"wh_loss: 1.57920023114543\n",
|
||||
"off_loss: 0.18636367223715702\n",
|
||||
"id_loss: 0.8860541224528692\n",
|
||||
"time: 44.016666666666666\n",
|
||||
"Model saved to ./models/fairmot_ft.pth\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tracker.fit(num_epochs=EPOCHS, lr=LEARNING_RATE, resume=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
|
@ -221,16 +249,138 @@
|
|||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/anaconda/envs/cv/lib/python3.7/site-packages/torch/nn/_reduction.py:43: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n",
|
||||
" warnings.warn(warning.format(ret))\n"
|
||||
"Model saved to ./models/model_01.pth\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tracker.fit(num_epochs=EPOCHS, lr=LEARNING_RATE)"
|
||||
"if SAVE_MODEL:\n",
|
||||
" tracker.save(f\"./models/model_{EPOCHS:02d}.pth\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 02 Evaluate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note that `EVAL_DATA_PATH` follows the FairMOT input format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Creating model...\n",
|
||||
"loaded ./models/fairmot_ft.pth, epoch 11\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"eval_results = tracker.predict(\n",
|
||||
" EVAL_DATA_PATH,\n",
|
||||
" conf_thres=CONF_THRES,\n",
|
||||
" track_buffer=TRACK_BUFFER,\n",
|
||||
" im_size=IM_SIZE,\n",
|
||||
" frame_rate=FRAME_RATE\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"eval_metrics = tracker.evaluate(eval_results, EVAL_DATA_PATH) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 03 Predict"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"input_video = download(\n",
|
||||
" Urls.carcans_video_path, osp.join(data_path(), \"carcans.mp4\")\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Creating model...\n",
|
||||
"loaded ./models/fairmot_ft.pth, epoch 11\n",
|
||||
"Lenth of the video: 251 frames\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"test_results = tracker.predict(\n",
|
||||
" input_video,\n",
|
||||
" conf_thres=CONF_THRES,\n",
|
||||
" track_buffer=TRACK_BUFFER,\n",
|
||||
" im_size=IM_SIZE,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"output_video = osp.join(data_path(), \"carcans_output.mp4\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"write_video(test_results, input_video, output_video)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Video.from_file(output_video)"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
|
@ -34,10 +34,7 @@ def plot_thresholds(
|
|||
metric_function: The metric function
|
||||
y_pred: predicted probabilities.
|
||||
y_true: True class indices.
|
||||
<<<<<<< HEAD
|
||||
samples: Number of threshold samples
|
||||
=======
|
||||
>>>>>>> master
|
||||
figsize: Figure size (w, h)
|
||||
"""
|
||||
metric_name = metric_function.__name__
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
from typing import List
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
||||
class Urls:
|
||||
base = "https://cvbp.blob.core.windows.net/public/datasets/tracking/"
|
||||
|
||||
fridge_objects_path = urljoin(base, "odFridgeObjects_FairMOT-Format.zip")
|
||||
carcans_annotations_path = urljoin(base, "carcans_vott-csv-export.zip")
|
||||
carcans_video_path = urljoin(base, "car_cans_8s.mp4")
|
||||
|
||||
@classmethod
|
||||
def all(cls) -> List[str]:
|
||||
return [v for k, v in cls.__dict__.items() if k.endswith("_path")]
|
|
@ -15,10 +15,7 @@ class TrackingDataset:
|
|||
"""A multi-object tracking dataset."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data_root: str,
|
||||
name: str = "default",
|
||||
batch_size: int = 12,
|
||||
self, data_root: str, name: str = "default", batch_size: int = 12,
|
||||
) -> None:
|
||||
"""
|
||||
Args:
|
||||
|
|
|
@ -2,21 +2,28 @@
|
|||
# Licensed under the MIT License.
|
||||
|
||||
import argparse
|
||||
from collections import OrderedDict
|
||||
from copy import deepcopy
|
||||
import glob
|
||||
import requests
|
||||
import os
|
||||
import os.path as osp
|
||||
from typing import Dict, List
|
||||
import requests
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
import torch
|
||||
import torch.cuda as cuda
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
import cv2
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from .references.fairmot.datasets.dataset.jde import LoadImages, LoadVideo
|
||||
from .references.fairmot.models.model import (
|
||||
create_model,
|
||||
load_model,
|
||||
save_model,
|
||||
)
|
||||
from .references.fairmot.tracker.multitracker import JDETracker
|
||||
from .references.fairmot.trains.train_factory import train_factory
|
||||
|
@ -24,7 +31,8 @@ from .references.fairmot.trains.train_factory import train_factory
|
|||
from .bbox import TrackingBbox
|
||||
from .dataset import TrackingDataset
|
||||
from .opts import opts
|
||||
from ..common.gpu import torch_device, get_gpu_str
|
||||
from .plot import draw_boxes, assign_colors
|
||||
from ..common.gpu import torch_device
|
||||
|
||||
BASELINE_URL = (
|
||||
"https://drive.google.com/open?id=1udpOPum8fJdoEQm6n0jsIgMMViOMFinu"
|
||||
|
@ -68,6 +76,7 @@ def _download_baseline(url, destination) -> None:
|
|||
|
||||
save_response_content(response, destination)
|
||||
|
||||
|
||||
def _get_gpu_str():
|
||||
if cuda.is_available():
|
||||
devices = [str(x) for x in range(cuda.device_count())]
|
||||
|
@ -75,13 +84,57 @@ def _get_gpu_str():
|
|||
else:
|
||||
return "-1" # cpu
|
||||
|
||||
|
||||
def write_video(
|
||||
results: Dict[int, List[TrackingBbox]], input_video: str, output_video: str
|
||||
) -> None:
|
||||
"""
|
||||
Plot the predicted tracks on the input video. Write the output to {output_path}.
|
||||
|
||||
Args:
|
||||
results: dictionary mapping frame id to a list of predicted TrackingBboxes
|
||||
input_video: path to the input video
|
||||
output_video: path to write out the output video
|
||||
"""
|
||||
results = OrderedDict(sorted(results.items()))
|
||||
# read video and initialize new tracking video
|
||||
video = cv2.VideoCapture()
|
||||
video.open(input_video)
|
||||
|
||||
image_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
image_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
fourcc = cv2.VideoWriter_fourcc(*"MP4V")
|
||||
frame_rate = int(video.get(cv2.CAP_PROP_FPS))
|
||||
writer = cv2.VideoWriter(
|
||||
output_video, fourcc, frame_rate, (image_width, image_height)
|
||||
)
|
||||
|
||||
# assign bbox color per id
|
||||
unique_ids = list(
|
||||
set([bb.track_id for frame in results.values() for bb in frame])
|
||||
)
|
||||
color_map = assign_colors(unique_ids)
|
||||
|
||||
# create images and add to video writer, adapted from https://github.com/ZQPei/deep_sort_pytorch
|
||||
frame_idx = 0
|
||||
while video.grab():
|
||||
_, cur_image = video.retrieve()
|
||||
cur_tracks = results[frame_idx]
|
||||
if len(cur_tracks) > 0:
|
||||
cur_image = draw_boxes(cur_image, cur_tracks, color_map)
|
||||
writer.write(cur_image)
|
||||
frame_idx += 1
|
||||
|
||||
print(f"Output saved to {output_video}.")
|
||||
|
||||
|
||||
class TrackingLearner(object):
|
||||
"""Tracking Learner for Multi-Object Tracking"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dataset: TrackingDataset = None,
|
||||
model: nn.Module = None,
|
||||
dataset: TrackingDataset,
|
||||
model_path: str,
|
||||
arch: str = "dla_34",
|
||||
head_conv: int = None,
|
||||
) -> None:
|
||||
|
@ -92,7 +145,7 @@ class TrackingLearner(object):
|
|||
|
||||
Args:
|
||||
dataset: the dataset
|
||||
model: the model
|
||||
model_path: path to save model
|
||||
arch: the model architecture
|
||||
Supported architectures: resdcn_34, resdcn_50, resfpndcn_34, dla_34, hrnet_32
|
||||
head_conv: conv layer channels for output head. None maps to the default setting.
|
||||
|
@ -101,47 +154,27 @@ class TrackingLearner(object):
|
|||
self.opt = opts()
|
||||
self.opt.arch = arch
|
||||
self.opt.head_conv = head_conv if head_conv else -1
|
||||
self.opt.gpus = get_gpu_str()
|
||||
self.opt.gpus = _get_gpu_str()
|
||||
self.opt.device = torch_device()
|
||||
|
||||
self.dataset = dataset
|
||||
self.model = model if model is not None else self.init_model()
|
||||
self.model = self.init_model()
|
||||
self.model_path = model_path
|
||||
|
||||
def init_model(self) -> nn.Module:
|
||||
"""
|
||||
Download and initialize the baseline FairMOT model.
|
||||
"""
|
||||
model_dir = osp.join(self.opt.root_dir, "models")
|
||||
os.makedirs(model_dir, exist_ok=True)
|
||||
_download_baseline(BASELINE_URL, osp.join(model_dir, "all_dla34.pth"))
|
||||
baseline_path = osp.join(model_dir, "all_dla34.pth")
|
||||
# os.makedirs(model_dir, exist_ok=True)
|
||||
# _download_baseline(BASELINE_URL, baseline_path)
|
||||
self.opt.load_model = baseline_path
|
||||
|
||||
return create_model(self.opt.arch, self.opt.heads, self.opt.head_conv)
|
||||
|
||||
def load(self, path: str = None, resume=False) -> None:
|
||||
"""
|
||||
Load a model from path.
|
||||
"""
|
||||
if resume:
|
||||
# if resume, load optimizer and start_epoch as well as model state dict
|
||||
# set path to model_last.pth if path is not provided
|
||||
model_dir = (
|
||||
self.opt.save_dir[:-4]
|
||||
if self.opt.save_dir.endswith("TEST")
|
||||
else self.opt.save_dir
|
||||
)
|
||||
self.model, self.optimizer, self.start_epoch = load_model(
|
||||
self.model,
|
||||
path if path else osp.join(model_dir, "model_last.pth"),
|
||||
self.optimizer,
|
||||
resume,
|
||||
self.opt.lr,
|
||||
self.opt.lr_step,
|
||||
)
|
||||
else:
|
||||
# otherwise just load the model state dict
|
||||
self.model = load_model(self.model, path)
|
||||
|
||||
def fit(
|
||||
self, lr: float = 1e-4, lr_step: str = "20,27", num_epochs: int = 30,
|
||||
self, lr: float = 1e-4, lr_step: str = "20,27", num_epochs: int = 30
|
||||
) -> None:
|
||||
"""
|
||||
The main training loop.
|
||||
|
@ -159,34 +192,60 @@ class TrackingLearner(object):
|
|||
if not self.dataset:
|
||||
raise Exception("No dataset provided")
|
||||
|
||||
self.opt.lr = lr
|
||||
self.opt.lr_step = lr_step
|
||||
self.opt.num_epochs = num_epochs
|
||||
opt_fit = deepcopy(self.opt) # copy opt to avoid bug
|
||||
opt_fit.lr = lr
|
||||
opt_fit.lr_step = lr_step
|
||||
opt_fit.num_epochs = num_epochs
|
||||
|
||||
# update dataset options
|
||||
self.opt.update_dataset_info_and_set_heads(self.dataset.train_data)
|
||||
opt_fit.update_dataset_info_and_set_heads(self.dataset.train_data)
|
||||
|
||||
# initialize dataloader
|
||||
train_loader = self.dataset.train_dl
|
||||
|
||||
self.optimizer = torch.optim.Adam(self.model.parameters(), self.opt.lr)
|
||||
self.start_epoch = 0
|
||||
self.optimizer = torch.optim.Adam(self.model.parameters(), opt_fit.lr)
|
||||
start_epoch = 0
|
||||
print(f"Loading {opt_fit.load_model}")
|
||||
self.model = load_model(self.model, opt_fit.load_model)
|
||||
|
||||
Trainer = train_factory[self.opt.task]
|
||||
trainer = Trainer(self.opt.opt, self.model, self.optimizer)
|
||||
trainer.set_device(
|
||||
self.opt.gpus, self.opt.chunk_sizes, self.opt.device
|
||||
)
|
||||
Trainer = train_factory[opt_fit.task]
|
||||
trainer = Trainer(opt_fit.opt, self.model, self.optimizer)
|
||||
trainer.set_device(opt_fit.gpus, opt_fit.chunk_sizes, opt_fit.device)
|
||||
|
||||
# training loop
|
||||
for epoch in range(self.start_epoch + 1, self.opt.num_epochs + 1):
|
||||
mark = epoch if self.opt.save_all else "last"
|
||||
for epoch in range(
|
||||
start_epoch + 1, start_epoch + opt_fit.num_epochs + 1
|
||||
):
|
||||
print(
|
||||
"=" * 5,
|
||||
f" Epoch: {epoch}/{start_epoch + opt_fit.num_epochs} ",
|
||||
"=" * 5,
|
||||
)
|
||||
self.epoch = epoch
|
||||
log_dict_train, _ = trainer.train(epoch, train_loader)
|
||||
if epoch in self.opt.lr_step:
|
||||
lr = self.opt.lr * (0.1 ** (self.opt.lr_step.index(epoch) + 1))
|
||||
for k, v in log_dict_train.items():
|
||||
print(f"{k}: {v}")
|
||||
if epoch in opt_fit.lr_step:
|
||||
lr = opt_fit.lr * (0.1 ** (opt_fit.lr_step.index(epoch) + 1))
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group["lr"] = lr
|
||||
|
||||
# save after training because at inference-time FairMOT src reads model weights from disk
|
||||
self.save(self.model_path)
|
||||
|
||||
def save(self, path) -> None:
|
||||
"""
|
||||
Save the model to a specified path.
|
||||
"""
|
||||
model_dir, _ = osp.split(path)
|
||||
os.makedirs(model_dir, exist_ok=True)
|
||||
|
||||
save_model(path, self.epoch, self.model, self.optimizer)
|
||||
print(f"Model saved to {path}")
|
||||
|
||||
def evaluate(self, results, gt) -> pd.DataFrame:
|
||||
pass
|
||||
|
||||
def predict(
|
||||
self,
|
||||
im_or_video_path: str,
|
||||
|
@ -195,8 +254,7 @@ class TrackingLearner(object):
|
|||
nms_thres: float = 0.4,
|
||||
track_buffer: int = 30,
|
||||
min_box_area: float = 200,
|
||||
input_h: float = None,
|
||||
input_w: float = None,
|
||||
im_size: Tuple[float, float] = (None, None),
|
||||
frame_rate: int = 30,
|
||||
) -> Dict[int, List[TrackingBbox]]:
|
||||
"""
|
||||
|
@ -210,43 +268,47 @@ class TrackingLearner(object):
|
|||
nms_thres: iou thresh for nms
|
||||
track_buffer: tracking buffer
|
||||
min_box_area: filter out tiny boxes
|
||||
input_h: input height. Default from dataset
|
||||
input_w: input width. Default from dataset
|
||||
im_size: (input height, input_weight)
|
||||
frame_rate: frame rate
|
||||
|
||||
Returns a list of TrackingBboxes
|
||||
|
||||
Implementation inspired from code found here: https://github.com/ifzhang/FairMOT/blob/master/src/track.py
|
||||
"""
|
||||
self.opt.conf_thres = conf_thres
|
||||
self.opt.det_thres = det_thres
|
||||
self.opt.nms_thres = nms_thres
|
||||
self.opt.track_buffer = track_buffer
|
||||
self.opt.min_box_area = min_box_area
|
||||
opt_pred = deepcopy(self.opt) # copy opt to avoid bug
|
||||
opt_pred.conf_thres = conf_thres
|
||||
opt_pred.det_thres = det_thres
|
||||
opt_pred.nms_thres = nms_thres
|
||||
opt_pred.track_buffer = track_buffer
|
||||
opt_pred.min_box_area = min_box_area
|
||||
|
||||
input_h, input_w = im_size
|
||||
input_height = input_h if input_h else -1
|
||||
input_width = input_w if input_w else -1
|
||||
self.opt.update_dataset_res(input_height, input_width)
|
||||
opt_pred.update_dataset_res(input_height, input_width)
|
||||
|
||||
# initialize tracker
|
||||
tracker = JDETracker(self.opt.opt, frame_rate=frame_rate)
|
||||
opt_pred.load_model = self.model_path
|
||||
tracker = JDETracker(opt_pred.opt, frame_rate=frame_rate)
|
||||
|
||||
# initialize dataloader
|
||||
dataloader = self.get_dataloader(im_or_video_path)
|
||||
dataloader = self._get_dataloader(
|
||||
im_or_video_path, opt_pred.input_h, opt_pred.input_w
|
||||
)
|
||||
|
||||
frame_id = 0
|
||||
out = {}
|
||||
results = []
|
||||
for path, img, img0 in dataloader:
|
||||
blob = torch.from_numpy(img).cuda().unsqueeze(0)
|
||||
online_targets = self.tracker.update(blob, img0)
|
||||
online_targets = tracker.update(blob, img0)
|
||||
online_bboxes = []
|
||||
for t in online_targets:
|
||||
tlwh = t.tlwh
|
||||
tlbr = t.tlbr
|
||||
tid = t.track_id
|
||||
vertical = tlwh[2] / tlwh[3] > 1.6
|
||||
if tlwh[2] * tlwh[3] > self.opt.min_box_area and not vertical:
|
||||
if tlwh[2] * tlwh[3] > opt_pred.min_box_area and not vertical:
|
||||
bb = TrackingBbox(
|
||||
tlbr[1], tlbr[0], tlbr[3], tlbr[2], frame_id, tid
|
||||
)
|
||||
|
@ -256,7 +318,9 @@ class TrackingLearner(object):
|
|||
|
||||
return out
|
||||
|
||||
def get_dataloader(self, im_or_video_path: str) -> DataLoader:
|
||||
def _get_dataloader(
|
||||
self, im_or_video_path: str, input_h, input_w
|
||||
) -> DataLoader:
|
||||
"""
|
||||
Creates a dataloader from images or video in the given path.
|
||||
|
||||
|
@ -275,10 +339,8 @@ class TrackingLearner(object):
|
|||
im_format = [".jpg", ".jpeg", ".png", ".tif"]
|
||||
video_format = [".mp4", ".avi"]
|
||||
|
||||
input_w = self.opt.input_w
|
||||
input_h = self.opt.input_h
|
||||
|
||||
# if path is to a root directory of images
|
||||
|
||||
if (
|
||||
osp.isdir(im_or_video_path)
|
||||
and len(
|
||||
|
|
|
@ -14,7 +14,7 @@ class opts(object):
|
|||
|
||||
def __init__(
|
||||
self,
|
||||
root_dir: str = os.getcwd(),
|
||||
load_model: str = "",
|
||||
gpus: str = "0, 1",
|
||||
save_all: bool = False,
|
||||
arch: str = "dla_34",
|
||||
|
@ -32,13 +32,15 @@ class opts(object):
|
|||
track_buffer: int = 30,
|
||||
min_box_area: float = 200,
|
||||
reid_dim: int = 512,
|
||||
root_dir: str = os.getcwd(),
|
||||
) -> None:
|
||||
self._init_opt()
|
||||
|
||||
self.load_model = load_model
|
||||
self.gpus = gpus
|
||||
self.save_all = save_all
|
||||
self.arch = arch
|
||||
self.head_conv = head_conv if head_conv != -1 else 256 # init default
|
||||
self.head_conv = head_conv
|
||||
self.input_h = input_h
|
||||
self.input_w = input_w
|
||||
self.lr = lr
|
||||
|
@ -62,10 +64,9 @@ class opts(object):
|
|||
|
||||
self._opt.task = "mot"
|
||||
self._opt.dataset = "jde"
|
||||
self._opt.resume = False
|
||||
self._opt.exp_id = "default"
|
||||
self._opt.test = False
|
||||
self._opt.load_model = ""
|
||||
self._opt.resume = False
|
||||
self._opt.num_workers = 8
|
||||
self._opt.not_cuda_benchmark = False
|
||||
self._opt.seed = 317
|
||||
|
@ -171,7 +172,7 @@ class opts(object):
|
|||
self._opt.output_res = max(self._opt.output_h, self._opt.output_w)
|
||||
|
||||
if self._opt.task == "mot":
|
||||
self.heads = {
|
||||
self._opt.heads = {
|
||||
"hm": self._opt.num_classes,
|
||||
"wh": 2
|
||||
if not self._opt.cat_spec_wh
|
||||
|
@ -179,13 +180,22 @@ class opts(object):
|
|||
"id": self._opt.reid_dim,
|
||||
}
|
||||
if self._opt.reg_offset:
|
||||
self.heads.update({"reg": 2})
|
||||
self._opt.heads.update({"reg": 2})
|
||||
self._opt.nID = dataset.nID
|
||||
self._opt.img_size = (self._opt.input_w, self._opt.input_h)
|
||||
else:
|
||||
assert 0, "task not defined"
|
||||
|
||||
### getters and setters ###
|
||||
@property
|
||||
def load_model(self):
|
||||
return self._load_model
|
||||
|
||||
@load_model.setter
|
||||
def load_model(self, value):
|
||||
self._load_model = value
|
||||
self._opt.load_model = self._load_model
|
||||
|
||||
@property
|
||||
def gpus(self):
|
||||
return self._gpus
|
||||
|
@ -224,7 +234,7 @@ class opts(object):
|
|||
|
||||
@head_conv.setter
|
||||
def head_conv(self, value):
|
||||
self._head_conv = value
|
||||
self._head_conv = value if value != -1 else 256
|
||||
self._opt.head_conv = self._head_conv
|
||||
|
||||
@property
|
||||
|
@ -344,14 +354,9 @@ class opts(object):
|
|||
self._root_dir = value
|
||||
self._opt.root_dir = self._root_dir
|
||||
|
||||
self._exp_dir = osp.join(self._root_dir, "exp", self._opt.task)
|
||||
self._opt.exp_dir = self._exp_dir
|
||||
|
||||
self._save_dir = osp.join(self._exp_dir, self._opt.exp_id)
|
||||
self._opt.save_dir = self._save_dir
|
||||
|
||||
self._debug_dir = osp.join(self._save_dir, "debug")
|
||||
self._opt.debug_dir = self._debug_dir
|
||||
self._opt.exp_dir = osp.join(self._root_dir, "exp", self._opt.task)
|
||||
self._opt.save_dir = osp.join(self._opt.exp_dir, self._opt.exp_id)
|
||||
self._opt.debug_dir = osp.join(self._opt.save_dir, "debug")
|
||||
|
||||
@property
|
||||
def device(self):
|
||||
|
@ -362,28 +367,27 @@ class opts(object):
|
|||
self._device = value
|
||||
self._opt.device = self._device
|
||||
|
||||
@property
|
||||
def heads(self):
|
||||
return self._heads
|
||||
|
||||
@heads.setter
|
||||
def heads(self, value):
|
||||
self._heads = value
|
||||
self._opt.heads = self._heads
|
||||
|
||||
### getters only ####
|
||||
@property
|
||||
def opt(self):
|
||||
return self._opt
|
||||
|
||||
@property
|
||||
def resume(self):
|
||||
return self._resume
|
||||
|
||||
@property
|
||||
def task(self):
|
||||
return self._opt.task
|
||||
|
||||
@property
|
||||
def save_dir(self):
|
||||
return self.opt._save_dir
|
||||
return self._opt.save_dir
|
||||
|
||||
@property
|
||||
def chunk_sizes(self):
|
||||
return self._opt.chunk_sizes
|
||||
|
||||
@property
|
||||
def heads(self):
|
||||
return self._opt.heads
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import os.path as osp
|
||||
from typing import Dict, List, Tuple
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from .bbox import TrackingBbox
|
||||
|
||||
|
||||
def draw_boxes(
|
||||
im: np.ndarray,
|
||||
cur_tracks: List[TrackingBbox],
|
||||
color_map: Dict[int, Tuple[int, int, int]],
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Overlay bbox and id labels onto the frame
|
||||
|
||||
Args:
|
||||
im: raw frame
|
||||
cur_tracks: list of bboxes in the current frame
|
||||
color_map: dictionary mapping ids to bbox colors
|
||||
"""
|
||||
|
||||
cur_ids = [bb.track_id for bb in cur_tracks]
|
||||
tracks = dict(zip(cur_ids, cur_tracks))
|
||||
|
||||
for label, bb in tracks.items():
|
||||
left = round(bb.left)
|
||||
top = round(bb.top)
|
||||
right = round(bb.right)
|
||||
bottom = round(bb.bottom)
|
||||
|
||||
# box text and bar
|
||||
color = color_map[label]
|
||||
label = str(label)
|
||||
|
||||
# last two args of getTextSize() are font_scale and thickness
|
||||
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 1, 1)[0]
|
||||
cv2.rectangle(im, (left, top), (right, bottom), color, 3)
|
||||
cv2.putText(
|
||||
im,
|
||||
"id_" + label,
|
||||
(left, top + t_size[1] - 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
color,
|
||||
3,
|
||||
)
|
||||
|
||||
return im
|
||||
|
||||
|
||||
def assign_colors(id_list: List[int],) -> Dict[int, Tuple[int, int, int]]:
|
||||
"""
|
||||
Produce corresponding unique color palettes for unique ids
|
||||
|
||||
Args:
|
||||
id_list: list of track ids
|
||||
"""
|
||||
palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
|
||||
|
||||
color_list = []
|
||||
id_list2 = list(range(len(id_list)))
|
||||
|
||||
# adapted from https://github.com/ZQPei/deep_sort_pytorch
|
||||
for i in id_list2:
|
||||
color = [int((p * ((i + 1) ** 5 - i + 1)) % 255) for p in palette]
|
||||
color_list.append(tuple(color))
|
||||
|
||||
color_map = dict(zip(id_list, color_list))
|
||||
|
||||
return color_map
|
|
@ -5,4 +5,5 @@ Our aim is to make as little edits to these files as possible, so that newer ver
|
|||
|
||||
The only edits made are listed below, and highlighted in the code with a "# EDITED" comment:
|
||||
- Fixing import statements, e.g. "import utils" -> "from . import utils"
|
||||
- Not hard-coding input resolution values in datasets/dataset/jde.py
|
||||
- Not hard-coding input resolution values in datasets/dataset/jde.py
|
||||
- Setting the logging level to WARNING
|
|
@ -10,7 +10,7 @@ def get_logger(name='root'):
|
|||
handler.setFormatter(formatter)
|
||||
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logger.setLevel(logging.WARNING) # EDITED
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче