From 0c86c649a6d7f4dc38a5f41beae7f4ee6880ce27 Mon Sep 17 00:00:00 2001 From: Bugra Tekin Date: Mon, 21 Oct 2019 10:13:13 +0200 Subject: [PATCH] add cfg for multi-obj --- .../cfg/ape_occlusion.data | 7 + multi_obj_pose_estimation/cfg/benchvise.data | 9 +- .../cfg/can_occlusion.data | 9 +- .../cfg/cat_occlusion.data | 9 +- .../cfg/driller_occlusion.data | 9 +- .../cfg/duck_occlusion.data | 9 +- .../cfg/eggbox_occlusion.data | 9 +- .../cfg/glue_occlusion.data | 9 +- .../cfg/holepuncher_occlusion.data | 9 +- multi_obj_pose_estimation/cfg/occlusion.data | 7 + .../cfg/yolo-pose-multi.cfg | 11 +- multi_obj_pose_estimation/valid_multi.ipynb | 336 +++++++----------- 12 files changed, 207 insertions(+), 226 deletions(-) diff --git a/multi_obj_pose_estimation/cfg/ape_occlusion.data b/multi_obj_pose_estimation/cfg/ape_occlusion.data index 994a750..61f0a4c 100644 --- a/multi_obj_pose_estimation/cfg/ape_occlusion.data +++ b/multi_obj_pose_estimation/cfg/ape_occlusion.data @@ -3,3 +3,10 @@ mesh = ../LINEMOD/ape/ape.ply backup = backup_multi name = ape diam = 0.103 +gpus = 0 +im_width = 640 +im_height = 480 +fx = 572.4114 +fy = 573.5704 +u0 = 325.2611 +v0 = 242.0489 \ No newline at end of file diff --git a/multi_obj_pose_estimation/cfg/benchvise.data b/multi_obj_pose_estimation/cfg/benchvise.data index d40ade9..25e698d 100644 --- a/multi_obj_pose_estimation/cfg/benchvise.data +++ b/multi_obj_pose_estimation/cfg/benchvise.data @@ -4,4 +4,11 @@ backup = backup_multi mesh = ../LINEMOD/benchvise/benchvise.ply tr_range = ../LINEMOD/benchvise/training_range.txt name = benchvise -diam = 0.286908 \ No newline at end of file +diam = 0.286908 +gpus = 0 +im_width = 640 +im_height = 480 +fx = 572.4114 +fy = 573.5704 +u0 = 325.2611 +v0 = 242.0489 \ No newline at end of file diff --git a/multi_obj_pose_estimation/cfg/can_occlusion.data b/multi_obj_pose_estimation/cfg/can_occlusion.data index bcf80ac..ebbfa40 100644 --- a/multi_obj_pose_estimation/cfg/can_occlusion.data +++ b/multi_obj_pose_estimation/cfg/can_occlusion.data @@ -2,4 +2,11 @@ valid = ../LINEMOD/can/test_occlusion.txt mesh = ../LINEMOD/can/can.ply backup = backup_multi name = can -diam = 0.202 \ No newline at end of file +diam = 0.202 +gpus = 0 +im_width = 640 +im_height = 480 +fx = 572.4114 +fy = 573.5704 +u0 = 325.2611 +v0 = 242.0489 \ No newline at end of file diff --git a/multi_obj_pose_estimation/cfg/cat_occlusion.data b/multi_obj_pose_estimation/cfg/cat_occlusion.data index f26b2ab..661e5a7 100644 --- a/multi_obj_pose_estimation/cfg/cat_occlusion.data +++ b/multi_obj_pose_estimation/cfg/cat_occlusion.data @@ -2,4 +2,11 @@ valid = ../LINEMOD/cat/test_occlusion.txt mesh = ../LINEMOD/cat/cat.ply backup = backup_multi name = cat -diam = 0.155 \ No newline at end of file +diam = 0.155 +gpus = 0 +im_width = 640 +im_height = 480 +fx = 572.4114 +fy = 573.5704 +u0 = 325.2611 +v0 = 242.0489 \ No newline at end of file diff --git a/multi_obj_pose_estimation/cfg/driller_occlusion.data b/multi_obj_pose_estimation/cfg/driller_occlusion.data index 1d89f38..415a1f2 100644 --- a/multi_obj_pose_estimation/cfg/driller_occlusion.data +++ b/multi_obj_pose_estimation/cfg/driller_occlusion.data @@ -2,4 +2,11 @@ valid = ../LINEMOD/driller/test_occlusion.txt mesh = ../LINEMOD/driller/driller.ply backup = backup_multi name = driller -diam = 0.262 \ No newline at end of file +diam = 0.262 +gpus = 0 +im_width = 640 +im_height = 480 +fx = 572.4114 +fy = 573.5704 +u0 = 325.2611 +v0 = 242.0489 \ No newline at end of file diff --git a/multi_obj_pose_estimation/cfg/duck_occlusion.data b/multi_obj_pose_estimation/cfg/duck_occlusion.data index 8a4154a..b4e922e 100644 --- a/multi_obj_pose_estimation/cfg/duck_occlusion.data +++ b/multi_obj_pose_estimation/cfg/duck_occlusion.data @@ -2,4 +2,11 @@ valid = ../LINEMOD/duck/test_occlusion.txt mesh = ../LINEMOD/duck/duck.ply backup = backup_multi name = duck -diam = 0.109 \ No newline at end of file +diam = 0.109 +gpus = 0 +im_width = 640 +im_height = 480 +fx = 572.4114 +fy = 573.5704 +u0 = 325.2611 +v0 = 242.0489 \ No newline at end of file diff --git a/multi_obj_pose_estimation/cfg/eggbox_occlusion.data b/multi_obj_pose_estimation/cfg/eggbox_occlusion.data index f50b09a..eaceef8 100644 --- a/multi_obj_pose_estimation/cfg/eggbox_occlusion.data +++ b/multi_obj_pose_estimation/cfg/eggbox_occlusion.data @@ -2,4 +2,11 @@ valid = ../LINEMOD/eggbox/test_occlusion.txt mesh = ../LINEMOD/eggbox/eggbox.ply backup = backup_multi name = eggbox -diam = 0.176364 \ No newline at end of file +diam = 0.176364 +gpus = 0 +im_width = 640 +im_height = 480 +fx = 572.4114 +fy = 573.5704 +u0 = 325.2611 +v0 = 242.0489 \ No newline at end of file diff --git a/multi_obj_pose_estimation/cfg/glue_occlusion.data b/multi_obj_pose_estimation/cfg/glue_occlusion.data index 721bc56..dfb0fbd 100644 --- a/multi_obj_pose_estimation/cfg/glue_occlusion.data +++ b/multi_obj_pose_estimation/cfg/glue_occlusion.data @@ -2,4 +2,11 @@ valid = ../LINEMOD/glue/test_occlusion.txt mesh = ../LINEMOD/glue/glue.ply backup = backup_multi name = glue -diam = 0.176 \ No newline at end of file +diam = 0.176 +gpus = 0 +im_width = 640 +im_height = 480 +fx = 572.4114 +fy = 573.5704 +u0 = 325.2611 +v0 = 242.0489 \ No newline at end of file diff --git a/multi_obj_pose_estimation/cfg/holepuncher_occlusion.data b/multi_obj_pose_estimation/cfg/holepuncher_occlusion.data index c119964..6dc256f 100644 --- a/multi_obj_pose_estimation/cfg/holepuncher_occlusion.data +++ b/multi_obj_pose_estimation/cfg/holepuncher_occlusion.data @@ -2,4 +2,11 @@ valid = ../LINEMOD/holepuncher/test_occlusion.txt mesh = ../LINEMOD/holepuncher/holepuncher.ply backup = backup_multi name = holepuncher -diam = 0.162 \ No newline at end of file +diam = 0.162 +gpus = 0 +im_width = 640 +im_height = 480 +fx = 572.4114 +fy = 573.5704 +u0 = 325.2611 +v0 = 242.0489 \ No newline at end of file diff --git a/multi_obj_pose_estimation/cfg/occlusion.data b/multi_obj_pose_estimation/cfg/occlusion.data index 21aee04..d7d6cb9 100644 --- a/multi_obj_pose_estimation/cfg/occlusion.data +++ b/multi_obj_pose_estimation/cfg/occlusion.data @@ -21,3 +21,10 @@ diam6 = 0.262 diam7 = 0.109 diam9 = 0.176 diam10 = 0.162 +gpus = 0 +im_width = 640 +im_height = 480 +fx = 572.4114 +fy = 573.5704 +u0 = 325.2611 +v0 = 242.0489 \ No newline at end of file diff --git a/multi_obj_pose_estimation/cfg/yolo-pose-multi.cfg b/multi_obj_pose_estimation/cfg/yolo-pose-multi.cfg index 6ecde14..4c2431e 100644 --- a/multi_obj_pose_estimation/cfg/yolo-pose-multi.cfg +++ b/multi_obj_pose_estimation/cfg/yolo-pose-multi.cfg @@ -1,27 +1,24 @@ [net] -# Testing batch=32 subdivisions=8 -# Training -# batch=64 -# subdivisions=8 height=416 width=416 channels=3 +num_keypoints=9 momentum=0.9 decay=0.0005 angle=0 saturation = 1.5 exposure = 1.5 hue=.1 - learning_rate=0.001 burn_in=1000 max_batches = 80200 policy=steps steps=-1,100,20000,30000 -# steps=-1,180,360,540 scales=0.1,10,.1,.1 +conf_thresh = 0.05 +max_epochs = 500 [convolutional] batch_normalize=1 @@ -235,13 +232,11 @@ activation=leaky size=1 stride=1 pad=1 -# filters=125 filters=160 activation=linear [region] -# anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 anchors = 1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851 bias_match=1 classes=13 diff --git a/multi_obj_pose_estimation/valid_multi.ipynb b/multi_obj_pose_estimation/valid_multi.ipynb index 7e4f6e7..06b2a16 100644 --- a/multi_obj_pose_estimation/valid_multi.ipynb +++ b/multi_obj_pose_estimation/valid_multi.ipynb @@ -2,203 +2,137 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import os\n", "os.sys.path.append('..')\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\"\n", "import torch\n", - "from torch.autograd import Variable\n", - "from torchvision import datasets, transforms\n", - "from scipy.misc import imsave\n", - "import scipy.io\n", - "import warnings\n", - "import sys\n", - "warnings.filterwarnings(\"ignore\")\n", "import matplotlib.pyplot as plt\n", "import scipy.misc\n", + "import warnings\n", + "import sys\n", + "import argparse\n", + "warnings.filterwarnings(\"ignore\")\n", + "from torch.autograd import Variable\n", + "from torchvision import datasets, transforms\n", "\n", - "from darknet_multi import Darknet\n", - "from utils import *\n", "import dataset_multi\n", - "from MeshPly import MeshPly" + "from darknet_multi import Darknet\n", + "from utils_multi import *\n", + "from cfg import parse_cfg\n", + "from MeshPly import MeshPly\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from scipy.misc import imsave\n", + "import scipy.io\n", + "import scipy.misc" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2018-05-06 14:09:50 Testing ape...\n", - "2018-05-06 14:10:15 Acc using 5 px 2D Projection = 7.01%\n", - "2018-05-06 14:10:15 Acc using 10 px 2D Projection = 40.43%\n", - "2018-05-06 14:10:15 Acc using 15 px 2D Projection = 59.83%\n", - "2018-05-06 14:10:15 Acc using 20 px 2D Projection = 68.55%\n", - "2018-05-06 14:10:15 Acc using 25 px 2D Projection = 72.05%\n", - "2018-05-06 14:10:15 Acc using 30 px 2D Projection = 73.68%\n", - "2018-05-06 14:10:15 Acc using 35 px 2D Projection = 74.53%\n", - "2018-05-06 14:10:15 Acc using 40 px 2D Projection = 75.13%\n", - "2018-05-06 14:10:15 Acc using 45 px 2D Projection = 75.73%\n", - "2018-05-06 14:10:15 Acc using 50 px 2D Projection = 76.50%\n", - "2018-05-06 14:10:18 Testing can...\n", - "2018-05-06 14:10:47 Acc using 5 px 2D Projection = 11.18%\n", - "2018-05-06 14:10:47 Acc using 10 px 2D Projection = 57.83%\n", - "2018-05-06 14:10:47 Acc using 15 px 2D Projection = 79.95%\n", - "2018-05-06 14:10:47 Acc using 20 px 2D Projection = 85.75%\n", - "2018-05-06 14:10:47 Acc using 25 px 2D Projection = 88.73%\n", - "2018-05-06 14:10:47 Acc using 30 px 2D Projection = 90.39%\n", - "2018-05-06 14:10:47 Acc using 35 px 2D Projection = 91.80%\n", - "2018-05-06 14:10:47 Acc using 40 px 2D Projection = 93.21%\n", - "2018-05-06 14:10:47 Acc using 45 px 2D Projection = 93.62%\n", - "2018-05-06 14:10:47 Acc using 50 px 2D Projection = 93.79%\n", - "2018-05-06 14:10:50 Testing cat...\n", - "2018-05-06 14:11:16 Acc using 5 px 2D Projection = 3.62%\n", - "2018-05-06 14:11:16 Acc using 10 px 2D Projection = 23.25%\n", - "2018-05-06 14:11:16 Acc using 15 px 2D Projection = 39.51%\n", - "2018-05-06 14:11:16 Acc using 20 px 2D Projection = 49.45%\n", - "2018-05-06 14:11:16 Acc using 25 px 2D Projection = 54.76%\n", - "2018-05-06 14:11:16 Acc using 30 px 2D Projection = 57.96%\n", - "2018-05-06 14:11:16 Acc using 35 px 2D Projection = 59.56%\n", - "2018-05-06 14:11:16 Acc using 40 px 2D Projection = 60.99%\n", - "2018-05-06 14:11:16 Acc using 45 px 2D Projection = 62.51%\n", - "2018-05-06 14:11:16 Acc using 50 px 2D Projection = 63.27%\n", - "2018-05-06 14:11:19 Testing duck...\n", - "2018-05-06 14:11:42 Acc using 5 px 2D Projection = 5.07%\n", - "2018-05-06 14:11:42 Acc using 10 px 2D Projection = 18.20%\n", - "2018-05-06 14:11:42 Acc using 15 px 2D Projection = 30.88%\n", - "2018-05-06 14:11:42 Acc using 20 px 2D Projection = 55.12%\n", - "2018-05-06 14:11:42 Acc using 25 px 2D Projection = 75.15%\n", - "2018-05-06 14:11:42 Acc using 30 px 2D Projection = 81.45%\n", - "2018-05-06 14:11:42 Acc using 35 px 2D Projection = 83.20%\n", - "2018-05-06 14:11:42 Acc using 40 px 2D Projection = 83.64%\n", - "2018-05-06 14:11:42 Acc using 45 px 2D Projection = 83.90%\n", - "2018-05-06 14:11:42 Acc using 50 px 2D Projection = 84.16%\n", - "2018-05-06 14:11:45 Testing driller...\n", - "2018-05-06 14:12:10 Acc using 5 px 2D Projection = 1.40%\n", - "2018-05-06 14:12:10 Acc using 10 px 2D Projection = 17.38%\n", - "2018-05-06 14:12:10 Acc using 15 px 2D Projection = 39.87%\n", - "2018-05-06 14:12:10 Acc using 20 px 2D Projection = 62.93%\n", - "2018-05-06 14:12:10 Acc using 25 px 2D Projection = 80.64%\n", - "2018-05-06 14:12:10 Acc using 30 px 2D Projection = 89.87%\n", - "2018-05-06 14:12:10 Acc using 35 px 2D Projection = 94.89%\n", - "2018-05-06 14:12:10 Acc using 40 px 2D Projection = 95.88%\n", - "2018-05-06 14:12:10 Acc using 45 px 2D Projection = 96.54%\n", - "2018-05-06 14:12:10 Acc using 50 px 2D Projection = 96.87%\n", - "2018-05-06 14:12:13 Testing glue...\n", - "2018-05-06 14:12:31 Acc using 5 px 2D Projection = 6.53%\n", - "2018-05-06 14:12:31 Acc using 10 px 2D Projection = 26.91%\n", - "2018-05-06 14:12:31 Acc using 15 px 2D Projection = 39.65%\n", - "2018-05-06 14:12:31 Acc using 20 px 2D Projection = 46.18%\n", - "2018-05-06 14:12:31 Acc using 25 px 2D Projection = 49.50%\n", - "2018-05-06 14:12:31 Acc using 30 px 2D Projection = 51.83%\n", - "2018-05-06 14:12:31 Acc using 35 px 2D Projection = 53.05%\n", - "2018-05-06 14:12:31 Acc using 40 px 2D Projection = 53.16%\n", - "2018-05-06 14:12:31 Acc using 45 px 2D Projection = 53.93%\n", - "2018-05-06 14:12:31 Acc using 50 px 2D Projection = 54.71%\n", - "2018-05-06 14:12:45 Testing holepuncher...\n", - "2018-05-06 14:19:31 Acc using 5 px 2D Projection = 8.26%\n", - "2018-05-06 14:19:31 Acc using 10 px 2D Projection = 39.50%\n", - "2018-05-06 14:19:31 Acc using 15 px 2D Projection = 53.31%\n", - "2018-05-06 14:19:31 Acc using 20 px 2D Projection = 62.56%\n", - "2018-05-06 14:19:31 Acc using 25 px 2D Projection = 68.02%\n", - "2018-05-06 14:19:31 Acc using 30 px 2D Projection = 74.71%\n", - "2018-05-06 14:19:31 Acc using 35 px 2D Projection = 80.74%\n", - "2018-05-06 14:19:31 Acc using 40 px 2D Projection = 85.62%\n", - "2018-05-06 14:19:31 Acc using 45 px 2D Projection = 89.59%\n", - "2018-05-06 14:19:31 Acc using 50 px 2D Projection = 91.49%\n" + "2019-10-18 17:00:04 Testing ape...\n", + "2019-10-18 17:01:38 Acc using 5 px 2D Projection = 6.07%\n", + "2019-10-18 17:01:38 Acc using 10 px 2D Projection = 39.32%\n", + "2019-10-18 17:01:38 Acc using 15 px 2D Projection = 59.83%\n", + "2019-10-18 17:01:38 Acc using 20 px 2D Projection = 68.29%\n", + "2019-10-18 17:01:38 Acc using 25 px 2D Projection = 72.74%\n", + "2019-10-18 17:01:38 Acc using 30 px 2D Projection = 74.96%\n", + "2019-10-18 17:01:38 Acc using 35 px 2D Projection = 75.64%\n", + "2019-10-18 17:01:38 Acc using 40 px 2D Projection = 76.32%\n", + "2019-10-18 17:01:38 Acc using 45 px 2D Projection = 76.67%\n", + "2019-10-18 17:01:38 Acc using 50 px 2D Projection = 78.03%\n", + "2019-10-18 17:01:39 Testing can...\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 146\u001b[0m \u001b[0mvalid\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdatacfg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodelcfg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mweightfile\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 147\u001b[0m \u001b[0mdatacfg\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'cfg/can_occlusion.data'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 148\u001b[1;33m \u001b[0mvalid\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdatacfg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodelcfg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mweightfile\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 149\u001b[0m \u001b[0mdatacfg\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'cfg/cat_occlusion.data'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[0mvalid\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdatacfg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodelcfg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mweightfile\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m\u001b[0m in \u001b[0;36mvalid\u001b[1;34m(datacfg, cfgfile, weightfile)\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[1;31m# Using confidence threshold, eliminate low-confidence predictions\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 75\u001b[0m \u001b[0mtrgt\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mview\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnum_labels\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 76\u001b[1;33m \u001b[0mall_boxes\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_multi_region_boxes\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconf_thresh\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnum_classes\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnum_keypoints\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0manchors\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnum_anchors\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrgt\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0monly_objectness\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 77\u001b[0m \u001b[0mt4\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 78\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Documents\\Code\\singleshot6Dpose\\multi_obj_pose_estimation\\utils_multi.py\u001b[0m in \u001b[0;36mget_multi_region_boxes\u001b[1;34m(output, conf_thresh, num_classes, num_keypoints, anchors, num_anchors, correspondingclass, only_objectness, validation)\u001b[0m\n\u001b[0;32m 330\u001b[0m \u001b[0mmax_ind\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mind\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 331\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 332\u001b[1;33m \u001b[1;32mif\u001b[0m \u001b[0mconf\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0mconf_thresh\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 333\u001b[0m \u001b[0mbcx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 334\u001b[0m \u001b[0mbcy\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ - "def valid(datacfg, cfgfile, weightfile, conf_th):\n", + "def valid(datacfg, cfgfile, weightfile):\n", " def truths_length(truths):\n", " for i in range(50):\n", " if truths[i][1] == 0:\n", " return i\n", "\n", - " # Parse configuration files\n", - " options = read_data_cfg(datacfg)\n", - " valid_images = options['valid']\n", - " meshname = options['mesh']\n", - " backupdir = options['backup']\n", - " name = options['name']\n", - " prefix = 'results'\n", - " # Read object model information, get 3D bounding box corners\n", - " mesh = MeshPly(meshname)\n", - " vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()\n", - " corners3D = get_3D_corners(vertices)\n", - " # Read intrinsic camera parameters\n", - " internal_calibration = get_camera_intrinsic()\n", + " # Parse data configuration files\n", + " data_options = read_data_cfg(datacfg)\n", + " valid_images = data_options['valid']\n", + " meshname = data_options['mesh']\n", + " name = data_options['name']\n", + " im_width = int(data_options['im_width'])\n", + " im_height = int(data_options['im_height']) \n", + " fx = float(data_options['fx'])\n", + " fy = float(data_options['fy'])\n", + " u0 = float(data_options['u0'])\n", + " v0 = float(data_options['v0'])\n", + " \n", + " # Parse net configuration file\n", + " net_options = parse_cfg(cfgfile)[0]\n", + " loss_options = parse_cfg(cfgfile)[-1]\n", + " conf_thresh = float(net_options['conf_thresh'])\n", + " num_keypoints = int(net_options['num_keypoints'])\n", + " num_classes = int(loss_options['classes'])\n", + " num_anchors = int(loss_options['num'])\n", + " anchors = [float(anchor) for anchor in loss_options['anchors'].split(',')]\n", "\n", - " # Get validation file names\n", - " with open(valid_images) as fp:\n", + " # Read object model information, get 3D bounding box corners, get intrinsics\n", + " mesh = MeshPly(meshname)\n", + " vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()\n", + " corners3D = get_3D_corners(vertices)\n", + " diam = float(data_options['diam'])\n", + " intrinsic_calibration = get_camera_intrinsic(u0, v0, fx, fy) # camera params\n", + "\n", + " # Network I/O params\n", + " num_labels = 2*num_keypoints+3 # +2 for width, height, +1 for object class\n", + " errs_2d = [] # to save\n", + " with open(valid_images) as fp: # validation file names\n", " tmp_files = fp.readlines()\n", " valid_files = [item.rstrip() for item in tmp_files]\n", - " \n", + "\n", + " # Compute-related Parameters\n", + " use_cuda = True # whether to use cuda or no\n", + " kwargs = {'num_workers': 4, 'pin_memory': True} # number of workers etc.\n", + "\n", " # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode\n", " model = Darknet(cfgfile)\n", " model.load_weights(weightfile)\n", " model.cuda()\n", " model.eval()\n", "\n", - " # Get the parser for the test dataset\n", - " valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.width, model.height),\n", - " shuffle=False,\n", - " objclass=name,\n", - " transform=transforms.Compose([\n", - " transforms.ToTensor(),\n", - " ]))\n", - " valid_batchsize = 1\n", - "\n", - " # Specify the number of workers for multiple processing, get the dataloader for the test dataset\n", - " kwargs = {'num_workers': 4, 'pin_memory': True}\n", - " test_loader = torch.utils.data.DataLoader(\n", - " valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) \n", - "\n", - " # Parameters\n", - " visualize = False\n", - " use_cuda = True\n", - " num_classes = 13\n", - " anchors = [1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851]\n", - " num_anchors = 5\n", - " eps = 1e-5\n", - " conf_thresh = conf_th\n", - " iou_thresh = 0.5\n", - "\n", - " # Parameters to save\n", - " errs_2d = []\n", - " edges = [[1, 2], [1, 3], [1, 5], [2, 4], [2, 6], [3, 4], [3, 7], [4, 8], [5, 6], [5, 7], [6, 8], [7, 8]]\n", - " edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]]\n", + " # Get the dataloader for the test dataset\n", + " valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.width, model.height), shuffle=False, objclass=name, transform=transforms.Compose([transforms.ToTensor(),]))\n", + " test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=False, **kwargs) \n", "\n", " # Iterate through test batches (Batch size for test data is 1)\n", - " count = 0\n", " logging('Testing {}...'.format(name))\n", " for batch_idx, (data, target) in enumerate(test_loader):\n", " \n", - " # Images\n", - " img = data[0, :, :, :]\n", - " img = img.numpy().squeeze()\n", - " img = np.transpose(img, (1, 2, 0))\n", - " \n", " t1 = time.time()\n", " # Pass data to GPU\n", " if use_cuda:\n", " data = data.cuda()\n", - " target = target.cuda()\n", + " # target = target.cuda()\n", " \n", " # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference\n", " data = Variable(data, volatile=True)\n", @@ -209,8 +143,8 @@ " t3 = time.time()\n", " \n", " # Using confidence threshold, eliminate low-confidence predictions\n", - " trgt = target[0].view(-1, 21)\n", - " all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0) \n", + " trgt = target[0].view(-1, num_labels)\n", + " all_boxes = get_multi_region_boxes(output, conf_thresh, num_classes, num_keypoints, anchors, num_anchors, int(trgt[0][0]), only_objectness=0) \n", " t4 = time.time()\n", " \n", " # Iterate through all images in the batch\n", @@ -220,95 +154,77 @@ " boxes = all_boxes[i]\n", " \n", " # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)\n", - " truths = target[i].view(-1, 21)\n", + " truths = target[i].view(-1, num_labels)\n", " \n", " # Get how many object are present in the scene\n", " num_gts = truths_length(truths)\n", "\n", " # Iterate through each ground-truth object\n", " for k in range(num_gts):\n", - " box_gt = [truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6], \n", - " truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12], \n", - " truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]]\n", - " best_conf_est = -1\n", + " box_gt = list()\n", + " for j in range(1, num_labels):\n", + " box_gt.append(truths[k][j])\n", + " box_gt.extend([1.0, 1.0])\n", + " box_gt.append(truths[k][0])\n", " \n", - "\n", " # If the prediction has the highest confidence, choose it as our prediction\n", + " best_conf_est = -sys.maxsize\n", " for j in range(len(boxes)):\n", - " if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int(truths[k][0])):\n", - " best_conf_est = boxes[j][18]\n", + " if (boxes[j][2*num_keypoints] > best_conf_est) and (boxes[j][2*num_keypoints+2] == int(truths[k][0])):\n", + " best_conf_est = boxes[j][2*num_keypoints]\n", " box_pr = boxes[j]\n", - " bb2d_gt = get_2d_bb(box_gt[:18], output.size(3))\n", - " bb2d_pr = get_2d_bb(box_pr[:18], output.size(3))\n", - " iou = bbox_iou(bb2d_gt, bb2d_pr)\n", - " match = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18]))\n", + " match = corner_confidence(box_gt[:2*num_keypoints], torch.FloatTensor(boxes[j][:2*num_keypoints]))\n", " \n", " # Denormalize the corner predictions \n", - " corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32')\n", - " corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')\n", - " corners2D_gt[:, 0] = corners2D_gt[:, 0] * 640\n", - " corners2D_gt[:, 1] = corners2D_gt[:, 1] * 480 \n", - " corners2D_pr[:, 0] = corners2D_pr[:, 0] * 640\n", - " corners2D_pr[:, 1] = corners2D_pr[:, 1] * 480\n", - " corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of the corners in OCCLUSION\n", + " corners2D_gt = np.array(np.reshape(box_gt[:2*num_keypoints], [-1, 2]), dtype='float32')\n", + " corners2D_pr = np.array(np.reshape(box_pr[:2*num_keypoints], [-1, 2]), dtype='float32')\n", + " corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width\n", + " corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height \n", + " corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width\n", + " corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height\n", + " corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of corners\n", " \n", " # Compute [R|t] by pnp\n", " objpoints3D = np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32')\n", - " K = np.array(internal_calibration, dtype='float32')\n", + " K = np.array(intrinsic_calibration, dtype='float32')\n", " R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K)\n", " R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K)\n", " \n", " # Compute pixel error\n", " Rt_gt = np.concatenate((R_gt, t_gt), axis=1)\n", " Rt_pr = np.concatenate((R_pr, t_pr), axis=1)\n", - " proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration) \n", - " proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration) \n", - " proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, internal_calibration)) \n", - " proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, internal_calibration)) \n", + " proj_2d_gt = compute_projection(vertices, Rt_gt, intrinsic_calibration) \n", + " proj_2d_pred = compute_projection(vertices, Rt_pr, intrinsic_calibration) \n", + " proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, intrinsic_calibration)) \n", + " proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, intrinsic_calibration)) \n", " norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)\n", " pixel_dist = np.mean(norm)\n", " errs_2d.append(pixel_dist)\n", "\n", - " \n", - " if visualize:\n", - " # Visualize\n", - " plt.xlim((0, 640))\n", - " plt.ylim((0, 480))\n", - " plt.imshow(scipy.misc.imresize(img, (480, 640)))\n", - " # Projections\n", - " for edge in edges_corners:\n", - " plt.plot(proj_corners_gt[edge, 0], proj_corners_gt[edge, 1], color='g', linewidth=3.0)\n", - " plt.plot(proj_corners_pr[edge, 0], proj_corners_pr[edge, 1], color='b', linewidth=3.0)\n", - " plt.gca().invert_yaxis()\n", - " plt.show()\n", - "\n", " t5 = time.time()\n", "\n", " # Compute 2D projection score\n", + " eps = 1e-5\n", " for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:\n", " acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)\n", " # Print test statistics\n", " logging(' Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))\n", "\n", - "conf_th = 0.05\n", - "cfgfile = 'cfg/yolo-pose-multi.cfg'\n", - "weightfile = 'backup_multi/model_backup2.weights'\n", + "modelcfg = 'cfg/yolo-pose-multi.cfg'\n", "datacfg = 'cfg/ape_occlusion.data'\n", - "valid(datacfg, cfgfile, weightfile, conf_th)\n", - "datacfg = 'cfg/can_occlusion.data'\n", - "valid(datacfg, cfgfile, weightfile, conf_th)\n", - "datacfg = 'cfg/cat_occlusion.data'\n", - "valid(datacfg, cfgfile, weightfile, conf_th)\n", - "datacfg = 'cfg/duck_occlusion.data'\n", - "valid(datacfg, cfgfile, weightfile, conf_th)\n", - "datacfg = 'cfg/driller_occlusion.data'\n", - "valid(datacfg, cfgfile, weightfile, conf_th)\n", - "datacfg = 'cfg/glue_occlusion.data'\n", - "valid(datacfg, cfgfile, weightfile, conf_th)\n", - "datacfg = 'cfg/holepuncher_occlusion.data'\n", - "valid(datacfg, cfgfile, weightfile, conf_th)\n", + "weightfile = 'backup_multi/model_backup.weights'\n", "\n", - " " + "valid(datacfg, modelcfg, weightfile)\n", + "datacfg = 'cfg/can_occlusion.data'\n", + "valid(datacfg, modelcfg, weightfile)\n", + "datacfg = 'cfg/cat_occlusion.data'\n", + "valid(datacfg, modelcfg, weightfile)\n", + "datacfg = 'cfg/duck_occlusion.data'\n", + "valid(datacfg, modelcfg, weightfile)\n", + "datacfg = 'cfg/glue_occlusion.data'\n", + "valid(datacfg, modelcfg, weightfile)\n", + "datacfg = 'cfg/holepuncher_occlusion.data'\n", + "valid(datacfg, modelcfg, weightfile)" ] }, { @@ -321,21 +237,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.6.6" } }, "nbformat": 4,