From 0c86c649a6d7f4dc38a5f41beae7f4ee6880ce27 Mon Sep 17 00:00:00 2001
From: Bugra Tekin <butekin@microsoft.com>
Date: Mon, 21 Oct 2019 10:13:13 +0200
Subject: [PATCH] add cfg for multi-obj

---
 .../cfg/ape_occlusion.data                    |   7 +
 multi_obj_pose_estimation/cfg/benchvise.data  |   9 +-
 .../cfg/can_occlusion.data                    |   9 +-
 .../cfg/cat_occlusion.data                    |   9 +-
 .../cfg/driller_occlusion.data                |   9 +-
 .../cfg/duck_occlusion.data                   |   9 +-
 .../cfg/eggbox_occlusion.data                 |   9 +-
 .../cfg/glue_occlusion.data                   |   9 +-
 .../cfg/holepuncher_occlusion.data            |   9 +-
 multi_obj_pose_estimation/cfg/occlusion.data  |   7 +
 .../cfg/yolo-pose-multi.cfg                   |  11 +-
 multi_obj_pose_estimation/valid_multi.ipynb   | 336 +++++++-----------
 12 files changed, 207 insertions(+), 226 deletions(-)

diff --git a/multi_obj_pose_estimation/cfg/ape_occlusion.data b/multi_obj_pose_estimation/cfg/ape_occlusion.data
index 994a750..61f0a4c 100644
--- a/multi_obj_pose_estimation/cfg/ape_occlusion.data
+++ b/multi_obj_pose_estimation/cfg/ape_occlusion.data
@@ -3,3 +3,10 @@ mesh = ../LINEMOD/ape/ape.ply
 backup = backup_multi
 name = ape
 diam = 0.103
+gpus = 0
+im_width = 640
+im_height = 480
+fx = 572.4114 
+fy = 573.5704
+u0 = 325.2611
+v0 = 242.0489
\ No newline at end of file
diff --git a/multi_obj_pose_estimation/cfg/benchvise.data b/multi_obj_pose_estimation/cfg/benchvise.data
index d40ade9..25e698d 100644
--- a/multi_obj_pose_estimation/cfg/benchvise.data
+++ b/multi_obj_pose_estimation/cfg/benchvise.data
@@ -4,4 +4,11 @@ backup = backup_multi
 mesh = ../LINEMOD/benchvise/benchvise.ply
 tr_range = ../LINEMOD/benchvise/training_range.txt
 name = benchvise
-diam = 0.286908
\ No newline at end of file
+diam = 0.286908
+gpus = 0
+im_width = 640
+im_height = 480
+fx = 572.4114 
+fy = 573.5704
+u0 = 325.2611
+v0 = 242.0489
\ No newline at end of file
diff --git a/multi_obj_pose_estimation/cfg/can_occlusion.data b/multi_obj_pose_estimation/cfg/can_occlusion.data
index bcf80ac..ebbfa40 100644
--- a/multi_obj_pose_estimation/cfg/can_occlusion.data
+++ b/multi_obj_pose_estimation/cfg/can_occlusion.data
@@ -2,4 +2,11 @@ valid  = ../LINEMOD/can/test_occlusion.txt
 mesh = ../LINEMOD/can/can.ply
 backup = backup_multi
 name = can
-diam = 0.202
\ No newline at end of file
+diam = 0.202
+gpus = 0
+im_width = 640
+im_height = 480
+fx = 572.4114 
+fy = 573.5704
+u0 = 325.2611
+v0 = 242.0489
\ No newline at end of file
diff --git a/multi_obj_pose_estimation/cfg/cat_occlusion.data b/multi_obj_pose_estimation/cfg/cat_occlusion.data
index f26b2ab..661e5a7 100644
--- a/multi_obj_pose_estimation/cfg/cat_occlusion.data
+++ b/multi_obj_pose_estimation/cfg/cat_occlusion.data
@@ -2,4 +2,11 @@ valid  = ../LINEMOD/cat/test_occlusion.txt
 mesh = ../LINEMOD/cat/cat.ply
 backup = backup_multi
 name = cat
-diam = 0.155
\ No newline at end of file
+diam = 0.155
+gpus = 0
+im_width = 640
+im_height = 480
+fx = 572.4114 
+fy = 573.5704
+u0 = 325.2611
+v0 = 242.0489
\ No newline at end of file
diff --git a/multi_obj_pose_estimation/cfg/driller_occlusion.data b/multi_obj_pose_estimation/cfg/driller_occlusion.data
index 1d89f38..415a1f2 100644
--- a/multi_obj_pose_estimation/cfg/driller_occlusion.data
+++ b/multi_obj_pose_estimation/cfg/driller_occlusion.data
@@ -2,4 +2,11 @@ valid  = ../LINEMOD/driller/test_occlusion.txt
 mesh = ../LINEMOD/driller/driller.ply
 backup = backup_multi
 name = driller
-diam = 0.262
\ No newline at end of file
+diam = 0.262
+gpus = 0
+im_width = 640
+im_height = 480
+fx = 572.4114 
+fy = 573.5704
+u0 = 325.2611
+v0 = 242.0489
\ No newline at end of file
diff --git a/multi_obj_pose_estimation/cfg/duck_occlusion.data b/multi_obj_pose_estimation/cfg/duck_occlusion.data
index 8a4154a..b4e922e 100644
--- a/multi_obj_pose_estimation/cfg/duck_occlusion.data
+++ b/multi_obj_pose_estimation/cfg/duck_occlusion.data
@@ -2,4 +2,11 @@ valid  = ../LINEMOD/duck/test_occlusion.txt
 mesh = ../LINEMOD/duck/duck.ply
 backup = backup_multi
 name = duck
-diam = 0.109
\ No newline at end of file
+diam = 0.109
+gpus = 0
+im_width = 640
+im_height = 480
+fx = 572.4114 
+fy = 573.5704
+u0 = 325.2611
+v0 = 242.0489
\ No newline at end of file
diff --git a/multi_obj_pose_estimation/cfg/eggbox_occlusion.data b/multi_obj_pose_estimation/cfg/eggbox_occlusion.data
index f50b09a..eaceef8 100644
--- a/multi_obj_pose_estimation/cfg/eggbox_occlusion.data
+++ b/multi_obj_pose_estimation/cfg/eggbox_occlusion.data
@@ -2,4 +2,11 @@ valid  = ../LINEMOD/eggbox/test_occlusion.txt
 mesh = ../LINEMOD/eggbox/eggbox.ply
 backup = backup_multi
 name = eggbox
-diam = 0.176364
\ No newline at end of file
+diam = 0.176364
+gpus = 0
+im_width = 640
+im_height = 480
+fx = 572.4114 
+fy = 573.5704
+u0 = 325.2611
+v0 = 242.0489
\ No newline at end of file
diff --git a/multi_obj_pose_estimation/cfg/glue_occlusion.data b/multi_obj_pose_estimation/cfg/glue_occlusion.data
index 721bc56..dfb0fbd 100644
--- a/multi_obj_pose_estimation/cfg/glue_occlusion.data
+++ b/multi_obj_pose_estimation/cfg/glue_occlusion.data
@@ -2,4 +2,11 @@ valid  = ../LINEMOD/glue/test_occlusion.txt
 mesh = ../LINEMOD/glue/glue.ply
 backup = backup_multi
 name = glue
-diam = 0.176
\ No newline at end of file
+diam = 0.176
+gpus = 0
+im_width = 640
+im_height = 480
+fx = 572.4114 
+fy = 573.5704
+u0 = 325.2611
+v0 = 242.0489
\ No newline at end of file
diff --git a/multi_obj_pose_estimation/cfg/holepuncher_occlusion.data b/multi_obj_pose_estimation/cfg/holepuncher_occlusion.data
index c119964..6dc256f 100644
--- a/multi_obj_pose_estimation/cfg/holepuncher_occlusion.data
+++ b/multi_obj_pose_estimation/cfg/holepuncher_occlusion.data
@@ -2,4 +2,11 @@ valid  = ../LINEMOD/holepuncher/test_occlusion.txt
 mesh = ../LINEMOD/holepuncher/holepuncher.ply
 backup = backup_multi
 name = holepuncher
-diam = 0.162
\ No newline at end of file
+diam = 0.162
+gpus = 0
+im_width = 640
+im_height = 480
+fx = 572.4114 
+fy = 573.5704
+u0 = 325.2611
+v0 = 242.0489
\ No newline at end of file
diff --git a/multi_obj_pose_estimation/cfg/occlusion.data b/multi_obj_pose_estimation/cfg/occlusion.data
index 21aee04..d7d6cb9 100644
--- a/multi_obj_pose_estimation/cfg/occlusion.data
+++ b/multi_obj_pose_estimation/cfg/occlusion.data
@@ -21,3 +21,10 @@ diam6 = 0.262
 diam7 = 0.109
 diam9 = 0.176
 diam10 = 0.162
+gpus = 0
+im_width = 640
+im_height = 480
+fx = 572.4114 
+fy = 573.5704
+u0 = 325.2611
+v0 = 242.0489
\ No newline at end of file
diff --git a/multi_obj_pose_estimation/cfg/yolo-pose-multi.cfg b/multi_obj_pose_estimation/cfg/yolo-pose-multi.cfg
index 6ecde14..4c2431e 100644
--- a/multi_obj_pose_estimation/cfg/yolo-pose-multi.cfg
+++ b/multi_obj_pose_estimation/cfg/yolo-pose-multi.cfg
@@ -1,27 +1,24 @@
 [net]
-# Testing
 batch=32
 subdivisions=8
-# Training
-# batch=64
-# subdivisions=8
 height=416
 width=416
 channels=3
+num_keypoints=9
 momentum=0.9
 decay=0.0005
 angle=0
 saturation = 1.5
 exposure = 1.5
 hue=.1
-
 learning_rate=0.001
 burn_in=1000
 max_batches = 80200
 policy=steps
 steps=-1,100,20000,30000
-# steps=-1,180,360,540
 scales=0.1,10,.1,.1
+conf_thresh = 0.05
+max_epochs = 500
 
 [convolutional]
 batch_normalize=1
@@ -235,13 +232,11 @@ activation=leaky
 size=1
 stride=1
 pad=1
-# filters=125
 filters=160
 activation=linear
 
 
 [region]
-# anchors =  1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
 anchors = 1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851
 bias_match=1
 classes=13
diff --git a/multi_obj_pose_estimation/valid_multi.ipynb b/multi_obj_pose_estimation/valid_multi.ipynb
index 7e4f6e7..06b2a16 100644
--- a/multi_obj_pose_estimation/valid_multi.ipynb
+++ b/multi_obj_pose_estimation/valid_multi.ipynb
@@ -2,203 +2,137 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
     "%matplotlib inline\n",
     "import os\n",
     "os.sys.path.append('..')\n",
-    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\"\n",
     "import torch\n",
-    "from torch.autograd import Variable\n",
-    "from torchvision import datasets, transforms\n",
-    "from scipy.misc import imsave\n",
-    "import scipy.io\n",
-    "import warnings\n",
-    "import sys\n",
-    "warnings.filterwarnings(\"ignore\")\n",
     "import matplotlib.pyplot as plt\n",
     "import scipy.misc\n",
+    "import warnings\n",
+    "import sys\n",
+    "import argparse\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "from torch.autograd import Variable\n",
+    "from torchvision import datasets, transforms\n",
     "\n",
-    "from darknet_multi import Darknet\n",
-    "from utils import *\n",
     "import dataset_multi\n",
-    "from MeshPly import MeshPly"
+    "from darknet_multi import Darknet\n",
+    "from utils_multi import *\n",
+    "from cfg import parse_cfg\n",
+    "from MeshPly import MeshPly\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "from scipy.misc import imsave\n",
+    "import scipy.io\n",
+    "import scipy.misc"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2018-05-06 14:09:50 Testing ape...\n",
-      "2018-05-06 14:10:15    Acc using 5 px 2D Projection = 7.01%\n",
-      "2018-05-06 14:10:15    Acc using 10 px 2D Projection = 40.43%\n",
-      "2018-05-06 14:10:15    Acc using 15 px 2D Projection = 59.83%\n",
-      "2018-05-06 14:10:15    Acc using 20 px 2D Projection = 68.55%\n",
-      "2018-05-06 14:10:15    Acc using 25 px 2D Projection = 72.05%\n",
-      "2018-05-06 14:10:15    Acc using 30 px 2D Projection = 73.68%\n",
-      "2018-05-06 14:10:15    Acc using 35 px 2D Projection = 74.53%\n",
-      "2018-05-06 14:10:15    Acc using 40 px 2D Projection = 75.13%\n",
-      "2018-05-06 14:10:15    Acc using 45 px 2D Projection = 75.73%\n",
-      "2018-05-06 14:10:15    Acc using 50 px 2D Projection = 76.50%\n",
-      "2018-05-06 14:10:18 Testing can...\n",
-      "2018-05-06 14:10:47    Acc using 5 px 2D Projection = 11.18%\n",
-      "2018-05-06 14:10:47    Acc using 10 px 2D Projection = 57.83%\n",
-      "2018-05-06 14:10:47    Acc using 15 px 2D Projection = 79.95%\n",
-      "2018-05-06 14:10:47    Acc using 20 px 2D Projection = 85.75%\n",
-      "2018-05-06 14:10:47    Acc using 25 px 2D Projection = 88.73%\n",
-      "2018-05-06 14:10:47    Acc using 30 px 2D Projection = 90.39%\n",
-      "2018-05-06 14:10:47    Acc using 35 px 2D Projection = 91.80%\n",
-      "2018-05-06 14:10:47    Acc using 40 px 2D Projection = 93.21%\n",
-      "2018-05-06 14:10:47    Acc using 45 px 2D Projection = 93.62%\n",
-      "2018-05-06 14:10:47    Acc using 50 px 2D Projection = 93.79%\n",
-      "2018-05-06 14:10:50 Testing cat...\n",
-      "2018-05-06 14:11:16    Acc using 5 px 2D Projection = 3.62%\n",
-      "2018-05-06 14:11:16    Acc using 10 px 2D Projection = 23.25%\n",
-      "2018-05-06 14:11:16    Acc using 15 px 2D Projection = 39.51%\n",
-      "2018-05-06 14:11:16    Acc using 20 px 2D Projection = 49.45%\n",
-      "2018-05-06 14:11:16    Acc using 25 px 2D Projection = 54.76%\n",
-      "2018-05-06 14:11:16    Acc using 30 px 2D Projection = 57.96%\n",
-      "2018-05-06 14:11:16    Acc using 35 px 2D Projection = 59.56%\n",
-      "2018-05-06 14:11:16    Acc using 40 px 2D Projection = 60.99%\n",
-      "2018-05-06 14:11:16    Acc using 45 px 2D Projection = 62.51%\n",
-      "2018-05-06 14:11:16    Acc using 50 px 2D Projection = 63.27%\n",
-      "2018-05-06 14:11:19 Testing duck...\n",
-      "2018-05-06 14:11:42    Acc using 5 px 2D Projection = 5.07%\n",
-      "2018-05-06 14:11:42    Acc using 10 px 2D Projection = 18.20%\n",
-      "2018-05-06 14:11:42    Acc using 15 px 2D Projection = 30.88%\n",
-      "2018-05-06 14:11:42    Acc using 20 px 2D Projection = 55.12%\n",
-      "2018-05-06 14:11:42    Acc using 25 px 2D Projection = 75.15%\n",
-      "2018-05-06 14:11:42    Acc using 30 px 2D Projection = 81.45%\n",
-      "2018-05-06 14:11:42    Acc using 35 px 2D Projection = 83.20%\n",
-      "2018-05-06 14:11:42    Acc using 40 px 2D Projection = 83.64%\n",
-      "2018-05-06 14:11:42    Acc using 45 px 2D Projection = 83.90%\n",
-      "2018-05-06 14:11:42    Acc using 50 px 2D Projection = 84.16%\n",
-      "2018-05-06 14:11:45 Testing driller...\n",
-      "2018-05-06 14:12:10    Acc using 5 px 2D Projection = 1.40%\n",
-      "2018-05-06 14:12:10    Acc using 10 px 2D Projection = 17.38%\n",
-      "2018-05-06 14:12:10    Acc using 15 px 2D Projection = 39.87%\n",
-      "2018-05-06 14:12:10    Acc using 20 px 2D Projection = 62.93%\n",
-      "2018-05-06 14:12:10    Acc using 25 px 2D Projection = 80.64%\n",
-      "2018-05-06 14:12:10    Acc using 30 px 2D Projection = 89.87%\n",
-      "2018-05-06 14:12:10    Acc using 35 px 2D Projection = 94.89%\n",
-      "2018-05-06 14:12:10    Acc using 40 px 2D Projection = 95.88%\n",
-      "2018-05-06 14:12:10    Acc using 45 px 2D Projection = 96.54%\n",
-      "2018-05-06 14:12:10    Acc using 50 px 2D Projection = 96.87%\n",
-      "2018-05-06 14:12:13 Testing glue...\n",
-      "2018-05-06 14:12:31    Acc using 5 px 2D Projection = 6.53%\n",
-      "2018-05-06 14:12:31    Acc using 10 px 2D Projection = 26.91%\n",
-      "2018-05-06 14:12:31    Acc using 15 px 2D Projection = 39.65%\n",
-      "2018-05-06 14:12:31    Acc using 20 px 2D Projection = 46.18%\n",
-      "2018-05-06 14:12:31    Acc using 25 px 2D Projection = 49.50%\n",
-      "2018-05-06 14:12:31    Acc using 30 px 2D Projection = 51.83%\n",
-      "2018-05-06 14:12:31    Acc using 35 px 2D Projection = 53.05%\n",
-      "2018-05-06 14:12:31    Acc using 40 px 2D Projection = 53.16%\n",
-      "2018-05-06 14:12:31    Acc using 45 px 2D Projection = 53.93%\n",
-      "2018-05-06 14:12:31    Acc using 50 px 2D Projection = 54.71%\n",
-      "2018-05-06 14:12:45 Testing holepuncher...\n",
-      "2018-05-06 14:19:31    Acc using 5 px 2D Projection = 8.26%\n",
-      "2018-05-06 14:19:31    Acc using 10 px 2D Projection = 39.50%\n",
-      "2018-05-06 14:19:31    Acc using 15 px 2D Projection = 53.31%\n",
-      "2018-05-06 14:19:31    Acc using 20 px 2D Projection = 62.56%\n",
-      "2018-05-06 14:19:31    Acc using 25 px 2D Projection = 68.02%\n",
-      "2018-05-06 14:19:31    Acc using 30 px 2D Projection = 74.71%\n",
-      "2018-05-06 14:19:31    Acc using 35 px 2D Projection = 80.74%\n",
-      "2018-05-06 14:19:31    Acc using 40 px 2D Projection = 85.62%\n",
-      "2018-05-06 14:19:31    Acc using 45 px 2D Projection = 89.59%\n",
-      "2018-05-06 14:19:31    Acc using 50 px 2D Projection = 91.49%\n"
+      "2019-10-18 17:00:04 Testing ape...\n",
+      "2019-10-18 17:01:38    Acc using 5 px 2D Projection = 6.07%\n",
+      "2019-10-18 17:01:38    Acc using 10 px 2D Projection = 39.32%\n",
+      "2019-10-18 17:01:38    Acc using 15 px 2D Projection = 59.83%\n",
+      "2019-10-18 17:01:38    Acc using 20 px 2D Projection = 68.29%\n",
+      "2019-10-18 17:01:38    Acc using 25 px 2D Projection = 72.74%\n",
+      "2019-10-18 17:01:38    Acc using 30 px 2D Projection = 74.96%\n",
+      "2019-10-18 17:01:38    Acc using 35 px 2D Projection = 75.64%\n",
+      "2019-10-18 17:01:38    Acc using 40 px 2D Projection = 76.32%\n",
+      "2019-10-18 17:01:38    Acc using 45 px 2D Projection = 76.67%\n",
+      "2019-10-18 17:01:38    Acc using 50 px 2D Projection = 78.03%\n",
+      "2019-10-18 17:01:39 Testing can...\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-5-27dc0bb0bf4c>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m    146\u001b[0m \u001b[0mvalid\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdatacfg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodelcfg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mweightfile\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    147\u001b[0m \u001b[0mdatacfg\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'cfg/can_occlusion.data'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 148\u001b[1;33m \u001b[0mvalid\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdatacfg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodelcfg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mweightfile\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    149\u001b[0m \u001b[0mdatacfg\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'cfg/cat_occlusion.data'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    150\u001b[0m \u001b[0mvalid\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdatacfg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodelcfg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mweightfile\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m<ipython-input-5-27dc0bb0bf4c>\u001b[0m in \u001b[0;36mvalid\u001b[1;34m(datacfg, cfgfile, weightfile)\u001b[0m\n\u001b[0;32m     74\u001b[0m         \u001b[1;31m# Using confidence threshold, eliminate low-confidence predictions\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     75\u001b[0m         \u001b[0mtrgt\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mview\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnum_labels\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 76\u001b[1;33m         \u001b[0mall_boxes\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_multi_region_boxes\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconf_thresh\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnum_classes\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnum_keypoints\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0manchors\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnum_anchors\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrgt\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0monly_objectness\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     77\u001b[0m         \u001b[0mt4\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     78\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Documents\\Code\\singleshot6Dpose\\multi_obj_pose_estimation\\utils_multi.py\u001b[0m in \u001b[0;36mget_multi_region_boxes\u001b[1;34m(output, conf_thresh, num_classes, num_keypoints, anchors, num_anchors, correspondingclass, only_objectness, validation)\u001b[0m\n\u001b[0;32m    330\u001b[0m                         \u001b[0mmax_ind\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mind\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    331\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 332\u001b[1;33m                     \u001b[1;32mif\u001b[0m \u001b[0mconf\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0mconf_thresh\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    333\u001b[0m                         \u001b[0mbcx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    334\u001b[0m                         \u001b[0mbcy\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
      ]
     }
    ],
    "source": [
-    "def valid(datacfg, cfgfile, weightfile, conf_th):\n",
+    "def valid(datacfg, cfgfile, weightfile):\n",
     "    def truths_length(truths):\n",
     "        for i in range(50):\n",
     "            if truths[i][1] == 0:\n",
     "                return i\n",
     "\n",
-    "    # Parse configuration files\n",
-    "    options       = read_data_cfg(datacfg)\n",
-    "    valid_images  = options['valid']\n",
-    "    meshname      = options['mesh']\n",
-    "    backupdir     = options['backup']\n",
-    "    name          = options['name']\n",
-    "    prefix        = 'results'\n",
-    "    # Read object model information, get 3D bounding box corners\n",
-    "    mesh          = MeshPly(meshname)\n",
-    "    vertices      = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()\n",
-    "    corners3D     = get_3D_corners(vertices)\n",
-    "    # Read intrinsic camera parameters\n",
-    "    internal_calibration = get_camera_intrinsic()\n",
+    "    # Parse data configuration files\n",
+    "    data_options = read_data_cfg(datacfg)\n",
+    "    valid_images = data_options['valid']\n",
+    "    meshname     = data_options['mesh']\n",
+    "    name         = data_options['name']\n",
+    "    im_width     = int(data_options['im_width'])\n",
+    "    im_height    = int(data_options['im_height']) \n",
+    "    fx           = float(data_options['fx'])\n",
+    "    fy           = float(data_options['fy'])\n",
+    "    u0           = float(data_options['u0'])\n",
+    "    v0           = float(data_options['v0'])\n",
+    "    \n",
+    "    # Parse net configuration file\n",
+    "    net_options   = parse_cfg(cfgfile)[0]\n",
+    "    loss_options  = parse_cfg(cfgfile)[-1]\n",
+    "    conf_thresh   = float(net_options['conf_thresh'])\n",
+    "    num_keypoints = int(net_options['num_keypoints'])\n",
+    "    num_classes   = int(loss_options['classes'])\n",
+    "    num_anchors   = int(loss_options['num'])\n",
+    "    anchors       = [float(anchor) for anchor in loss_options['anchors'].split(',')]\n",
     "\n",
-    "    # Get validation file names\n",
-    "    with open(valid_images) as fp:\n",
+    "    # Read object model information, get 3D bounding box corners, get intrinsics\n",
+    "    mesh                  = MeshPly(meshname)\n",
+    "    vertices              = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()\n",
+    "    corners3D             = get_3D_corners(vertices)\n",
+    "    diam                  = float(data_options['diam'])\n",
+    "    intrinsic_calibration = get_camera_intrinsic(u0, v0, fx, fy) # camera params\n",
+    "\n",
+    "    # Network I/O params\n",
+    "    num_labels = 2*num_keypoints+3 # +2 for width, height, +1 for object class\n",
+    "    errs_2d = []  # to save\n",
+    "    with open(valid_images) as fp:     # validation file names\n",
     "        tmp_files = fp.readlines()\n",
     "        valid_files = [item.rstrip() for item in tmp_files]\n",
-    "    \n",
+    "\n",
+    "    # Compute-related Parameters\n",
+    "    use_cuda = True # whether to use cuda or no\n",
+    "    kwargs = {'num_workers': 4, 'pin_memory': True} # number of workers etc.\n",
+    "\n",
     "    # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode\n",
     "    model = Darknet(cfgfile)\n",
     "    model.load_weights(weightfile)\n",
     "    model.cuda()\n",
     "    model.eval()\n",
     "\n",
-    "    # Get the parser for the test dataset\n",
-    "    valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.width, model.height),\n",
-    "                       shuffle=False,\n",
-    "                       objclass=name,\n",
-    "                       transform=transforms.Compose([\n",
-    "                           transforms.ToTensor(),\n",
-    "                       ]))\n",
-    "    valid_batchsize = 1\n",
-    "\n",
-    "    # Specify the number of workers for multiple processing, get the dataloader for the test dataset\n",
-    "    kwargs = {'num_workers': 4, 'pin_memory': True}\n",
-    "    test_loader = torch.utils.data.DataLoader(\n",
-    "        valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) \n",
-    "\n",
-    "    # Parameters\n",
-    "    visualize       = False\n",
-    "    use_cuda        = True\n",
-    "    num_classes     = 13\n",
-    "    anchors         = [1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851]\n",
-    "    num_anchors     = 5\n",
-    "    eps             = 1e-5\n",
-    "    conf_thresh     = conf_th\n",
-    "    iou_thresh      = 0.5\n",
-    "\n",
-    "    # Parameters to save\n",
-    "    errs_2d = []\n",
-    "    edges = [[1, 2], [1, 3], [1, 5], [2, 4], [2, 6], [3, 4], [3, 7], [4, 8], [5, 6], [5, 7], [6, 8], [7, 8]]\n",
-    "    edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]]\n",
+    "    # Get the dataloader for the test dataset\n",
+    "    valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.width, model.height), shuffle=False, objclass=name, transform=transforms.Compose([transforms.ToTensor(),]))\n",
+    "    test_loader   = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=False, **kwargs) \n",
     "\n",
     "    # Iterate through test batches (Batch size for test data is 1)\n",
-    "    count = 0\n",
     "    logging('Testing {}...'.format(name))\n",
     "    for batch_idx, (data, target) in enumerate(test_loader):\n",
     "        \n",
-    "        # Images\n",
-    "        img = data[0, :, :, :]\n",
-    "        img = img.numpy().squeeze()\n",
-    "        img = np.transpose(img, (1, 2, 0))\n",
-    "        \n",
     "        t1 = time.time()\n",
     "        # Pass data to GPU\n",
     "        if use_cuda:\n",
     "            data = data.cuda()\n",
-    "            target = target.cuda()\n",
+    "            # target = target.cuda()\n",
     "        \n",
     "        # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference\n",
     "        data = Variable(data, volatile=True)\n",
@@ -209,8 +143,8 @@
     "        t3 = time.time()\n",
     "        \n",
     "        # Using confidence threshold, eliminate low-confidence predictions\n",
-    "        trgt = target[0].view(-1, 21)\n",
-    "        all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0)        \n",
+    "        trgt = target[0].view(-1, num_labels)\n",
+    "        all_boxes = get_multi_region_boxes(output, conf_thresh, num_classes, num_keypoints, anchors, num_anchors, int(trgt[0][0]), only_objectness=0)        \n",
     "        t4 = time.time()\n",
     "        \n",
     "        # Iterate through all images in the batch\n",
@@ -220,95 +154,77 @@
     "            boxes   = all_boxes[i]\n",
     "            \n",
     "            # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)\n",
-    "            truths  = target[i].view(-1, 21)\n",
+    "            truths  = target[i].view(-1, num_labels)\n",
     "            \n",
     "            # Get how many object are present in the scene\n",
     "            num_gts = truths_length(truths)\n",
     "\n",
     "            # Iterate through each ground-truth object\n",
     "            for k in range(num_gts):\n",
-    "                box_gt        = [truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6], \n",
-    "                                truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12], \n",
-    "                                truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]]\n",
-    "                best_conf_est = -1\n",
+    "                box_gt = list()\n",
+    "                for j in range(1, num_labels):\n",
+    "                    box_gt.append(truths[k][j])\n",
+    "                box_gt.extend([1.0, 1.0])\n",
+    "                box_gt.append(truths[k][0])\n",
     "                \n",
-    "\n",
     "                # If the prediction has the highest confidence, choose it as our prediction\n",
+    "                best_conf_est = -sys.maxsize\n",
     "                for j in range(len(boxes)):\n",
-    "                    if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int(truths[k][0])):\n",
-    "                        best_conf_est = boxes[j][18]\n",
+    "                    if (boxes[j][2*num_keypoints] > best_conf_est) and (boxes[j][2*num_keypoints+2] == int(truths[k][0])):\n",
+    "                        best_conf_est = boxes[j][2*num_keypoints]\n",
     "                        box_pr        = boxes[j]\n",
-    "                        bb2d_gt       = get_2d_bb(box_gt[:18], output.size(3))\n",
-    "                        bb2d_pr       = get_2d_bb(box_pr[:18], output.size(3))\n",
-    "                        iou           = bbox_iou(bb2d_gt, bb2d_pr)\n",
-    "                        match         = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18]))\n",
+    "                        match         = corner_confidence(box_gt[:2*num_keypoints], torch.FloatTensor(boxes[j][:2*num_keypoints]))\n",
     "                    \n",
     "                # Denormalize the corner predictions \n",
-    "                corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32')\n",
-    "                corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')\n",
-    "                corners2D_gt[:, 0] = corners2D_gt[:, 0] * 640\n",
-    "                corners2D_gt[:, 1] = corners2D_gt[:, 1] * 480               \n",
-    "                corners2D_pr[:, 0] = corners2D_pr[:, 0] * 640\n",
-    "                corners2D_pr[:, 1] = corners2D_pr[:, 1] * 480\n",
-    "                corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of the corners in OCCLUSION\n",
+    "                corners2D_gt = np.array(np.reshape(box_gt[:2*num_keypoints], [-1, 2]), dtype='float32')\n",
+    "                corners2D_pr = np.array(np.reshape(box_pr[:2*num_keypoints], [-1, 2]), dtype='float32')\n",
+    "                corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width\n",
+    "                corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height               \n",
+    "                corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width\n",
+    "                corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height\n",
+    "                corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of corners\n",
     "                \n",
     "                # Compute [R|t] by pnp\n",
     "                objpoints3D = np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32')\n",
-    "                K = np.array(internal_calibration, dtype='float32')\n",
+    "                K = np.array(intrinsic_calibration, dtype='float32')\n",
     "                R_gt, t_gt = pnp(objpoints3D,  corners2D_gt_corrected, K)\n",
     "                R_pr, t_pr = pnp(objpoints3D,  corners2D_pr, K)\n",
     "                \n",
     "                # Compute pixel error\n",
     "                Rt_gt        = np.concatenate((R_gt, t_gt), axis=1)\n",
     "                Rt_pr        = np.concatenate((R_pr, t_pr), axis=1)\n",
-    "                proj_2d_gt   = compute_projection(vertices, Rt_gt, internal_calibration) \n",
-    "                proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration) \n",
-    "                proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, internal_calibration)) \n",
-    "                proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, internal_calibration)) \n",
+    "                proj_2d_gt   = compute_projection(vertices, Rt_gt, intrinsic_calibration) \n",
+    "                proj_2d_pred = compute_projection(vertices, Rt_pr, intrinsic_calibration) \n",
+    "                proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, intrinsic_calibration)) \n",
+    "                proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, intrinsic_calibration)) \n",
     "                norm         = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)\n",
     "                pixel_dist   = np.mean(norm)\n",
     "                errs_2d.append(pixel_dist)\n",
     "\n",
-    "                \n",
-    "                if visualize:\n",
-    "                    # Visualize\n",
-    "                    plt.xlim((0, 640))\n",
-    "                    plt.ylim((0, 480))\n",
-    "                    plt.imshow(scipy.misc.imresize(img, (480, 640)))\n",
-    "                    # Projections\n",
-    "                    for edge in edges_corners:\n",
-    "                        plt.plot(proj_corners_gt[edge, 0], proj_corners_gt[edge, 1], color='g', linewidth=3.0)\n",
-    "                        plt.plot(proj_corners_pr[edge, 0], proj_corners_pr[edge, 1], color='b', linewidth=3.0)\n",
-    "                    plt.gca().invert_yaxis()\n",
-    "                    plt.show()\n",
-    "\n",
     "        t5 = time.time()\n",
     "\n",
     "    # Compute 2D projection score\n",
+    "    eps = 1e-5\n",
     "    for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:\n",
     "        acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)\n",
     "        # Print test statistics\n",
     "        logging('   Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))\n",
     "\n",
-    "conf_th = 0.05\n",
-    "cfgfile = 'cfg/yolo-pose-multi.cfg'\n",
-    "weightfile = 'backup_multi/model_backup2.weights'\n",
+    "modelcfg = 'cfg/yolo-pose-multi.cfg'\n",
     "datacfg = 'cfg/ape_occlusion.data'\n",
-    "valid(datacfg, cfgfile, weightfile, conf_th)\n",
-    "datacfg = 'cfg/can_occlusion.data'\n",
-    "valid(datacfg, cfgfile, weightfile, conf_th)\n",
-    "datacfg = 'cfg/cat_occlusion.data'\n",
-    "valid(datacfg, cfgfile, weightfile, conf_th)\n",
-    "datacfg = 'cfg/duck_occlusion.data'\n",
-    "valid(datacfg, cfgfile, weightfile, conf_th)\n",
-    "datacfg = 'cfg/driller_occlusion.data'\n",
-    "valid(datacfg, cfgfile, weightfile, conf_th)\n",
-    "datacfg = 'cfg/glue_occlusion.data'\n",
-    "valid(datacfg, cfgfile, weightfile, conf_th)\n",
-    "datacfg = 'cfg/holepuncher_occlusion.data'\n",
-    "valid(datacfg, cfgfile, weightfile, conf_th)\n",
+    "weightfile = 'backup_multi/model_backup.weights'\n",
     "\n",
-    "    "
+    "valid(datacfg, modelcfg, weightfile)\n",
+    "datacfg = 'cfg/can_occlusion.data'\n",
+    "valid(datacfg, modelcfg, weightfile)\n",
+    "datacfg = 'cfg/cat_occlusion.data'\n",
+    "valid(datacfg, modelcfg, weightfile)\n",
+    "datacfg = 'cfg/duck_occlusion.data'\n",
+    "valid(datacfg, modelcfg, weightfile)\n",
+    "datacfg = 'cfg/glue_occlusion.data'\n",
+    "valid(datacfg, modelcfg, weightfile)\n",
+    "datacfg = 'cfg/holepuncher_occlusion.data'\n",
+    "valid(datacfg, modelcfg, weightfile)"
    ]
   },
   {
@@ -321,21 +237,21 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 2",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "python2"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.12"
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
   }
  },
  "nbformat": 4,