singleshotpose/valid.ipynb

317 строки
15 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import os\n",
"import time\n",
"import torch\n",
"from torch.autograd import Variable\n",
"from torchvision import datasets, transforms\n",
"import scipy.io\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"import matplotlib.pyplot as plt\n",
"import scipy.misc\n",
"\n",
"from darknet import Darknet\n",
"import dataset\n",
"from utils import *\n",
"from MeshPly import MeshPly\n",
"\n",
"# Create new directory\n",
"def makedirs(path):\n",
" if not os.path.exists( path ):\n",
" os.makedirs( path )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def valid(datacfg, modelcfg, weightfile):\n",
" def truths_length(truths, max_num_gt=50):\n",
" for i in range(max_num_gt):\n",
" if truths[i][1] == 0:\n",
" return i\n",
"\n",
" # Parse configuration files\n",
" data_options = read_data_cfg(datacfg)\n",
" valid_images = data_options['valid']\n",
" meshname = data_options['mesh']\n",
" backupdir = data_options['backup']\n",
" name = data_options['name']\n",
" gpus = data_options['gpus'] \n",
" fx = float(data_options['fx'])\n",
" fy = float(data_options['fy'])\n",
" u0 = float(data_options['u0'])\n",
" v0 = float(data_options['v0'])\n",
" im_width = int(data_options['width'])\n",
" im_height = int(data_options['height'])\n",
" if not os.path.exists(backupdir):\n",
" makedirs(backupdir)\n",
"\n",
" # Parameters\n",
" seed = int(time.time())\n",
" os.environ['CUDA_VISIBLE_DEVICES'] = gpus\n",
" torch.cuda.manual_seed(seed)\n",
" save = False\n",
" visualize = True\n",
" testtime = True\n",
" num_classes = 1\n",
" testing_samples = 0.0\n",
" edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]]\n",
" if save:\n",
" makedirs(backupdir + '/test')\n",
" makedirs(backupdir + '/test/gt')\n",
" makedirs(backupdir + '/test/pr')\n",
" # To save\n",
" testing_error_trans = 0.0\n",
" testing_error_angle = 0.0\n",
" testing_error_pixel = 0.0\n",
" errs_2d = []\n",
" errs_3d = []\n",
" errs_trans = []\n",
" errs_angle = []\n",
" errs_corner2D = []\n",
" preds_trans = []\n",
" preds_rot = []\n",
" preds_corners2D = []\n",
" gts_trans = []\n",
" gts_rot = []\n",
" gts_corners2D = []\n",
"\n",
" # Read object model information, get 3D bounding box corners\n",
" mesh = MeshPly(meshname)\n",
" vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()\n",
" corners3D = get_3D_corners(vertices)\n",
" try:\n",
" diam = float(options['diam'])\n",
" except:\n",
" diam = calc_pts_diameter(np.array(mesh.vertices))\n",
" \n",
" # Read intrinsic camera parameters\n",
" intrinsic_calibration = get_camera_intrinsic(u0, v0, fx, fy)\n",
"\n",
" # Get validation file names\n",
" with open(valid_images) as fp:\n",
" tmp_files = fp.readlines()\n",
" valid_files = [item.rstrip() for item in tmp_files]\n",
" \n",
" # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode\n",
" model = Darknet(modelcfg)\n",
" model.print_network()\n",
" model.load_weights(weightfile)\n",
" model.cuda()\n",
" model.eval()\n",
" test_width = model.test_width\n",
" test_height = model.test_height\n",
" num_keypoints = model.num_keypoints \n",
" num_labels = num_keypoints * 2 + 3\n",
"\n",
" # Get the parser for the test dataset\n",
" valid_dataset = dataset.listDataset(valid_images, \n",
" shape=(test_width, test_height),\n",
" shuffle=False,\n",
" transform=transforms.Compose([transforms.ToTensor(),]))\n",
"\n",
" # Specify the number of workers for multiple processing, get the dataloader for the test dataset\n",
" kwargs = {'num_workers': 4, 'pin_memory': True}\n",
" test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=False, **kwargs) \n",
"\n",
" logging(\" Testing {}...\".format(name))\n",
" logging(\" Number of test samples: %d\" % len(test_loader.dataset))\n",
" # Iterate through test batches (Batch size for test data is 1)\n",
" count = 0\n",
" for batch_idx, (data, target) in enumerate(test_loader):\n",
" \n",
" # Images\n",
" img = data[0, :, :, :]\n",
" img = img.numpy().squeeze()\n",
" img = np.transpose(img, (1, 2, 0))\n",
" \n",
" t1 = time.time()\n",
" # Pass data to GPU\n",
" data = data.cuda()\n",
" target = target.cuda()\n",
" # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference\n",
" data = Variable(data, volatile=True)\n",
" t2 = time.time()\n",
" # Forward pass\n",
" output = model(data).data \n",
" t3 = time.time()\n",
" # Using confidence threshold, eliminate low-confidence predictions\n",
" all_boxes = get_region_boxes(output, num_classes, num_keypoints) \n",
" t4 = time.time()\n",
" # Evaluation\n",
" # Iterate through all batch elements\n",
" for box_pr, target in zip([all_boxes], [target[0]]):\n",
" # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)\n",
" truths = target.view(-1, num_keypoints*2+3)\n",
" # Get how many objects are present in the scene\n",
" num_gts = truths_length(truths)\n",
" # Iterate through each ground-truth object\n",
" for k in range(num_gts):\n",
" box_gt = list()\n",
" for j in range(1, 2*num_keypoints+1):\n",
" box_gt.append(truths[k][j])\n",
" box_gt.extend([1.0, 1.0])\n",
" box_gt.append(truths[k][0])\n",
"\n",
" # Denormalize the corner predictions \n",
" corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32')\n",
" corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')\n",
" corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width\n",
" corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height \n",
" corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width\n",
" corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height\n",
" preds_corners2D.append(corners2D_pr)\n",
" gts_corners2D.append(corners2D_gt)\n",
"\n",
" # Compute corner prediction error\n",
" corner_norm = np.linalg.norm(corners2D_gt - corners2D_pr, axis=1)\n",
" corner_dist = np.mean(corner_norm)\n",
" errs_corner2D.append(corner_dist)\n",
" \n",
" # Compute [R|t] by pnp\n",
" R_gt, t_gt = pnp(np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'), corners2D_gt, np.array(intrinsic_calibration, dtype='float32'))\n",
" R_pr, t_pr = pnp(np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'), corners2D_pr, np.array(intrinsic_calibration, dtype='float32'))\n",
" \n",
" # Compute translation error\n",
" trans_dist = np.sqrt(np.sum(np.square(t_gt - t_pr)))\n",
" errs_trans.append(trans_dist)\n",
" \n",
" # Compute angle error\n",
" angle_dist = calcAngularDistance(R_gt, R_pr)\n",
" errs_angle.append(angle_dist)\n",
" \n",
" # Compute pixel error\n",
" Rt_gt = np.concatenate((R_gt, t_gt), axis=1)\n",
" Rt_pr = np.concatenate((R_pr, t_pr), axis=1)\n",
" proj_2d_gt = compute_projection(vertices, Rt_gt, intrinsic_calibration)\n",
" proj_2d_pred = compute_projection(vertices, Rt_pr, intrinsic_calibration) \n",
" proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, intrinsic_calibration)) \n",
" proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, intrinsic_calibration)) \n",
" norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)\n",
" pixel_dist = np.mean(norm)\n",
" errs_2d.append(pixel_dist)\n",
"\n",
" if visualize:\n",
" # Visualize\n",
" plt.xlim((0, im_width))\n",
" plt.ylim((0, im_height))\n",
" plt.imshow(scipy.misc.imresize(img, (im_height, im_width)))\n",
" # Projections\n",
" for edge in edges_corners:\n",
" plt.plot(proj_corners_gt[edge, 0], proj_corners_gt[edge, 1], color='g', linewidth=3.0)\n",
" plt.plot(proj_corners_pr[edge, 0], proj_corners_pr[edge, 1], color='b', linewidth=3.0)\n",
" plt.gca().invert_yaxis()\n",
" plt.show()\n",
" \n",
" # Compute 3D distances\n",
" transform_3d_gt = compute_transformation(vertices, Rt_gt) \n",
" transform_3d_pred = compute_transformation(vertices, Rt_pr) \n",
" norm3d = np.linalg.norm(transform_3d_gt - transform_3d_pred, axis=0)\n",
" vertex_dist = np.mean(norm3d) \n",
" errs_3d.append(vertex_dist) \n",
"\n",
" # Sum errors\n",
" testing_error_trans += trans_dist\n",
" testing_error_angle += angle_dist\n",
" testing_error_pixel += pixel_dist\n",
" testing_samples += 1\n",
" count = count + 1\n",
"\n",
" if save:\n",
" preds_trans.append(t_pr)\n",
" gts_trans.append(t_gt)\n",
" preds_rot.append(R_pr)\n",
" gts_rot.append(R_gt)\n",
"\n",
" np.savetxt(backupdir + '/test/gt/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_gt, dtype='float32'))\n",
" np.savetxt(backupdir + '/test/gt/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_gt, dtype='float32'))\n",
" np.savetxt(backupdir + '/test/pr/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_pr, dtype='float32'))\n",
" np.savetxt(backupdir + '/test/pr/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_pr, dtype='float32'))\n",
" np.savetxt(backupdir + '/test/gt/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_gt, dtype='float32'))\n",
" np.savetxt(backupdir + '/test/pr/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_pr, dtype='float32'))\n",
"\n",
"\n",
" t5 = time.time()\n",
"\n",
" # Compute 2D projection error, 6D pose error, 5cm5degree error\n",
" px_threshold = 5 # 5 pixel threshold for 2D reprojection error is standard in recent sota 6D object pose estimation works \n",
" eps = 1e-5\n",
" acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)\n",
" acc5cm5deg = len(np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans)+eps)\n",
" acc3d10 = len(np.where(np.array(errs_3d) <= diam * 0.1)[0]) * 100. / (len(errs_3d)+eps)\n",
" acc5cm5deg = len(np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans)+eps)\n",
" corner_acc = len(np.where(np.array(errs_corner2D) <= px_threshold)[0]) * 100. / (len(errs_corner2D)+eps)\n",
" mean_err_2d = np.mean(errs_2d)\n",
" mean_corner_err_2d = np.mean(errs_corner2D)\n",
" nts = float(testing_samples)\n",
"\n",
" if testtime:\n",
" print('-----------------------------------')\n",
" print(' tensor to cuda : %f' % (t2 - t1))\n",
" print(' forward pass : %f' % (t3 - t2))\n",
" print('get_region_boxes : %f' % (t4 - t3))\n",
" print(' prediction time : %f' % (t4 - t1))\n",
" print(' eval : %f' % (t5 - t4))\n",
" print('-----------------------------------')\n",
"\n",
" # Print test statistics\n",
" logging('Results of {}'.format(name))\n",
" logging(' Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))\n",
" logging(' Acc using 10% threshold - {} vx 3D Transformation = {:.2f}%'.format(diam * 0.1, acc3d10))\n",
" logging(' Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg))\n",
" logging(\" Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f\" % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d))\n",
" logging(' Translation error: %f m, angle error: %f degree, pixel error: % f pix' % (testing_error_trans/nts, testing_error_angle/nts, testing_error_pixel/nts) )\n",
"\n",
" if save:\n",
" predfile = backupdir + '/predictions_linemod_' + name + '.mat'\n",
" scipy.io.savemat(predfile, {'R_gts': gts_rot, 't_gts':gts_trans, 'corner_gts': gts_corners2D, 'R_prs': preds_rot, 't_prs':preds_trans, 'corner_prs': preds_corners2D})\n",
"\n",
"datacfg = 'cfg/ape.data'\n",
"modelcfg = 'cfg/yolo-pose.cfg'\n",
"weightfile = 'backup/ape/model_backup.weights'\n",
"valid(datacfg, modelcfg, weightfile)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}