317 строки
15 KiB
Plaintext
317 строки
15 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%matplotlib inline\n",
|
|
"import os\n",
|
|
"import time\n",
|
|
"import torch\n",
|
|
"from torch.autograd import Variable\n",
|
|
"from torchvision import datasets, transforms\n",
|
|
"import scipy.io\n",
|
|
"import warnings\n",
|
|
"warnings.filterwarnings(\"ignore\")\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import scipy.misc\n",
|
|
"\n",
|
|
"from darknet import Darknet\n",
|
|
"import dataset\n",
|
|
"from utils import *\n",
|
|
"from MeshPly import MeshPly\n",
|
|
"\n",
|
|
"# Create new directory\n",
|
|
"def makedirs(path):\n",
|
|
" if not os.path.exists( path ):\n",
|
|
" os.makedirs( path )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def valid(datacfg, modelcfg, weightfile):\n",
|
|
" def truths_length(truths, max_num_gt=50):\n",
|
|
" for i in range(max_num_gt):\n",
|
|
" if truths[i][1] == 0:\n",
|
|
" return i\n",
|
|
"\n",
|
|
" # Parse configuration files\n",
|
|
" data_options = read_data_cfg(datacfg)\n",
|
|
" valid_images = data_options['valid']\n",
|
|
" meshname = data_options['mesh']\n",
|
|
" backupdir = data_options['backup']\n",
|
|
" name = data_options['name']\n",
|
|
" gpus = data_options['gpus'] \n",
|
|
" fx = float(data_options['fx'])\n",
|
|
" fy = float(data_options['fy'])\n",
|
|
" u0 = float(data_options['u0'])\n",
|
|
" v0 = float(data_options['v0'])\n",
|
|
" im_width = int(data_options['width'])\n",
|
|
" im_height = int(data_options['height'])\n",
|
|
" if not os.path.exists(backupdir):\n",
|
|
" makedirs(backupdir)\n",
|
|
"\n",
|
|
" # Parameters\n",
|
|
" seed = int(time.time())\n",
|
|
" os.environ['CUDA_VISIBLE_DEVICES'] = gpus\n",
|
|
" torch.cuda.manual_seed(seed)\n",
|
|
" save = False\n",
|
|
" visualize = True\n",
|
|
" testtime = True\n",
|
|
" num_classes = 1\n",
|
|
" testing_samples = 0.0\n",
|
|
" edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]]\n",
|
|
" if save:\n",
|
|
" makedirs(backupdir + '/test')\n",
|
|
" makedirs(backupdir + '/test/gt')\n",
|
|
" makedirs(backupdir + '/test/pr')\n",
|
|
" # To save\n",
|
|
" testing_error_trans = 0.0\n",
|
|
" testing_error_angle = 0.0\n",
|
|
" testing_error_pixel = 0.0\n",
|
|
" errs_2d = []\n",
|
|
" errs_3d = []\n",
|
|
" errs_trans = []\n",
|
|
" errs_angle = []\n",
|
|
" errs_corner2D = []\n",
|
|
" preds_trans = []\n",
|
|
" preds_rot = []\n",
|
|
" preds_corners2D = []\n",
|
|
" gts_trans = []\n",
|
|
" gts_rot = []\n",
|
|
" gts_corners2D = []\n",
|
|
"\n",
|
|
" # Read object model information, get 3D bounding box corners\n",
|
|
" mesh = MeshPly(meshname)\n",
|
|
" vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()\n",
|
|
" corners3D = get_3D_corners(vertices)\n",
|
|
" try:\n",
|
|
" diam = float(options['diam'])\n",
|
|
" except:\n",
|
|
" diam = calc_pts_diameter(np.array(mesh.vertices))\n",
|
|
" \n",
|
|
" # Read intrinsic camera parameters\n",
|
|
" intrinsic_calibration = get_camera_intrinsic(u0, v0, fx, fy)\n",
|
|
"\n",
|
|
" # Get validation file names\n",
|
|
" with open(valid_images) as fp:\n",
|
|
" tmp_files = fp.readlines()\n",
|
|
" valid_files = [item.rstrip() for item in tmp_files]\n",
|
|
" \n",
|
|
" # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode\n",
|
|
" model = Darknet(modelcfg)\n",
|
|
" model.print_network()\n",
|
|
" model.load_weights(weightfile)\n",
|
|
" model.cuda()\n",
|
|
" model.eval()\n",
|
|
" test_width = model.test_width\n",
|
|
" test_height = model.test_height\n",
|
|
" num_keypoints = model.num_keypoints \n",
|
|
" num_labels = num_keypoints * 2 + 3\n",
|
|
"\n",
|
|
" # Get the parser for the test dataset\n",
|
|
" valid_dataset = dataset.listDataset(valid_images, \n",
|
|
" shape=(test_width, test_height),\n",
|
|
" shuffle=False,\n",
|
|
" transform=transforms.Compose([transforms.ToTensor(),]))\n",
|
|
"\n",
|
|
" # Specify the number of workers for multiple processing, get the dataloader for the test dataset\n",
|
|
" kwargs = {'num_workers': 4, 'pin_memory': True}\n",
|
|
" test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=False, **kwargs) \n",
|
|
"\n",
|
|
" logging(\" Testing {}...\".format(name))\n",
|
|
" logging(\" Number of test samples: %d\" % len(test_loader.dataset))\n",
|
|
" # Iterate through test batches (Batch size for test data is 1)\n",
|
|
" count = 0\n",
|
|
" for batch_idx, (data, target) in enumerate(test_loader):\n",
|
|
" \n",
|
|
" # Images\n",
|
|
" img = data[0, :, :, :]\n",
|
|
" img = img.numpy().squeeze()\n",
|
|
" img = np.transpose(img, (1, 2, 0))\n",
|
|
" \n",
|
|
" t1 = time.time()\n",
|
|
" # Pass data to GPU\n",
|
|
" data = data.cuda()\n",
|
|
" target = target.cuda()\n",
|
|
" # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference\n",
|
|
" data = Variable(data, volatile=True)\n",
|
|
" t2 = time.time()\n",
|
|
" # Forward pass\n",
|
|
" output = model(data).data \n",
|
|
" t3 = time.time()\n",
|
|
" # Using confidence threshold, eliminate low-confidence predictions\n",
|
|
" all_boxes = get_region_boxes(output, num_classes, num_keypoints) \n",
|
|
" t4 = time.time()\n",
|
|
" # Evaluation\n",
|
|
" # Iterate through all batch elements\n",
|
|
" for box_pr, target in zip([all_boxes], [target[0]]):\n",
|
|
" # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)\n",
|
|
" truths = target.view(-1, num_keypoints*2+3)\n",
|
|
" # Get how many objects are present in the scene\n",
|
|
" num_gts = truths_length(truths)\n",
|
|
" # Iterate through each ground-truth object\n",
|
|
" for k in range(num_gts):\n",
|
|
" box_gt = list()\n",
|
|
" for j in range(1, 2*num_keypoints+1):\n",
|
|
" box_gt.append(truths[k][j])\n",
|
|
" box_gt.extend([1.0, 1.0])\n",
|
|
" box_gt.append(truths[k][0])\n",
|
|
"\n",
|
|
" # Denormalize the corner predictions \n",
|
|
" corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32')\n",
|
|
" corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')\n",
|
|
" corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width\n",
|
|
" corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height \n",
|
|
" corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width\n",
|
|
" corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height\n",
|
|
" preds_corners2D.append(corners2D_pr)\n",
|
|
" gts_corners2D.append(corners2D_gt)\n",
|
|
"\n",
|
|
" # Compute corner prediction error\n",
|
|
" corner_norm = np.linalg.norm(corners2D_gt - corners2D_pr, axis=1)\n",
|
|
" corner_dist = np.mean(corner_norm)\n",
|
|
" errs_corner2D.append(corner_dist)\n",
|
|
" \n",
|
|
" # Compute [R|t] by pnp\n",
|
|
" R_gt, t_gt = pnp(np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'), corners2D_gt, np.array(intrinsic_calibration, dtype='float32'))\n",
|
|
" R_pr, t_pr = pnp(np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'), corners2D_pr, np.array(intrinsic_calibration, dtype='float32'))\n",
|
|
" \n",
|
|
" # Compute translation error\n",
|
|
" trans_dist = np.sqrt(np.sum(np.square(t_gt - t_pr)))\n",
|
|
" errs_trans.append(trans_dist)\n",
|
|
" \n",
|
|
" # Compute angle error\n",
|
|
" angle_dist = calcAngularDistance(R_gt, R_pr)\n",
|
|
" errs_angle.append(angle_dist)\n",
|
|
" \n",
|
|
" # Compute pixel error\n",
|
|
" Rt_gt = np.concatenate((R_gt, t_gt), axis=1)\n",
|
|
" Rt_pr = np.concatenate((R_pr, t_pr), axis=1)\n",
|
|
" proj_2d_gt = compute_projection(vertices, Rt_gt, intrinsic_calibration)\n",
|
|
" proj_2d_pred = compute_projection(vertices, Rt_pr, intrinsic_calibration) \n",
|
|
" proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, intrinsic_calibration)) \n",
|
|
" proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, intrinsic_calibration)) \n",
|
|
" norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)\n",
|
|
" pixel_dist = np.mean(norm)\n",
|
|
" errs_2d.append(pixel_dist)\n",
|
|
"\n",
|
|
" if visualize:\n",
|
|
" # Visualize\n",
|
|
" plt.xlim((0, im_width))\n",
|
|
" plt.ylim((0, im_height))\n",
|
|
" plt.imshow(scipy.misc.imresize(img, (im_height, im_width)))\n",
|
|
" # Projections\n",
|
|
" for edge in edges_corners:\n",
|
|
" plt.plot(proj_corners_gt[edge, 0], proj_corners_gt[edge, 1], color='g', linewidth=3.0)\n",
|
|
" plt.plot(proj_corners_pr[edge, 0], proj_corners_pr[edge, 1], color='b', linewidth=3.0)\n",
|
|
" plt.gca().invert_yaxis()\n",
|
|
" plt.show()\n",
|
|
" \n",
|
|
" # Compute 3D distances\n",
|
|
" transform_3d_gt = compute_transformation(vertices, Rt_gt) \n",
|
|
" transform_3d_pred = compute_transformation(vertices, Rt_pr) \n",
|
|
" norm3d = np.linalg.norm(transform_3d_gt - transform_3d_pred, axis=0)\n",
|
|
" vertex_dist = np.mean(norm3d) \n",
|
|
" errs_3d.append(vertex_dist) \n",
|
|
"\n",
|
|
" # Sum errors\n",
|
|
" testing_error_trans += trans_dist\n",
|
|
" testing_error_angle += angle_dist\n",
|
|
" testing_error_pixel += pixel_dist\n",
|
|
" testing_samples += 1\n",
|
|
" count = count + 1\n",
|
|
"\n",
|
|
" if save:\n",
|
|
" preds_trans.append(t_pr)\n",
|
|
" gts_trans.append(t_gt)\n",
|
|
" preds_rot.append(R_pr)\n",
|
|
" gts_rot.append(R_gt)\n",
|
|
"\n",
|
|
" np.savetxt(backupdir + '/test/gt/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_gt, dtype='float32'))\n",
|
|
" np.savetxt(backupdir + '/test/gt/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_gt, dtype='float32'))\n",
|
|
" np.savetxt(backupdir + '/test/pr/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_pr, dtype='float32'))\n",
|
|
" np.savetxt(backupdir + '/test/pr/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_pr, dtype='float32'))\n",
|
|
" np.savetxt(backupdir + '/test/gt/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_gt, dtype='float32'))\n",
|
|
" np.savetxt(backupdir + '/test/pr/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_pr, dtype='float32'))\n",
|
|
"\n",
|
|
"\n",
|
|
" t5 = time.time()\n",
|
|
"\n",
|
|
" # Compute 2D projection error, 6D pose error, 5cm5degree error\n",
|
|
" px_threshold = 5 # 5 pixel threshold for 2D reprojection error is standard in recent sota 6D object pose estimation works \n",
|
|
" eps = 1e-5\n",
|
|
" acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)\n",
|
|
" acc5cm5deg = len(np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans)+eps)\n",
|
|
" acc3d10 = len(np.where(np.array(errs_3d) <= diam * 0.1)[0]) * 100. / (len(errs_3d)+eps)\n",
|
|
" acc5cm5deg = len(np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans)+eps)\n",
|
|
" corner_acc = len(np.where(np.array(errs_corner2D) <= px_threshold)[0]) * 100. / (len(errs_corner2D)+eps)\n",
|
|
" mean_err_2d = np.mean(errs_2d)\n",
|
|
" mean_corner_err_2d = np.mean(errs_corner2D)\n",
|
|
" nts = float(testing_samples)\n",
|
|
"\n",
|
|
" if testtime:\n",
|
|
" print('-----------------------------------')\n",
|
|
" print(' tensor to cuda : %f' % (t2 - t1))\n",
|
|
" print(' forward pass : %f' % (t3 - t2))\n",
|
|
" print('get_region_boxes : %f' % (t4 - t3))\n",
|
|
" print(' prediction time : %f' % (t4 - t1))\n",
|
|
" print(' eval : %f' % (t5 - t4))\n",
|
|
" print('-----------------------------------')\n",
|
|
"\n",
|
|
" # Print test statistics\n",
|
|
" logging('Results of {}'.format(name))\n",
|
|
" logging(' Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))\n",
|
|
" logging(' Acc using 10% threshold - {} vx 3D Transformation = {:.2f}%'.format(diam * 0.1, acc3d10))\n",
|
|
" logging(' Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg))\n",
|
|
" logging(\" Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f\" % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d))\n",
|
|
" logging(' Translation error: %f m, angle error: %f degree, pixel error: % f pix' % (testing_error_trans/nts, testing_error_angle/nts, testing_error_pixel/nts) )\n",
|
|
"\n",
|
|
" if save:\n",
|
|
" predfile = backupdir + '/predictions_linemod_' + name + '.mat'\n",
|
|
" scipy.io.savemat(predfile, {'R_gts': gts_rot, 't_gts':gts_trans, 'corner_gts': gts_corners2D, 'R_prs': preds_rot, 't_prs':preds_trans, 'corner_prs': preds_corners2D})\n",
|
|
"\n",
|
|
"datacfg = 'cfg/ape.data'\n",
|
|
"modelcfg = 'cfg/yolo-pose.cfg'\n",
|
|
"weightfile = 'backup/ape/model_backup.weights'\n",
|
|
"valid(datacfg, modelcfg, weightfile)\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|