import sys
import os
import time
import math
import torch
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from torch.autograd import Variable
import torch.nn.functional as F
import cv2
from scipy import spatial
import struct
import imghdr
# Create new directory
def makedirs(path):
if not os.path.exists( path ):
os.makedirs( path )
def get_all_files(directory):
files = []
for f in os.listdir(directory):
if os.path.isfile(os.path.join(directory, f)):
files.append(os.path.join(directory, f))
files.extend(get_all_files(os.path.join(directory, f)))
return files
def calcAngularDistance(gt_rot, pr_rot):
rotDiff =, np.transpose(pr_rot))
trace = np.trace(rotDiff)
return np.rad2deg(np.arccos((trace-1.0)/2.0))
def get_camera_intrinsic(u0, v0, fx, fy):
return np.array([[fx, 0.0, u0], [0.0, fy, v0], [0.0, 0.0, 1.0]])
def compute_projection(points_3D, transformation, internal_calibration):
projections_2d = np.zeros((2, points_3D.shape[1]), dtype='float32')
camera_projection = (
projections_2d[0, :] = camera_projection[0, :]/camera_projection[2, :]
projections_2d[1, :] = camera_projection[1, :]/camera_projection[2, :]
return projections_2d
def compute_transformation(points_3D, transformation):
def calc_pts_diameter(pts):
diameter = -1
for pt_id in range(pts.shape[0]):
pt_dup = np.tile(np.array([pts[pt_id, :]]), [pts.shape[0] - pt_id, 1])
pts_diff = pt_dup - pts[pt_id:, :]
max_dist = math.sqrt((pts_diff * pts_diff).sum(axis=1).max())
if max_dist > diameter:
diameter = max_dist
return diameter
def adi(pts_est, pts_gt):
nn_index = spatial.cKDTree(pts_est)
nn_dists, _ = nn_index.query(pts_gt, k=1)
e = nn_dists.mean()
return e
def get_3D_corners(vertices):
min_x = np.min(vertices[0,:])
max_x = np.max(vertices[0,:])
min_y = np.min(vertices[1,:])
max_y = np.max(vertices[1,:])
min_z = np.min(vertices[2,:])
max_z = np.max(vertices[2,:])
corners = np.array([[min_x, min_y, min_z],
[min_x, min_y, max_z],
[min_x, max_y, min_z],
[min_x, max_y, max_z],
[max_x, min_y, min_z],
[max_x, min_y, max_z],
[max_x, max_y, min_z],
[max_x, max_y, max_z]])
corners = np.concatenate((np.transpose(corners), np.ones((1,8)) ), axis=0)
return corners
def pnp(points_3D, points_2D, cameraMatrix):
distCoeffs = pnp.distCoeffs
distCoeffs = np.zeros((8, 1), dtype='float32')
assert points_2D.shape[0] == points_2D.shape[0], 'points 3D and points 2D must have same number of vertices'
_, R_exp, t = cv2.solvePnP(points_3D,
R, _ = cv2.Rodrigues(R_exp)
return R, t
def get_2d_bb(box, size):
x = box[0]
y = box[1]
min_x = np.min(np.reshape(box, [-1,2])[:,0])
max_x = np.max(np.reshape(box, [-1,2])[:,0])
min_y = np.min(np.reshape(box, [-1,2])[:,1])
max_y = np.max(np.reshape(box, [-1,2])[:,1])
w = max_x - min_x
h = max_y - min_y
new_box = [x*size, y*size, w*size, h*size]
return new_box
def compute_2d_bb(pts):
min_x = np.min(pts[0,:])
max_x = np.max(pts[0,:])
min_y = np.min(pts[1,:])
max_y = np.max(pts[1,:])
w = max_x - min_x
h = max_y - min_y
cx = (max_x + min_x) / 2.0
cy = (max_y + min_y) / 2.0
new_box = [cx, cy, w, h]
return new_box
def compute_2d_bb_from_orig_pix(pts, size):
min_x = np.min(pts[0,:]) / 640.0
max_x = np.max(pts[0,:]) / 640.0
min_y = np.min(pts[1,:]) / 480.0
max_y = np.max(pts[1,:]) / 480.0
w = max_x - min_x
h = max_y - min_y
cx = (max_x + min_x) / 2.0
cy = (max_y + min_y) / 2.0
new_box = [cx*size, cy*size, w*size, h*size]
return new_box
def corner_confidences(gt_corners, pr_corners, th=80, sharpness=2, im_width=640, im_height=480):
''' gt_corners: Ground-truth 2D projections of the 3D bounding box corners, shape: (16 x nA), type: torch.FloatTensor
pr_corners: Prediction for the 2D projections of the 3D bounding box corners, shape: (16 x nA), type: torch.FloatTensor
th : distance threshold, type: int
sharpness : sharpness of the exponential that assigns a confidence value to the distance
return : a torch.FloatTensor of shape (nA,) with 9 confidence values
shape = gt_corners.size()
nA = shape[1]
dist = gt_corners - pr_corners
num_el = dist.numel()
num_keypoints = num_el//(nA*2)
dist = dist.t().contiguous().view(nA, num_keypoints, 2)
dist[:, :, 0] = dist[:, :, 0] * im_width
dist[:, :, 1] = dist[:, :, 1] * im_height
eps = 1e-5
distthresh = torch.FloatTensor([th]).repeat(nA, num_keypoints)
dist = torch.sqrt(torch.sum((dist)**2, dim=2)).squeeze() # nA x 9
mask = (dist < distthresh).type(torch.FloatTensor)
conf = torch.exp(sharpness*(1 - dist/distthresh))-1 # mask * (torch.exp(math.log(2) * (1.0 - dist/rrt)) - 1)
conf0 = torch.exp(sharpness*(1 - torch.zeros(conf.size(0),1))) - 1
conf = conf / conf0.repeat(1, num_keypoints)
# conf = 1 - dist/distthresh
conf = mask * conf # nA x 9
mean_conf = torch.mean(conf, dim=1)
return mean_conf
def corner_confidence(gt_corners, pr_corners, th=80, sharpness=2, im_width=640, im_height=480):
''' gt_corners: Ground-truth 2D projections of the 3D bounding box corners, shape: (18,) type: list
pr_corners: Prediction for the 2D projections of the 3D bounding box corners, shape: (18,), type: list
th : distance threshold, type: int
sharpness : sharpness of the exponential that assigns a confidence value to the distance
return : a list of shape (9,) with 9 confidence values
dist = torch.FloatTensor(gt_corners) - pr_corners
num_keypoints = dist.numel()//2
dist = dist.view(num_keypoints, 2)
dist[:, 0] = dist[:, 0] * im_width
dist[:, 1] = dist[:, 1] * im_height
eps = 1e-5
dist = torch.sqrt(torch.sum((dist)**2, dim=1))
mask = (dist < th).type(torch.FloatTensor)
conf = torch.exp(sharpness * (1.0 - dist/th)) - 1
conf0 = torch.exp(torch.FloatTensor([sharpness])) - 1 + eps
conf = conf / conf0.repeat(num_keypoints, 1)
conf = mask * conf
return torch.mean(conf)
def sigmoid(x):
return 1.0/(math.exp(-x)+1.)
def softmax(x):
x = torch.exp(x - torch.max(x))
x = x/x.sum()
return x
def fix_corner_order(corners2D_gt):
corners2D_gt_corrected = np.zeros((9, 2), dtype='float32')
corners2D_gt_corrected[0, :] = corners2D_gt[0, :]
corners2D_gt_corrected[1, :] = corners2D_gt[1, :]
corners2D_gt_corrected[2, :] = corners2D_gt[3, :]
corners2D_gt_corrected[3, :] = corners2D_gt[5, :]
corners2D_gt_corrected[4, :] = corners2D_gt[7, :]
corners2D_gt_corrected[5, :] = corners2D_gt[2, :]
corners2D_gt_corrected[6, :] = corners2D_gt[4, :]
corners2D_gt_corrected[7, :] = corners2D_gt[6, :]
corners2D_gt_corrected[8, :] = corners2D_gt[8, :]
return corners2D_gt_corrected
def convert2cpu(gpu_matrix):
return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
def convert2cpu_long(gpu_matrix):
return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
def get_region_boxes(output, num_classes, num_keypoints, only_objectness=1, validation=True):
# Parameters
anchor_dim = 1
if output.dim() == 3:
output = output.unsqueeze(0)
batch = output.size(0)
assert(output.size(1) == (2*num_keypoints+1+num_classes)*anchor_dim)
h = output.size(2)
w = output.size(3)
# Activation
t0 = time.time()
max_conf = -sys.maxsize
output = output.view(batch*anchor_dim, 2*num_keypoints+1+num_classes, h*w).transpose(0,1).contiguous().view(2*num_keypoints+1+num_classes, batch*anchor_dim*h*w)
grid_x = torch.linspace(0, w-1, w).repeat(h,1).repeat(batch*anchor_dim, 1, 1).view(batch*anchor_dim*h*w).cuda()
grid_y = torch.linspace(0, h-1, h).repeat(w,1).t().repeat(batch*anchor_dim, 1, 1).view(batch*anchor_dim*h*w).cuda()
xs = list()
ys = list()
xs.append(torch.sigmoid(output[0]) + grid_x)
ys.append(torch.sigmoid(output[1]) + grid_y)
for j in range(1,num_keypoints):
xs.append(output[2*j + 0] + grid_x)
ys.append(output[2*j + 1] + grid_y)
det_confs = torch.sigmoid(output[2*num_keypoints])
cls_confs = torch.nn.Softmax()(Variable(output[2*num_keypoints+1:2*num_keypoints+1+num_classes].transpose(0,1))).data
cls_max_confs, cls_max_ids = torch.max(cls_confs, 1)
cls_max_confs = cls_max_confs.view(-1)
cls_max_ids = cls_max_ids.view(-1)
t1 = time.time()
# GPU to CPU
sz_hw = h*w
sz_hwa = sz_hw*anchor_dim
det_confs = convert2cpu(det_confs)
cls_max_confs = convert2cpu(cls_max_confs)
cls_max_ids = convert2cpu_long(cls_max_ids)
for j in range(num_keypoints):
xs[j] = convert2cpu(xs[j])
ys[j] = convert2cpu(ys[j])
if validation:
cls_confs = convert2cpu(cls_confs.view(-1, num_classes))
t2 = time.time()
# Boxes filter
for b in range(batch):
for cy in range(h):
for cx in range(w):
for i in range(anchor_dim):
ind = b*sz_hwa + i*sz_hw + cy*w + cx
det_conf = det_confs[ind]
if only_objectness:
conf = det_confs[ind]
conf = det_confs[ind] * cls_max_confs[ind]
if conf > max_conf:
max_conf = conf
bcx = list()
bcy = list()
for j in range(num_keypoints):
cls_max_conf = cls_max_confs[ind]
cls_max_id = cls_max_ids[ind]
box = list()
for j in range(num_keypoints):
t3 = time.time()
if False:
print('matrix computation : %f' % (t1-t0))
print(' gpu to cpu : %f' % (t2-t1))
print(' boxes filter : %f' % (t3-t2))
return box
def read_truths(lab_path, num_keypoints=9):
num_labels = 2*num_keypoints+3 # +2 for width, height, +1 for class label
if os.path.getsize(lab_path):
truths = np.loadtxt(lab_path)
truths = truths.reshape(truths.size//num_labels, num_labels) # to avoid single truth problem
return truths
return np.array([])
def read_truths_args(lab_path, num_keypoints=9):
num_labels = 2 * num_keypoints + 1
truths = read_truths(lab_path)
new_truths = []
for i in range(truths.shape[0]):
for j in range(num_labels):
return np.array(new_truths)
def read_pose(lab_path):
if os.path.getsize(lab_path):
truths = np.loadtxt(lab_path)
# truths = truths.reshape(truths.size/21, 21) # to avoid single truth problem
return truths
return np.array([])
def load_class_names(namesfile):
class_names = []
with open(namesfile, 'r') as fp:
lines = fp.readlines()
for line in lines:
line = line.rstrip()
return class_names
def image2torch(img):
width = img.width
height = img.height
img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
img = img.view(height, width, 3).transpose(0,1).transpose(0,2).contiguous()
img = img.view(1, 3, height, width)
img = img.float().div(255.0)
return img
def read_data_cfg(datacfg):
options = dict()
options['gpus'] = '0'
options['num_workers'] = '10'
with open(datacfg, 'r') as fp:
lines = fp.readlines()
for line in lines:
line = line.strip()
if line == '':
key,value = line.split('=')
key = key.strip()
value = value.strip()
options[key] = value
return options
def scale_bboxes(bboxes, width, height):
import copy
dets = copy.deepcopy(bboxes)
for i in range(len(dets)):
dets[i][0] = dets[i][0] * width
dets[i][1] = dets[i][1] * height
dets[i][2] = dets[i][2] * width
dets[i][3] = dets[i][3] * height
return dets
def file_lines(thefilepath):
count = 0
thefile = open(thefilepath, 'rb')
while True:
buffer =*1024)
if not buffer:
count += buffer.count(b'\n')
thefile.close( )
return count
def get_image_size(fname):
'''Determine the image type of fhandle and return its size.
from draco'''
with open(fname, 'rb') as fhandle:
head =
if len(head) != 24:
if imghdr.what(fname) == 'png':
check = struct.unpack('>i', head[4:8])[0]
if check != 0x0d0a1a0a:
width, height = struct.unpack('>ii', head[16:24])
elif imghdr.what(fname) == 'gif':
width, height = struct.unpack('<HH', head[6:10])
elif imghdr.what(fname) == 'jpeg' or imghdr.what(fname) == 'jpg':
try: # Read 0xff next
size = 2
ftype = 0
while not 0xc0 <= ftype <= 0xcf:, 1)
byte =
while ord(byte) == 0xff:
byte =
ftype = ord(byte)
size = struct.unpack('>H',[0] - 2
# We are at a SOFn block, 1) # Skip `precision' byte.
height, width = struct.unpack('>HH',
except Exception: #IGNORE:W0703
return width, height
def logging(message):
print('%s %s' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), message))
