update training code
This commit is contained in:
Родитель
05d0786aea
Коммит
3002ea2d52
Двоичный файл не отображается.
|
@ -0,0 +1,76 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow.contrib.data import prefetch_to_device, shuffle_and_repeat, map_and_batch
|
||||
import os
|
||||
import glob
|
||||
import numpy as np
|
||||
import cv2
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
|
||||
###############################################################################################
|
||||
# data loader for training stage
|
||||
###############################################################################################
|
||||
def _parse_function(image_path,lm_path,mask_path):
|
||||
|
||||
# input image
|
||||
x = tf.read_file(image_path)
|
||||
img = tf.image.decode_png(x, channels=3)
|
||||
img = tf.cast(img,tf.float32)
|
||||
img = img[:,:,::-1]
|
||||
|
||||
# ground truth landmark
|
||||
x2 = tf.read_file(lm_path)
|
||||
lm = tf.decode_raw(x2,tf.float64)
|
||||
lm = tf.cast(lm,tf.float32)
|
||||
lm = tf.reshape(lm,[68,2])
|
||||
|
||||
# skin mask
|
||||
x3 = tf.read_file(mask_path)
|
||||
mask = tf.image.decode_png(x3, channels=3)
|
||||
mask = tf.cast(mask,tf.float32)
|
||||
|
||||
return img,lm,mask
|
||||
|
||||
def check_lm_bin(dataset,lm_path):
|
||||
if not os.path.isdir(os.path.join(dataset,'lm_bin')):
|
||||
os.makdirs(os.path.join(dataset,'lm_bin'))
|
||||
for i in range(len(lm_path)):
|
||||
lm = np.loadtxt(lm_path[i])
|
||||
lm = np.reshape(lm,[-1])
|
||||
lm.tofile(os.path.join(dataset,'lm_bin',lm_path[i].split('/')[-1].replace('txt','bin')))
|
||||
|
||||
def load_dataset(opt,train=True):
|
||||
if train:
|
||||
data_path = opt.data_path
|
||||
else:
|
||||
data_path = opt.val_data_path
|
||||
image_path_all = []
|
||||
lm_path_all = []
|
||||
mask_path_all = []
|
||||
|
||||
for dataset in data_path:
|
||||
image_path = glob.glob(dataset + '/' + '*.png')
|
||||
image_path.sort()
|
||||
lm_path_ = [os.path.join(dataset,'lm',f.split('/')[-1].replace('png','txt')) for f in image_path]
|
||||
lm_path_.sort()
|
||||
mask_path = [os.path.join(dataset,'mask',f.split('/')[-1]) for f in image_path]
|
||||
mask_path.sort()
|
||||
|
||||
# check if landmark binary files exist
|
||||
check_lm_bin(dataset,lm_path_)
|
||||
|
||||
lm_path = [os.path.join(dataset,'lm_bin',f.split('/')[-1].replace('png','bin')) for f in image_path]
|
||||
lm_path.sort()
|
||||
|
||||
image_path_all += image_path
|
||||
mask_path_all += mask_path
|
||||
lm_path_all += lm_path
|
||||
|
||||
dataset_num = len(image_path_all)
|
||||
|
||||
dataset = tf.data.Dataset.from_tensor_slices((image_path_all,lm_path_all,mask_path_all))
|
||||
dataset = dataset. \
|
||||
apply(shuffle_and_repeat(dataset_num)). \
|
||||
apply(map_and_batch(_parse_function, opt.batch_size, num_parallel_batches=4, drop_remainder=True)). \
|
||||
apply(prefetch_to_device('/gpu:0', None)) # When using dataset.prefetch, use buffer_size=None to let it detect optimal buffer size
|
||||
|
||||
inputs_iterator = dataset.make_one_shot_iterator()
|
||||
return inputs_iterator
|
50
demo.py
50
demo.py
|
@ -7,18 +7,25 @@ import cv2
|
|||
import platform
|
||||
from scipy.io import loadmat,savemat
|
||||
|
||||
from preprocess_img import Preprocess
|
||||
from load_data import *
|
||||
from preprocess_img import align_img
|
||||
from utils import *
|
||||
from face_decoder import Face3D
|
||||
from options import Option
|
||||
|
||||
is_windows = platform.system() == "Windows"
|
||||
|
||||
def load_graph(graph_filename):
|
||||
with tf.gfile.GFile(graph_filename,'rb') as f:
|
||||
graph_def = tf.GraphDef()
|
||||
graph_def.ParseFromString(f.read())
|
||||
def restore_weights(sess,opt):
|
||||
var_list = tf.trainable_variables()
|
||||
g_list = tf.global_variables()
|
||||
|
||||
return graph_def
|
||||
# add batch normalization params into trainable variables
|
||||
bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
|
||||
bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
|
||||
var_list +=bn_moving_vars
|
||||
|
||||
# create saver to save and restore weights
|
||||
saver = tf.train.Saver(var_list = var_list)
|
||||
saver.restore(sess,opt.pretrain_weights)
|
||||
|
||||
def demo():
|
||||
# input and output folder
|
||||
|
@ -36,22 +43,32 @@ def demo():
|
|||
|
||||
# read standard landmarks for preprocessing images
|
||||
lm3D = load_lm3d()
|
||||
batchsize = 1
|
||||
n = 0
|
||||
|
||||
# build reconstruction model
|
||||
with tf.Graph().as_default() as graph,tf.device('/cpu:0'):
|
||||
|
||||
opt = Option()
|
||||
opt.batch_size = 1
|
||||
opt.is_train = False
|
||||
FaceReconstructor = Face3D()
|
||||
images = tf.placeholder(name = 'input_imgs', shape = [batchsize,224,224,3], dtype = tf.float32)
|
||||
graph_def = load_graph('network/FaceReconModel.pb')
|
||||
tf.import_graph_def(graph_def,name='resnet',input_map={'input_imgs:0': images})
|
||||
images = tf.placeholder(name = 'input_imgs', shape = [opt.batch_size,224,224,3], dtype = tf.float32)
|
||||
|
||||
# output coefficients of R-Net (dim = 257)
|
||||
coeff = graph.get_tensor_by_name('resnet/coeff:0')
|
||||
if opt.use_pb and os.path.isfile('network/FaceReconModel.pb'):
|
||||
print('Using pre-trained .pb file.')
|
||||
use_pb = True
|
||||
graph_def = load_graph('network/FaceReconModel.pb')
|
||||
tf.import_graph_def(graph_def,name='resnet',input_map={'input_imgs:0': images})
|
||||
# output coefficients of R-Net (dim = 257)
|
||||
coeff = graph.get_tensor_by_name('resnet/coeff:0')
|
||||
else:
|
||||
print('Using pre-trained .ckpt file: %s'%opt.pretrain_weights)
|
||||
use_pb = False
|
||||
import networks
|
||||
coeff = networks.R_Net(images,is_training=False)
|
||||
|
||||
# reconstructing faces
|
||||
FaceReconstructor.Reconstruction_Block(coeff,batchsize)
|
||||
FaceReconstructor.Reconstruction_Block(coeff,opt)
|
||||
face_shape = FaceReconstructor.face_shape_t
|
||||
face_texture = FaceReconstructor.face_texture
|
||||
face_color = FaceReconstructor.face_color
|
||||
|
@ -61,6 +78,9 @@ def demo():
|
|||
|
||||
|
||||
with tf.Session() as sess:
|
||||
if not use_pb:
|
||||
restore_weights(sess,opt)
|
||||
|
||||
print('reconstructing...')
|
||||
for file in img_list:
|
||||
n += 1
|
||||
|
@ -68,7 +88,7 @@ def demo():
|
|||
# load images and corresponding 5 facial landmarks
|
||||
img,lm = load_img(file,file.replace('png','txt').replace('jpg','txt'))
|
||||
# preprocess input image
|
||||
input_img,lm_new,transform_params = Preprocess(img,lm,lm3D)
|
||||
input_img,lm_new,transform_params = align_img(img,lm,lm3D)
|
||||
|
||||
coeff_,face_shape_,face_texture_,face_color_,landmarks_2d_,recon_img_,tri_ = sess.run([coeff,\
|
||||
face_shape,face_texture,face_color,landmarks_2d,recon_img,tri],feed_dict = {images: input_img})
|
||||
|
|
197
face_decoder.py
197
face_decoder.py
|
@ -7,40 +7,42 @@ import platform
|
|||
is_windows = platform.system() == "Windows"
|
||||
|
||||
if not is_windows:
|
||||
import mesh_renderer
|
||||
|
||||
from renderer import mesh_renderer
|
||||
###############################################################################################
|
||||
# Reconstruct 3D face based on output coefficients and facemodel
|
||||
###############################################################################################
|
||||
|
||||
# BFM 3D face model
|
||||
class BFM():
|
||||
def __init__(self,model_path = 'BFM/BFM_model_front.mat'):
|
||||
def __init__(self,model_path = './BFM/BFM_model_front.mat'):
|
||||
model = loadmat(model_path)
|
||||
self.meanshape = tf.constant(model['meanshape']) # mean face shape. [3*N,1]
|
||||
self.idBase = tf.constant(model['idBase']) # identity basis. [3*N,80]
|
||||
self.exBase = tf.constant(model['exBase'].astype(np.float32)) # expression basis. [3*N,64]
|
||||
self.meantex = tf.constant(model['meantex']) # mean face texture. [3*N,1] (0-255)
|
||||
self.texBase = tf.constant(model['texBase']) # texture basis. [3*N,80]
|
||||
self.point_buf = tf.constant(model['point_buf']) # triangle indices for each vertex that lies in. starts from 1. [N,8]
|
||||
self.face_buf = tf.constant(model['tri']) # vertex indices in each triangle. starts from 1. [F,3]
|
||||
self.keypoints = tf.squeeze(tf.constant(model['keypoints'])) # vertex indices of 68 facial landmarks. starts from 1. [68,1]
|
||||
self.point_buf = tf.constant(model['point_buf']) # face indices for each vertex that lies in. starts from 1. [N,8]
|
||||
self.face_buf = tf.constant(model['tri']) # vertex indices for each face. starts from 1. [F,3]
|
||||
self.front_mask_render = tf.squeeze(tf.constant(model['frontmask2_idx'])) # vertex indices for small face region to compute photometric error. starts from 1.
|
||||
self.mask_face_buf = tf.constant(model['tri_mask2']) # vertex indices for each face from small face region. starts from 1. [f,3]
|
||||
self.skin_mask = tf.squeeze(tf.constant(model['skinmask'])) # vertex indices for pre-defined skin region to compute reflectance loss
|
||||
self.keypoints = tf.squeeze(tf.constant(model['keypoints'])) # vertex indices for 68 landmarks. starts from 1. [68,1]
|
||||
|
||||
# Analytic 3D face reconstructor
|
||||
# Analytic 3D face
|
||||
class Face3D():
|
||||
def __init__(self):
|
||||
facemodel = BFM()
|
||||
self.facemodel = facemodel
|
||||
|
||||
# analytic 3D face reconstructions with coefficients from R-Net
|
||||
def Reconstruction_Block(self,coeff,batchsize):
|
||||
def Reconstruction_Block(self,coeff,opt):
|
||||
#coeff: [batchsize,257] reconstruction coefficients
|
||||
id_coeff,ex_coeff,tex_coeff,angles,translation,gamma = self.Split_coeff(coeff)
|
||||
|
||||
id_coeff,ex_coeff,tex_coeff,angles,translation,gamma,camera_scale,f_scale = self.Split_coeff(coeff)
|
||||
# [batchsize,N,3] canonical face shape in BFM space
|
||||
face_shape = self.Shape_formation_block(id_coeff,ex_coeff,self.facemodel)
|
||||
# [batchsize,N,3] vertex texture (in RGB order)
|
||||
face_texture = self.Texture_formation_block(tex_coeff,self.facemodel)
|
||||
self.face_texture = face_texture
|
||||
# [batchsize,3,3] rotation matrix for face shape
|
||||
rotation = self.Compute_rotation_matrix(angles)
|
||||
# [batchsize,N,3] vertex normal
|
||||
|
@ -49,38 +51,44 @@ class Face3D():
|
|||
|
||||
# do rigid transformation for face shape using predicted rotation and translation
|
||||
face_shape_t = self.Rigid_transform_block(face_shape,rotation,translation)
|
||||
self.face_shape_t = face_shape_t
|
||||
# compute 2d landmark projections
|
||||
# landmark_p: [batchsize,68,2]
|
||||
face_landmark_t = self.Compute_landmark(face_shape_t,self.facemodel)
|
||||
landmark_p = self.Projection_block(face_landmark_t) # 256*256 image
|
||||
landmark_p = tf.stack([landmark_p[:,:,0],223. - landmark_p[:,:,1]],axis = 2)
|
||||
self.landmark_p = landmark_p
|
||||
landmark_p = self.Projection_block(face_landmark_t,camera_scale,f_scale)
|
||||
|
||||
# [batchsize,N,3] vertex color (in RGB order)
|
||||
face_color = self.Illumination_block(face_texture, norm_r, gamma)
|
||||
|
||||
# reconstruction images and region masks for computing photometric loss
|
||||
render_imgs,img_mask,img_mask_crop = self.Render_block(face_shape_t,norm_r,face_color,camera_scale,f_scale,self.facemodel,opt.batch_size,opt.is_train)
|
||||
|
||||
self.id_coeff = id_coeff
|
||||
self.ex_coeff = ex_coeff
|
||||
self.tex_coeff = tex_coeff
|
||||
self.f_scale = f_scale
|
||||
self.gamma = gamma
|
||||
self.face_shape = face_shape
|
||||
self.face_shape_t = face_shape_t
|
||||
self.face_texture = face_texture
|
||||
self.face_color = face_color
|
||||
self.landmark_p = landmark_p
|
||||
self.render_imgs = render_imgs
|
||||
self.img_mask = img_mask
|
||||
self.img_mask_crop = img_mask_crop
|
||||
|
||||
# reconstruction images
|
||||
if not is_windows:
|
||||
render_imgs = self.Render_block(face_shape_t,norm_r,face_color,self.facemodel,batchsize)
|
||||
render_imgs = tf.clip_by_value(render_imgs,0,255)
|
||||
render_imgs = tf.cast(render_imgs,tf.float32)
|
||||
self.render_imgs = render_imgs
|
||||
else:
|
||||
self.render_imgs = []
|
||||
|
||||
######################################################################################################
|
||||
#----------------------------------------------------------------------------------------------
|
||||
def Split_coeff(self,coeff):
|
||||
|
||||
id_coeff = coeff[:,:80] #identity
|
||||
ex_coeff = coeff[:,80:144] #expression
|
||||
tex_coeff = coeff[:,144:224] #texture
|
||||
angles = coeff[:,224:227] #euler angles for pose
|
||||
gamma = coeff[:,227:254] #lighting
|
||||
translation = coeff[:,254:257] #translation
|
||||
|
||||
return id_coeff,ex_coeff,tex_coeff,angles,translation,gamma
|
||||
id_coeff = coeff[:,:80]
|
||||
ex_coeff = coeff[:,80:144]
|
||||
tex_coeff = coeff[:,144:224]
|
||||
angles = coeff[:,224:227]
|
||||
gamma = coeff[:,227:254]
|
||||
translation = coeff[:,254:257]
|
||||
camera_scale = tf.ones([tf.shape(coeff)[0],1])
|
||||
f_scale = tf.ones([tf.shape(coeff)[0],1])
|
||||
|
||||
return id_coeff,ex_coeff,tex_coeff,angles,translation,gamma,camera_scale,f_scale
|
||||
|
||||
def Shape_formation_block(self,id_coeff,ex_coeff,facemodel):
|
||||
face_shape = tf.einsum('ij,aj->ai',facemodel.idBase,id_coeff) + \
|
||||
|
@ -170,31 +178,27 @@ class Face3D():
|
|||
# R = RzRyRx
|
||||
rotation = tf.matmul(tf.matmul(rotation_Z,rotation_Y),rotation_X)
|
||||
|
||||
# because our face shape is N*3, so compute the transpose of R, so that rotation shapes can be calculated as face_shape*R
|
||||
rotation = tf.transpose(rotation, perm = [0,2,1])
|
||||
|
||||
return rotation
|
||||
|
||||
def Projection_block(self,face_shape,focal=1015.0,half_image_width=112.):
|
||||
def Projection_block(self,face_shape,camera_scale,f_scale):
|
||||
|
||||
# pre-defined camera focal for pespective projection
|
||||
focal = tf.constant(focal)
|
||||
# focal = tf.constant(400.0)
|
||||
focal = tf.constant(1015.0)
|
||||
focal = focal*f_scale
|
||||
focal = tf.reshape(focal,[-1,1])
|
||||
batchsize = tf.shape(face_shape)[0]
|
||||
# center = tf.constant(112.0)
|
||||
batchsize = tf.shape(focal)[0]
|
||||
|
||||
# define camera position
|
||||
camera_pos = tf.reshape(tf.constant([0.0,0.0,10.0]),[1,1,3])
|
||||
camera_pos = tf.reshape(tf.constant([0.0,0.0,10.0]),[1,1,3])*tf.reshape(camera_scale,[-1,1,1])
|
||||
reverse_z = tf.tile(tf.reshape(tf.constant([1.0,0,0,0,1,0,0,0,-1.0]),[1,3,3]),[tf.shape(face_shape)[0],1,1])
|
||||
|
||||
# compute projection matrix
|
||||
p_matrix = tf.concat([focal*tf.ones([batchsize,1]),tf.zeros([batchsize,1]),half_image_width*tf.ones([batchsize,1]),tf.zeros([batchsize,1]),\
|
||||
focal*tf.ones([batchsize,1]),half_image_width*tf.ones([batchsize,1]),tf.zeros([batchsize,2]),tf.ones([batchsize,1])],axis = 1)
|
||||
# p_matrix = tf.tile(tf.reshape(p_matrix,[1,3,3]),[tf.shape(face_shape)[0],1,1])
|
||||
p_matrix = tf.concat([focal,tf.zeros([batchsize,1]),112.*tf.ones([batchsize,1]),tf.zeros([batchsize,1]),focal,112.*tf.ones([batchsize,1]),tf.zeros([batchsize,2]),tf.ones([batchsize,1])],axis = 1)
|
||||
p_matrix = tf.reshape(p_matrix,[-1,3,3])
|
||||
|
||||
# convert z in canonical space to the distance to camera
|
||||
reverse_z = tf.tile(tf.reshape(tf.constant([1.0,0,0,0,1,0,0,0,-1.0]),[1,3,3]),[tf.shape(face_shape)[0],1,1])
|
||||
# convert z in world space to the distance to camera
|
||||
face_shape = tf.matmul(face_shape,reverse_z) + camera_pos
|
||||
aug_projection = tf.matmul(face_shape,tf.transpose(p_matrix,[0,2,1]))
|
||||
|
||||
|
@ -256,51 +260,84 @@ class Face3D():
|
|||
|
||||
return face_shape_t
|
||||
|
||||
def Render_block(self,face_shape,face_norm,face_color,facemodel,batchsize):
|
||||
def Render_block(self,face_shape,face_norm,face_color,camera_scale,f_scale,facemodel,batchsize,is_train=True):
|
||||
if is_train and is_windows:
|
||||
raise ValueError('Not support training with Windows environment.')
|
||||
|
||||
if is_windows:
|
||||
return [],[],[]
|
||||
|
||||
# render reconstruction images
|
||||
n_vex = int(facemodel.idBase.shape[0].value/3)
|
||||
fov_y = 2*tf.atan(112/(1015.))*180./m.pi + tf.zeros([batchsize])
|
||||
|
||||
fov_y = 2*tf.atan(112./(1015.*f_scale))*180./m.pi
|
||||
fov_y = tf.reshape(fov_y,[batchsize])
|
||||
# full face region
|
||||
face_shape = tf.reshape(face_shape,[batchsize,n_vex,3])
|
||||
face_norm = tf.reshape(face_norm,[batchsize,n_vex,3])
|
||||
face_color = tf.reshape(face_color,[batchsize,n_vex,3])
|
||||
|
||||
#cammera settings
|
||||
# same as in Projection_block
|
||||
camera_position = tf.constant([[0,0,10.0]]) + tf.zeros([batchsize,3])
|
||||
camera_lookat = tf.constant([[0,0,0.0]]) + tf.zeros([batchsize,3])
|
||||
camera_up = tf.constant([[0,1.0,0]]) + tf.zeros([batchsize,3])
|
||||
# pre-defined cropped face region
|
||||
mask_face_shape = tf.gather(face_shape,tf.cast(facemodel.front_mask_render-1,tf.int32),axis = 1)
|
||||
mask_face_norm = tf.gather(face_norm,tf.cast(facemodel.front_mask_render-1,tf.int32),axis = 1)
|
||||
mask_face_color = tf.gather(face_color,tf.cast(facemodel.front_mask_render-1,tf.int32),axis = 1)
|
||||
|
||||
# setting light source position(intensities are set to 0 because we have already computed the vertex color)
|
||||
light_positions = tf.reshape(tf.constant([0,0,1e5]),[1,1,3]) + tf.zeros([batchsize,1,3])
|
||||
light_intensities = tf.reshape(tf.constant([0.0,0.0,0.0]),[1,1,3])+tf.zeros([batchsize,1,3])
|
||||
ambient_color = tf.reshape(tf.constant([1.0,1,1]),[1,3])+ tf.zeros([batchsize,3])
|
||||
# setting cammera settings
|
||||
camera_position = tf.constant([[0,0,10.0]])*tf.reshape(camera_scale,[-1,1])
|
||||
camera_lookat = tf.constant([0,0,0.0])
|
||||
camera_up = tf.constant([0,1.0,0])
|
||||
|
||||
# setting light source position(intensities are set to 0 because we have computed the vertex color)
|
||||
light_positions = tf.tile(tf.reshape(tf.constant([0,0,1e5]),[1,1,3]),[batchsize,1,1])
|
||||
light_intensities = tf.tile(tf.reshape(tf.constant([0.0,0.0,0.0]),[1,1,3]),[batchsize,1,1])
|
||||
ambient_color = tf.tile(tf.reshape(tf.constant([1.0,1,1]),[1,3]),[batchsize,1])
|
||||
|
||||
near_clip = 0.01*tf.ones([batchsize])
|
||||
far_clip = 50*tf.ones([batchsize])
|
||||
#using tf_mesh_renderer for rasterization (https://github.com/google/tf_mesh_renderer)
|
||||
# img: [batchsize,224,224,4] images in RGBA order (0-255)
|
||||
|
||||
if not is_windows:
|
||||
with tf.device('/cpu:0'):
|
||||
img = mesh_renderer.mesh_renderer(face_shape,
|
||||
tf.cast(facemodel.face_buf-1,tf.int32),
|
||||
face_norm,
|
||||
face_color,
|
||||
camera_position = camera_position,
|
||||
camera_lookat = camera_lookat,
|
||||
camera_up = camera_up,
|
||||
light_positions = light_positions,
|
||||
light_intensities = light_intensities,
|
||||
image_width = 224,
|
||||
image_height = 224,
|
||||
fov_y = fov_y, #12.5936
|
||||
ambient_color = ambient_color,
|
||||
near_clip = near_clip,
|
||||
far_clip = far_clip)
|
||||
return img
|
||||
else:
|
||||
return np.zeros([224, 224], dtype=np.int32)
|
||||
# img: [batchsize,224,224,3] images in RGB order (0-255)
|
||||
# mask:[batchsize,224,224,1] transparency for img ({0,1} value)
|
||||
img_rgba = mesh_renderer.mesh_renderer(face_shape,
|
||||
tf.cast(facemodel.face_buf-1,tf.int32),
|
||||
face_norm,
|
||||
face_color,
|
||||
camera_position = camera_position,
|
||||
camera_lookat = camera_lookat,
|
||||
camera_up = camera_up,
|
||||
light_positions = light_positions,
|
||||
light_intensities = light_intensities,
|
||||
image_width = 224,
|
||||
image_height = 224,
|
||||
fov_y = fov_y,
|
||||
near_clip = 0.01,
|
||||
far_clip = 50.0,
|
||||
ambient_color = ambient_color)
|
||||
|
||||
img = img_rgba[:,:,:,:3]
|
||||
mask = img_rgba[:,:,:,3:]
|
||||
|
||||
img = tf.cast(img[:,:,:,::-1],tf.float32) #transfer RGB to BGR
|
||||
mask = tf.cast(mask,tf.float32) # full face region
|
||||
|
||||
if is_train:
|
||||
# compute mask for small face region
|
||||
img_crop_rgba = mesh_renderer.mesh_renderer(mask_face_shape,
|
||||
tf.cast(facemodel.mask_face_buf-1,tf.int32),
|
||||
mask_face_norm,
|
||||
mask_face_color,
|
||||
camera_position = camera_position,
|
||||
camera_lookat = camera_lookat,
|
||||
camera_up = camera_up,
|
||||
light_positions = light_positions,
|
||||
light_intensities = light_intensities,
|
||||
image_width = 224,
|
||||
image_height = 224,
|
||||
fov_y = fov_y,
|
||||
near_clip = 0.01,
|
||||
far_clip = 50.0,
|
||||
ambient_color = ambient_color)
|
||||
|
||||
mask_f = img_crop_rgba[:,:,:,3:]
|
||||
mask_f = tf.cast(mask_f,tf.float32) # small face region
|
||||
return img,mask,mask_f
|
||||
|
||||
img_rgba = tf.cast(tf.clip_by_value(img_rgba,0,255),tf.float32)
|
||||
|
||||
return img_rgba,mask,mask
|
||||
|
|
|
@ -0,0 +1,247 @@
|
|||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""Contains the definition of the Inception Resnet V1 architecture.
|
||||
As described in http://arxiv.org/abs/1602.07261.
|
||||
Inception-v4, Inception-ResNet and the Impact of Residual Connections
|
||||
on Learning
|
||||
Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
import tensorflow.contrib.slim as slim
|
||||
|
||||
|
||||
# Inception-Resnet-A
|
||||
def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
|
||||
"""Builds the 35x35 resnet block."""
|
||||
with tf.variable_scope(scope, 'Block35', [net], reuse=reuse):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv2_1 = slim.conv2d(tower_conv2_0, 32, 3, scope='Conv2d_0b_3x3')
|
||||
tower_conv2_2 = slim.conv2d(tower_conv2_1, 32, 3, scope='Conv2d_0c_3x3')
|
||||
mixed = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2], 3)
|
||||
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
|
||||
activation_fn=None, scope='Conv2d_1x1')
|
||||
net += scale * up
|
||||
if activation_fn:
|
||||
net = activation_fn(net)
|
||||
return net
|
||||
|
||||
# Inception-Resnet-B
|
||||
def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
|
||||
"""Builds the 17x17 resnet block."""
|
||||
with tf.variable_scope(scope, 'Block17', [net], reuse=reuse):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 128, 1, scope='Conv2d_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 128, [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
tower_conv1_2 = slim.conv2d(tower_conv1_1, 128, [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
mixed = tf.concat([tower_conv, tower_conv1_2], 3)
|
||||
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
|
||||
activation_fn=None, scope='Conv2d_1x1')
|
||||
net += scale * up
|
||||
if activation_fn:
|
||||
net = activation_fn(net)
|
||||
return net
|
||||
|
||||
|
||||
# Inception-Resnet-C
|
||||
def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
|
||||
"""Builds the 8x8 resnet block."""
|
||||
with tf.variable_scope(scope, 'Block8', [net], reuse=reuse):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 192, [1, 3],
|
||||
scope='Conv2d_0b_1x3')
|
||||
tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [3, 1],
|
||||
scope='Conv2d_0c_3x1')
|
||||
mixed = tf.concat([tower_conv, tower_conv1_2], 3)
|
||||
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
|
||||
activation_fn=None, scope='Conv2d_1x1')
|
||||
net += scale * up
|
||||
if activation_fn:
|
||||
net = activation_fn(net)
|
||||
return net
|
||||
|
||||
def reduction_a(net, k, l, m, n):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, n, 3, stride=2, padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, k, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, l, 3,
|
||||
scope='Conv2d_0b_3x3')
|
||||
tower_conv1_2 = slim.conv2d(tower_conv1_1, m, 3,
|
||||
stride=2, padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)
|
||||
return net
|
||||
|
||||
def reduction_b(net):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1, 256, 3, stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv2_1 = slim.conv2d(tower_conv2, 256, 3,
|
||||
scope='Conv2d_0b_3x3')
|
||||
tower_conv2_2 = slim.conv2d(tower_conv2_1, 256, 3, stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat([tower_conv_1, tower_conv1_1,
|
||||
tower_conv2_2, tower_pool], 3)
|
||||
return net
|
||||
|
||||
def inference(images, keep_probability, phase_train=True,
|
||||
bottleneck_layer_size=128, weight_decay=0.0, reuse=None):
|
||||
batch_norm_params = {
|
||||
# Decay for the moving averages.
|
||||
'decay': 0.995,
|
||||
# epsilon to prevent 0s in variance.
|
||||
'epsilon': 0.001,
|
||||
# force in-place updates of mean and variance estimates
|
||||
'updates_collections': None,
|
||||
# Moving averages ends up in the trainable variables collection
|
||||
'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
|
||||
}
|
||||
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected],
|
||||
weights_initializer=slim.initializers.xavier_initializer(),
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay),
|
||||
normalizer_fn=slim.batch_norm,
|
||||
normalizer_params=batch_norm_params):
|
||||
return inception_resnet_v1(images, is_training=phase_train,
|
||||
dropout_keep_prob=keep_probability, bottleneck_layer_size=bottleneck_layer_size, reuse=reuse)
|
||||
|
||||
|
||||
def inception_resnet_v1(inputs, is_training=True,
|
||||
dropout_keep_prob=0.8,
|
||||
bottleneck_layer_size=128,
|
||||
reuse=None,
|
||||
scope='InceptionResnetV1'):
|
||||
"""Creates the Inception Resnet V1 model.
|
||||
Args:
|
||||
inputs: a 4-D tensor of size [batch_size, height, width, 3].
|
||||
num_classes: number of predicted classes.
|
||||
is_training: whether is training or not.
|
||||
dropout_keep_prob: float, the fraction to keep before final layer.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
Returns:
|
||||
logits: the logits outputs of the model.
|
||||
end_points: the set of end_points from the inception model.
|
||||
"""
|
||||
end_points = {}
|
||||
|
||||
with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse):
|
||||
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
||||
is_training=is_training):
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
|
||||
# 149 x 149 x 32
|
||||
net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
end_points['Conv2d_1a_3x3'] = net
|
||||
# 147 x 147 x 32
|
||||
net = slim.conv2d(net, 32, 3, padding='VALID',
|
||||
scope='Conv2d_2a_3x3')
|
||||
end_points['Conv2d_2a_3x3'] = net
|
||||
# 147 x 147 x 64
|
||||
net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
|
||||
end_points['Conv2d_2b_3x3'] = net
|
||||
# 73 x 73 x 64
|
||||
net = slim.max_pool2d(net, 3, stride=2, padding='VALID',
|
||||
scope='MaxPool_3a_3x3')
|
||||
end_points['MaxPool_3a_3x3'] = net
|
||||
# 73 x 73 x 80
|
||||
net = slim.conv2d(net, 80, 1, padding='VALID',
|
||||
scope='Conv2d_3b_1x1')
|
||||
end_points['Conv2d_3b_1x1'] = net
|
||||
# 71 x 71 x 192
|
||||
net = slim.conv2d(net, 192, 3, padding='VALID',
|
||||
scope='Conv2d_4a_3x3')
|
||||
end_points['Conv2d_4a_3x3'] = net
|
||||
# 35 x 35 x 256
|
||||
net = slim.conv2d(net, 256, 3, stride=2, padding='VALID',
|
||||
scope='Conv2d_4b_3x3')
|
||||
end_points['Conv2d_4b_3x3'] = net
|
||||
|
||||
# 5 x Inception-resnet-A
|
||||
net = slim.repeat(net, 5, block35, scale=0.17)
|
||||
end_points['Mixed_5a'] = net
|
||||
|
||||
# Reduction-A
|
||||
with tf.variable_scope('Mixed_6a'):
|
||||
net = reduction_a(net, 192, 192, 256, 384)
|
||||
end_points['Mixed_6a'] = net
|
||||
|
||||
# 10 x Inception-Resnet-B
|
||||
net = slim.repeat(net, 10, block17, scale=0.10)
|
||||
end_points['Mixed_6b'] = net
|
||||
|
||||
# Reduction-B
|
||||
with tf.variable_scope('Mixed_7a'):
|
||||
net = reduction_b(net)
|
||||
end_points['Mixed_7a'] = net
|
||||
|
||||
# 5 x Inception-Resnet-C
|
||||
net = slim.repeat(net, 5, block8, scale=0.20)
|
||||
end_points['Mixed_8a'] = net
|
||||
|
||||
net = block8(net, activation_fn=None)
|
||||
end_points['Mixed_8b'] = net
|
||||
|
||||
with tf.variable_scope('Logits'):
|
||||
end_points['PrePool'] = net
|
||||
#pylint: disable=no-member
|
||||
net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
|
||||
scope='AvgPool_1a_8x8')
|
||||
net = slim.flatten(net)
|
||||
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='Dropout')
|
||||
|
||||
end_points['PreLogitsFlatten'] = net
|
||||
|
||||
net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None,
|
||||
scope='Bottleneck', reuse=False)
|
||||
|
||||
return net, end_points
|
|
@ -0,0 +1,76 @@
|
|||
import tensorflow as tf
|
||||
from scipy.io import loadmat,savemat
|
||||
###############################################################################################
|
||||
# Define losses for training
|
||||
###############################################################################################
|
||||
|
||||
# photometric loss
|
||||
# input_imgs and render_imgs are [batchsize,h,w,3] BGR images
|
||||
# img_mask are [batchsize,h,w,1] attention masks
|
||||
def Photo_loss(input_imgs,render_imgs,img_mask):
|
||||
|
||||
input_imgs = tf.cast(input_imgs,tf.float32)
|
||||
|
||||
# img_mask = tf.squeeze(img_mask,3)
|
||||
img_mask = tf.stop_gradient(img_mask[:,:,:,0])
|
||||
|
||||
# photo loss with skin attention
|
||||
photo_loss = tf.sqrt(tf.reduce_sum(tf.square(input_imgs - render_imgs),axis = 3))*img_mask/255
|
||||
photo_loss = tf.reduce_sum(photo_loss) / tf.maximum(tf.reduce_sum(img_mask),1.0)
|
||||
|
||||
return photo_loss
|
||||
|
||||
# perceptual loss
|
||||
# id_feature and id_label are [batchsize, c] identity features for reconstruction images and input images
|
||||
def Perceptual_loss(id_feature,id_label):
|
||||
id_feature = tf.nn.l2_normalize(id_feature, dim = 1)
|
||||
id_label = tf.nn.l2_normalize(id_label, dim = 1)
|
||||
# cosine similarity
|
||||
sim = tf.reduce_sum(id_feature*id_label,1)
|
||||
loss = tf.reduce_sum(tf.maximum(0.0,1.0 - sim))/tf.cast(tf.shape(id_feature)[0],tf.float32)
|
||||
|
||||
return loss
|
||||
|
||||
# landmark loss
|
||||
# landmark_p and landmark_label are [batchsize, 68, 2] landmark projections for reconstruction images and input images
|
||||
def Landmark_loss(landmark_p,landmark_label):
|
||||
|
||||
# we set higher weights for landmarks around the mouth and nose regions
|
||||
landmark_weight = tf.concat([tf.ones([1,28]),20*tf.ones([1,3]),tf.ones([1,29]),20*tf.ones([1,8])],axis = 1)
|
||||
landmark_weight = tf.tile(landmark_weight,[tf.shape(landmark_p)[0],1])
|
||||
|
||||
landmark_loss = tf.reduce_sum(tf.reduce_sum(tf.square(landmark_p-landmark_label),2)*landmark_weight)/(68.0*tf.cast(tf.shape(landmark_p)[0],tf.float32))
|
||||
|
||||
return landmark_loss
|
||||
|
||||
# coefficient regularization to ensure plausible 3d faces
|
||||
def Regulation_loss(id_coeff,ex_coeff,tex_coeff,opt):
|
||||
w_ex = opt.w_ex
|
||||
w_tex = opt.w_tex
|
||||
|
||||
regulation_loss = tf.nn.l2_loss(id_coeff) + w_ex * tf.nn.l2_loss(ex_coeff) + w_tex * tf.nn.l2_loss(tex_coeff)
|
||||
regulation_loss = 2*regulation_loss/ tf.cast(tf.shape(id_coeff)[0],tf.float32)
|
||||
|
||||
return regulation_loss
|
||||
|
||||
# albedo regularization to ensure an uniform skin albedo
|
||||
def Reflectance_loss(face_texture,facemodel):
|
||||
skin_mask = facemodel.skin_mask
|
||||
skin_mask = tf.reshape(skin_mask,[1,tf.shape(skin_mask)[0],1])
|
||||
|
||||
texture_mean = tf.reduce_sum(face_texture*skin_mask,1)/tf.reduce_sum(skin_mask)
|
||||
texture_mean = tf.expand_dims(texture_mean,1)
|
||||
|
||||
# minimize texture variance for pre-defined skin region
|
||||
reflectance_loss = tf.reduce_sum(tf.square((face_texture - texture_mean)*skin_mask/255.0))/(tf.cast(tf.shape(face_texture)[0],tf.float32)*tf.reduce_sum(skin_mask))
|
||||
|
||||
return reflectance_loss
|
||||
|
||||
# gamma regularization to ensure a nearly-monochromatic light
|
||||
def Gamma_loss(gamma):
|
||||
gamma = tf.reshape(gamma,[-1,3,9])
|
||||
gamma_mean = tf.reduce_mean(gamma,1, keep_dims = True)
|
||||
|
||||
gamma_loss = tf.reduce_mean(tf.square(gamma - gamma_mean))
|
||||
|
||||
return gamma_loss
|
|
@ -0,0 +1,87 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow.contrib.slim.nets import resnet_v1
|
||||
slim = tf.contrib.slim
|
||||
from inception_resnet_v1 import inception_resnet_v1
|
||||
###############################################################################################
|
||||
#Define R-Net and Perceptual-Net for 3D face reconstruction
|
||||
###############################################################################################
|
||||
|
||||
def R_Net(inputs,is_training=True):
|
||||
#input: [Batchsize,H,W,C], 0-255, BGR image
|
||||
inputs = tf.cast(inputs,tf.float32)
|
||||
# standard ResNet50 backbone (without the last classfication FC layer)
|
||||
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
|
||||
net,end_points = resnet_v1.resnet_v1_50(inputs,is_training = is_training ,reuse = tf.AUTO_REUSE)
|
||||
|
||||
# Modified FC layer with 257 channels for reconstruction coefficients
|
||||
net_id = slim.conv2d(net, 80, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
weights_initializer = tf.zeros_initializer(),
|
||||
scope='fc-id')
|
||||
net_ex = slim.conv2d(net, 64, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
weights_initializer = tf.zeros_initializer(),
|
||||
scope='fc-ex')
|
||||
net_tex = slim.conv2d(net, 80, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
weights_initializer = tf.zeros_initializer(),
|
||||
scope='fc-tex')
|
||||
net_angles = slim.conv2d(net, 3, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
weights_initializer = tf.zeros_initializer(),
|
||||
scope='fc-angles')
|
||||
net_gamma = slim.conv2d(net, 27, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
weights_initializer = tf.zeros_initializer(),
|
||||
scope='fc-gamma')
|
||||
net_t_xy = slim.conv2d(net, 2, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
weights_initializer = tf.zeros_initializer(),
|
||||
scope='fc-XY')
|
||||
net_t_z = slim.conv2d(net, 1, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
weights_initializer = tf.zeros_initializer(),
|
||||
scope='fc-Z')
|
||||
|
||||
net_id = tf.squeeze(net_id, [1,2], name='fc-id/squeezed')
|
||||
net_ex = tf.squeeze(net_ex, [1,2], name='fc-ex/squeezed')
|
||||
net_tex = tf.squeeze(net_tex, [1,2],name='fc-tex/squeezed')
|
||||
net_angles = tf.squeeze(net_angles,[1,2], name='fc-angles/squeezed')
|
||||
net_gamma = tf.squeeze(net_gamma,[1,2], name='fc-gamma/squeezed')
|
||||
net_t_xy = tf.squeeze(net_t_xy,[1,2], name='fc-XY/squeezed')
|
||||
net_t_z = tf.squeeze(net_t_z,[1,2], name='fc-Z/squeezed')
|
||||
|
||||
net_ = tf.concat([net_id,net_ex,net_tex,net_angles,net_gamma,net_t_xy,net_t_z], axis = 1)
|
||||
|
||||
return net_
|
||||
|
||||
|
||||
def Perceptual_Net(input_imgs):
|
||||
#input_imgs: [Batchsize,H,W,C], 0-255, BGR image
|
||||
|
||||
input_imgs = tf.reshape(input_imgs,[-1,224,224,3])
|
||||
input_imgs = tf.cast(input_imgs,tf.float32)
|
||||
input_imgs = tf.clip_by_value(input_imgs,0,255)
|
||||
input_imgs = (input_imgs - 127.5)/128.0
|
||||
|
||||
#standard face-net backbone
|
||||
batch_norm_params = {
|
||||
'decay': 0.995,
|
||||
'epsilon': 0.001,
|
||||
'updates_collections': None}
|
||||
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected],weights_initializer=slim.initializers.xavier_initializer(),
|
||||
weights_regularizer=slim.l2_regularizer(0.0),
|
||||
normalizer_fn=slim.batch_norm,
|
||||
normalizer_params=batch_norm_params):
|
||||
feature_128,_ = inception_resnet_v1(input_imgs, bottleneck_layer_size=128, is_training=False, reuse=tf.AUTO_REUSE)
|
||||
|
||||
# output the last FC layer feature(before classification) as identity feature
|
||||
return feature_128
|
|
@ -0,0 +1,60 @@
|
|||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import os
|
||||
|
||||
# training options
|
||||
|
||||
class Option():
|
||||
def __init__(self):
|
||||
#--------------------------------------------------------------------------------------
|
||||
self.model_dir = 'result'
|
||||
self.model_name = 'model_test2'
|
||||
self.data_path = ['./processed_data']
|
||||
self.val_data_path = ['./processed_data']
|
||||
|
||||
self.model_save_path = os.path.join(self.model_dir,self.model_name)
|
||||
if not os.path.exists(self.model_save_path):
|
||||
os.makedirs(self.model_save_path)
|
||||
|
||||
self.summary_dir = os.path.join(self.model_save_path,'summary')
|
||||
|
||||
self.train_summary_path = os.path.join(self.summary_dir, 'train')
|
||||
self.val_summary_path = os.path.join(self.summary_dir, 'val')
|
||||
#---------------------------------------------------------------------------------------
|
||||
# visible gpu settings
|
||||
self.config = tf.ConfigProto()
|
||||
self.config.gpu_options.visible_device_list = '0'
|
||||
self.is_train = True
|
||||
self.use_pb = True
|
||||
#---------------------------------------------------------------------------------------
|
||||
# training parameters
|
||||
|
||||
self.w_photo = 1.92
|
||||
self.w_lm = 1.6e-3
|
||||
self.w_id = 0.2
|
||||
|
||||
self.w_reg = 3.0e-4
|
||||
self.w_ref = 5.0
|
||||
|
||||
self.w_gamma = 10.0
|
||||
|
||||
self.w_ex = 0.8
|
||||
self.w_tex = 1.7e-2
|
||||
|
||||
self.batch_size = 16
|
||||
self.boundaries = [100000]
|
||||
lr = [1e-4,2e-5]
|
||||
self.global_step = tf.Variable(0,name='global_step',trainable = False)
|
||||
self.lr = tf.train.piecewise_constant(self.global_step,self.boundaries,lr)
|
||||
self.augment = True
|
||||
self.train_maxiter = 200000
|
||||
self.train_summary_iter = 50
|
||||
self.image_summary_iter = 200
|
||||
self.val_summary_iter = 1000
|
||||
self.save_iter = 10000
|
||||
#---------------------------------------------------------------------------------------
|
||||
# initial weights for resnet and facenet
|
||||
self.R_net_weights = os.path.join('./weights/resnet','resnet_v1_50.ckpt')
|
||||
self.Perceptual_net_weights = './weights/id_net/model-20170512-110547.ckpt-250000'
|
||||
|
||||
self.pretrain_weights = os.path.join('train/model_test2','iter_100000.ckpt')
|
|
@ -1,6 +1,14 @@
|
|||
import numpy as np
|
||||
from scipy.io import loadmat,savemat
|
||||
from PIL import Image
|
||||
from skin import skinmask
|
||||
import argparse
|
||||
from utils import *
|
||||
import os
|
||||
import glob
|
||||
import tensorflow as tf
|
||||
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
||||
|
||||
#calculating least square problem
|
||||
def POS(xp,x):
|
||||
|
@ -27,7 +35,8 @@ def POS(xp,x):
|
|||
|
||||
return t,s
|
||||
|
||||
def process_img(img,lm,t,s,target_size = 224.):
|
||||
# resize and crop images
|
||||
def resize_n_crop_img(img,lm,t,s,target_size = 224.):
|
||||
w0,h0 = img.size
|
||||
w = (w0/s*102).astype(np.int32)
|
||||
h = (h0/s*102).astype(np.int32)
|
||||
|
@ -49,7 +58,7 @@ def process_img(img,lm,t,s,target_size = 224.):
|
|||
|
||||
|
||||
# resize and crop input images before sending to the R-Net
|
||||
def Preprocess(img,lm,lm3D):
|
||||
def align_img(img,lm,lm3D):
|
||||
|
||||
w0,h0 = img.size
|
||||
|
||||
|
@ -60,9 +69,83 @@ def Preprocess(img,lm,lm3D):
|
|||
t,s = POS(lm.transpose(),lm3D.transpose())
|
||||
|
||||
# processing the image
|
||||
img_new,lm_new = process_img(img,lm,t,s)
|
||||
img_new,lm_new = resize_n_crop_img(img,lm,t,s)
|
||||
lm_new = np.stack([lm_new[:,0],223 - lm_new[:,1]], axis = 1)
|
||||
trans_params = np.array([w0,h0,102.0/s,t[0],t[1]])
|
||||
|
||||
return img_new,lm_new,trans_params
|
||||
|
||||
# detect 68 face landmarks for aligned images
|
||||
def get_68landmark(img,detector,sess):
|
||||
|
||||
input_img = detector.get_tensor_by_name('input_imgs:0')
|
||||
lm = detector.get_tensor_by_name('landmark:0')
|
||||
|
||||
landmark = sess.run(lm,feed_dict={input_img:img})
|
||||
landmark = np.reshape(landmark,[68,2])
|
||||
landmark = np.stack([landmark[:,1],223-landmark[:,0]],axis=1)
|
||||
|
||||
return landmark
|
||||
|
||||
# get skin attention mask for aligned images
|
||||
def get_skinmask(img):
|
||||
|
||||
img = np.squeeze(img,0)
|
||||
skin_img = skinmask(img)
|
||||
return skin_img
|
||||
|
||||
def parse_args():
|
||||
desc = "Data preprocessing for Deep3DRecon."
|
||||
parser = argparse.ArgumentParser(description=desc)
|
||||
|
||||
parser.add_argument('--img_path', type=str, default='./input', help='original images folder')
|
||||
parser.add_argument('--save_path', type=str, default='./processed_data', help='custom path to save proccessed images and labels')
|
||||
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
# training data pre-processing
|
||||
def preprocessing():
|
||||
|
||||
args = parse_args()
|
||||
image_path = args.img_path
|
||||
save_path = args.save_path
|
||||
if not os.path.isdir(save_path):
|
||||
os.makedirs(save_path)
|
||||
if not os.path.isdir(os.path.join(save_path,'lm')):
|
||||
os.makedirs(os.path.join(save_path,'lm'))
|
||||
if not os.path.isdir(os.path.join(save_path,'lm_bin')):
|
||||
os.makedirs(os.path.join(save_path,'lm_bin'))
|
||||
if not os.path.isdir(os.path.join(save_path,'mask')):
|
||||
os.makedirs(os.path.join(save_path,'mask'))
|
||||
|
||||
img_list = sorted(glob.glob(image_path + '/' + '*.png'))
|
||||
img_list += sorted(glob.glob(image_path + '/' + '*.jpg'))
|
||||
|
||||
lm3D = load_lm3d()
|
||||
|
||||
with tf.Graph().as_default() as graph, tf.device('/gpu:0'):
|
||||
lm_detector = load_graph(os.path.join('network','landmark68_detector.pb'))
|
||||
tf.import_graph_def(lm_detector,name='')
|
||||
sess = tf.InteractiveSession()
|
||||
|
||||
for file in img_list:
|
||||
|
||||
print(file)
|
||||
name = file.split('/')[-1].replace('.png','').replace('.jpg','')
|
||||
img,lm5p = load_img(file,file.replace('png','txt').replace('jpg','txt'))
|
||||
img_align,_,_ = align_img(img,lm5p,lm3D) # [1,224,224,3] BGR image
|
||||
|
||||
lm68p = get_68landmark(img_align,graph,sess)
|
||||
lm68p = lm68p.astype(np.float64)
|
||||
skin_mask = get_skinmask(img_align)
|
||||
|
||||
Image.fromarray(img_align.squeeze(0)[:,:,::-1].astype(np.uint8),'RGB').save(os.path.join(save_path,name+'.png'))
|
||||
Image.fromarray(skin_mask.astype(np.uint8)).save(os.path.join(save_path,'mask',name+'.png'))
|
||||
|
||||
np.savetxt(os.path.join(save_path,'lm',name+'.txt'),lm68p)
|
||||
lm_bin = np.reshape(lm68p,[-1])
|
||||
lm_bin.tofile(os.path.join(save_path,'lm_bin',name+'.bin'))
|
||||
|
||||
if __name__ == '__main__':
|
||||
preprocessing()
|
|
@ -0,0 +1,86 @@
|
|||
import tensorflow as tf
|
||||
import face_decoder
|
||||
import networks
|
||||
import losses
|
||||
from utils import *
|
||||
###############################################################################################
|
||||
# model for single image face reconstruction
|
||||
###############################################################################################
|
||||
class Reconstruction_model():
|
||||
# initialization
|
||||
def __init__(self,opt):
|
||||
self.Face3D = face_decoder.Face3D() #analytic 3D face object
|
||||
self.opt = opt # training options
|
||||
self.Optimizer = tf.train.AdamOptimizer(learning_rate = opt.lr) # optimizer
|
||||
|
||||
# load input data from queue
|
||||
def set_input(self,input_iterator):
|
||||
self.imgs,self.lm_labels,self.attention_masks = input_iterator.get_next()
|
||||
|
||||
# forward process of the model
|
||||
def forward(self,is_train = True):
|
||||
|
||||
with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
|
||||
self.coeff = networks.R_Net(self.imgs,is_training=is_train)
|
||||
|
||||
self.Face3D.Reconstruction_Block(self.coeff,self.opt)
|
||||
|
||||
self.id_labels = networks.Perceptual_Net(self.imgs)
|
||||
self.id_features = networks.Perceptual_Net(self.Face3D.render_imgs)
|
||||
|
||||
self.photo_loss = losses.Photo_loss(self.imgs,self.Face3D.render_imgs,self.Face3D.img_mask_crop*self.attention_masks)
|
||||
self.landmark_loss = losses.Landmark_loss(self.Face3D.landmark_p,self.lm_labels)
|
||||
self.perceptual_loss = losses.Perceptual_loss(self.id_features,self.id_labels)
|
||||
|
||||
self.reg_loss = losses.Regulation_loss(self.Face3D.id_coeff,self.Face3D.ex_coeff,self.Face3D.tex_coeff,self.opt)
|
||||
self.reflect_loss = losses.Reflectance_loss(self.Face3D.face_texture,self.Face3D.facemodel)
|
||||
self.gamma_loss = losses.Gamma_loss(self.Face3D.gamma)
|
||||
|
||||
|
||||
self.loss = self.opt.w_photo*self.photo_loss + self.opt.w_lm*self.landmark_loss + self.opt.w_id*self.perceptual_loss\
|
||||
+ self.opt.w_reg*self.reg_loss + self.opt.w_ref*self.reflect_loss + self.opt.w_gamma*self.gamma_loss
|
||||
|
||||
# backward process
|
||||
def backward(self,is_train = True):
|
||||
if is_train:
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||
var_list = tf.trainable_variables()
|
||||
update_var_list = [v for v in var_list if 'resnet_v1_50' in v.name or 'fc-' in v.name]
|
||||
grads = tf.gradients(self.loss,update_var_list)
|
||||
# get train_op with update_ops to ensure updating for bn parameters
|
||||
with tf.control_dependencies(update_ops):
|
||||
self.train_op = self.Optimizer.apply_gradients(zip(grads,update_var_list),global_step = self.opt.global_step)
|
||||
|
||||
# if not training stage, avoid updating variables
|
||||
else:
|
||||
pass
|
||||
|
||||
# forward and backward
|
||||
def step(self, is_train = True):
|
||||
with tf.variable_scope(tf.get_variable_scope()) as scope:
|
||||
self.forward(is_train = is_train)
|
||||
self.backward(is_train = is_train)
|
||||
|
||||
# statistics summarization
|
||||
def summarize(self):
|
||||
|
||||
# scalar and histogram stats
|
||||
stat = [
|
||||
tf.summary.scalar('reflect_error',self.reflect_loss),
|
||||
tf.summary.scalar('gamma_error',self.gamma_loss),
|
||||
tf.summary.scalar('id_sim_error',self.perceptual_loss),
|
||||
tf.summary.scalar('lm_error',tf.sqrt(self.landmark_loss)),
|
||||
tf.summary.scalar('photo_error',self.photo_loss),
|
||||
tf.summary.scalar('train_error',self.loss),
|
||||
tf.summary.histogram('id_coeff',self.Face3D.id_coeff),
|
||||
tf.summary.histogram('ex_coeff',self.Face3D.ex_coeff),
|
||||
tf.summary.histogram('tex_coeff',self.Face3D.tex_coeff)]
|
||||
|
||||
self.summary_stat = tf.summary.merge(stat)
|
||||
# combine face region of reconstruction images with input images
|
||||
render_imgs = self.Face3D.render_imgs[:,:,:,::-1]*self.Face3D.img_mask + tf.cast(self.imgs[:,:,:,::-1],tf.float32)*(1-self.Face3D.img_mask)
|
||||
render_imgs = tf.clip_by_value(render_imgs,0,255)
|
||||
render_imgs = tf.cast(render_imgs,tf.uint8)
|
||||
# image stats
|
||||
img_stat = [tf.summary.image('imgs',tf.concat([tf.cast(self.imgs[:,:,:,::-1],tf.uint8),render_imgs],axis = 2), max_outputs = 8)]
|
||||
self.summary_img = tf.summary.merge(img_stat)
|
|
@ -0,0 +1 @@
|
|||
#.
|
|
@ -0,0 +1,152 @@
|
|||
# Copyright 2017 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Collection of TF functions for managing 3D camera matrices."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def perspective(aspect_ratio, fov_y, near_clip, far_clip):
|
||||
"""Computes perspective transformation matrices.
|
||||
|
||||
Functionality mimes gluPerspective (third_party/GL/glu/include/GLU/glu.h).
|
||||
|
||||
Args:
|
||||
aspect_ratio: float value specifying the image aspect ratio (width/height).
|
||||
fov_y: 1-D float32 Tensor with shape [batch_size] specifying output vertical
|
||||
field of views in degrees.
|
||||
near_clip: 1-D float32 Tensor with shape [batch_size] specifying near
|
||||
clipping plane distance.
|
||||
far_clip: 1-D float32 Tensor with shape [batch_size] specifying far clipping
|
||||
plane distance.
|
||||
|
||||
Returns:
|
||||
A [batch_size, 4, 4] float tensor that maps from right-handed points in eye
|
||||
space to left-handed points in clip space.
|
||||
"""
|
||||
# The multiplication of fov_y by pi/360.0 simultaneously converts to radians
|
||||
# and adds the half-angle factor of .5.
|
||||
focal_lengths_y = 1.0 / tf.tan(fov_y * (math.pi / 360.0))
|
||||
depth_range = far_clip - near_clip
|
||||
p_22 = -(far_clip + near_clip) / depth_range
|
||||
p_23 = -2.0 * (far_clip * near_clip / depth_range)
|
||||
|
||||
zeros = tf.zeros_like(p_23, dtype=tf.float32)
|
||||
# pyformat: disable
|
||||
perspective_transform = tf.concat(
|
||||
[
|
||||
focal_lengths_y / aspect_ratio, zeros, zeros, zeros,
|
||||
zeros, focal_lengths_y, zeros, zeros,
|
||||
zeros, zeros, p_22, p_23,
|
||||
zeros, zeros, -tf.ones_like(p_23, dtype=tf.float32), zeros
|
||||
], axis=0)
|
||||
# pyformat: enable
|
||||
perspective_transform = tf.reshape(perspective_transform, [4, 4, -1])
|
||||
return tf.transpose(perspective_transform, [2, 0, 1])
|
||||
|
||||
|
||||
def look_at(eye, center, world_up):
|
||||
"""Computes camera viewing matrices.
|
||||
|
||||
Functionality mimes gluLookAt (third_party/GL/glu/include/GLU/glu.h).
|
||||
|
||||
Args:
|
||||
eye: 2-D float32 tensor with shape [batch_size, 3] containing the XYZ world
|
||||
space position of the camera.
|
||||
center: 2-D float32 tensor with shape [batch_size, 3] containing a position
|
||||
along the center of the camera's gaze.
|
||||
world_up: 2-D float32 tensor with shape [batch_size, 3] specifying the
|
||||
world's up direction; the output camera will have no tilt with respect
|
||||
to this direction.
|
||||
|
||||
Returns:
|
||||
A [batch_size, 4, 4] float tensor containing a right-handed camera
|
||||
extrinsics matrix that maps points from world space to points in eye space.
|
||||
"""
|
||||
batch_size = center.shape[0].value
|
||||
vector_degeneracy_cutoff = 1e-6
|
||||
forward = center - eye
|
||||
forward_norm = tf.norm(forward, ord='euclidean', axis=1, keep_dims=True)
|
||||
# tf.assert_greater(
|
||||
# forward_norm,
|
||||
# vector_degeneracy_cutoff,
|
||||
# message='Camera matrix is degenerate because eye and center are close.')
|
||||
forward = tf.divide(forward, forward_norm)
|
||||
|
||||
to_side = tf.cross(forward, world_up)
|
||||
to_side_norm = tf.norm(to_side, ord='euclidean', axis=1, keep_dims=True)
|
||||
# tf.assert_greater(
|
||||
# to_side_norm,
|
||||
# vector_degeneracy_cutoff,
|
||||
# message='Camera matrix is degenerate because up and gaze are close or'
|
||||
# 'because up is degenerate.')
|
||||
to_side = tf.divide(to_side, to_side_norm)
|
||||
cam_up = tf.cross(to_side, forward)
|
||||
|
||||
w_column = tf.constant(
|
||||
batch_size * [[0., 0., 0., 1.]], dtype=tf.float32) # [batch_size, 4]
|
||||
w_column = tf.reshape(w_column, [batch_size, 4, 1])
|
||||
view_rotation = tf.stack(
|
||||
[to_side, cam_up, -forward,
|
||||
tf.zeros_like(to_side, dtype=tf.float32)],
|
||||
axis=1) # [batch_size, 4, 3] matrix
|
||||
view_rotation = tf.concat(
|
||||
[view_rotation, w_column], axis=2) # [batch_size, 4, 4]
|
||||
|
||||
identity_batch = tf.tile(tf.expand_dims(tf.eye(3), 0), [batch_size, 1, 1])
|
||||
view_translation = tf.concat([identity_batch, tf.expand_dims(-eye, 2)], 2)
|
||||
view_translation = tf.concat(
|
||||
[view_translation,
|
||||
tf.reshape(w_column, [batch_size, 1, 4])], 1)
|
||||
camera_matrices = tf.matmul(view_rotation, view_translation)
|
||||
return camera_matrices
|
||||
|
||||
|
||||
def euler_matrices(angles):
|
||||
"""Computes a XYZ Tait-Bryan (improper Euler angle) rotation.
|
||||
|
||||
Returns 4x4 matrices for convenient multiplication with other transformations.
|
||||
|
||||
Args:
|
||||
angles: a [batch_size, 3] tensor containing X, Y, and Z angles in radians.
|
||||
|
||||
Returns:
|
||||
a [batch_size, 4, 4] tensor of matrices.
|
||||
"""
|
||||
s = tf.sin(angles)
|
||||
c = tf.cos(angles)
|
||||
# Rename variables for readability in the matrix definition below.
|
||||
c0, c1, c2 = (c[:, 0], c[:, 1], c[:, 2])
|
||||
s0, s1, s2 = (s[:, 0], s[:, 1], s[:, 2])
|
||||
|
||||
zeros = tf.zeros_like(s[:, 0])
|
||||
ones = tf.ones_like(s[:, 0])
|
||||
|
||||
# pyformat: disable
|
||||
flattened = tf.concat(
|
||||
[
|
||||
c2 * c1, c2 * s1 * s0 - c0 * s2, s2 * s0 + c2 * c0 * s1, zeros,
|
||||
c1 * s2, c2 * c0 + s2 * s1 * s0, c0 * s2 * s1 - c2 * s0, zeros,
|
||||
-s1, c1 * s0, c1 * c0, zeros,
|
||||
zeros, zeros, zeros, ones
|
||||
],
|
||||
axis=0)
|
||||
# pyformat: enable
|
||||
reshaped = tf.reshape(flattened, [4, 4, -1])
|
||||
return tf.transpose(reshaped, [2, 0, 1])
|
|
@ -0,0 +1,404 @@
|
|||
# Copyright 2017 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Differentiable 3-D rendering of a triangle mesh."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
from renderer import camera_utils
|
||||
from renderer import rasterize_triangles
|
||||
|
||||
|
||||
def phong_shader(normals,
|
||||
alphas,
|
||||
pixel_positions,
|
||||
light_positions,
|
||||
light_intensities,
|
||||
diffuse_colors=None,
|
||||
camera_position=None,
|
||||
specular_colors=None,
|
||||
shininess_coefficients=None,
|
||||
ambient_color=None):
|
||||
"""Computes pixelwise lighting from rasterized buffers with the Phong model.
|
||||
|
||||
Args:
|
||||
normals: a 4D float32 tensor with shape [batch_size, image_height,
|
||||
image_width, 3]. The inner dimension is the world space XYZ normal for
|
||||
the corresponding pixel. Should be already normalized.
|
||||
alphas: a 3D float32 tensor with shape [batch_size, image_height,
|
||||
image_width]. The inner dimension is the alpha value (transparency)
|
||||
for the corresponding pixel.
|
||||
pixel_positions: a 4D float32 tensor with shape [batch_size, image_height,
|
||||
image_width, 3]. The inner dimension is the world space XYZ position for
|
||||
the corresponding pixel.
|
||||
light_positions: a 3D tensor with shape [batch_size, light_count, 3]. The
|
||||
XYZ position of each light in the scene. In the same coordinate space as
|
||||
pixel_positions.
|
||||
light_intensities: a 3D tensor with shape [batch_size, light_count, 3]. The
|
||||
RGB intensity values for each light. Intensities may be above one.
|
||||
diffuse_colors: a 4D float32 tensor with shape [batch_size, image_height,
|
||||
image_width, 3]. The inner dimension is the diffuse RGB coefficients at
|
||||
a pixel in the range [0, 1].
|
||||
camera_position: a 1D tensor with shape [batch_size, 3]. The XYZ camera
|
||||
position in the scene. If supplied, specular reflections will be
|
||||
computed. If not supplied, specular_colors and shininess_coefficients
|
||||
are expected to be None. In the same coordinate space as
|
||||
pixel_positions.
|
||||
specular_colors: a 4D float32 tensor with shape [batch_size, image_height,
|
||||
image_width, 3]. The inner dimension is the specular RGB coefficients at
|
||||
a pixel in the range [0, 1]. If None, assumed to be tf.zeros()
|
||||
shininess_coefficients: A 3D float32 tensor that is broadcasted to shape
|
||||
[batch_size, image_height, image_width]. The inner dimension is the
|
||||
shininess coefficient for the object at a pixel. Dimensions that are
|
||||
constant can be given length 1, so [batch_size, 1, 1] and [1, 1, 1] are
|
||||
also valid input shapes.
|
||||
ambient_color: a 2D tensor with shape [batch_size, 3]. The RGB ambient
|
||||
color, which is added to each pixel before tone mapping. If None, it is
|
||||
assumed to be tf.zeros().
|
||||
Returns:
|
||||
A 4D float32 tensor of shape [batch_size, image_height, image_width, 4]
|
||||
containing the lit RGBA color values for each image at each pixel. Colors
|
||||
are in the range [0,1].
|
||||
|
||||
Raises:
|
||||
ValueError: An invalid argument to the method is detected.
|
||||
"""
|
||||
batch_size, image_height, image_width = [s.value for s in normals.shape[:-1]]
|
||||
light_count = light_positions.shape[1].value
|
||||
pixel_count = image_height * image_width
|
||||
# Reshape all values to easily do pixelwise computations:
|
||||
normals = tf.reshape(normals, [batch_size, -1, 3])
|
||||
alphas = tf.reshape(alphas, [batch_size, -1, 1])
|
||||
diffuse_colors = tf.reshape(diffuse_colors, [batch_size, -1, 3])
|
||||
if camera_position is not None:
|
||||
specular_colors = tf.reshape(specular_colors, [batch_size, -1, 3])
|
||||
|
||||
# Ambient component
|
||||
output_colors = tf.zeros([batch_size, image_height * image_width, 3])
|
||||
if ambient_color is not None:
|
||||
ambient_reshaped = tf.expand_dims(ambient_color, axis=1)
|
||||
output_colors = tf.add(output_colors, ambient_reshaped * diffuse_colors)
|
||||
|
||||
# Diffuse component
|
||||
pixel_positions = tf.reshape(pixel_positions, [batch_size, -1, 3])
|
||||
per_light_pixel_positions = tf.stack(
|
||||
[pixel_positions] * light_count,
|
||||
axis=1) # [batch_size, light_count, pixel_count, 3]
|
||||
directions_to_lights = tf.nn.l2_normalize(
|
||||
tf.expand_dims(light_positions, axis=2) - per_light_pixel_positions,
|
||||
dim=3) # [batch_size, light_count, pixel_count, 3]
|
||||
# The specular component should only contribute when the light and normal
|
||||
# face one another (i.e. the dot product is nonnegative):
|
||||
normals_dot_lights = tf.clip_by_value(
|
||||
tf.reduce_sum(
|
||||
tf.expand_dims(normals, axis=1) * directions_to_lights, axis=3), 0.0,
|
||||
1.0) # [batch_size, light_count, pixel_count]
|
||||
diffuse_output = tf.expand_dims(
|
||||
diffuse_colors, axis=1) * tf.expand_dims(
|
||||
normals_dot_lights, axis=3) * tf.expand_dims(
|
||||
light_intensities, axis=2)
|
||||
diffuse_output = tf.reduce_sum(
|
||||
diffuse_output, axis=1) # [batch_size, pixel_count, 3]
|
||||
output_colors = tf.add(output_colors, diffuse_output)
|
||||
|
||||
# Specular component
|
||||
if camera_position is not None:
|
||||
camera_position = tf.reshape(camera_position, [batch_size, 1, 3])
|
||||
mirror_reflection_direction = tf.nn.l2_normalize(
|
||||
2.0 * tf.expand_dims(normals_dot_lights, axis=3) * tf.expand_dims(
|
||||
normals, axis=1) - directions_to_lights,
|
||||
dim=3)
|
||||
direction_to_camera = tf.nn.l2_normalize(
|
||||
camera_position - pixel_positions, dim=2)
|
||||
reflection_direction_dot_camera_direction = tf.reduce_sum(
|
||||
tf.expand_dims(direction_to_camera, axis=1) *
|
||||
mirror_reflection_direction,
|
||||
axis=3)
|
||||
# The specular component should only contribute when the reflection is
|
||||
# external:
|
||||
reflection_direction_dot_camera_direction = tf.clip_by_value(
|
||||
tf.nn.l2_normalize(reflection_direction_dot_camera_direction, dim=2),
|
||||
0.0, 1.0)
|
||||
# The specular component should also only contribute when the diffuse
|
||||
# component contributes:
|
||||
reflection_direction_dot_camera_direction = tf.where(
|
||||
normals_dot_lights != 0.0, reflection_direction_dot_camera_direction,
|
||||
tf.zeros_like(
|
||||
reflection_direction_dot_camera_direction, dtype=tf.float32))
|
||||
# Reshape to support broadcasting the shininess coefficient, which rarely
|
||||
# varies per-vertex:
|
||||
reflection_direction_dot_camera_direction = tf.reshape(
|
||||
reflection_direction_dot_camera_direction,
|
||||
[batch_size, light_count, image_height, image_width])
|
||||
shininess_coefficients = tf.expand_dims(shininess_coefficients, axis=1)
|
||||
specularity = tf.reshape(
|
||||
tf.pow(reflection_direction_dot_camera_direction,
|
||||
shininess_coefficients),
|
||||
[batch_size, light_count, pixel_count, 1])
|
||||
specular_output = tf.expand_dims(
|
||||
specular_colors, axis=1) * specularity * tf.expand_dims(
|
||||
light_intensities, axis=2)
|
||||
specular_output = tf.reduce_sum(specular_output, axis=1)
|
||||
output_colors = tf.add(output_colors, specular_output)
|
||||
rgb_images = tf.reshape(output_colors,
|
||||
[batch_size, image_height, image_width, 3])
|
||||
alpha_images = tf.reshape(alphas, [batch_size, image_height, image_width, 1])
|
||||
valid_rgb_values = tf.concat(3 * [alpha_images > 0.5], axis=3)
|
||||
rgb_images = tf.where(valid_rgb_values, rgb_images,
|
||||
tf.zeros_like(rgb_images, dtype=tf.float32))
|
||||
return tf.reverse(tf.concat([rgb_images, alpha_images], axis=3), axis=[1])
|
||||
|
||||
|
||||
def tone_mapper(image, gamma):
|
||||
"""Applies gamma correction to the input image.
|
||||
|
||||
Tone maps the input image batch in order to make scenes with a high dynamic
|
||||
range viewable. The gamma correction factor is computed separately per image,
|
||||
but is shared between all provided channels. The exact function computed is:
|
||||
|
||||
image_out = A*image_in^gamma, where A is an image-wide constant computed so
|
||||
that the maximum image value is approximately 1. The correction is applied
|
||||
to all channels.
|
||||
|
||||
Args:
|
||||
image: 4-D float32 tensor with shape [batch_size, image_height,
|
||||
image_width, channel_count]. The batch of images to tone map.
|
||||
gamma: 0-D float32 nonnegative tensor. Values of gamma below one compress
|
||||
relative contrast in the image, and values above one increase it. A
|
||||
value of 1 is equivalent to scaling the image to have a maximum value
|
||||
of 1.
|
||||
Returns:
|
||||
4-D float32 tensor with shape [batch_size, image_height, image_width,
|
||||
channel_count]. Contains the gamma-corrected images, clipped to the range
|
||||
[0, 1].
|
||||
"""
|
||||
batch_size = image.shape[0].value
|
||||
corrected_image = tf.pow(image, gamma)
|
||||
image_max = tf.reduce_max(
|
||||
tf.reshape(corrected_image, [batch_size, -1]), axis=1)
|
||||
scaled_image = tf.divide(corrected_image,
|
||||
tf.reshape(image_max, [batch_size, 1, 1, 1]))
|
||||
return tf.clip_by_value(scaled_image, 0.0, 1.0)
|
||||
|
||||
|
||||
def mesh_renderer(vertices,
|
||||
triangles,
|
||||
normals,
|
||||
diffuse_colors,
|
||||
camera_position,
|
||||
camera_lookat,
|
||||
camera_up,
|
||||
light_positions,
|
||||
light_intensities,
|
||||
image_width,
|
||||
image_height,
|
||||
specular_colors=None,
|
||||
shininess_coefficients=None,
|
||||
ambient_color=None,
|
||||
fov_y=40.0,
|
||||
near_clip=0.01,
|
||||
far_clip=50.0):
|
||||
"""Renders an input scene using phong shading, and returns an output image.
|
||||
|
||||
Args:
|
||||
vertices: 3-D float32 tensor with shape [batch_size, vertex_count, 3]. Each
|
||||
triplet is an xyz position in world space.
|
||||
triangles: 2-D int32 tensor with shape [triangle_count, 3]. Each triplet
|
||||
should contain vertex indices describing a triangle such that the
|
||||
triangle's normal points toward the viewer if the forward order of the
|
||||
triplet defines a clockwise winding of the vertices. Gradients with
|
||||
respect to this tensor are not available.
|
||||
normals: 3-D float32 tensor with shape [batch_size, vertex_count, 3]. Each
|
||||
triplet is the xyz vertex normal for its corresponding vertex. Each
|
||||
vector is assumed to be already normalized.
|
||||
diffuse_colors: 3-D float32 tensor with shape [batch_size,
|
||||
vertex_count, 3]. The RGB diffuse reflection in the range [0,1] for
|
||||
each vertex.
|
||||
camera_position: 2-D tensor with shape [batch_size, 3] or 1-D tensor with
|
||||
shape [3] specifying the XYZ world space camera position.
|
||||
camera_lookat: 2-D tensor with shape [batch_size, 3] or 1-D tensor with
|
||||
shape [3] containing an XYZ point along the center of the camera's gaze.
|
||||
camera_up: 2-D tensor with shape [batch_size, 3] or 1-D tensor with shape
|
||||
[3] containing the up direction for the camera. The camera will have no
|
||||
tilt with respect to this direction.
|
||||
light_positions: a 3-D tensor with shape [batch_size, light_count, 3]. The
|
||||
XYZ position of each light in the scene. In the same coordinate space as
|
||||
pixel_positions.
|
||||
light_intensities: a 3-D tensor with shape [batch_size, light_count, 3]. The
|
||||
RGB intensity values for each light. Intensities may be above one.
|
||||
image_width: int specifying desired output image width in pixels.
|
||||
image_height: int specifying desired output image height in pixels.
|
||||
specular_colors: 3-D float32 tensor with shape [batch_size,
|
||||
vertex_count, 3]. The RGB specular reflection in the range [0, 1] for
|
||||
each vertex. If supplied, specular reflections will be computed, and
|
||||
both specular_colors and shininess_coefficients are expected.
|
||||
shininess_coefficients: a 0D-2D float32 tensor with maximum shape
|
||||
[batch_size, vertex_count]. The phong shininess coefficient of each
|
||||
vertex. A 0D tensor or float gives a constant shininess coefficient
|
||||
across all batches and images. A 1D tensor must have shape [batch_size],
|
||||
and a single shininess coefficient per image is used.
|
||||
ambient_color: a 2D tensor with shape [batch_size, 3]. The RGB ambient
|
||||
color, which is added to each pixel in the scene. If None, it is
|
||||
assumed to be black.
|
||||
fov_y: float, 0D tensor, or 1D tensor with shape [batch_size] specifying
|
||||
desired output image y field of view in degrees.
|
||||
near_clip: float, 0D tensor, or 1D tensor with shape [batch_size] specifying
|
||||
near clipping plane distance.
|
||||
far_clip: float, 0D tensor, or 1D tensor with shape [batch_size] specifying
|
||||
far clipping plane distance.
|
||||
|
||||
Returns:
|
||||
A 4-D float32 tensor of shape [batch_size, image_height, image_width, 4]
|
||||
containing the lit RGBA color values for each image at each pixel. RGB
|
||||
colors are the intensity values before tonemapping and can be in the range
|
||||
[0, infinity]. Clipping to the range [0,1] with tf.clip_by_value is likely
|
||||
reasonable for both viewing and training most scenes. More complex scenes
|
||||
with multiple lights should tone map color values for display only. One
|
||||
simple tonemapping approach is to rescale color values as x/(1+x); gamma
|
||||
compression is another common techinque. Alpha values are zero for
|
||||
background pixels and near one for mesh pixels.
|
||||
Raises:
|
||||
ValueError: An invalid argument to the method is detected.
|
||||
"""
|
||||
if len(vertices.shape) != 3:
|
||||
raise ValueError('Vertices must have shape [batch_size, vertex_count, 3].')
|
||||
batch_size = vertices.shape[0].value
|
||||
# print(batch_size)
|
||||
if len(normals.shape) != 3:
|
||||
raise ValueError('Normals must have shape [batch_size, vertex_count, 3].')
|
||||
if len(light_positions.shape) != 3:
|
||||
raise ValueError(
|
||||
'Light_positions must have shape [batch_size, light_count, 3].')
|
||||
if len(light_intensities.shape) != 3:
|
||||
raise ValueError(
|
||||
'Light_intensities must have shape [batch_size, light_count, 3].')
|
||||
if len(diffuse_colors.shape) != 3:
|
||||
raise ValueError(
|
||||
'vertex_diffuse_colors must have shape [batch_size, vertex_count, 3].')
|
||||
if (ambient_color is not None and
|
||||
ambient_color.get_shape().as_list() != [batch_size, 3]):
|
||||
raise ValueError('Ambient_color must have shape [batch_size, 3].')
|
||||
if camera_position.get_shape().as_list() == [3]:
|
||||
camera_position = tf.tile(
|
||||
tf.expand_dims(camera_position, axis=0), [batch_size, 1])
|
||||
elif camera_position.get_shape().as_list() != [batch_size, 3]:
|
||||
raise ValueError('Camera_position must have shape [batch_size, 3]')
|
||||
if camera_lookat.get_shape().as_list() == [3]:
|
||||
camera_lookat = tf.tile(
|
||||
tf.expand_dims(camera_lookat, axis=0), [batch_size, 1])
|
||||
elif camera_lookat.get_shape().as_list() != [batch_size, 3]:
|
||||
raise ValueError('Camera_lookat must have shape [batch_size, 3]')
|
||||
if camera_up.get_shape().as_list() == [3]:
|
||||
camera_up = tf.tile(tf.expand_dims(camera_up, axis=0), [batch_size, 1])
|
||||
elif camera_up.get_shape().as_list() != [batch_size, 3]:
|
||||
raise ValueError('Camera_up must have shape [batch_size, 3]')
|
||||
if isinstance(fov_y, float):
|
||||
fov_y = tf.constant(batch_size * [fov_y], dtype=tf.float32)
|
||||
elif not fov_y.get_shape().as_list():
|
||||
fov_y = tf.tile(tf.expand_dims(fov_y, 0), [batch_size])
|
||||
elif fov_y.get_shape().as_list() != [batch_size]:
|
||||
raise ValueError('Fov_y must be a float, a 0D tensor, or a 1D tensor with'
|
||||
'shape [batch_size]')
|
||||
if isinstance(near_clip, float):
|
||||
near_clip = tf.constant(batch_size * [near_clip], dtype=tf.float32)
|
||||
elif not near_clip.get_shape().as_list():
|
||||
near_clip = tf.tile(tf.expand_dims(near_clip, 0), [batch_size])
|
||||
elif near_clip.get_shape().as_list() != [batch_size]:
|
||||
raise ValueError('Near_clip must be a float, a 0D tensor, or a 1D tensor'
|
||||
'with shape [batch_size]')
|
||||
if isinstance(far_clip, float):
|
||||
far_clip = tf.constant(batch_size * [far_clip], dtype=tf.float32)
|
||||
elif not far_clip.get_shape().as_list():
|
||||
far_clip = tf.tile(tf.expand_dims(far_clip, 0), [batch_size])
|
||||
elif far_clip.get_shape().as_list() != [batch_size]:
|
||||
raise ValueError('Far_clip must be a float, a 0D tensor, or a 1D tensor'
|
||||
'with shape [batch_size]')
|
||||
if specular_colors is not None and shininess_coefficients is None:
|
||||
raise ValueError(
|
||||
'Specular colors were supplied without shininess coefficients.')
|
||||
if shininess_coefficients is not None and specular_colors is None:
|
||||
raise ValueError(
|
||||
'Shininess coefficients were supplied without specular colors.')
|
||||
if specular_colors is not None:
|
||||
# Since a 0-D float32 tensor is accepted, also accept a float.
|
||||
if isinstance(shininess_coefficients, float):
|
||||
shininess_coefficients = tf.constant(
|
||||
shininess_coefficients, dtype=tf.float32)
|
||||
if len(specular_colors.shape) != 3:
|
||||
raise ValueError('The specular colors must have shape [batch_size, '
|
||||
'vertex_count, 3].')
|
||||
if len(shininess_coefficients.shape) > 2:
|
||||
raise ValueError('The shininess coefficients must have shape at most'
|
||||
'[batch_size, vertex_count].')
|
||||
# If we don't have per-vertex coefficients, we can just reshape the
|
||||
# input shininess to broadcast later, rather than interpolating an
|
||||
# additional vertex attribute:
|
||||
if len(shininess_coefficients.shape) < 2:
|
||||
vertex_attributes = tf.concat(
|
||||
[normals, vertices, diffuse_colors, specular_colors], axis=2)
|
||||
else:
|
||||
vertex_attributes = tf.concat(
|
||||
[
|
||||
normals, vertices, diffuse_colors, specular_colors,
|
||||
tf.expand_dims(shininess_coefficients, axis=2)
|
||||
],
|
||||
axis=2)
|
||||
else:
|
||||
vertex_attributes = tf.concat([normals, vertices, diffuse_colors], axis=2)
|
||||
|
||||
camera_matrices = camera_utils.look_at(camera_position, camera_lookat,
|
||||
camera_up)
|
||||
|
||||
perspective_transforms = camera_utils.perspective(image_width / image_height,
|
||||
fov_y, near_clip, far_clip)
|
||||
|
||||
clip_space_transforms = tf.matmul(perspective_transforms, camera_matrices)
|
||||
|
||||
pixel_attributes,alphas = rasterize_triangles.rasterize_triangles(
|
||||
vertices, vertex_attributes, triangles, clip_space_transforms,
|
||||
image_width, image_height, [-1] * vertex_attributes.shape[2].value)
|
||||
|
||||
# Extract the interpolated vertex attributes from the pixel buffer and
|
||||
# supply them to the shader:
|
||||
pixel_normals = tf.nn.l2_normalize(pixel_attributes[:, :, :, 0:3], dim=3)
|
||||
pixel_positions = pixel_attributes[:, :, :, 3:6]
|
||||
diffuse_colors = pixel_attributes[:, :, :, 6:9]
|
||||
if specular_colors is not None:
|
||||
specular_colors = pixel_attributes[:, :, :, 9:12]
|
||||
# Retrieve the interpolated shininess coefficients if necessary, or just
|
||||
# reshape our input for broadcasting:
|
||||
if len(shininess_coefficients.shape) == 2:
|
||||
shininess_coefficients = pixel_attributes[:, :, :, 12]
|
||||
else:
|
||||
shininess_coefficients = tf.reshape(shininess_coefficients, [-1, 1, 1])
|
||||
|
||||
# pixel_mask = tf.cast(tf.reduce_any(diffuse_colors >= 0, axis=3), tf.float32)
|
||||
|
||||
renders = phong_shader(
|
||||
normals=pixel_normals,
|
||||
alphas=alphas,
|
||||
pixel_positions=pixel_positions,
|
||||
light_positions=light_positions,
|
||||
light_intensities=light_intensities,
|
||||
diffuse_colors=diffuse_colors,
|
||||
camera_position=camera_position if specular_colors is not None else None,
|
||||
specular_colors=specular_colors,
|
||||
shininess_coefficients=shininess_coefficients,
|
||||
ambient_color=ambient_color)
|
||||
return renders
|
|
@ -0,0 +1,190 @@
|
|||
# Copyright 2017 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Differentiable triangle rasterizer."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
# rasterize_triangles_module = tf.load_op_library(
|
||||
# os.path.join(os.environ['TEST_SRCDIR'],
|
||||
# 'tf_mesh_renderer/mesh_renderer/kernels/rasterize_triangles_kernel.so'))
|
||||
|
||||
|
||||
rasterize_triangles_module = tf.load_op_library('./renderer/rasterize_triangles_kernel_1.so')
|
||||
|
||||
|
||||
# This epsilon should be smaller than any valid barycentric reweighting factor
|
||||
# (i.e. the per-pixel reweighting factor used to correct for the effects of
|
||||
# perspective-incorrect barycentric interpolation). It is necessary primarily
|
||||
# because the reweighting factor will be 0 for factors outside the mesh, and we
|
||||
# need to ensure the image color and gradient outside the region of the mesh are
|
||||
# 0.
|
||||
_MINIMUM_REWEIGHTING_THRESHOLD = 1e-6
|
||||
|
||||
# This epsilon is the minimum absolute value of a homogenous coordinate before
|
||||
# it is clipped. It should be sufficiently large such that the output of
|
||||
# the perspective divide step with this denominator still has good working
|
||||
# precision with 32 bit arithmetic, and sufficiently small so that in practice
|
||||
# vertices are almost never close enough to a clipping plane to be thresholded.
|
||||
_MINIMUM_PERSPECTIVE_DIVIDE_THRESHOLD = 1e-6
|
||||
|
||||
|
||||
def rasterize_triangles(vertices, attributes, triangles, projection_matrices,
|
||||
image_width, image_height, background_value):
|
||||
"""Rasterizes the input scene and computes interpolated vertex attributes.
|
||||
|
||||
NOTE: the rasterizer does no triangle clipping. Triangles that lie outside the
|
||||
viewing frustum (esp. behind the camera) may be drawn incorrectly.
|
||||
|
||||
Args:
|
||||
vertices: 3-D float32 tensor with shape [batch_size, vertex_count, 3]. Each
|
||||
triplet is an xyz position in model space.
|
||||
attributes: 3-D float32 tensor with shape [batch_size, vertex_count,
|
||||
attribute_count]. Each vertex attribute is interpolated
|
||||
across the triangle using barycentric interpolation.
|
||||
triangles: 2-D int32 tensor with shape [triangle_count, 3]. Each triplet
|
||||
should contain vertex indices describing a triangle such that the
|
||||
triangle's normal points toward the viewer if the forward order of the
|
||||
triplet defines a clockwise winding of the vertices. Gradients with
|
||||
respect to this tensor are not available.
|
||||
projection_matrices: 3-D float tensor with shape [batch_size, 4, 4]
|
||||
containing model-view-perspective projection matrices.
|
||||
image_width: int specifying desired output image width in pixels.
|
||||
image_height: int specifying desired output image height in pixels.
|
||||
background_value: a 1-D float32 tensor with shape [attribute_count]. Pixels
|
||||
that lie outside all triangles take this value.
|
||||
|
||||
Returns:
|
||||
A 4-D float32 tensor with shape [batch_size, image_height, image_width,
|
||||
attribute_count], containing the interpolated vertex attributes at
|
||||
each pixel.
|
||||
|
||||
Raises:
|
||||
ValueError: An invalid argument to the method is detected.
|
||||
"""
|
||||
if not image_width > 0:
|
||||
raise ValueError('Image width must be > 0.')
|
||||
if not image_height > 0:
|
||||
raise ValueError('Image height must be > 0.')
|
||||
if len(vertices.shape) != 3:
|
||||
raise ValueError('The vertex buffer must be 3D.')
|
||||
batch_size = vertices.shape[0].value
|
||||
vertex_count = vertices.shape[1].value
|
||||
|
||||
# We map the coordinates to normalized device coordinates before passing
|
||||
# the scene to the rendering kernel to keep as many ops in tensorflow as
|
||||
# possible.
|
||||
|
||||
homogeneous_coord = tf.ones([batch_size, vertex_count, 1], dtype=tf.float32)
|
||||
vertices_homogeneous = tf.concat([vertices, homogeneous_coord], 2)
|
||||
|
||||
# Vertices are given in row-major order, but the transformation pipeline is
|
||||
# column major:
|
||||
clip_space_points = tf.matmul(
|
||||
vertices_homogeneous, projection_matrices, transpose_b=True)
|
||||
|
||||
# Perspective divide, first thresholding the homogeneous coordinate to avoid
|
||||
# the possibility of NaNs:
|
||||
clip_space_points_w = tf.maximum(
|
||||
tf.abs(clip_space_points[:, :, 3:4]),
|
||||
_MINIMUM_PERSPECTIVE_DIVIDE_THRESHOLD) * tf.sign(
|
||||
clip_space_points[:, :, 3:4])
|
||||
normalized_device_coordinates = (
|
||||
clip_space_points[:, :, 0:3] / clip_space_points_w)
|
||||
|
||||
per_image_uncorrected_barycentric_coordinates = []
|
||||
per_image_vertex_ids = []
|
||||
for im in range(vertices.shape[0]):
|
||||
barycentric_coords, triangle_ids, _ = (
|
||||
rasterize_triangles_module.rasterize_triangles(
|
||||
normalized_device_coordinates[im, :, :], triangles, image_width,
|
||||
image_height))
|
||||
per_image_uncorrected_barycentric_coordinates.append(
|
||||
tf.reshape(barycentric_coords, [-1, 3]))
|
||||
|
||||
# Gathers the vertex indices now because the indices don't contain a batch
|
||||
# identifier, and reindexes the vertex ids to point to a (batch,vertex_id)
|
||||
vertex_ids = tf.gather(triangles, tf.reshape(triangle_ids, [-1]))
|
||||
reindexed_ids = tf.add(vertex_ids, im * vertices.shape[1].value)
|
||||
per_image_vertex_ids.append(reindexed_ids)
|
||||
|
||||
uncorrected_barycentric_coordinates = tf.concat(
|
||||
per_image_uncorrected_barycentric_coordinates, axis=0)
|
||||
vertex_ids = tf.concat(per_image_vertex_ids, axis=0)
|
||||
|
||||
# Indexes with each pixel's clip-space triangle's extrema (the pixel's
|
||||
# 'corner points') ids to get the relevant properties for deferred shading.
|
||||
flattened_vertex_attributes = tf.reshape(attributes,
|
||||
[batch_size * vertex_count, -1])
|
||||
corner_attributes = tf.gather(flattened_vertex_attributes, vertex_ids)
|
||||
|
||||
# Barycentric interpolation is linear in the reciprocal of the homogeneous
|
||||
# W coordinate, so we use these weights to correct for the effects of
|
||||
# perspective distortion after rasterization.
|
||||
perspective_distortion_weights = tf.reciprocal(
|
||||
tf.reshape(clip_space_points_w, [-1]))
|
||||
corner_distortion_weights = tf.gather(perspective_distortion_weights,
|
||||
vertex_ids)
|
||||
|
||||
# Apply perspective correction to the barycentric coordinates. This step is
|
||||
# required since the rasterizer receives normalized-device coordinates (i.e.,
|
||||
# after perspective division), so it can't apply perspective correction to the
|
||||
# interpolated values.
|
||||
weighted_barycentric_coordinates = tf.multiply(
|
||||
uncorrected_barycentric_coordinates, corner_distortion_weights)
|
||||
barycentric_reweighting_factor = tf.reduce_sum(
|
||||
weighted_barycentric_coordinates, axis=1)
|
||||
|
||||
corrected_barycentric_coordinates = tf.divide(
|
||||
weighted_barycentric_coordinates,
|
||||
tf.expand_dims(
|
||||
tf.maximum(barycentric_reweighting_factor,
|
||||
_MINIMUM_REWEIGHTING_THRESHOLD),
|
||||
axis=1))
|
||||
|
||||
# Computes the pixel attributes by interpolating the known attributes at the
|
||||
# corner points of the triangle interpolated with the barycentric coordinates.
|
||||
weighted_vertex_attributes = tf.multiply(
|
||||
corner_attributes,
|
||||
tf.expand_dims(corrected_barycentric_coordinates, axis=2))
|
||||
summed_attributes = tf.reduce_sum(weighted_vertex_attributes, axis=1)
|
||||
attribute_images = tf.reshape(summed_attributes,
|
||||
[batch_size, image_height, image_width, -1])
|
||||
|
||||
# Barycentric coordinates should approximately sum to one where there is
|
||||
# rendered geometry, but be exactly zero where there is not.
|
||||
alphas = tf.clip_by_value(
|
||||
tf.reduce_sum(2.0 * corrected_barycentric_coordinates, axis=1), 0.0, 1.0)
|
||||
alphas = tf.reshape(alphas, [batch_size, image_height, image_width, 1])
|
||||
|
||||
attributes_with_background = (
|
||||
alphas * attribute_images + (1.0 - alphas) * background_value)
|
||||
|
||||
return attributes_with_background,alphas
|
||||
|
||||
|
||||
@tf.RegisterGradient('RasterizeTriangles')
|
||||
def _rasterize_triangles_grad(op, df_dbarys, df_dids, df_dz):
|
||||
# Gradients are only supported for barycentric coordinates. Gradients for the
|
||||
# z-buffer are possible as well but not currently implemented.
|
||||
del df_dids, df_dz
|
||||
return rasterize_triangles_module.rasterize_triangles_grad(
|
||||
op.inputs[0], op.inputs[1], op.outputs[0], op.outputs[1], df_dbarys,
|
||||
op.get_attr('image_width'), op.get_attr('image_height')), None
|
|
@ -0,0 +1,103 @@
|
|||
import math
|
||||
import numpy as np
|
||||
|
||||
class GMM:
|
||||
def __init__(self, dim, num, w, mu, cov, cov_det, cov_inv):
|
||||
self.dim = dim # feature dimension
|
||||
self.num = num # number of Gaussian components
|
||||
self.w = w # weights of Gaussian components (a list of scalars)
|
||||
self.mu= mu # mean of Gaussian components (a list of 1xdim vectors)
|
||||
self.cov = cov # covariance matrix of Gaussian components (a list of dimxdim matrices)
|
||||
self.cov_det = cov_det # pre-computed determinet of covariance matrices (a list of scalars)
|
||||
self.cov_inv = cov_inv # pre-computed inverse covariance matrices (a list of dimxdim matrices)
|
||||
|
||||
self.factor = [0]*num
|
||||
for i in range(self.num):
|
||||
self.factor[i] = (2*math.pi)**(self.dim/2) * self.cov_det[i]**0.5
|
||||
|
||||
def likelihood(self, data):
|
||||
assert(data.shape[1] == self.dim)
|
||||
N = data.shape[0]
|
||||
lh = np.zeros(N)
|
||||
|
||||
for i in range(self.num):
|
||||
data_ = data - self.mu[i]
|
||||
|
||||
tmp = np.matmul(data_,self.cov_inv[i]) * data_
|
||||
tmp = np.sum(tmp,axis=1)
|
||||
power = -0.5 * tmp
|
||||
|
||||
p = np.array([math.exp(power[j]) for j in range(N)])
|
||||
p = p/self.factor[i]
|
||||
lh += p*self.w[i]
|
||||
|
||||
return lh
|
||||
|
||||
|
||||
def _rgb2ycbcr(rgb):
|
||||
m = np.array([[65.481, 128.553, 24.966],
|
||||
[-37.797, -74.203, 112],
|
||||
[112, -93.786, -18.214]])
|
||||
shape = rgb.shape
|
||||
rgb = rgb.reshape((shape[0] * shape[1], 3))
|
||||
ycbcr = np.dot(rgb, m.transpose() / 255.)
|
||||
ycbcr[:, 0] += 16.
|
||||
ycbcr[:, 1:] += 128.
|
||||
return ycbcr.reshape(shape)
|
||||
|
||||
|
||||
def _bgr2ycbcr(bgr):
|
||||
rgb = bgr[..., ::-1]
|
||||
return _rgb2ycbcr(rgb)
|
||||
|
||||
|
||||
gmm_skin_w = [0.24063933, 0.16365987, 0.26034665, 0.33535415]
|
||||
gmm_skin_mu = [np.array([113.71862, 103.39613, 164.08226]),
|
||||
np.array([150.19858, 105.18467, 155.51428]),
|
||||
np.array([183.92976, 107.62468, 152.71820]),
|
||||
np.array([114.90524, 113.59782, 151.38217])]
|
||||
gmm_skin_cov_det = [5692842.5, 5851930.5, 2329131., 1585971.]
|
||||
gmm_skin_cov_inv = [np.array([[0.0019472069, 0.0020450759, -0.00060243998],[0.0020450759, 0.017700525, 0.0051420014],[-0.00060243998, 0.0051420014, 0.0081308950]]),
|
||||
np.array([[0.0027110141, 0.0011036990, 0.0023122299],[0.0011036990, 0.010707724, 0.010742856],[0.0023122299, 0.010742856, 0.017481629]]),
|
||||
np.array([[0.0048026871, 0.00022935172, 0.0077668377],[0.00022935172, 0.011729696, 0.0081661865],[0.0077668377, 0.0081661865, 0.025374353]]),
|
||||
np.array([[0.0011989699, 0.0022453172, -0.0010748957],[0.0022453172, 0.047758564, 0.020332102],[-0.0010748957, 0.020332102, 0.024502251]])]
|
||||
|
||||
gmm_skin = GMM(3, 4, gmm_skin_w, gmm_skin_mu, [], gmm_skin_cov_det, gmm_skin_cov_inv)
|
||||
|
||||
gmm_nonskin_w = [0.12791070, 0.31130761, 0.34245777, 0.21832393]
|
||||
gmm_nonskin_mu = [np.array([99.200851, 112.07533, 140.20602]),
|
||||
np.array([110.91392, 125.52969, 130.19237]),
|
||||
np.array([129.75864, 129.96107, 126.96808]),
|
||||
np.array([112.29587, 128.85121, 129.05431])]
|
||||
gmm_nonskin_cov_det = [458703648., 6466488., 90611376., 133097.63]
|
||||
gmm_nonskin_cov_inv = [np.array([[0.00085371657, 0.00071197288, 0.00023958916],[0.00071197288, 0.0025935620, 0.00076557708],[0.00023958916, 0.00076557708, 0.0015042332]]),
|
||||
np.array([[0.00024650150, 0.00045542428, 0.00015019422],[0.00045542428, 0.026412144, 0.018419769],[0.00015019422, 0.018419769, 0.037497383]]),
|
||||
np.array([[0.00037054974, 0.00038146760, 0.00040408765],[0.00038146760, 0.0085505722, 0.0079136286],[0.00040408765, 0.0079136286, 0.010982352]]),
|
||||
np.array([[0.00013709733, 0.00051228428, 0.00012777430],[0.00051228428, 0.28237113, 0.10528370],[0.00012777430, 0.10528370, 0.23468947]])]
|
||||
|
||||
gmm_nonskin = GMM(3, 4, gmm_nonskin_w, gmm_nonskin_mu, [], gmm_nonskin_cov_det, gmm_nonskin_cov_inv)
|
||||
|
||||
prior_skin = 0.8
|
||||
prior_nonskin = 1 - prior_skin
|
||||
|
||||
|
||||
# calculate skin attention mask
|
||||
def skinmask(imbgr):
|
||||
im = _bgr2ycbcr(imbgr)
|
||||
|
||||
data = im.reshape((-1,3))
|
||||
|
||||
lh_skin = gmm_skin.likelihood(data)
|
||||
lh_nonskin = gmm_nonskin.likelihood(data)
|
||||
|
||||
tmp1 = prior_skin * lh_skin
|
||||
tmp2 = prior_nonskin * lh_nonskin
|
||||
post_skin = tmp1 / (tmp1+tmp2) # posterior probability
|
||||
|
||||
post_skin = post_skin.reshape((im.shape[0],im.shape[1]))
|
||||
|
||||
post_skin = np.round(post_skin*255)
|
||||
post_skin = post_skin.astype(np.uint8)
|
||||
post_skin = np.tile(np.expand_dims(post_skin,2),[1,1,3]) # reshape to H*W*3
|
||||
|
||||
return post_skin
|
|
@ -0,0 +1,140 @@
|
|||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import os
|
||||
from options import Option
|
||||
from reconstruction_model import *
|
||||
from data_loader import *
|
||||
from utils import *
|
||||
import argparse
|
||||
###############################################################################################
|
||||
# training stage
|
||||
###############################################################################################
|
||||
|
||||
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
||||
|
||||
# training data and validation data
|
||||
def parse_args():
|
||||
desc = "Data preprocessing for Deep3DRecon."
|
||||
parser = argparse.ArgumentParser(description=desc)
|
||||
|
||||
parser.add_argument('--data_path', type=str, default='./processed_data', help='training data folder')
|
||||
parser.add_argument('--val_data_path', type=str, default='./processed_data', help='validation data folder')
|
||||
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
# initialize weights for resnet and facenet
|
||||
def restore_weights_and_initialize(opt):
|
||||
var_list = tf.trainable_variables()
|
||||
g_list = tf.global_variables()
|
||||
|
||||
# add batch normalization params into trainable variables
|
||||
bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
|
||||
bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
|
||||
var_list +=bn_moving_vars
|
||||
|
||||
# create saver to save and restore weights
|
||||
resnet_vars = [v for v in var_list if 'resnet_v1_50' in v.name]
|
||||
facenet_vars = [v for v in var_list if 'InceptionResnetV1' in v.name]
|
||||
saver_resnet = tf.train.Saver(var_list = resnet_vars)
|
||||
saver_facenet = tf.train.Saver(var_list = facenet_vars)
|
||||
|
||||
saver = tf.train.Saver(var_list = resnet_vars + [v for v in var_list if 'fc-' in v.name],max_to_keep = 50)
|
||||
|
||||
# create session
|
||||
sess = tf.InteractiveSession(config = opt.config)
|
||||
|
||||
# create summary op
|
||||
train_writer = tf.summary.FileWriter(opt.train_summary_path, sess.graph)
|
||||
val_writer = tf.summary.FileWriter(opt.val_summary_path, sess.graph)
|
||||
|
||||
# initialization
|
||||
tf.global_variables_initializer().run()
|
||||
tf.local_variables_initializer().run()
|
||||
|
||||
saver_resnet.restore(sess,opt.R_net_weights)
|
||||
saver_facenet.restore(sess,opt.Perceptual_net_weights)
|
||||
|
||||
return saver, train_writer,val_writer, sess
|
||||
|
||||
|
||||
# main function for training
|
||||
def train():
|
||||
|
||||
# read BFM face model
|
||||
# transfer original BFM model to our model
|
||||
if not os.path.isfile('./BFM/BFM_model_front.mat'):
|
||||
transferBFM09()
|
||||
|
||||
with tf.Graph().as_default() as graph:
|
||||
|
||||
# training options
|
||||
args = parse_args()
|
||||
opt = Option()
|
||||
opt.data_path = [args.data_path]
|
||||
opt.val_data_path = [args.val_data_path]
|
||||
|
||||
# load training data into queue
|
||||
train_iterator = load_dataset(opt)
|
||||
# create reconstruction model
|
||||
model = Reconstruction_model(opt)
|
||||
# send training data to the model
|
||||
model.set_input(train_iterator)
|
||||
# update model variables with training data
|
||||
model.step(is_train = True)
|
||||
# summarize training statistics
|
||||
model.summarize()
|
||||
|
||||
# several training stattistics to be saved
|
||||
train_stat = model.summary_stat
|
||||
train_img_stat = model.summary_img
|
||||
train_op = model.train_op
|
||||
photo_error = model.photo_loss
|
||||
lm_error = model.landmark_loss
|
||||
id_error = model.perceptual_loss
|
||||
|
||||
# load validation data into queue
|
||||
val_iterator = load_dataset(opt,train=False)
|
||||
# send validation data to the model
|
||||
model.set_input(val_iterator)
|
||||
# only do foward pass without updating model variables
|
||||
model.step(is_train = False)
|
||||
# summarize validation statistics
|
||||
model.summarize()
|
||||
val_stat = model.summary_stat
|
||||
val_img_stat = model.summary_img
|
||||
|
||||
# initialization
|
||||
saver, train_writer,val_writer, sess = restore_weights_and_initialize(opt)
|
||||
|
||||
# freeze the graph to ensure no new op will be added during training
|
||||
sess.graph.finalize()
|
||||
|
||||
# training loop
|
||||
for i in range(opt.train_maxiter):
|
||||
_,ph_loss,lm_loss,id_loss = sess.run([train_op,photo_error,lm_error,id_error])
|
||||
print('Iter: %d; lm_loss: %f ; photo_loss: %f; id_loss: %f\n'%(i,np.sqrt(lm_loss),ph_loss,id_loss))
|
||||
# summarize training stats every <train_summary_iter> iterations
|
||||
if np.mod(i,opt.train_summary_iter) == 0:
|
||||
train_summary = sess.run(train_stat)
|
||||
train_writer.add_summary(train_summary,i)
|
||||
|
||||
# summarize image stats every <image_summary_iter> iterations
|
||||
if np.mod(i,opt.image_summary_iter) == 0:
|
||||
train_img_summary = sess.run(train_img_stat)
|
||||
train_writer.add_summary(train_img_summary,i)
|
||||
|
||||
# summarize validation stats every <val_summary_iter> iterations
|
||||
if np.mod(i,opt.val_summary_iter) == 0:
|
||||
val_summary,val_img_summary = sess.run([val_stat,val_img_stat])
|
||||
val_writer.add_summary(val_summary,i)
|
||||
val_writer.add_summary(val_img_summary,i)
|
||||
|
||||
# # save model variables every <save_iter> iterations
|
||||
if np.mod(i,opt.save_iter) == 0:
|
||||
saver.save(sess,os.path.join(opt.model_save_path,'iter_%d.ckpt'%i))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
train()
|
|
@ -1,4 +1,5 @@
|
|||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from PIL import Image
|
||||
from scipy.io import loadmat,savemat
|
||||
from array import array
|
||||
|
@ -125,4 +126,12 @@ def save_obj(path,v,f,c):
|
|||
for i in range(len(f)):
|
||||
file.write('f %d %d %d\n'%(f[i,0],f[i,1],f[i,2]))
|
||||
|
||||
file.close()
|
||||
file.close()
|
||||
|
||||
# load .pb file into tensorflow graph
|
||||
def load_graph(graph_filename):
|
||||
with tf.gfile.GFile(graph_filename,'rb') as f:
|
||||
graph_def = tf.GraphDef()
|
||||
graph_def.ParseFromString(f.read())
|
||||
|
||||
return graph_def
|
Загрузка…
Ссылка в новой задаче