update training code

2020-12-04 15:36:24 +08:00 · 2020-12-04 15:36:24 +08:00 · 3002ea2d52
--- a/BFM/BFM_model_front.mat
+++ b/BFM/BFM_model_front.mat
--- a/data_loader.py
+++ b/data_loader.py
@ -0,0 +1,76 @@
+import tensorflow as tf
+from tensorflow.contrib.data import prefetch_to_device, shuffle_and_repeat, map_and_batch
+import os
+import glob
+import numpy as np
+import cv2
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
+###############################################################################################
+# data loader for training stage
+###############################################################################################
+def _parse_function(image_path,lm_path,mask_path):
+
+	# input image
+	x = tf.read_file(image_path)
+	img = tf.image.decode_png(x, channels=3)
+	img = tf.cast(img,tf.float32)
+	img = img[:,:,::-1]
+
+	# ground truth landmark
+	x2 = tf.read_file(lm_path)
+	lm = tf.decode_raw(x2,tf.float64)
+	lm = tf.cast(lm,tf.float32)
+	lm = tf.reshape(lm,[68,2])
+
+	# skin mask
+	x3 = tf.read_file(mask_path)
+	mask = tf.image.decode_png(x3, channels=3)
+	mask = tf.cast(mask,tf.float32)
+
+	return img,lm,mask
+
+def check_lm_bin(dataset,lm_path):
+	if not os.path.isdir(os.path.join(dataset,'lm_bin')):
+		os.makdirs(os.path.join(dataset,'lm_bin'))
+		for i in range(len(lm_path)):
+			lm = np.loadtxt(lm_path[i])
+			lm = np.reshape(lm,[-1])
+			lm.tofile(os.path.join(dataset,'lm_bin',lm_path[i].split('/')[-1].replace('txt','bin')))	
+
+def load_dataset(opt,train=True):
+	if train:
+		data_path = opt.data_path
+	else:
+		data_path = opt.val_data_path
+	image_path_all = []
+	lm_path_all = []
+	mask_path_all = []
+
+	for dataset in data_path:
+		image_path = glob.glob(dataset + '/' + '*.png')
+		image_path.sort()
+		lm_path_ = [os.path.join(dataset,'lm',f.split('/')[-1].replace('png','txt')) for f in image_path]
+		lm_path_.sort()
+		mask_path = [os.path.join(dataset,'mask',f.split('/')[-1]) for f in image_path]
+		mask_path.sort()
+
+		# check if landmark binary files exist
+		check_lm_bin(dataset,lm_path_)
+
+		lm_path = [os.path.join(dataset,'lm_bin',f.split('/')[-1].replace('png','bin')) for f in image_path]
+		lm_path.sort()
+
+		image_path_all += image_path
+		mask_path_all += mask_path
+		lm_path_all += lm_path
+
+	dataset_num = len(image_path_all)
+
+	dataset = tf.data.Dataset.from_tensor_slices((image_path_all,lm_path_all,mask_path_all))
+	dataset = dataset. \
+	apply(shuffle_and_repeat(dataset_num)). \
+	apply(map_and_batch(_parse_function, opt.batch_size, num_parallel_batches=4, drop_remainder=True)). \
+	apply(prefetch_to_device('/gpu:0', None)) # When using dataset.prefetch, use buffer_size=None to let it detect optimal buffer size
+
+	inputs_iterator = dataset.make_one_shot_iterator()
+	return inputs_iterator
--- a/demo.py
+++ b/demo.py
@ -7,18 +7,25 @@ import cv2
 import platform
 from scipy.io import loadmat,savemat

-from preprocess_img import Preprocess
-from load_data import *
+from preprocess_img import align_img
+from utils import *
 from face_decoder import Face3D
+from options import Option

 is_windows = platform.system() == "Windows"

-def load_graph(graph_filename):
-	with tf.gfile.GFile(graph_filename,'rb') as f:
-		graph_def = tf.GraphDef()
-		graph_def.ParseFromString(f.read())
+def restore_weights(sess,opt):
+	var_list = tf.trainable_variables()
+	g_list = tf.global_variables()

-	return graph_def
+	# add batch normalization params into trainable variables 
+	bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
+	bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
+	var_list +=bn_moving_vars
+
+	# create saver to save and restore weights
+	saver = tf.train.Saver(var_list = var_list)
+	saver.restore(sess,opt.pretrain_weights)

 def demo():
 	# input and output folder
@ -36,22 +43,32 @@ def demo():

 	# read standard landmarks for preprocessing images
 	lm3D = load_lm3d()
-	batchsize = 1
 	n = 0

 	# build reconstruction model
 	with tf.Graph().as_default() as graph,tf.device('/cpu:0'):

+		opt = Option()
+		opt.batch_size = 1
+		opt.is_train = False
 		FaceReconstructor = Face3D()
-		images = tf.placeholder(name = 'input_imgs', shape = [batchsize,224,224,3], dtype = tf.float32)
-		graph_def = load_graph('network/FaceReconModel.pb')
-		tf.import_graph_def(graph_def,name='resnet',input_map={'input_imgs:0': images})
+		images = tf.placeholder(name = 'input_imgs', shape = [opt.batch_size,224,224,3], dtype = tf.float32)

-		# output coefficients of R-Net (dim = 257) 
-		coeff = graph.get_tensor_by_name('resnet/coeff:0')
+		if opt.use_pb and os.path.isfile('network/FaceReconModel.pb'):
+			print('Using pre-trained .pb file.')
+			use_pb = True
+			graph_def = load_graph('network/FaceReconModel.pb')
+			tf.import_graph_def(graph_def,name='resnet',input_map={'input_imgs:0': images})
+			# output coefficients of R-Net (dim = 257) 
+			coeff = graph.get_tensor_by_name('resnet/coeff:0')
+		else:
+			print('Using pre-trained .ckpt file: %s'%opt.pretrain_weights)
+			use_pb = False
+			import networks
+			coeff = networks.R_Net(images,is_training=False)

 		# reconstructing faces
-		FaceReconstructor.Reconstruction_Block(coeff,batchsize)
+		FaceReconstructor.Reconstruction_Block(coeff,opt)
 		face_shape = FaceReconstructor.face_shape_t
 		face_texture = FaceReconstructor.face_texture
 		face_color = FaceReconstructor.face_color
@ -61,6 +78,9 @@ def demo():


 		with tf.Session() as sess:
+			if not use_pb:
+				restore_weights(sess,opt)
+
 			print('reconstructing...')
 			for file in img_list:
 				n += 1
@ -68,7 +88,7 @@ def demo():
 				# load images and corresponding 5 facial landmarks
 				img,lm = load_img(file,file.replace('png','txt').replace('jpg','txt'))
 				# preprocess input image
-				input_img,lm_new,transform_params = Preprocess(img,lm,lm3D)
+				input_img,lm_new,transform_params = align_img(img,lm,lm3D)

 				coeff_,face_shape_,face_texture_,face_color_,landmarks_2d_,recon_img_,tri_ = sess.run([coeff,\
 					face_shape,face_texture,face_color,landmarks_2d,recon_img,tri],feed_dict = {images: input_img})
--- a/face_decoder.py
+++ b/face_decoder.py
@ -7,40 +7,42 @@ import platform
 is_windows = platform.system() == "Windows"

 if not is_windows:
-	import mesh_renderer
-
+	from renderer import mesh_renderer
 ###############################################################################################
 # Reconstruct 3D face based on output coefficients and facemodel
 ###############################################################################################

 # BFM 3D face model
 class BFM():
-	def __init__(self,model_path = 'BFM/BFM_model_front.mat'):
+	def __init__(self,model_path = './BFM/BFM_model_front.mat'):
 		model = loadmat(model_path)
 		self.meanshape = tf.constant(model['meanshape']) # mean face shape. [3*N,1]
 		self.idBase = tf.constant(model['idBase']) # identity basis. [3*N,80]
 		self.exBase = tf.constant(model['exBase'].astype(np.float32)) # expression basis. [3*N,64]
 		self.meantex = tf.constant(model['meantex']) # mean face texture. [3*N,1] (0-255)
 		self.texBase = tf.constant(model['texBase']) # texture basis. [3*N,80]
-		self.point_buf = tf.constant(model['point_buf']) # triangle indices for each vertex that lies in. starts from 1. [N,8]
-		self.face_buf = tf.constant(model['tri']) # vertex indices in each triangle. starts from 1. [F,3]
-		self.keypoints = tf.squeeze(tf.constant(model['keypoints'])) # vertex indices of 68 facial landmarks. starts from 1. [68,1]
+		self.point_buf = tf.constant(model['point_buf']) # face indices for each vertex that lies in. starts from 1. [N,8]
+		self.face_buf = tf.constant(model['tri']) # vertex indices for each face. starts from 1. [F,3]
+		self.front_mask_render = tf.squeeze(tf.constant(model['frontmask2_idx'])) # vertex indices for small face region to compute photometric error. starts from 1.
+		self.mask_face_buf = tf.constant(model['tri_mask2']) # vertex indices for each face from small face region. starts from 1. [f,3]
+		self.skin_mask = tf.squeeze(tf.constant(model['skinmask'])) # vertex indices for pre-defined skin region to compute reflectance loss
+		self.keypoints = tf.squeeze(tf.constant(model['keypoints'])) # vertex indices for 68 landmarks. starts from 1. [68,1]

-# Analytic 3D face reconstructor
+# Analytic 3D face
 class Face3D():
 	def __init__(self):
 		facemodel = BFM()
 		self.facemodel = facemodel

 	# analytic 3D face reconstructions with coefficients from R-Net
-	def Reconstruction_Block(self,coeff,batchsize):
+	def Reconstruction_Block(self,coeff,opt):
 		#coeff: [batchsize,257] reconstruction coefficients
-		id_coeff,ex_coeff,tex_coeff,angles,translation,gamma = self.Split_coeff(coeff)
+
+		id_coeff,ex_coeff,tex_coeff,angles,translation,gamma,camera_scale,f_scale = self.Split_coeff(coeff)
 		# [batchsize,N,3] canonical face shape in BFM space
 		face_shape = self.Shape_formation_block(id_coeff,ex_coeff,self.facemodel)
 		# [batchsize,N,3] vertex texture (in RGB order)
 		face_texture = self.Texture_formation_block(tex_coeff,self.facemodel)
-		self.face_texture = face_texture
 		# [batchsize,3,3] rotation matrix for face shape
 		rotation = self.Compute_rotation_matrix(angles)
 		# [batchsize,N,3] vertex normal
@ -49,38 +51,44 @@ class Face3D():

 		# do rigid transformation for face shape using predicted rotation and translation
 		face_shape_t = self.Rigid_transform_block(face_shape,rotation,translation)
-		self.face_shape_t = face_shape_t
 		# compute 2d landmark projections 
 		# landmark_p: [batchsize,68,2]	
 		face_landmark_t = self.Compute_landmark(face_shape_t,self.facemodel)
-		landmark_p = self.Projection_block(face_landmark_t)   # 256*256 image
-		landmark_p = tf.stack([landmark_p[:,:,0],223. - landmark_p[:,:,1]],axis = 2)
-		self.landmark_p = landmark_p
+		landmark_p = self.Projection_block(face_landmark_t,camera_scale,f_scale)

 		# [batchsize,N,3] vertex color (in RGB order)
 		face_color = self.Illumination_block(face_texture, norm_r, gamma)
+
+		# reconstruction images and region masks for computing photometric loss		
+		render_imgs,img_mask,img_mask_crop = self.Render_block(face_shape_t,norm_r,face_color,camera_scale,f_scale,self.facemodel,opt.batch_size,opt.is_train)
+
+		self.id_coeff = id_coeff
+		self.ex_coeff = ex_coeff
+		self.tex_coeff = tex_coeff
+		self.f_scale = f_scale
+		self.gamma = gamma
+		self.face_shape = face_shape
+		self.face_shape_t = face_shape_t
+		self.face_texture = face_texture
 		self.face_color = face_color
+		self.landmark_p = landmark_p
+		self.render_imgs = render_imgs
+		self.img_mask = img_mask
+		self.img_mask_crop = img_mask_crop

-		# reconstruction images
-		if not is_windows:
-			render_imgs = self.Render_block(face_shape_t,norm_r,face_color,self.facemodel,batchsize)
-			render_imgs = tf.clip_by_value(render_imgs,0,255)
-			render_imgs = tf.cast(render_imgs,tf.float32) 
-			self.render_imgs = render_imgs
-		else:
-			self.render_imgs = []
-
-	######################################################################################################
+	#----------------------------------------------------------------------------------------------
 	def Split_coeff(self,coeff):

-		id_coeff = coeff[:,:80] #identity
-		ex_coeff = coeff[:,80:144] #expression
-		tex_coeff = coeff[:,144:224] #texture
-		angles = coeff[:,224:227] #euler angles for pose
-		gamma = coeff[:,227:254] #lighting
-		translation = coeff[:,254:257] #translation
-		
-		return id_coeff,ex_coeff,tex_coeff,angles,translation,gamma
+		id_coeff = coeff[:,:80]
+		ex_coeff = coeff[:,80:144]
+		tex_coeff = coeff[:,144:224]
+		angles = coeff[:,224:227]
+		gamma = coeff[:,227:254]
+		translation = coeff[:,254:257]
+		camera_scale = tf.ones([tf.shape(coeff)[0],1])
+		f_scale = tf.ones([tf.shape(coeff)[0],1])
+
+		return id_coeff,ex_coeff,tex_coeff,angles,translation,gamma,camera_scale,f_scale

 	def Shape_formation_block(self,id_coeff,ex_coeff,facemodel):
 		face_shape = tf.einsum('ij,aj->ai',facemodel.idBase,id_coeff) + \
@ -170,31 +178,27 @@ class Face3D():
 		# R = RzRyRx
 		rotation = tf.matmul(tf.matmul(rotation_Z,rotation_Y),rotation_X)

-		# because our face shape is N*3, so compute the transpose of R, so that rotation shapes can be calculated as face_shape*R 
 		rotation = tf.transpose(rotation, perm = [0,2,1])

 		return rotation

-	def Projection_block(self,face_shape,focal=1015.0,half_image_width=112.):
+	def Projection_block(self,face_shape,camera_scale,f_scale):

 		# pre-defined camera focal for pespective projection
-		focal = tf.constant(focal)
-		# focal = tf.constant(400.0)
+		focal = tf.constant(1015.0)
+		focal = focal*f_scale
 		focal = tf.reshape(focal,[-1,1])
-		batchsize = tf.shape(face_shape)[0]
-		# center = tf.constant(112.0)
+		batchsize = tf.shape(focal)[0]

 		# define camera position
-		camera_pos = tf.reshape(tf.constant([0.0,0.0,10.0]),[1,1,3])
+		camera_pos = tf.reshape(tf.constant([0.0,0.0,10.0]),[1,1,3])*tf.reshape(camera_scale,[-1,1,1])
+		reverse_z = tf.tile(tf.reshape(tf.constant([1.0,0,0,0,1,0,0,0,-1.0]),[1,3,3]),[tf.shape(face_shape)[0],1,1])

 		# compute projection matrix
-		p_matrix = tf.concat([focal*tf.ones([batchsize,1]),tf.zeros([batchsize,1]),half_image_width*tf.ones([batchsize,1]),tf.zeros([batchsize,1]),\
-			focal*tf.ones([batchsize,1]),half_image_width*tf.ones([batchsize,1]),tf.zeros([batchsize,2]),tf.ones([batchsize,1])],axis = 1)
-		# p_matrix = tf.tile(tf.reshape(p_matrix,[1,3,3]),[tf.shape(face_shape)[0],1,1])
+		p_matrix = tf.concat([focal,tf.zeros([batchsize,1]),112.*tf.ones([batchsize,1]),tf.zeros([batchsize,1]),focal,112.*tf.ones([batchsize,1]),tf.zeros([batchsize,2]),tf.ones([batchsize,1])],axis = 1)
 		p_matrix = tf.reshape(p_matrix,[-1,3,3])

-		# convert z in canonical space to the distance to camera
-		reverse_z = tf.tile(tf.reshape(tf.constant([1.0,0,0,0,1,0,0,0,-1.0]),[1,3,3]),[tf.shape(face_shape)[0],1,1])
+		# convert z in world space to the distance to camera
 		face_shape = tf.matmul(face_shape,reverse_z) + camera_pos
 		aug_projection = tf.matmul(face_shape,tf.transpose(p_matrix,[0,2,1]))

@ -256,51 +260,84 @@ class Face3D():

 		return face_shape_t

-	def Render_block(self,face_shape,face_norm,face_color,facemodel,batchsize):
+	def Render_block(self,face_shape,face_norm,face_color,camera_scale,f_scale,facemodel,batchsize,is_train=True):
+		if is_train and is_windows:
+			raise ValueError('Not support training with Windows environment.')
+
+		if is_windows:
+			return [],[],[]
+
 		# render reconstruction images 
 		n_vex = int(facemodel.idBase.shape[0].value/3)
-		fov_y = 2*tf.atan(112/(1015.))*180./m.pi + tf.zeros([batchsize])
-
+		fov_y = 2*tf.atan(112./(1015.*f_scale))*180./m.pi
+		fov_y = tf.reshape(fov_y,[batchsize])
 		# full face region
 		face_shape = tf.reshape(face_shape,[batchsize,n_vex,3])
 		face_norm = tf.reshape(face_norm,[batchsize,n_vex,3])
 		face_color = tf.reshape(face_color,[batchsize,n_vex,3])

-		#cammera settings
-		# same as in Projection_block
-		camera_position = tf.constant([[0,0,10.0]]) + tf.zeros([batchsize,3])
-		camera_lookat = tf.constant([[0,0,0.0]]) + tf.zeros([batchsize,3])
-		camera_up = tf.constant([[0,1.0,0]]) + tf.zeros([batchsize,3])
+		# pre-defined cropped face region
+		mask_face_shape = tf.gather(face_shape,tf.cast(facemodel.front_mask_render-1,tf.int32),axis = 1)
+		mask_face_norm = tf.gather(face_norm,tf.cast(facemodel.front_mask_render-1,tf.int32),axis = 1)
+		mask_face_color = tf.gather(face_color,tf.cast(facemodel.front_mask_render-1,tf.int32),axis = 1)

-		# setting light source position(intensities are set to 0 because we have already computed the vertex color)
-		light_positions = tf.reshape(tf.constant([0,0,1e5]),[1,1,3]) + tf.zeros([batchsize,1,3])
-		light_intensities = tf.reshape(tf.constant([0.0,0.0,0.0]),[1,1,3])+tf.zeros([batchsize,1,3])
-		ambient_color = tf.reshape(tf.constant([1.0,1,1]),[1,3])+ tf.zeros([batchsize,3])
+		# setting cammera settings
+		camera_position = tf.constant([[0,0,10.0]])*tf.reshape(camera_scale,[-1,1])
+		camera_lookat = tf.constant([0,0,0.0])
+		camera_up = tf.constant([0,1.0,0])
+
+		# setting light source position(intensities are set to 0 because we have computed the vertex color)
+		light_positions = tf.tile(tf.reshape(tf.constant([0,0,1e5]),[1,1,3]),[batchsize,1,1])
+		light_intensities = tf.tile(tf.reshape(tf.constant([0.0,0.0,0.0]),[1,1,3]),[batchsize,1,1])
+		ambient_color = tf.tile(tf.reshape(tf.constant([1.0,1,1]),[1,3]),[batchsize,1])

-		near_clip = 0.01*tf.ones([batchsize])
-		far_clip = 50*tf.ones([batchsize])
 		#using tf_mesh_renderer for rasterization (https://github.com/google/tf_mesh_renderer)
-		# img: [batchsize,224,224,4] images in RGBA order (0-255)
-		
-		if not is_windows:
-			with tf.device('/cpu:0'):
-				img = mesh_renderer.mesh_renderer(face_shape,
-					tf.cast(facemodel.face_buf-1,tf.int32),
-					face_norm,
-					face_color,
-					camera_position = camera_position,
-					camera_lookat = camera_lookat,
-					camera_up = camera_up,
-					light_positions = light_positions,
-					light_intensities = light_intensities,
-					image_width = 224,
-					image_height = 224,
-					fov_y = fov_y, #12.5936
-					ambient_color = ambient_color,
-					near_clip = near_clip,
-					far_clip = far_clip)
-			return img
-		else:
-			return np.zeros([224, 224], dtype=np.int32)
+		# img: [batchsize,224,224,3] images in RGB order (0-255)
+		# mask:[batchsize,224,224,1] transparency for img ({0,1} value)
+		img_rgba = mesh_renderer.mesh_renderer(face_shape,
+			tf.cast(facemodel.face_buf-1,tf.int32),
+			face_norm,
+			face_color,
+			camera_position = camera_position,
+			camera_lookat = camera_lookat,
+			camera_up = camera_up,
+			light_positions = light_positions,
+			light_intensities = light_intensities,
+			image_width = 224,
+			image_height = 224,
+			fov_y = fov_y,
+			near_clip = 0.01,
+			far_clip = 50.0,
+			ambient_color = ambient_color)

+		img = img_rgba[:,:,:,:3]
+		mask = img_rgba[:,:,:,3:]

+		img = tf.cast(img[:,:,:,::-1],tf.float32) #transfer RGB to BGR
+		mask = tf.cast(mask,tf.float32) # full face region
+
+		if is_train:
+			# compute mask for small face region
+			img_crop_rgba = mesh_renderer.mesh_renderer(mask_face_shape,
+				tf.cast(facemodel.mask_face_buf-1,tf.int32),
+				mask_face_norm,
+				mask_face_color,
+				camera_position = camera_position,
+				camera_lookat = camera_lookat,
+				camera_up = camera_up,
+				light_positions = light_positions,
+				light_intensities = light_intensities,
+				image_width = 224,
+				image_height = 224,
+				fov_y = fov_y,
+				near_clip = 0.01,
+				far_clip = 50.0,
+				ambient_color = ambient_color)
+
+			mask_f = img_crop_rgba[:,:,:,3:]
+			mask_f = tf.cast(mask_f,tf.float32) # small face region
+			return img,mask,mask_f
+
+		img_rgba = tf.cast(tf.clip_by_value(img_rgba,0,255),tf.float32)
+
+		return img_rgba,mask,mask
--- a/inception_resnet_v1.py
+++ b/inception_resnet_v1.py
@ -0,0 +1,247 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Contains the definition of the Inception Resnet V1 architecture.
+As described in http://arxiv.org/abs/1602.07261.
+  Inception-v4, Inception-ResNet and the Impact of Residual Connections
+    on Learning
+  Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+
+
+# Inception-Resnet-A
+def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
+    """Builds the 35x35 resnet block."""
+    with tf.variable_scope(scope, 'Block35', [net], reuse=reuse):
+        with tf.variable_scope('Branch_0'):
+            tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1')
+        with tf.variable_scope('Branch_1'):
+            tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
+            tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3')
+        with tf.variable_scope('Branch_2'):
+            tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
+            tower_conv2_1 = slim.conv2d(tower_conv2_0, 32, 3, scope='Conv2d_0b_3x3')
+            tower_conv2_2 = slim.conv2d(tower_conv2_1, 32, 3, scope='Conv2d_0c_3x3')
+        mixed = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2], 3)
+        up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
+                         activation_fn=None, scope='Conv2d_1x1')
+        net += scale * up
+        if activation_fn:
+            net = activation_fn(net)
+    return net
+
+# Inception-Resnet-B
+def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
+    """Builds the 17x17 resnet block."""
+    with tf.variable_scope(scope, 'Block17', [net], reuse=reuse):
+        with tf.variable_scope('Branch_0'):
+            tower_conv = slim.conv2d(net, 128, 1, scope='Conv2d_1x1')
+        with tf.variable_scope('Branch_1'):
+            tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1')
+            tower_conv1_1 = slim.conv2d(tower_conv1_0, 128, [1, 7],
+                                        scope='Conv2d_0b_1x7')
+            tower_conv1_2 = slim.conv2d(tower_conv1_1, 128, [7, 1],
+                                        scope='Conv2d_0c_7x1')
+        mixed = tf.concat([tower_conv, tower_conv1_2], 3)
+        up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
+                         activation_fn=None, scope='Conv2d_1x1')
+        net += scale * up
+        if activation_fn:
+            net = activation_fn(net)
+    return net
+
+
+# Inception-Resnet-C
+def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
+    """Builds the 8x8 resnet block."""
+    with tf.variable_scope(scope, 'Block8', [net], reuse=reuse):
+        with tf.variable_scope('Branch_0'):
+            tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
+        with tf.variable_scope('Branch_1'):
+            tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1')
+            tower_conv1_1 = slim.conv2d(tower_conv1_0, 192, [1, 3],
+                                        scope='Conv2d_0b_1x3')
+            tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [3, 1],
+                                        scope='Conv2d_0c_3x1')
+        mixed = tf.concat([tower_conv, tower_conv1_2], 3)
+        up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
+                         activation_fn=None, scope='Conv2d_1x1')
+        net += scale * up
+        if activation_fn:
+            net = activation_fn(net)
+    return net
+  
+def reduction_a(net, k, l, m, n):
+    with tf.variable_scope('Branch_0'):
+        tower_conv = slim.conv2d(net, n, 3, stride=2, padding='VALID',
+                                 scope='Conv2d_1a_3x3')
+    with tf.variable_scope('Branch_1'):
+        tower_conv1_0 = slim.conv2d(net, k, 1, scope='Conv2d_0a_1x1')
+        tower_conv1_1 = slim.conv2d(tower_conv1_0, l, 3,
+                                    scope='Conv2d_0b_3x3')
+        tower_conv1_2 = slim.conv2d(tower_conv1_1, m, 3,
+                                    stride=2, padding='VALID',
+                                    scope='Conv2d_1a_3x3')
+    with tf.variable_scope('Branch_2'):
+        tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
+                                     scope='MaxPool_1a_3x3')
+    net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)
+    return net
+
+def reduction_b(net):
+    with tf.variable_scope('Branch_0'):
+        tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
+        tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,
+                                   padding='VALID', scope='Conv2d_1a_3x3')
+    with tf.variable_scope('Branch_1'):
+        tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
+        tower_conv1_1 = slim.conv2d(tower_conv1, 256, 3, stride=2,
+                                    padding='VALID', scope='Conv2d_1a_3x3')
+    with tf.variable_scope('Branch_2'):
+        tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
+        tower_conv2_1 = slim.conv2d(tower_conv2, 256, 3,
+                                    scope='Conv2d_0b_3x3')
+        tower_conv2_2 = slim.conv2d(tower_conv2_1, 256, 3, stride=2,
+                                    padding='VALID', scope='Conv2d_1a_3x3')
+    with tf.variable_scope('Branch_3'):
+        tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
+                                     scope='MaxPool_1a_3x3')
+    net = tf.concat([tower_conv_1, tower_conv1_1,
+                        tower_conv2_2, tower_pool], 3)
+    return net
+  
+def inference(images, keep_probability, phase_train=True, 
+              bottleneck_layer_size=128, weight_decay=0.0, reuse=None):
+    batch_norm_params = {
+        # Decay for the moving averages.
+        'decay': 0.995,
+        # epsilon to prevent 0s in variance.
+        'epsilon': 0.001,
+        # force in-place updates of mean and variance estimates
+        'updates_collections': None,
+        # Moving averages ends up in the trainable variables collection
+        'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
+    }
+    
+    with slim.arg_scope([slim.conv2d, slim.fully_connected],
+                        weights_initializer=slim.initializers.xavier_initializer(), 
+                        weights_regularizer=slim.l2_regularizer(weight_decay),
+                        normalizer_fn=slim.batch_norm,
+                        normalizer_params=batch_norm_params):
+        return inception_resnet_v1(images, is_training=phase_train,
+              dropout_keep_prob=keep_probability, bottleneck_layer_size=bottleneck_layer_size, reuse=reuse)
+
+
+def inception_resnet_v1(inputs, is_training=True,
+                        dropout_keep_prob=0.8,
+                        bottleneck_layer_size=128,
+                        reuse=None, 
+                        scope='InceptionResnetV1'):
+    """Creates the Inception Resnet V1 model.
+    Args:
+      inputs: a 4-D tensor of size [batch_size, height, width, 3].
+      num_classes: number of predicted classes.
+      is_training: whether is training or not.
+      dropout_keep_prob: float, the fraction to keep before final layer.
+      reuse: whether or not the network and its variables should be reused. To be
+        able to reuse 'scope' must be given.
+      scope: Optional variable_scope.
+    Returns:
+      logits: the logits outputs of the model.
+      end_points: the set of end_points from the inception model.
+    """
+    end_points = {}
+  
+    with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse):
+        with slim.arg_scope([slim.batch_norm, slim.dropout],
+                            is_training=is_training):
+            with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
+                                stride=1, padding='SAME'):
+      
+                # 149 x 149 x 32
+                net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID',
+                                  scope='Conv2d_1a_3x3')
+                end_points['Conv2d_1a_3x3'] = net
+                # 147 x 147 x 32
+                net = slim.conv2d(net, 32, 3, padding='VALID',
+                                  scope='Conv2d_2a_3x3')
+                end_points['Conv2d_2a_3x3'] = net
+                # 147 x 147 x 64
+                net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
+                end_points['Conv2d_2b_3x3'] = net
+                # 73 x 73 x 64
+                net = slim.max_pool2d(net, 3, stride=2, padding='VALID',
+                                      scope='MaxPool_3a_3x3')
+                end_points['MaxPool_3a_3x3'] = net
+                # 73 x 73 x 80
+                net = slim.conv2d(net, 80, 1, padding='VALID',
+                                  scope='Conv2d_3b_1x1')
+                end_points['Conv2d_3b_1x1'] = net
+                # 71 x 71 x 192
+                net = slim.conv2d(net, 192, 3, padding='VALID',
+                                  scope='Conv2d_4a_3x3')
+                end_points['Conv2d_4a_3x3'] = net
+                # 35 x 35 x 256
+                net = slim.conv2d(net, 256, 3, stride=2, padding='VALID',
+                                  scope='Conv2d_4b_3x3')
+                end_points['Conv2d_4b_3x3'] = net
+                
+                # 5 x Inception-resnet-A
+                net = slim.repeat(net, 5, block35, scale=0.17)
+                end_points['Mixed_5a'] = net
+        
+                # Reduction-A
+                with tf.variable_scope('Mixed_6a'):
+                    net = reduction_a(net, 192, 192, 256, 384)
+                end_points['Mixed_6a'] = net
+                
+                # 10 x Inception-Resnet-B
+                net = slim.repeat(net, 10, block17, scale=0.10)
+                end_points['Mixed_6b'] = net
+                
+                # Reduction-B
+                with tf.variable_scope('Mixed_7a'):
+                    net = reduction_b(net)
+                end_points['Mixed_7a'] = net
+                
+                # 5 x Inception-Resnet-C
+                net = slim.repeat(net, 5, block8, scale=0.20)
+                end_points['Mixed_8a'] = net
+                
+                net = block8(net, activation_fn=None)
+                end_points['Mixed_8b'] = net
+                
+                with tf.variable_scope('Logits'):
+                    end_points['PrePool'] = net
+                    #pylint: disable=no-member
+                    net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
+                                          scope='AvgPool_1a_8x8')
+                    net = slim.flatten(net)
+          
+                    net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
+                                       scope='Dropout')
+          
+                    end_points['PreLogitsFlatten'] = net
+                
+                net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, 
+                        scope='Bottleneck', reuse=False)
+  
+    return net, end_points
--- a/losses.py
+++ b/losses.py
@ -0,0 +1,76 @@
+import tensorflow as tf
+from scipy.io import loadmat,savemat
+###############################################################################################
+# Define losses for training
+###############################################################################################
+
+# photometric loss
+# input_imgs and render_imgs are [batchsize,h,w,3] BGR images
+# img_mask are [batchsize,h,w,1] attention masks
+def Photo_loss(input_imgs,render_imgs,img_mask):
+
+	input_imgs = tf.cast(input_imgs,tf.float32)
+
+	# img_mask = tf.squeeze(img_mask,3)
+	img_mask = tf.stop_gradient(img_mask[:,:,:,0])
+
+	# photo loss with skin attention
+	photo_loss = tf.sqrt(tf.reduce_sum(tf.square(input_imgs - render_imgs),axis = 3))*img_mask/255
+	photo_loss = tf.reduce_sum(photo_loss) / tf.maximum(tf.reduce_sum(img_mask),1.0)
+
+	return photo_loss
+
+# perceptual loss
+# id_feature and id_label are [batchsize, c] identity features for reconstruction images and input images
+def Perceptual_loss(id_feature,id_label):
+	id_feature = tf.nn.l2_normalize(id_feature, dim = 1)
+	id_label = tf.nn.l2_normalize(id_label, dim = 1)
+	# cosine similarity
+	sim = tf.reduce_sum(id_feature*id_label,1)
+	loss = tf.reduce_sum(tf.maximum(0.0,1.0 - sim))/tf.cast(tf.shape(id_feature)[0],tf.float32)
+
+	return loss
+
+# landmark loss
+# landmark_p and landmark_label are [batchsize, 68, 2] landmark projections for reconstruction images and input images
+def Landmark_loss(landmark_p,landmark_label):
+
+	# we set higher weights for landmarks around the mouth and nose regions
+	landmark_weight = tf.concat([tf.ones([1,28]),20*tf.ones([1,3]),tf.ones([1,29]),20*tf.ones([1,8])],axis = 1)
+	landmark_weight = tf.tile(landmark_weight,[tf.shape(landmark_p)[0],1])
+
+	landmark_loss = tf.reduce_sum(tf.reduce_sum(tf.square(landmark_p-landmark_label),2)*landmark_weight)/(68.0*tf.cast(tf.shape(landmark_p)[0],tf.float32))
+
+	return landmark_loss
+
+# coefficient regularization to ensure plausible 3d faces
+def Regulation_loss(id_coeff,ex_coeff,tex_coeff,opt):
+	w_ex = opt.w_ex
+	w_tex = opt.w_tex
+
+	regulation_loss = tf.nn.l2_loss(id_coeff) + w_ex * tf.nn.l2_loss(ex_coeff) + w_tex * tf.nn.l2_loss(tex_coeff)
+	regulation_loss = 2*regulation_loss/ tf.cast(tf.shape(id_coeff)[0],tf.float32)
+
+	return regulation_loss 
+
+# albedo regularization to ensure an uniform skin albedo
+def Reflectance_loss(face_texture,facemodel):
+	skin_mask = facemodel.skin_mask
+	skin_mask = tf.reshape(skin_mask,[1,tf.shape(skin_mask)[0],1])
+
+	texture_mean = tf.reduce_sum(face_texture*skin_mask,1)/tf.reduce_sum(skin_mask)
+	texture_mean = tf.expand_dims(texture_mean,1)
+
+	# minimize texture variance for pre-defined skin region  
+	reflectance_loss = tf.reduce_sum(tf.square((face_texture - texture_mean)*skin_mask/255.0))/(tf.cast(tf.shape(face_texture)[0],tf.float32)*tf.reduce_sum(skin_mask))
+
+	return reflectance_loss
+
+# gamma regularization to ensure a nearly-monochromatic light
+def Gamma_loss(gamma):
+	gamma = tf.reshape(gamma,[-1,3,9])
+	gamma_mean = tf.reduce_mean(gamma,1, keep_dims = True)
+
+	gamma_loss = tf.reduce_mean(tf.square(gamma - gamma_mean))
+
+	return gamma_loss
--- a/network/blank.txt
+++ b/network/blank.txt
--- a/networks.py
+++ b/networks.py
@ -0,0 +1,87 @@
+import tensorflow as tf 
+from tensorflow.contrib.slim.nets import resnet_v1
+slim = tf.contrib.slim
+from inception_resnet_v1 import inception_resnet_v1
+###############################################################################################
+#Define R-Net and Perceptual-Net for 3D face reconstruction
+###############################################################################################
+
+def R_Net(inputs,is_training=True):
+	#input: [Batchsize,H,W,C], 0-255, BGR image
+	inputs = tf.cast(inputs,tf.float32)
+	# standard ResNet50 backbone (without the last classfication FC layer)
+	with slim.arg_scope(resnet_v1.resnet_arg_scope()):
+		net,end_points = resnet_v1.resnet_v1_50(inputs,is_training = is_training ,reuse = tf.AUTO_REUSE)
+
+	# Modified FC layer with 257 channels for reconstruction coefficients
+	net_id = slim.conv2d(net, 80, [1, 1],
+		activation_fn=None,
+		normalizer_fn=None,
+		weights_initializer = tf.zeros_initializer(),
+		scope='fc-id')
+	net_ex = slim.conv2d(net, 64, [1, 1],
+		activation_fn=None,
+		normalizer_fn=None,
+		weights_initializer = tf.zeros_initializer(),
+		scope='fc-ex')
+	net_tex = slim.conv2d(net, 80, [1, 1],
+		activation_fn=None,
+		normalizer_fn=None,
+		weights_initializer = tf.zeros_initializer(),
+		scope='fc-tex')
+	net_angles = slim.conv2d(net, 3, [1, 1],
+		activation_fn=None,
+		normalizer_fn=None,
+		weights_initializer = tf.zeros_initializer(),
+		scope='fc-angles')
+	net_gamma = slim.conv2d(net, 27, [1, 1],
+		activation_fn=None,
+		normalizer_fn=None,
+		weights_initializer = tf.zeros_initializer(),
+		scope='fc-gamma')
+	net_t_xy = slim.conv2d(net, 2, [1, 1],
+		activation_fn=None,
+		normalizer_fn=None,
+		weights_initializer = tf.zeros_initializer(),
+		scope='fc-XY')
+	net_t_z = slim.conv2d(net, 1, [1, 1],
+		activation_fn=None,
+		normalizer_fn=None,
+		weights_initializer = tf.zeros_initializer(),
+		scope='fc-Z')
+
+	net_id = tf.squeeze(net_id, [1,2], name='fc-id/squeezed')
+	net_ex = tf.squeeze(net_ex, [1,2], name='fc-ex/squeezed')
+	net_tex = tf.squeeze(net_tex, [1,2],name='fc-tex/squeezed')
+	net_angles = tf.squeeze(net_angles,[1,2], name='fc-angles/squeezed')
+	net_gamma = tf.squeeze(net_gamma,[1,2], name='fc-gamma/squeezed')
+	net_t_xy = tf.squeeze(net_t_xy,[1,2], name='fc-XY/squeezed')
+	net_t_z = tf.squeeze(net_t_z,[1,2], name='fc-Z/squeezed')
+
+	net_ = tf.concat([net_id,net_ex,net_tex,net_angles,net_gamma,net_t_xy,net_t_z], axis = 1)
+
+	return net_
+
+
+def Perceptual_Net(input_imgs):
+    #input_imgs: [Batchsize,H,W,C], 0-255, BGR image
+
+    input_imgs = tf.reshape(input_imgs,[-1,224,224,3])
+    input_imgs = tf.cast(input_imgs,tf.float32)
+    input_imgs = tf.clip_by_value(input_imgs,0,255)
+    input_imgs = (input_imgs - 127.5)/128.0
+
+    #standard face-net backbone
+    batch_norm_params = {
+    'decay': 0.995,
+    'epsilon': 0.001,
+    'updates_collections': None}
+
+    with slim.arg_scope([slim.conv2d, slim.fully_connected],weights_initializer=slim.initializers.xavier_initializer(), 
+        weights_regularizer=slim.l2_regularizer(0.0),
+        normalizer_fn=slim.batch_norm,
+        normalizer_params=batch_norm_params):
+        feature_128,_ = inception_resnet_v1(input_imgs, bottleneck_layer_size=128, is_training=False, reuse=tf.AUTO_REUSE)
+
+    # output the last FC layer feature(before classification) as identity feature
+    return feature_128
--- a/options.py
+++ b/options.py
@ -0,0 +1,60 @@
+import numpy as np 
+import tensorflow as tf
+import os
+
+# training options
+
+class Option():
+	def __init__(self):
+		#--------------------------------------------------------------------------------------
+		self.model_dir = 'result'
+		self.model_name = 'model_test2'
+		self.data_path = ['./processed_data']
+		self.val_data_path = ['./processed_data']
+
+		self.model_save_path = os.path.join(self.model_dir,self.model_name)
+		if not os.path.exists(self.model_save_path):
+			os.makedirs(self.model_save_path)
+
+		self.summary_dir = os.path.join(self.model_save_path,'summary')
+
+		self.train_summary_path = os.path.join(self.summary_dir, 'train')
+		self.val_summary_path = os.path.join(self.summary_dir, 'val')
+		#---------------------------------------------------------------------------------------
+		# visible gpu settings
+		self.config = tf.ConfigProto()
+		self.config.gpu_options.visible_device_list = '0'
+		self.is_train = True
+		self.use_pb = True
+		#---------------------------------------------------------------------------------------
+		# training parameters
+
+		self.w_photo = 1.92
+		self.w_lm = 1.6e-3
+		self.w_id = 0.2
+
+		self.w_reg = 3.0e-4
+		self.w_ref = 5.0
+
+		self.w_gamma = 10.0
+
+		self.w_ex = 0.8
+		self.w_tex = 1.7e-2
+
+		self.batch_size = 16
+		self.boundaries = [100000]
+		lr = [1e-4,2e-5]
+		self.global_step = tf.Variable(0,name='global_step',trainable = False)
+		self.lr = tf.train.piecewise_constant(self.global_step,self.boundaries,lr)
+		self.augment = True
+		self.train_maxiter = 200000
+		self.train_summary_iter = 50
+		self.image_summary_iter = 200
+		self.val_summary_iter = 1000
+		self.save_iter = 10000
+		#---------------------------------------------------------------------------------------
+		# initial weights for resnet and facenet
+		self.R_net_weights = os.path.join('./weights/resnet','resnet_v1_50.ckpt')
+		self.Perceptual_net_weights = './weights/id_net/model-20170512-110547.ckpt-250000'
+
+		self.pretrain_weights = os.path.join('train/model_test2','iter_100000.ckpt')
--- a/preprocess_img.py
+++ b/preprocess_img.py
@ -1,6 +1,14 @@
 import numpy as np 
 from scipy.io import loadmat,savemat
 from PIL import Image
+from skin import skinmask
+import argparse
+from utils import *
+import os
+import glob
+import tensorflow as tf
+
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'

 #calculating least square problem
 def POS(xp,x):
@ -27,7 +35,8 @@ def POS(xp,x):

 	return t,s

-def process_img(img,lm,t,s,target_size = 224.):
+# resize and crop images
+def resize_n_crop_img(img,lm,t,s,target_size = 224.):
 	w0,h0 = img.size
 	w = (w0/s*102).astype(np.int32)
 	h = (h0/s*102).astype(np.int32)
@ -49,7 +58,7 @@ def process_img(img,lm,t,s,target_size = 224.):


 # resize and crop input images before sending to the R-Net
-def Preprocess(img,lm,lm3D):
+def align_img(img,lm,lm3D):

 	w0,h0 = img.size

@ -60,9 +69,83 @@ def Preprocess(img,lm,lm3D):
 	t,s = POS(lm.transpose(),lm3D.transpose())

 	# processing the image
-	img_new,lm_new = process_img(img,lm,t,s)
+	img_new,lm_new = resize_n_crop_img(img,lm,t,s)
 	lm_new = np.stack([lm_new[:,0],223 - lm_new[:,1]], axis = 1)
 	trans_params = np.array([w0,h0,102.0/s,t[0],t[1]])

 	return img_new,lm_new,trans_params

+# detect 68 face landmarks for aligned images
+def get_68landmark(img,detector,sess):
+
+	input_img = detector.get_tensor_by_name('input_imgs:0')
+	lm = detector.get_tensor_by_name('landmark:0')
+
+	landmark = sess.run(lm,feed_dict={input_img:img})
+	landmark = np.reshape(landmark,[68,2])
+	landmark = np.stack([landmark[:,1],223-landmark[:,0]],axis=1)
+
+	return landmark
+
+# get skin attention mask for aligned images
+def get_skinmask(img):
+
+	img = np.squeeze(img,0)
+	skin_img = skinmask(img)
+	return skin_img
+
+def parse_args():
+    desc = "Data preprocessing for Deep3DRecon."
+    parser = argparse.ArgumentParser(description=desc)
+
+    parser.add_argument('--img_path', type=str, default='./input', help='original images folder')
+    parser.add_argument('--save_path', type=str, default='./processed_data', help='custom path to save proccessed images and labels')
+
+
+    return parser.parse_args()
+
+# training data pre-processing
+def preprocessing():
+
+	args = parse_args()
+	image_path = args.img_path
+	save_path = args.save_path
+	if not os.path.isdir(save_path):
+		os.makedirs(save_path)
+	if not os.path.isdir(os.path.join(save_path,'lm')):
+		os.makedirs(os.path.join(save_path,'lm'))
+	if not os.path.isdir(os.path.join(save_path,'lm_bin')):
+		os.makedirs(os.path.join(save_path,'lm_bin'))
+	if not os.path.isdir(os.path.join(save_path,'mask')):
+		os.makedirs(os.path.join(save_path,'mask'))
+
+	img_list = sorted(glob.glob(image_path + '/' + '*.png'))
+	img_list += sorted(glob.glob(image_path + '/' + '*.jpg'))
+
+	lm3D = load_lm3d()
+
+	with tf.Graph().as_default() as graph, tf.device('/gpu:0'):
+		lm_detector = load_graph(os.path.join('network','landmark68_detector.pb'))
+		tf.import_graph_def(lm_detector,name='')
+		sess = tf.InteractiveSession()
+
+		for file in img_list:
+
+			print(file)
+			name = file.split('/')[-1].replace('.png','').replace('.jpg','')
+			img,lm5p = load_img(file,file.replace('png','txt').replace('jpg','txt'))
+			img_align,_,_ = align_img(img,lm5p,lm3D)  # [1,224,224,3] BGR image
+
+			lm68p = get_68landmark(img_align,graph,sess)
+			lm68p = lm68p.astype(np.float64)
+			skin_mask = get_skinmask(img_align)
+
+			Image.fromarray(img_align.squeeze(0)[:,:,::-1].astype(np.uint8),'RGB').save(os.path.join(save_path,name+'.png'))
+			Image.fromarray(skin_mask.astype(np.uint8)).save(os.path.join(save_path,'mask',name+'.png'))
+
+			np.savetxt(os.path.join(save_path,'lm',name+'.txt'),lm68p)
+			lm_bin = np.reshape(lm68p,[-1])
+			lm_bin.tofile(os.path.join(save_path,'lm_bin',name+'.bin'))	
+
+if __name__ == '__main__':
+	preprocessing()
--- a/reconstruction_model.py
+++ b/reconstruction_model.py
@ -0,0 +1,86 @@
+import tensorflow as tf
+import face_decoder
+import networks
+import losses
+from utils import *
+###############################################################################################
+# model for single image face reconstruction
+###############################################################################################
+class Reconstruction_model():
+	# initialization
+	def __init__(self,opt):
+		self.Face3D = face_decoder.Face3D() #analytic 3D face object
+		self.opt = opt # training options
+		self.Optimizer = tf.train.AdamOptimizer(learning_rate = opt.lr) # optimizer
+
+	# load input data from queue
+	def set_input(self,input_iterator):
+		self.imgs,self.lm_labels,self.attention_masks = input_iterator.get_next()
+
+	# forward process of the model
+	def forward(self,is_train = True):
+
+		with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
+			self.coeff = networks.R_Net(self.imgs,is_training=is_train)
+
+			self.Face3D.Reconstruction_Block(self.coeff,self.opt)
+
+			self.id_labels = networks.Perceptual_Net(self.imgs)
+			self.id_features = networks.Perceptual_Net(self.Face3D.render_imgs)
+
+			self.photo_loss = losses.Photo_loss(self.imgs,self.Face3D.render_imgs,self.Face3D.img_mask_crop*self.attention_masks)
+			self.landmark_loss = losses.Landmark_loss(self.Face3D.landmark_p,self.lm_labels)
+			self.perceptual_loss = losses.Perceptual_loss(self.id_features,self.id_labels)
+
+			self.reg_loss = losses.Regulation_loss(self.Face3D.id_coeff,self.Face3D.ex_coeff,self.Face3D.tex_coeff,self.opt)
+			self.reflect_loss = losses.Reflectance_loss(self.Face3D.face_texture,self.Face3D.facemodel)
+			self.gamma_loss = losses.Gamma_loss(self.Face3D.gamma)
+
+
+			self.loss = self.opt.w_photo*self.photo_loss + self.opt.w_lm*self.landmark_loss + self.opt.w_id*self.perceptual_loss\
+			+ self.opt.w_reg*self.reg_loss + self.opt.w_ref*self.reflect_loss + self.opt.w_gamma*self.gamma_loss
+
+	# backward process
+	def backward(self,is_train = True):
+		if is_train:
+			update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+			var_list = tf.trainable_variables()
+			update_var_list = [v for v in var_list if 'resnet_v1_50' in v.name or 'fc-' in v.name]
+			grads = tf.gradients(self.loss,update_var_list)
+			# get train_op with update_ops to ensure updating for bn parameters
+			with tf.control_dependencies(update_ops):
+				self.train_op = self.Optimizer.apply_gradients(zip(grads,update_var_list),global_step = self.opt.global_step)
+
+		# if not training stage, avoid updating variables 
+		else:
+			pass
+
+	# forward and backward
+	def step(self, is_train = True):
+		with tf.variable_scope(tf.get_variable_scope()) as scope:
+			self.forward(is_train = is_train)
+		self.backward(is_train = is_train)
+
+	# statistics summarization
+	def summarize(self):
+
+		# scalar and histogram stats
+		stat = [
+		tf.summary.scalar('reflect_error',self.reflect_loss),
+		tf.summary.scalar('gamma_error',self.gamma_loss),
+		tf.summary.scalar('id_sim_error',self.perceptual_loss),
+		tf.summary.scalar('lm_error',tf.sqrt(self.landmark_loss)),
+		tf.summary.scalar('photo_error',self.photo_loss),
+		tf.summary.scalar('train_error',self.loss),
+		tf.summary.histogram('id_coeff',self.Face3D.id_coeff),
+		tf.summary.histogram('ex_coeff',self.Face3D.ex_coeff),
+		tf.summary.histogram('tex_coeff',self.Face3D.tex_coeff)]
+
+		self.summary_stat = tf.summary.merge(stat)
+		# combine face region of reconstruction images with input images
+		render_imgs = self.Face3D.render_imgs[:,:,:,::-1]*self.Face3D.img_mask + tf.cast(self.imgs[:,:,:,::-1],tf.float32)*(1-self.Face3D.img_mask)
+		render_imgs = tf.clip_by_value(render_imgs,0,255)
+		render_imgs = tf.cast(render_imgs,tf.uint8)
+		# image stats
+		img_stat = [tf.summary.image('imgs',tf.concat([tf.cast(self.imgs[:,:,:,::-1],tf.uint8),render_imgs],axis = 2), max_outputs = 8)]
+		self.summary_img = tf.summary.merge(img_stat) 
--- a/renderer/init.py
+++ b/renderer/init.py
@ -0,0 +1 @@
+#.
--- a/renderer/camera_utils.py
+++ b/renderer/camera_utils.py
@ -0,0 +1,152 @@
+# Copyright 2017 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Collection of TF functions for managing 3D camera matrices."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import tensorflow as tf
+
+
+def perspective(aspect_ratio, fov_y, near_clip, far_clip):
+  """Computes perspective transformation matrices.
+
+  Functionality mimes gluPerspective (third_party/GL/glu/include/GLU/glu.h).
+
+  Args:
+    aspect_ratio: float value specifying the image aspect ratio (width/height).
+    fov_y: 1-D float32 Tensor with shape [batch_size] specifying output vertical
+        field of views in degrees.
+    near_clip: 1-D float32 Tensor with shape [batch_size] specifying near
+        clipping plane distance.
+    far_clip: 1-D float32 Tensor with shape [batch_size] specifying far clipping
+        plane distance.
+
+  Returns:
+    A [batch_size, 4, 4] float tensor that maps from right-handed points in eye
+    space to left-handed points in clip space.
+  """
+  # The multiplication of fov_y by pi/360.0 simultaneously converts to radians
+  # and adds the half-angle factor of .5.
+  focal_lengths_y = 1.0 / tf.tan(fov_y * (math.pi / 360.0))
+  depth_range = far_clip - near_clip
+  p_22 = -(far_clip + near_clip) / depth_range
+  p_23 = -2.0 * (far_clip * near_clip / depth_range)
+
+  zeros = tf.zeros_like(p_23, dtype=tf.float32)
+  # pyformat: disable
+  perspective_transform = tf.concat(
+      [
+          focal_lengths_y / aspect_ratio, zeros, zeros, zeros,
+          zeros, focal_lengths_y, zeros, zeros,
+          zeros, zeros, p_22, p_23,
+          zeros, zeros, -tf.ones_like(p_23, dtype=tf.float32), zeros
+      ], axis=0)
+  # pyformat: enable
+  perspective_transform = tf.reshape(perspective_transform, [4, 4, -1])
+  return tf.transpose(perspective_transform, [2, 0, 1])
+
+
+def look_at(eye, center, world_up):
+  """Computes camera viewing matrices.
+
+  Functionality mimes gluLookAt (third_party/GL/glu/include/GLU/glu.h).
+
+  Args:
+    eye: 2-D float32 tensor with shape [batch_size, 3] containing the XYZ world
+        space position of the camera.
+    center: 2-D float32 tensor with shape [batch_size, 3] containing a position
+        along the center of the camera's gaze.
+    world_up: 2-D float32 tensor with shape [batch_size, 3] specifying the
+        world's up direction; the output camera will have no tilt with respect
+        to this direction.
+
+  Returns:
+    A [batch_size, 4, 4] float tensor containing a right-handed camera
+    extrinsics matrix that maps points from world space to points in eye space.
+  """
+  batch_size = center.shape[0].value
+  vector_degeneracy_cutoff = 1e-6
+  forward = center - eye
+  forward_norm = tf.norm(forward, ord='euclidean', axis=1, keep_dims=True)
+  # tf.assert_greater(
+  #     forward_norm,
+  #     vector_degeneracy_cutoff,
+  #     message='Camera matrix is degenerate because eye and center are close.')
+  forward = tf.divide(forward, forward_norm)
+
+  to_side = tf.cross(forward, world_up)
+  to_side_norm = tf.norm(to_side, ord='euclidean', axis=1, keep_dims=True)
+  # tf.assert_greater(
+  #     to_side_norm,
+  #     vector_degeneracy_cutoff,
+  #     message='Camera matrix is degenerate because up and gaze are close or'
+  #     'because up is degenerate.')
+  to_side = tf.divide(to_side, to_side_norm)
+  cam_up = tf.cross(to_side, forward)
+
+  w_column = tf.constant(
+      batch_size * [[0., 0., 0., 1.]], dtype=tf.float32)  # [batch_size, 4]
+  w_column = tf.reshape(w_column, [batch_size, 4, 1])
+  view_rotation = tf.stack(
+      [to_side, cam_up, -forward,
+       tf.zeros_like(to_side, dtype=tf.float32)],
+      axis=1)  # [batch_size, 4, 3] matrix
+  view_rotation = tf.concat(
+      [view_rotation, w_column], axis=2)  # [batch_size, 4, 4]
+
+  identity_batch = tf.tile(tf.expand_dims(tf.eye(3), 0), [batch_size, 1, 1])
+  view_translation = tf.concat([identity_batch, tf.expand_dims(-eye, 2)], 2)
+  view_translation = tf.concat(
+      [view_translation,
+       tf.reshape(w_column, [batch_size, 1, 4])], 1)
+  camera_matrices = tf.matmul(view_rotation, view_translation)
+  return camera_matrices
+
+
+def euler_matrices(angles):
+  """Computes a XYZ Tait-Bryan (improper Euler angle) rotation.
+
+  Returns 4x4 matrices for convenient multiplication with other transformations.
+
+  Args:
+    angles: a [batch_size, 3] tensor containing X, Y, and Z angles in radians.
+
+  Returns:
+    a [batch_size, 4, 4] tensor of matrices.
+  """
+  s = tf.sin(angles)
+  c = tf.cos(angles)
+  # Rename variables for readability in the matrix definition below.
+  c0, c1, c2 = (c[:, 0], c[:, 1], c[:, 2])
+  s0, s1, s2 = (s[:, 0], s[:, 1], s[:, 2])
+
+  zeros = tf.zeros_like(s[:, 0])
+  ones = tf.ones_like(s[:, 0])
+
+  # pyformat: disable
+  flattened = tf.concat(
+      [
+          c2 * c1, c2 * s1 * s0 - c0 * s2, s2 * s0 + c2 * c0 * s1, zeros,
+          c1 * s2, c2 * c0 + s2 * s1 * s0, c0 * s2 * s1 - c2 * s0, zeros,
+          -s1, c1 * s0, c1 * c0, zeros,
+          zeros, zeros, zeros, ones
+      ],
+      axis=0)
+  # pyformat: enable
+  reshaped = tf.reshape(flattened, [4, 4, -1])
+  return tf.transpose(reshaped, [2, 0, 1])
--- a/renderer/mesh_renderer.py
+++ b/renderer/mesh_renderer.py
@ -0,0 +1,404 @@
+# Copyright 2017 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Differentiable 3-D rendering of a triangle mesh."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+
+from renderer import camera_utils
+from renderer import rasterize_triangles
+
+
+def phong_shader(normals,
+                 alphas,
+                 pixel_positions,
+                 light_positions,
+                 light_intensities,
+                 diffuse_colors=None,
+                 camera_position=None,
+                 specular_colors=None,
+                 shininess_coefficients=None,
+                 ambient_color=None):
+  """Computes pixelwise lighting from rasterized buffers with the Phong model.
+
+  Args:
+    normals: a 4D float32 tensor with shape [batch_size, image_height,
+        image_width, 3]. The inner dimension is the world space XYZ normal for
+        the corresponding pixel. Should be already normalized.
+    alphas: a 3D float32 tensor with shape [batch_size, image_height,
+        image_width]. The inner dimension is the alpha value (transparency)
+        for the corresponding pixel.
+    pixel_positions: a 4D float32 tensor with shape [batch_size, image_height,
+        image_width, 3]. The inner dimension is the world space XYZ position for
+        the corresponding pixel.
+    light_positions: a 3D tensor with shape [batch_size, light_count, 3]. The
+        XYZ position of each light in the scene. In the same coordinate space as
+        pixel_positions.
+    light_intensities: a 3D tensor with shape [batch_size, light_count, 3]. The
+        RGB intensity values for each light. Intensities may be above one.
+    diffuse_colors: a 4D float32 tensor with shape [batch_size, image_height,
+        image_width, 3]. The inner dimension is the diffuse RGB coefficients at
+        a pixel in the range [0, 1].
+    camera_position: a 1D tensor with shape [batch_size, 3]. The XYZ camera
+        position in the scene. If supplied, specular reflections will be
+        computed. If not supplied, specular_colors and shininess_coefficients
+        are expected to be None. In the same coordinate space as
+        pixel_positions.
+    specular_colors: a 4D float32 tensor with shape [batch_size, image_height,
+        image_width, 3]. The inner dimension is the specular RGB coefficients at
+        a pixel in the range [0, 1]. If None, assumed to be tf.zeros()
+    shininess_coefficients: A 3D float32 tensor that is broadcasted to shape
+        [batch_size, image_height, image_width]. The inner dimension is the
+        shininess coefficient for the object at a pixel. Dimensions that are
+        constant can be given length 1, so [batch_size, 1, 1] and [1, 1, 1] are
+        also valid input shapes.
+    ambient_color: a 2D tensor with shape [batch_size, 3]. The RGB ambient
+        color, which is added to each pixel before tone mapping. If None, it is
+        assumed to be tf.zeros().
+  Returns:
+    A 4D float32 tensor of shape [batch_size, image_height, image_width, 4]
+    containing the lit RGBA color values for each image at each pixel. Colors
+    are in the range [0,1].
+
+  Raises:
+    ValueError: An invalid argument to the method is detected.
+  """
+  batch_size, image_height, image_width = [s.value for s in normals.shape[:-1]]
+  light_count = light_positions.shape[1].value
+  pixel_count = image_height * image_width
+  # Reshape all values to easily do pixelwise computations:
+  normals = tf.reshape(normals, [batch_size, -1, 3])
+  alphas = tf.reshape(alphas, [batch_size, -1, 1])
+  diffuse_colors = tf.reshape(diffuse_colors, [batch_size, -1, 3])
+  if camera_position is not None:
+    specular_colors = tf.reshape(specular_colors, [batch_size, -1, 3])
+
+  # Ambient component
+  output_colors = tf.zeros([batch_size, image_height * image_width, 3])
+  if ambient_color is not None:
+    ambient_reshaped = tf.expand_dims(ambient_color, axis=1)
+    output_colors = tf.add(output_colors, ambient_reshaped * diffuse_colors)
+
+  # Diffuse component
+  pixel_positions = tf.reshape(pixel_positions, [batch_size, -1, 3])
+  per_light_pixel_positions = tf.stack(
+      [pixel_positions] * light_count,
+      axis=1)  # [batch_size, light_count, pixel_count, 3]
+  directions_to_lights = tf.nn.l2_normalize(
+      tf.expand_dims(light_positions, axis=2) - per_light_pixel_positions,
+      dim=3)  # [batch_size, light_count, pixel_count, 3]
+  # The specular component should only contribute when the light and normal
+  # face one another (i.e. the dot product is nonnegative):
+  normals_dot_lights = tf.clip_by_value(
+      tf.reduce_sum(
+          tf.expand_dims(normals, axis=1) * directions_to_lights, axis=3), 0.0,
+      1.0)  # [batch_size, light_count, pixel_count]
+  diffuse_output = tf.expand_dims(
+      diffuse_colors, axis=1) * tf.expand_dims(
+          normals_dot_lights, axis=3) * tf.expand_dims(
+              light_intensities, axis=2)
+  diffuse_output = tf.reduce_sum(
+      diffuse_output, axis=1)  # [batch_size, pixel_count, 3]
+  output_colors = tf.add(output_colors, diffuse_output)
+
+  # Specular component
+  if camera_position is not None:
+    camera_position = tf.reshape(camera_position, [batch_size, 1, 3])
+    mirror_reflection_direction = tf.nn.l2_normalize(
+        2.0 * tf.expand_dims(normals_dot_lights, axis=3) * tf.expand_dims(
+            normals, axis=1) - directions_to_lights,
+        dim=3)
+    direction_to_camera = tf.nn.l2_normalize(
+        camera_position - pixel_positions, dim=2)
+    reflection_direction_dot_camera_direction = tf.reduce_sum(
+        tf.expand_dims(direction_to_camera, axis=1) *
+        mirror_reflection_direction,
+        axis=3)
+    # The specular component should only contribute when the reflection is
+    # external:
+    reflection_direction_dot_camera_direction = tf.clip_by_value(
+        tf.nn.l2_normalize(reflection_direction_dot_camera_direction, dim=2),
+        0.0, 1.0)
+    # The specular component should also only contribute when the diffuse
+    # component contributes:
+    reflection_direction_dot_camera_direction = tf.where(
+        normals_dot_lights != 0.0, reflection_direction_dot_camera_direction,
+        tf.zeros_like(
+            reflection_direction_dot_camera_direction, dtype=tf.float32))
+    # Reshape to support broadcasting the shininess coefficient, which rarely
+    # varies per-vertex:
+    reflection_direction_dot_camera_direction = tf.reshape(
+        reflection_direction_dot_camera_direction,
+        [batch_size, light_count, image_height, image_width])
+    shininess_coefficients = tf.expand_dims(shininess_coefficients, axis=1)
+    specularity = tf.reshape(
+        tf.pow(reflection_direction_dot_camera_direction,
+               shininess_coefficients),
+        [batch_size, light_count, pixel_count, 1])
+    specular_output = tf.expand_dims(
+        specular_colors, axis=1) * specularity * tf.expand_dims(
+            light_intensities, axis=2)
+    specular_output = tf.reduce_sum(specular_output, axis=1)
+    output_colors = tf.add(output_colors, specular_output)
+  rgb_images = tf.reshape(output_colors,
+                          [batch_size, image_height, image_width, 3])
+  alpha_images = tf.reshape(alphas, [batch_size, image_height, image_width, 1])
+  valid_rgb_values = tf.concat(3 * [alpha_images > 0.5], axis=3)
+  rgb_images = tf.where(valid_rgb_values, rgb_images,
+                        tf.zeros_like(rgb_images, dtype=tf.float32))
+  return tf.reverse(tf.concat([rgb_images, alpha_images], axis=3), axis=[1])
+
+
+def tone_mapper(image, gamma):
+  """Applies gamma correction to the input image.
+
+  Tone maps the input image batch in order to make scenes with a high dynamic
+  range viewable. The gamma correction factor is computed separately per image,
+  but is shared between all provided channels. The exact function computed is:
+
+  image_out = A*image_in^gamma, where A is an image-wide constant computed so
+  that the maximum image value is approximately 1. The correction is applied
+  to all channels.
+
+  Args:
+    image: 4-D float32 tensor with shape [batch_size, image_height,
+        image_width, channel_count]. The batch of images to tone map.
+    gamma: 0-D float32 nonnegative tensor. Values of gamma below one compress
+        relative contrast in the image, and values above one increase it. A
+        value of 1 is equivalent to scaling the image to have a maximum value
+        of 1.
+  Returns:
+    4-D float32 tensor with shape [batch_size, image_height, image_width,
+    channel_count]. Contains the gamma-corrected images, clipped to the range
+    [0, 1].
+  """
+  batch_size = image.shape[0].value
+  corrected_image = tf.pow(image, gamma)
+  image_max = tf.reduce_max(
+      tf.reshape(corrected_image, [batch_size, -1]), axis=1)
+  scaled_image = tf.divide(corrected_image,
+                           tf.reshape(image_max, [batch_size, 1, 1, 1]))
+  return tf.clip_by_value(scaled_image, 0.0, 1.0)
+
+
+def mesh_renderer(vertices,
+                  triangles,
+                  normals,
+                  diffuse_colors,
+                  camera_position,
+                  camera_lookat,
+                  camera_up,
+                  light_positions,
+                  light_intensities,
+                  image_width,
+                  image_height,
+                  specular_colors=None,
+                  shininess_coefficients=None,
+                  ambient_color=None,
+                  fov_y=40.0,
+                  near_clip=0.01,
+                  far_clip=50.0):
+  """Renders an input scene using phong shading, and returns an output image.
+
+  Args:
+    vertices: 3-D float32 tensor with shape [batch_size, vertex_count, 3]. Each
+        triplet is an xyz position in world space.
+    triangles: 2-D int32 tensor with shape [triangle_count, 3]. Each triplet
+        should contain vertex indices describing a triangle such that the
+        triangle's normal points toward the viewer if the forward order of the
+        triplet defines a clockwise winding of the vertices. Gradients with
+        respect to this tensor are not available.
+    normals: 3-D float32 tensor with shape [batch_size, vertex_count, 3]. Each
+        triplet is the xyz vertex normal for its corresponding vertex. Each
+        vector is assumed to be already normalized.
+    diffuse_colors: 3-D float32 tensor with shape [batch_size,
+        vertex_count, 3]. The RGB diffuse reflection in the range [0,1] for
+        each vertex.
+    camera_position: 2-D tensor with shape [batch_size, 3] or 1-D tensor with
+        shape [3] specifying the XYZ world space camera position.
+    camera_lookat: 2-D tensor with shape [batch_size, 3] or 1-D tensor with
+        shape [3] containing an XYZ point along the center of the camera's gaze.
+    camera_up: 2-D tensor with shape [batch_size, 3] or 1-D tensor with shape
+        [3] containing the up direction for the camera. The camera will have no
+        tilt with respect to this direction.
+    light_positions: a 3-D tensor with shape [batch_size, light_count, 3]. The
+        XYZ position of each light in the scene. In the same coordinate space as
+        pixel_positions.
+    light_intensities: a 3-D tensor with shape [batch_size, light_count, 3]. The
+        RGB intensity values for each light. Intensities may be above one.
+    image_width: int specifying desired output image width in pixels.
+    image_height: int specifying desired output image height in pixels.
+    specular_colors: 3-D float32 tensor with shape [batch_size,
+        vertex_count, 3]. The RGB specular reflection in the range [0, 1] for
+        each vertex.  If supplied, specular reflections will be computed, and
+        both specular_colors and shininess_coefficients are expected.
+    shininess_coefficients: a 0D-2D float32 tensor with maximum shape
+       [batch_size, vertex_count]. The phong shininess coefficient of each
+       vertex. A 0D tensor or float gives a constant shininess coefficient
+       across all batches and images. A 1D tensor must have shape [batch_size],
+       and a single shininess coefficient per image is used.
+    ambient_color: a 2D tensor with shape [batch_size, 3]. The RGB ambient
+        color, which is added to each pixel in the scene. If None, it is
+        assumed to be black.
+    fov_y: float, 0D tensor, or 1D tensor with shape [batch_size] specifying
+        desired output image y field of view in degrees.
+    near_clip: float, 0D tensor, or 1D tensor with shape [batch_size] specifying
+        near clipping plane distance.
+    far_clip: float, 0D tensor, or 1D tensor with shape [batch_size] specifying
+        far clipping plane distance.
+
+  Returns:
+    A 4-D float32 tensor of shape [batch_size, image_height, image_width, 4]
+    containing the lit RGBA color values for each image at each pixel. RGB
+    colors are the intensity values before tonemapping and can be in the range
+    [0, infinity]. Clipping to the range [0,1] with tf.clip_by_value is likely
+    reasonable for both viewing and training most scenes. More complex scenes
+    with multiple lights should tone map color values for display only. One
+    simple tonemapping approach is to rescale color values as x/(1+x); gamma
+    compression is another common techinque. Alpha values are zero for
+    background pixels and near one for mesh pixels.
+  Raises:
+    ValueError: An invalid argument to the method is detected.
+  """
+  if len(vertices.shape) != 3:
+    raise ValueError('Vertices must have shape [batch_size, vertex_count, 3].')
+  batch_size = vertices.shape[0].value
+  # print(batch_size)
+  if len(normals.shape) != 3:
+    raise ValueError('Normals must have shape [batch_size, vertex_count, 3].')
+  if len(light_positions.shape) != 3:
+    raise ValueError(
+        'Light_positions must have shape [batch_size, light_count, 3].')
+  if len(light_intensities.shape) != 3:
+    raise ValueError(
+        'Light_intensities must have shape [batch_size, light_count, 3].')
+  if len(diffuse_colors.shape) != 3:
+    raise ValueError(
+        'vertex_diffuse_colors must have shape [batch_size, vertex_count, 3].')
+  if (ambient_color is not None and
+      ambient_color.get_shape().as_list() != [batch_size, 3]):
+    raise ValueError('Ambient_color must have shape [batch_size, 3].')
+  if camera_position.get_shape().as_list() == [3]:
+    camera_position = tf.tile(
+        tf.expand_dims(camera_position, axis=0), [batch_size, 1])
+  elif camera_position.get_shape().as_list() != [batch_size, 3]:
+    raise ValueError('Camera_position must have shape [batch_size, 3]')
+  if camera_lookat.get_shape().as_list() == [3]:
+    camera_lookat = tf.tile(
+        tf.expand_dims(camera_lookat, axis=0), [batch_size, 1])
+  elif camera_lookat.get_shape().as_list() != [batch_size, 3]:
+    raise ValueError('Camera_lookat must have shape [batch_size, 3]')
+  if camera_up.get_shape().as_list() == [3]:
+    camera_up = tf.tile(tf.expand_dims(camera_up, axis=0), [batch_size, 1])
+  elif camera_up.get_shape().as_list() != [batch_size, 3]:
+    raise ValueError('Camera_up must have shape [batch_size, 3]')
+  if isinstance(fov_y, float):
+    fov_y = tf.constant(batch_size * [fov_y], dtype=tf.float32)
+  elif not fov_y.get_shape().as_list():
+    fov_y = tf.tile(tf.expand_dims(fov_y, 0), [batch_size])
+  elif fov_y.get_shape().as_list() != [batch_size]:
+    raise ValueError('Fov_y must be a float, a 0D tensor, or a 1D tensor with'
+                     'shape [batch_size]')
+  if isinstance(near_clip, float):
+    near_clip = tf.constant(batch_size * [near_clip], dtype=tf.float32)
+  elif not near_clip.get_shape().as_list():
+    near_clip = tf.tile(tf.expand_dims(near_clip, 0), [batch_size])
+  elif near_clip.get_shape().as_list() != [batch_size]:
+    raise ValueError('Near_clip must be a float, a 0D tensor, or a 1D tensor'
+                     'with shape [batch_size]')
+  if isinstance(far_clip, float):
+    far_clip = tf.constant(batch_size * [far_clip], dtype=tf.float32)
+  elif not far_clip.get_shape().as_list():
+    far_clip = tf.tile(tf.expand_dims(far_clip, 0), [batch_size])
+  elif far_clip.get_shape().as_list() != [batch_size]:
+    raise ValueError('Far_clip must be a float, a 0D tensor, or a 1D tensor'
+                     'with shape [batch_size]')
+  if specular_colors is not None and shininess_coefficients is None:
+    raise ValueError(
+        'Specular colors were supplied without shininess coefficients.')
+  if shininess_coefficients is not None and specular_colors is None:
+    raise ValueError(
+        'Shininess coefficients were supplied without specular colors.')
+  if specular_colors is not None:
+    # Since a 0-D float32 tensor is accepted, also accept a float.
+    if isinstance(shininess_coefficients, float):
+      shininess_coefficients = tf.constant(
+          shininess_coefficients, dtype=tf.float32)
+    if len(specular_colors.shape) != 3:
+      raise ValueError('The specular colors must have shape [batch_size, '
+                       'vertex_count, 3].')
+    if len(shininess_coefficients.shape) > 2:
+      raise ValueError('The shininess coefficients must have shape at most'
+                       '[batch_size, vertex_count].')
+    # If we don't have per-vertex coefficients, we can just reshape the
+    # input shininess to broadcast later, rather than interpolating an
+    # additional vertex attribute:
+    if len(shininess_coefficients.shape) < 2:
+      vertex_attributes = tf.concat(
+          [normals, vertices, diffuse_colors, specular_colors], axis=2)
+    else:
+      vertex_attributes = tf.concat(
+          [
+              normals, vertices, diffuse_colors, specular_colors,
+              tf.expand_dims(shininess_coefficients, axis=2)
+          ],
+          axis=2)
+  else:
+    vertex_attributes = tf.concat([normals, vertices, diffuse_colors], axis=2)
+
+  camera_matrices = camera_utils.look_at(camera_position, camera_lookat,
+                                         camera_up)
+
+  perspective_transforms = camera_utils.perspective(image_width / image_height,
+                                                    fov_y, near_clip, far_clip)
+
+  clip_space_transforms = tf.matmul(perspective_transforms, camera_matrices)
+
+  pixel_attributes,alphas = rasterize_triangles.rasterize_triangles(
+      vertices, vertex_attributes, triangles, clip_space_transforms,
+      image_width, image_height, [-1] * vertex_attributes.shape[2].value)
+
+  # Extract the interpolated vertex attributes from the pixel buffer and
+  # supply them to the shader:
+  pixel_normals = tf.nn.l2_normalize(pixel_attributes[:, :, :, 0:3], dim=3)
+  pixel_positions = pixel_attributes[:, :, :, 3:6]
+  diffuse_colors = pixel_attributes[:, :, :, 6:9]
+  if specular_colors is not None:
+    specular_colors = pixel_attributes[:, :, :, 9:12]
+    # Retrieve the interpolated shininess coefficients if necessary, or just
+    # reshape our input for broadcasting:
+    if len(shininess_coefficients.shape) == 2:
+      shininess_coefficients = pixel_attributes[:, :, :, 12]
+    else:
+      shininess_coefficients = tf.reshape(shininess_coefficients, [-1, 1, 1])
+
+  # pixel_mask = tf.cast(tf.reduce_any(diffuse_colors >= 0, axis=3), tf.float32)
+
+  renders = phong_shader(
+      normals=pixel_normals,
+      alphas=alphas,
+      pixel_positions=pixel_positions,
+      light_positions=light_positions,
+      light_intensities=light_intensities,
+      diffuse_colors=diffuse_colors,
+      camera_position=camera_position if specular_colors is not None else None,
+      specular_colors=specular_colors,
+      shininess_coefficients=shininess_coefficients,
+      ambient_color=ambient_color)
+  return renders
--- a/renderer/rasterize_triangles.py
+++ b/renderer/rasterize_triangles.py
@ -0,0 +1,190 @@
+# Copyright 2017 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Differentiable triangle rasterizer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tensorflow as tf
+
+
+# rasterize_triangles_module = tf.load_op_library(
+#     os.path.join(os.environ['TEST_SRCDIR'],
+#     'tf_mesh_renderer/mesh_renderer/kernels/rasterize_triangles_kernel.so'))
+
+
+rasterize_triangles_module = tf.load_op_library('./renderer/rasterize_triangles_kernel_1.so')
+
+
+# This epsilon should be smaller than any valid barycentric reweighting factor
+# (i.e. the per-pixel reweighting factor used to correct for the effects of
+# perspective-incorrect barycentric interpolation). It is necessary primarily
+# because the reweighting factor will be 0 for factors outside the mesh, and we
+# need to ensure the image color and gradient outside the region of the mesh are
+# 0.
+_MINIMUM_REWEIGHTING_THRESHOLD = 1e-6
+
+# This epsilon is the minimum absolute value of a homogenous coordinate before
+# it is clipped. It should be sufficiently large such that the output of
+# the perspective divide step with this denominator still has good working
+# precision with 32 bit arithmetic, and sufficiently small so that in practice
+# vertices are almost never close enough to a clipping plane to be thresholded.
+_MINIMUM_PERSPECTIVE_DIVIDE_THRESHOLD = 1e-6
+
+
+def rasterize_triangles(vertices, attributes, triangles, projection_matrices,
+                        image_width, image_height, background_value):
+  """Rasterizes the input scene and computes interpolated vertex attributes.
+
+  NOTE: the rasterizer does no triangle clipping. Triangles that lie outside the
+  viewing frustum (esp. behind the camera) may be drawn incorrectly.
+
+  Args:
+    vertices: 3-D float32 tensor with shape [batch_size, vertex_count, 3]. Each
+        triplet is an xyz position in model space.
+    attributes: 3-D float32 tensor with shape [batch_size, vertex_count,
+        attribute_count]. Each vertex attribute is interpolated
+        across the triangle using barycentric interpolation.
+    triangles: 2-D int32 tensor with shape [triangle_count, 3]. Each triplet
+        should contain vertex indices describing a triangle such that the
+        triangle's normal points toward the viewer if the forward order of the
+        triplet defines a clockwise winding of the vertices. Gradients with
+        respect to this tensor are not available.
+    projection_matrices: 3-D float tensor with shape [batch_size, 4, 4]
+        containing model-view-perspective projection matrices.
+    image_width: int specifying desired output image width in pixels.
+    image_height: int specifying desired output image height in pixels.
+    background_value: a 1-D float32 tensor with shape [attribute_count]. Pixels
+        that lie outside all triangles take this value.
+
+  Returns:
+    A 4-D float32 tensor with shape [batch_size, image_height, image_width,
+    attribute_count], containing the interpolated vertex attributes at
+    each pixel.
+
+  Raises:
+    ValueError: An invalid argument to the method is detected.
+  """
+  if not image_width > 0:
+    raise ValueError('Image width must be > 0.')
+  if not image_height > 0:
+    raise ValueError('Image height must be > 0.')
+  if len(vertices.shape) != 3:
+    raise ValueError('The vertex buffer must be 3D.')
+  batch_size = vertices.shape[0].value
+  vertex_count = vertices.shape[1].value
+
+  # We map the coordinates to normalized device coordinates before passing
+  # the scene to the rendering kernel to keep as many ops in tensorflow as
+  # possible.
+
+  homogeneous_coord = tf.ones([batch_size, vertex_count, 1], dtype=tf.float32)
+  vertices_homogeneous = tf.concat([vertices, homogeneous_coord], 2)
+
+  # Vertices are given in row-major order, but the transformation pipeline is
+  # column major:
+  clip_space_points = tf.matmul(
+      vertices_homogeneous, projection_matrices, transpose_b=True)
+
+  # Perspective divide, first thresholding the homogeneous coordinate to avoid
+  # the possibility of NaNs:
+  clip_space_points_w = tf.maximum(
+      tf.abs(clip_space_points[:, :, 3:4]),
+      _MINIMUM_PERSPECTIVE_DIVIDE_THRESHOLD) * tf.sign(
+          clip_space_points[:, :, 3:4])
+  normalized_device_coordinates = (
+      clip_space_points[:, :, 0:3] / clip_space_points_w)
+
+  per_image_uncorrected_barycentric_coordinates = []
+  per_image_vertex_ids = []
+  for im in range(vertices.shape[0]):
+    barycentric_coords, triangle_ids, _ = (
+        rasterize_triangles_module.rasterize_triangles(
+            normalized_device_coordinates[im, :, :], triangles, image_width,
+            image_height))
+    per_image_uncorrected_barycentric_coordinates.append(
+        tf.reshape(barycentric_coords, [-1, 3]))
+
+    # Gathers the vertex indices now because the indices don't contain a batch
+    # identifier, and reindexes the vertex ids to point to a (batch,vertex_id)
+    vertex_ids = tf.gather(triangles, tf.reshape(triangle_ids, [-1]))
+    reindexed_ids = tf.add(vertex_ids, im * vertices.shape[1].value)
+    per_image_vertex_ids.append(reindexed_ids)
+
+  uncorrected_barycentric_coordinates = tf.concat(
+      per_image_uncorrected_barycentric_coordinates, axis=0)
+  vertex_ids = tf.concat(per_image_vertex_ids, axis=0)
+
+  # Indexes with each pixel's clip-space triangle's extrema (the pixel's
+  # 'corner points') ids to get the relevant properties for deferred shading.
+  flattened_vertex_attributes = tf.reshape(attributes,
+                                           [batch_size * vertex_count, -1])
+  corner_attributes = tf.gather(flattened_vertex_attributes, vertex_ids)
+
+  # Barycentric interpolation is linear in the reciprocal of the homogeneous
+  # W coordinate, so we use these weights to correct for the effects of
+  # perspective distortion after rasterization.
+  perspective_distortion_weights = tf.reciprocal(
+      tf.reshape(clip_space_points_w, [-1]))
+  corner_distortion_weights = tf.gather(perspective_distortion_weights,
+                                        vertex_ids)
+
+  # Apply perspective correction to the barycentric coordinates. This step is
+  # required since the rasterizer receives normalized-device coordinates (i.e.,
+  # after perspective division), so it can't apply perspective correction to the
+  # interpolated values.
+  weighted_barycentric_coordinates = tf.multiply(
+      uncorrected_barycentric_coordinates, corner_distortion_weights)
+  barycentric_reweighting_factor = tf.reduce_sum(
+      weighted_barycentric_coordinates, axis=1)
+
+  corrected_barycentric_coordinates = tf.divide(
+      weighted_barycentric_coordinates,
+      tf.expand_dims(
+          tf.maximum(barycentric_reweighting_factor,
+                     _MINIMUM_REWEIGHTING_THRESHOLD),
+          axis=1))
+
+  # Computes the pixel attributes by interpolating the known attributes at the
+  # corner points of the triangle interpolated with the barycentric coordinates.
+  weighted_vertex_attributes = tf.multiply(
+      corner_attributes,
+      tf.expand_dims(corrected_barycentric_coordinates, axis=2))
+  summed_attributes = tf.reduce_sum(weighted_vertex_attributes, axis=1)
+  attribute_images = tf.reshape(summed_attributes,
+                                [batch_size, image_height, image_width, -1])
+
+  # Barycentric coordinates should approximately sum to one where there is
+  # rendered geometry, but be exactly zero where there is not.
+  alphas = tf.clip_by_value(
+      tf.reduce_sum(2.0 * corrected_barycentric_coordinates, axis=1), 0.0, 1.0)
+  alphas = tf.reshape(alphas, [batch_size, image_height, image_width, 1])
+
+  attributes_with_background = (
+      alphas * attribute_images + (1.0 - alphas) * background_value)
+
+  return attributes_with_background,alphas
+
+
+@tf.RegisterGradient('RasterizeTriangles')
+def _rasterize_triangles_grad(op, df_dbarys, df_dids, df_dz):
+  # Gradients are only supported for barycentric coordinates. Gradients for the
+  # z-buffer are possible as well but not currently implemented.
+  del df_dids, df_dz
+  return rasterize_triangles_module.rasterize_triangles_grad(
+      op.inputs[0], op.inputs[1], op.outputs[0], op.outputs[1], df_dbarys,
+      op.get_attr('image_width'), op.get_attr('image_height')), None
--- a/skin.py
+++ b/skin.py
@ -0,0 +1,103 @@
+import math
+import numpy as np
+
+class GMM:
+    def __init__(self, dim, num, w, mu, cov, cov_det, cov_inv):
+        self.dim = dim # feature dimension
+        self.num = num # number of Gaussian components
+        self.w = w # weights of Gaussian components (a list of scalars)
+        self.mu= mu # mean of Gaussian components (a list of 1xdim vectors)
+        self.cov = cov # covariance matrix of Gaussian components (a list of dimxdim matrices)
+        self.cov_det = cov_det # pre-computed determinet of covariance matrices (a list of scalars)
+        self.cov_inv = cov_inv # pre-computed inverse covariance matrices (a list of dimxdim matrices)
+
+        self.factor = [0]*num
+        for i in range(self.num):
+            self.factor[i] = (2*math.pi)**(self.dim/2) * self.cov_det[i]**0.5
+        
+    def likelihood(self, data):
+        assert(data.shape[1] == self.dim)
+        N = data.shape[0]
+        lh = np.zeros(N)
+
+        for i in range(self.num):
+            data_ = data - self.mu[i]
+
+            tmp = np.matmul(data_,self.cov_inv[i]) * data_
+            tmp = np.sum(tmp,axis=1)
+            power = -0.5 * tmp
+
+            p = np.array([math.exp(power[j]) for j in range(N)])
+            p = p/self.factor[i]
+            lh += p*self.w[i]
+        
+        return lh
+
+
+def _rgb2ycbcr(rgb):
+    m = np.array([[65.481, 128.553, 24.966],
+                  [-37.797, -74.203, 112],
+                  [112, -93.786, -18.214]])
+    shape = rgb.shape
+    rgb = rgb.reshape((shape[0] * shape[1], 3))
+    ycbcr = np.dot(rgb, m.transpose() / 255.)
+    ycbcr[:, 0] += 16.
+    ycbcr[:, 1:] += 128.
+    return ycbcr.reshape(shape)
+
+
+def _bgr2ycbcr(bgr):
+    rgb = bgr[..., ::-1]
+    return _rgb2ycbcr(rgb)
+
+
+gmm_skin_w = [0.24063933, 0.16365987, 0.26034665, 0.33535415]
+gmm_skin_mu = [np.array([113.71862, 103.39613, 164.08226]),
+                np.array([150.19858, 105.18467, 155.51428]),
+                np.array([183.92976, 107.62468, 152.71820]),
+                np.array([114.90524, 113.59782, 151.38217])]
+gmm_skin_cov_det = [5692842.5, 5851930.5, 2329131., 1585971.]
+gmm_skin_cov_inv = [np.array([[0.0019472069, 0.0020450759, -0.00060243998],[0.0020450759, 0.017700525, 0.0051420014],[-0.00060243998, 0.0051420014, 0.0081308950]]),
+                    np.array([[0.0027110141, 0.0011036990, 0.0023122299],[0.0011036990, 0.010707724, 0.010742856],[0.0023122299, 0.010742856, 0.017481629]]),
+                    np.array([[0.0048026871, 0.00022935172, 0.0077668377],[0.00022935172, 0.011729696, 0.0081661865],[0.0077668377, 0.0081661865, 0.025374353]]),
+                    np.array([[0.0011989699, 0.0022453172, -0.0010748957],[0.0022453172, 0.047758564, 0.020332102],[-0.0010748957, 0.020332102, 0.024502251]])]
+
+gmm_skin = GMM(3, 4, gmm_skin_w, gmm_skin_mu, [], gmm_skin_cov_det, gmm_skin_cov_inv)
+
+gmm_nonskin_w = [0.12791070, 0.31130761, 0.34245777, 0.21832393]
+gmm_nonskin_mu = [np.array([99.200851, 112.07533, 140.20602]),
+                    np.array([110.91392, 125.52969, 130.19237]),
+                    np.array([129.75864, 129.96107, 126.96808]),
+                    np.array([112.29587, 128.85121, 129.05431])]
+gmm_nonskin_cov_det = [458703648., 6466488., 90611376., 133097.63]
+gmm_nonskin_cov_inv = [np.array([[0.00085371657, 0.00071197288, 0.00023958916],[0.00071197288, 0.0025935620, 0.00076557708],[0.00023958916, 0.00076557708, 0.0015042332]]),
+                    np.array([[0.00024650150, 0.00045542428, 0.00015019422],[0.00045542428, 0.026412144, 0.018419769],[0.00015019422, 0.018419769, 0.037497383]]),
+                    np.array([[0.00037054974, 0.00038146760, 0.00040408765],[0.00038146760, 0.0085505722, 0.0079136286],[0.00040408765, 0.0079136286, 0.010982352]]),
+                    np.array([[0.00013709733, 0.00051228428, 0.00012777430],[0.00051228428, 0.28237113, 0.10528370],[0.00012777430, 0.10528370, 0.23468947]])]
+
+gmm_nonskin = GMM(3, 4, gmm_nonskin_w, gmm_nonskin_mu, [], gmm_nonskin_cov_det, gmm_nonskin_cov_inv)
+
+prior_skin = 0.8
+prior_nonskin = 1 - prior_skin
+
+
+# calculate skin attention mask
+def skinmask(imbgr):
+    im = _bgr2ycbcr(imbgr)
+
+    data = im.reshape((-1,3))
+
+    lh_skin = gmm_skin.likelihood(data)
+    lh_nonskin = gmm_nonskin.likelihood(data)
+
+    tmp1 = prior_skin * lh_skin
+    tmp2 = prior_nonskin * lh_nonskin
+    post_skin = tmp1 / (tmp1+tmp2) # posterior probability
+
+    post_skin = post_skin.reshape((im.shape[0],im.shape[1]))
+
+    post_skin = np.round(post_skin*255)
+    post_skin = post_skin.astype(np.uint8)
+    post_skin = np.tile(np.expand_dims(post_skin,2),[1,1,3]) # reshape to H*W*3
+
+    return post_skin
--- a/train.py
+++ b/train.py
@ -0,0 +1,140 @@
+import tensorflow as tf 
+import numpy as np 
+import os
+from options import Option
+from reconstruction_model import *
+from data_loader import *
+from utils import *
+import argparse
+###############################################################################################
+# training stage
+###############################################################################################
+
+
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+# training data and validation data
+def parse_args():
+    desc = "Data preprocessing for Deep3DRecon."
+    parser = argparse.ArgumentParser(description=desc)
+
+    parser.add_argument('--data_path', type=str, default='./processed_data', help='training data folder')
+    parser.add_argument('--val_data_path', type=str, default='./processed_data', help='validation data folder')
+
+
+    return parser.parse_args()
+
+# initialize weights for resnet and facenet
+def restore_weights_and_initialize(opt):
+	var_list = tf.trainable_variables()
+	g_list = tf.global_variables()
+
+	# add batch normalization params into trainable variables 
+	bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
+	bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
+	var_list +=bn_moving_vars
+
+	# create saver to save and restore weights
+	resnet_vars = [v for v in var_list if 'resnet_v1_50' in v.name]
+	facenet_vars = [v for v in var_list if 'InceptionResnetV1' in v.name]
+	saver_resnet = tf.train.Saver(var_list = resnet_vars)
+	saver_facenet = tf.train.Saver(var_list = facenet_vars)
+
+	saver = tf.train.Saver(var_list = resnet_vars + [v for v in var_list if 'fc-' in v.name],max_to_keep = 50)
+
+	# create session
+	sess = tf.InteractiveSession(config = opt.config)
+
+	# create summary op
+	train_writer = tf.summary.FileWriter(opt.train_summary_path, sess.graph)
+	val_writer = tf.summary.FileWriter(opt.val_summary_path, sess.graph)
+
+	# initialization
+	tf.global_variables_initializer().run()
+	tf.local_variables_initializer().run()
+
+	saver_resnet.restore(sess,opt.R_net_weights)
+	saver_facenet.restore(sess,opt.Perceptual_net_weights)
+
+	return saver, train_writer,val_writer, sess
+
+
+# main function for training
+def train():
+
+	# read BFM face model
+	# transfer original BFM model to our model
+	if not os.path.isfile('./BFM/BFM_model_front.mat'):
+		transferBFM09()
+
+	with tf.Graph().as_default() as graph:
+
+		# training options
+		args = parse_args()
+		opt = Option()
+		opt.data_path = [args.data_path]
+		opt.val_data_path = [args.val_data_path]
+
+		# load training data into queue
+		train_iterator = load_dataset(opt)
+		# create reconstruction model
+		model = Reconstruction_model(opt)
+		# send training data to the model
+		model.set_input(train_iterator)
+		# update model variables with training data
+		model.step(is_train = True)
+		# summarize training statistics
+		model.summarize()
+
+		# several training stattistics to be saved
+		train_stat = model.summary_stat
+		train_img_stat = model.summary_img
+		train_op = model.train_op
+		photo_error = model.photo_loss
+		lm_error = model.landmark_loss
+		id_error = model.perceptual_loss
+
+		# load validation data into queue
+		val_iterator = load_dataset(opt,train=False)
+		# send validation data to the model
+		model.set_input(val_iterator)
+		# only do foward pass without updating model variables
+		model.step(is_train = False)
+		# summarize validation statistics
+		model.summarize()
+		val_stat = model.summary_stat
+		val_img_stat = model.summary_img
+
+		# initialization
+		saver, train_writer,val_writer, sess = restore_weights_and_initialize(opt)
+
+		# freeze the graph to ensure no new op will be added during training
+		sess.graph.finalize()
+
+		# training loop
+		for i in range(opt.train_maxiter):
+			_,ph_loss,lm_loss,id_loss = sess.run([train_op,photo_error,lm_error,id_error])
+			print('Iter: %d; lm_loss: %f ; photo_loss: %f; id_loss: %f\n'%(i,np.sqrt(lm_loss),ph_loss,id_loss))
+			# summarize training stats every <train_summary_iter> iterations
+			if np.mod(i,opt.train_summary_iter) == 0:
+				train_summary = sess.run(train_stat)
+				train_writer.add_summary(train_summary,i)
+
+			# summarize image stats every <image_summary_iter> iterations
+			if np.mod(i,opt.image_summary_iter) == 0:
+				train_img_summary = sess.run(train_img_stat)
+				train_writer.add_summary(train_img_summary,i)
+
+			# summarize validation stats every <val_summary_iter> iterations	
+			if np.mod(i,opt.val_summary_iter) == 0:
+				val_summary,val_img_summary = sess.run([val_stat,val_img_stat])
+				val_writer.add_summary(val_summary,i)
+				val_writer.add_summary(val_img_summary,i)
+
+			# # save model variables every <save_iter> iterations	
+			if np.mod(i,opt.save_iter) == 0:
+				saver.save(sess,os.path.join(opt.model_save_path,'iter_%d.ckpt'%i))
+
+
+if __name__ == '__main__':
+	train()
--- a/load_data.py
+++ b/load_data.py
@ -1,4 +1,5 @@
 import numpy as np 
+import tensorflow as tf
 from PIL import Image
 from scipy.io import loadmat,savemat
 from array import array
@ -125,4 +126,12 @@ def save_obj(path,v,f,c):
 		for i in range(len(f)):
 			file.write('f %d %d %d\n'%(f[i,0],f[i,1],f[i,2]))

-	file.close()
+	file.close()
+
+# load .pb file into tensorflow graph
+def load_graph(graph_filename):
+	with tf.gfile.GFile(graph_filename,'rb') as f:
+		graph_def = tf.GraphDef()
+		graph_def.ParseFromString(f.read())
+
+	return graph_def