first commit

2017-12-02 22:55:11 +08:00 · 2017-12-02 22:55:11 +08:00 · 1068687f83
--- a/DSL_ImgProcess/README.MD
+++ b/DSL_ImgProcess/README.MD
@ -0,0 +1,11 @@
+Thanks a lot for your interests to our work. 
+I quickly wrap up a multi-gpu version code. (Note that in the submission phase, we use a 1gpu version code. Please let me know if anything we can improve)
+
+Training demo code: example.sh
+
+Inference demo code: batch_test_script_mainbody.sh
+
+The data and the checkpoint are available at:
+https://www.dropbox.com/sh/fpnvtcmyj4mul2s/AAB4wvsxoS8pf7ExnZYe4VV1a?dl=0
+
+You need to download them and put them in the working dir. An example is in ``example.sh''
--- a/DSL_ImgProcess/batch_test_script_leftpart.sh
+++ b/DSL_ImgProcess/batch_test_script_leftpart.sh
@ -0,0 +1,40 @@
+export PATH=/usr/anaconda2/bin:$PATH
+#export LD_LIBRARY_PATH=~/Downloads/cuda/lib64:"$LD_LIBRAYR_PATH"
+export CUDA_VISIBLE_DEVICES=6
+
+model_dir=checkpoints
+
+for (( e=345;e<=345;e+=2 ));do
+filename=$(ls "$model_dir" | grep -o 'params_'${e}'uidx[^\.]*\.ckpt\.index')
+filename=${filename:0:-6}
+
+python monitor_checkleft.py --data_dir=./cifar10_data --save_dir=$model_dir --batch_size=12 --show_interval=100 --load_params=${filename} --mode=I2L --useSoftLabel=0
+
+done
+
+for (( e=345;e<=345;e+=2 ));do
+filename=$(ls "$model_dir" | grep -o 'params_'${e}'uidx[^\.]*\.ckpt\.index')
+filename=${filename:0:-6}
+
+python monitor_checkleft.py --data_dir=./cifar10_data --save_dir=$model_dir --batch_size=12 --show_interval=100 --load_params=${filename} --mode=L2I --useSoftLabel=0
+
+done
+
+
+
+
+: <<'VIRTUAL_ENV'
+source ~/virtual_py/bin/activate
+export CUDA_VISIBLE_DEVICES=0
+
+model_dir=debug_room
+
+python monitor.py --data_dir=./cifar10_data --save_dir=$model_dir --batch_size=12 --show_interval=100 --load_params=params_9uidx13880.ckpt --mode=L2I --useSoftLabel=0
+
+deactivate
+
+VIRTUAL_ENV
+
+
+
+
--- a/DSL_ImgProcess/batch_test_script_mainbody.sh
+++ b/DSL_ImgProcess/batch_test_script_mainbody.sh
@ -0,0 +1,42 @@
+export PATH=/usr/anaconda2/bin:$PATH
+#export LD_LIBRARY_PATH=~/Downloads/cuda/lib64:"$LD_LIBRAYR_PATH"
+export CUDA_VISIBLE_DEVICES=6
+
+model_dir=checkpoints
+
+for (( e=345;e<=345;e+=2 ));do
+filename=$(ls "$model_dir" | grep -o 'params_'${e}'uidx[^\.]*\.ckpt\.index')
+filename=${filename:0:-6}
+
+python monitor.py --data_dir=./cifar10_data --save_dir=$model_dir --batch_size=12 --show_interval=100 --load_params=${filename} --mode=I2L --useSoftLabel=0
+
+done
+
+for (( e=345;e<=345;e+=2 ));do
+filename=$(ls "$model_dir" | grep -o 'params_'${e}'uidx[^\.]*\.ckpt\.index')
+filename=${filename:0:-6}
+
+python monitor.py --data_dir=./cifar10_data --save_dir=$model_dir --batch_size=12 --show_interval=100 --load_params=${filename} --mode=L2I --useSoftLabel=0
+
+done
+
+
+# When using "--oneside" in training mode, you should also add the 
+# corresponding "--oneside" in the inference phase.
+
+
+: <<'VIRTUAL_ENV'
+source ~/virtual_py/bin/activate
+export CUDA_VISIBLE_DEVICES=0
+
+model_dir=debug_room
+
+python monitor.py --data_dir=./cifar10_data --save_dir=$model_dir --batch_size=12 --show_interval=100 --load_params=params_9uidx13880.ckpt --mode=L2I --useSoftLabel=0
+
+deactivate
+
+VIRTUAL_ENV
+
+
+
+
--- a/DSL_ImgProcess/cifar_input.py
+++ b/DSL_ImgProcess/cifar_input.py
@ -0,0 +1,117 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""CIFAR dataset input module.
+"""
+
+import tensorflow as tf
+
+
+def build_input(dataset, data_path, batch_size, mode):
+  """Build CIFAR image and labels.
+
+  Args:
+    dataset: Either 'cifar10' or 'cifar100'.
+    data_path: Filename for data.
+    batch_size: Input batch size.
+    mode: Either 'train' or 'eval'.
+  Returns:
+    images: Batches of images. [batch_size, image_size, image_size, 3]
+    labels: Batches of labels. [batch_size, num_classes]
+  Raises:
+    ValueError: when the specified dataset is not supported.
+  """
+  image_size = 32
+  if dataset == 'cifar10':
+    label_bytes = 1
+    label_offset = 0
+    num_classes = 10
+  elif dataset == 'cifar100':
+    label_bytes = 1
+    label_offset = 1
+    num_classes = 100
+  else:
+    raise ValueError('Not supported dataset %s', dataset)
+
+  depth = 3
+  image_bytes = image_size * image_size * depth
+  record_bytes = label_bytes + label_offset + image_bytes
+
+  data_files = tf.gfile.Glob(data_path)
+  file_queue = tf.train.string_input_producer(data_files, shuffle=True)
+  # Read examples from files in the filename queue.
+  reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
+  _, value = reader.read(file_queue)
+
+  # Convert these examples to dense labels and processed images.
+  record = tf.reshape(tf.decode_raw(value, tf.uint8), [record_bytes])
+  
+  label = tf.cast(tf.slice(record, [label_offset], [label_bytes]), tf.int32)
+  # Convert from string to [depth * height * width] to [depth, height, width].
+  depth_major = tf.reshape(tf.slice(record, [label_bytes], [image_bytes]),
+                           [depth, image_size, image_size])
+  # Convert from [depth, height, width] to [height, width, depth].
+  image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)
+
+  if mode == 'train':
+    image = tf.image.resize_image_with_crop_or_pad(
+        image, image_size+4, image_size+4)
+    image = tf.random_crop(image, [image_size, image_size, 3])
+    image = tf.image.random_flip_left_right(image)
+    # Brightness/saturation/constrast provides small gains .2%~.5% on cifar.
+    # image = tf.image.random_brightness(image, max_delta=63. / 255.)
+    # image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+    # image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
+    image = tf.image.per_image_standardization(image)
+
+    example_queue = tf.RandomShuffleQueue(
+        capacity=16 * batch_size,
+        min_after_dequeue=8 * batch_size,
+        dtypes=[tf.float32, tf.int32],
+        shapes=[[image_size, image_size, depth], [1]])
+    num_threads = 16
+  else:
+    image = tf.image.resize_image_with_crop_or_pad(
+        image, image_size, image_size)
+    image = tf.image.per_image_whitening(image)
+
+    example_queue = tf.FIFOQueue(
+        3 * batch_size,
+        dtypes=[tf.float32, tf.int32],
+        shapes=[[image_size, image_size, depth], [1]])
+    num_threads = 1
+
+  example_enqueue_op = example_queue.enqueue([image, label])
+  tf.train.add_queue_runner(tf.train.queue_runner.QueueRunner(
+      example_queue, [example_enqueue_op] * num_threads))
+
+  # Read 'batch' labels + images from the example queue.
+  images, labels = example_queue.dequeue_many(batch_size)
+  labels = tf.reshape(labels, [batch_size, 1])
+  indices = tf.reshape(tf.range(0, batch_size, 1), [batch_size, 1])
+  labels = tf.sparse_to_dense(
+      tf.concat(1, [indices, labels]),
+      [batch_size, num_classes], 1.0, 0.0)
+
+  assert len(images.get_shape()) == 4
+  assert images.get_shape()[0] == batch_size
+  assert images.get_shape()[-1] == 3
+  assert len(labels.get_shape()) == 2
+  assert labels.get_shape()[0] == batch_size
+  assert labels.get_shape()[1] == num_classes
+
+  # Display the training images in the visualizer.
+  tf.image_summary('images', images)
+  return images, labels
--- a/DSL_ImgProcess/data/init.py
+++ b/DSL_ImgProcess/data/init.py
--- a/DSL_ImgProcess/data/cifar10_data.py
+++ b/DSL_ImgProcess/data/cifar10_data.py
@ -0,0 +1,129 @@
+"""
+Utilities for downloading and unpacking the CIFAR-10 dataset, originally published
+by Krizhevsky et al. and hosted here: https://www.cs.toronto.edu/~kriz/cifar.html
+"""
+
+import os
+import sys
+import tarfile
+from six.moves import urllib
+import numpy as np
+
+def maybe_download_and_extract(data_dir, url='http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'):
+    if not os.path.exists(os.path.join(data_dir, 'cifar-10-batches-py')):
+        if not os.path.exists(data_dir):
+            os.makedirs(data_dir)
+        filename = url.split('/')[-1]
+        filepath = os.path.join(data_dir, filename)
+        if not os.path.exists(filepath):
+            def _progress(count, block_size, total_size):
+                sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
+                    float(count * block_size) / float(total_size) * 100.0))
+                sys.stdout.flush()
+            filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
+            print()
+            statinfo = os.stat(filepath)
+            print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
+            tarfile.open(filepath, 'r:gz').extractall(data_dir)
+
+def unpickle(file):
+    fo = open(file, 'rb')
+    if (sys.version_info >= (3, 0)):
+        import pickle
+        d = pickle.load(fo, encoding='latin1')
+    else:
+        import cPickle
+        d = cPickle.load(fo)
+    fo.close()
+    return {'x': d['data'].reshape((10000,3,32,32)), 'y': np.array(d['labels']).astype(np.uint8)}
+
+def load(data_dir, subset='train'):
+    maybe_download_and_extract(data_dir)
+    if subset=='train':
+        train_data = [unpickle(os.path.join(data_dir,'cifar-10-batches-py','data_batch_' + str(i))) for i in range(1,6)]
+        trainx = np.concatenate([d['x'] for d in train_data],axis=0)
+        trainy = np.concatenate([d['y'] for d in train_data],axis=0)
+        return trainx, trainy
+    elif subset=='test':
+        test_data = unpickle(os.path.join(data_dir,'cifar-10-batches-py','test_batch'))
+        testx = test_data['x']
+        testy = test_data['y']
+        return testx, testy
+    else:
+        raise NotImplementedError('subset should be either train or test')
+
+class DataLoader(object):
+    """ an object that generates batches of CIFAR-10 data for training """
+
+    def __init__(self, data_dir, subset, batch_size, rng=None, shuffle=False, return_labels=False, filter_labels=None):
+        """ 
+        - data_dir is location where to store files
+        - subset is train|test 
+        - batch_size is int, of #examples to load at once
+        - rng is np.random.RandomState object for reproducibility
+        """
+
+        self.data_dir = data_dir
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.return_labels = return_labels
+
+        # create temporary storage for the data, if not yet created
+        if not os.path.exists(data_dir):
+            print('creating folder', data_dir)
+            os.makedirs(data_dir)
+
+        # load CIFAR-10 training data to RAM
+        self.data, self.labels = load(os.path.join(data_dir,'cifar-10-python'), subset=subset)
+
+        if filter_labels is not None:
+            selected_idx = self.labels == filter_labels
+            self.data = self.data[selected_idx]
+            self.labels = self.labels[selected_idx]
+            print('There are %d samples left' % self.labels.size)
+
+        self.data = np.transpose(self.data, (0,2,3,1)) # (N,3,32,32) -> (N,32,32,3)
+        
+        self.p = 0 # pointer to where we are in iteration
+        self.rng = np.random.RandomState(1) if rng is None else rng
+
+    def get_observation_size(self):
+        return self.data.shape[1:]
+
+    def get_num_labels(self):
+        return np.amax(self.labels) + 1
+
+    def reset(self):
+        self.p = 0
+
+    def __iter__(self):
+        return self
+
+    def __next__(self, n=None):
+        """ n is the number of examples to fetch """
+        if n is None: n = self.batch_size
+
+        # on first iteration lazily permute all data
+        if self.p == 0 and self.shuffle:
+            inds = self.rng.permutation(self.data.shape[0])
+            self.data = self.data[inds]
+            self.labels = self.labels[inds]
+
+        # on last iteration reset the counter and raise StopIteration
+        if self.p + n > self.data.shape[0]:
+            self.reset() # reset for next time we get called
+            raise StopIteration
+
+        # on intermediate iterations fetch the next batch
+        x = self.data[self.p : self.p + n]
+        y = self.labels[self.p : self.p + n]
+        self.p += self.batch_size
+
+        if self.return_labels:
+            return x,y
+        else:
+            return x
+
+    next = __next__  # Python 2 compatibility (https://stackoverflow.com/questions/29578469/how-to-make-an-object-both-a-python2-and-python3-iterator)
+
+
--- a/DSL_ImgProcess/data/cifar10_plotdata.py
+++ b/DSL_ImgProcess/data/cifar10_plotdata.py
@ -0,0 +1,36 @@
+import cifar10_data
+import argparse
+import plotting
+import numpy as np
+
+data_dir = '/home/tim/data'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--save_dir', type=str, default='./log')
+parser.add_argument('--data_dir', type=str, default='/home/tim/data')
+parser.add_argument('--plot_title', type=str, default=None)
+args = parser.parse_args()
+print(args)
+
+data_dir = args.data_dir
+
+trainx, trainy = cifar10_data.load(data_dir)
+
+ids = [[] for i in range(10)]
+for i in range(trainx.shape[0]):
+    if len(ids[trainy[i]]) < 10:
+        ids[trainy[i]].append(i)
+    if np.alltrue(np.asarray([len(_ids) >= 10 for _ids in ids])):
+        break
+
+images = np.zeros((10*10,32,32,3),dtype='uint8')
+for i in range(len(ids)):
+    for j in range(len(ids[i])):
+        images[10*j+i] = trainx[ids[i][j]].transpose([1,2,0])
+print(ids)
+
+img_tile = plotting.img_tile(images, aspect_ratio=1.0, border_color=1.0, stretch=True)
+img = plotting.plot_img(img_tile, title=args.plot_title if args.plot_title != 'None' else None)
+plotting.plt.savefig(args.save_dir + '/cifar10_orig_images.png')
+plotting.plt.close('all')
+
--- a/DSL_ImgProcess/data/imagenet_data.py
+++ b/DSL_ImgProcess/data/imagenet_data.py
@ -0,0 +1,137 @@
+"""
+Utilities for loading the small ImageNet dataset used in Oord et al.
+use scripts/png_to_npz.py to create the npz files
+
+The code here currently assumes that the preprocessing was done manually.
+TODO: make automatic and painless
+"""
+
+import os
+import sys
+import tarfile
+from six.moves import urllib
+
+import numpy as np
+from scipy.misc import imread
+
+def fetch(url, filepath):
+    filename = url.split('/')[-1]
+    def _progress(count, block_size, total_size):
+        sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
+            float(count * block_size) / float(total_size) * 100.0))
+        sys.stdout.flush()
+    print(url)
+    filepath, headers = urllib.request.urlretrieve(url, filepath, _progress)
+    print()
+    statinfo = os.stat(filepath)
+    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
+
+def maybe_download_and_extract(data_dir):
+    # more info on the dataset at http://image-net.org/small/download.php
+    # downloads and extracts the two tar files for train/val
+
+    train_dir = os.path.join(data_dir, 'train_32x32')
+    if not os.path.exists(train_dir):
+        train_url = 'http://image-net.org/small/train_32x32.tar' # 4GB
+        filepath = os.path.join(data_dir, 'train_32x32.tar')
+        fetch(train_url, filepath)
+        print('unpacking the tar file', filepath)
+        tarfile.open(filepath, 'r').extractall(data_dir) # creates the train_32x32 folder
+
+    test_dir = os.path.join(data_dir, 'valid_32x32')
+    if not os.path.exists(test_dir):
+        test_url = 'http://image-net.org/small/valid_32x32.tar' # 154MB
+        filepath = os.path.join(data_dir, 'valid_32x32.tar')
+        fetch(test_url, filepath)
+        print('unpacking the tar file', filepath)
+        tarfile.open(filepath, 'r').extractall(data_dir) # creates the valid_32x32 folder
+
+def maybe_preprocess(data_dir):
+
+    npz_file = os.path.join(data_dir, 'imgnet_32x32.npz')
+    if os.path.exists(npz_file):
+        return # all good
+
+    trainx = []
+    train_dir = os.path.join(data_dir, 'train_32x32')
+    for f in os.listdir(train_dir):
+        if f.endswith('.png'):
+            print('reading', f)
+            filepath = os.path.join(train_dir, f)
+            trainx.append(imread(filepath).reshape((1,32,32,3)))
+    trainx = np.concatenate(trainx, axis=0)
+
+    testx = []
+    test_dir = os.path.join(data_dir, 'valid_32x32')
+    for f in os.listdir(test_dir):
+        if f.endswith('.png'):
+            print('reading', f)
+            filepath = os.path.join(test_dir, f)
+            testx.append(imread(filepath).reshape((1,32,32,3)))
+    testx = np.concatenate(testx, axis=0)
+
+    np.savez(npz_file, trainx=trainx, testx=testx)
+
+
+def load(data_dir, subset='train'):
+    if not os.path.exists(data_dir):
+        print('creating folder', data_dir)
+        os.makedirs(data_dir)
+    maybe_download_and_extract(data_dir)
+    maybe_preprocess(data_dir)
+    imagenet_data = np.load(os.path.join(data_dir,'imgnet_32x32.npz'))
+    return imagenet_data['trainx'] if subset == 'train' else imagenet_data['testx']
+
+
+
+class DataLoader(object):
+    """ an object that generates batches of CIFAR-10 data for training """
+
+    def __init__(self, data_dir, subset, batch_size, rng=None, shuffle=False):
+        """ 
+        - data_dir is location where the files are stored
+        - subset is train|test 
+        - batch_size is int, of #examples to load at once
+        - rng is np.random.RandomState object for reproducibility
+        """
+
+        self.data_dir = data_dir
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+
+        self.data = load(os.path.join(data_dir,'small_imagenet'), subset=subset)
+        
+        self.p = 0 # pointer to where we are in iteration
+        self.rng = np.random.RandomState(1) if rng is None else rng
+
+    def get_observation_size(self):
+        return self.data.shape[1:]
+
+    def reset(self):
+        self.p = 0
+
+    def __iter__(self):
+        return self
+
+    def __next__(self, n=None):
+        """ n is the number of examples to fetch """
+        if n is None: n = self.batch_size
+
+        # on first iteration lazily permute all data
+        if self.p == 0 and self.shuffle:
+            inds = self.rng.permutation(self.data.shape[0])
+            self.data = self.data[inds]
+
+        # on last iteration reset the counter and raise StopIteration
+        if self.p + n > self.data.shape[0]:
+            self.reset() # reset for next time we get called
+            raise StopIteration
+
+        # on intermediate iterations fetch the next batch
+        x = self.data[self.p : self.p + n]
+        self.p += self.batch_size
+
+        return x
+
+    next = __next__  # Python 2 compatibility (https://stackoverflow.com/questions/29578469/how-to-make-an-object-both-a-python2-and-python3-iterator)
+
--- a/DSL_ImgProcess/data/pixelcnn_samples.png
+++ b/DSL_ImgProcess/data/pixelcnn_samples.png
--- a/DSL_ImgProcess/data2/init.py
+++ b/DSL_ImgProcess/data2/init.py
--- a/DSL_ImgProcess/data2/cifar10_data.py
+++ b/DSL_ImgProcess/data2/cifar10_data.py
@ -0,0 +1,131 @@
+"""
+Utilities for downloading and unpacking the CIFAR-10 dataset, originally published
+by Krizhevsky et al. and hosted here: https://www.cs.toronto.edu/~kriz/cifar.html
+"""
+
+import os
+import sys
+import tarfile
+from six.moves import urllib
+import numpy as np
+
+def maybe_download_and_extract(data_dir, url='http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'):
+    if not os.path.exists(os.path.join(data_dir, 'cifar-10-batches-py')):
+        if not os.path.exists(data_dir):
+            os.makedirs(data_dir)
+        filename = url.split('/')[-1]
+        filepath = os.path.join(data_dir, filename)
+        if not os.path.exists(filepath):
+            def _progress(count, block_size, total_size):
+                sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
+                    float(count * block_size) / float(total_size) * 100.0))
+                sys.stdout.flush()
+            filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
+            print()
+            statinfo = os.stat(filepath)
+            print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
+            tarfile.open(filepath, 'r:gz').extractall(data_dir)
+
+def unpickle(file):
+    fo = open(file, 'rb')
+    if (sys.version_info >= (3, 0)):
+        import pickle
+        d = pickle.load(fo, encoding='latin1')
+    else:
+        import cPickle
+        d = cPickle.load(fo)
+    fo.close()
+    return {'x': d['data'].reshape((10000,3,32,32)), 'y': np.array(d['labels']).astype(np.uint8)}
+
+def load(data_dir, subset='train'):
+    maybe_download_and_extract(data_dir)
+    if subset=='train':
+        train_data = [unpickle(os.path.join(data_dir,'cifar-10-batches-py','data_batch_' + str(i))) for i in range(1,6)]
+        trainx = np.concatenate([d['x'] for d in train_data],axis=0)
+        trainy = np.concatenate([d['y'] for d in train_data],axis=0)
+        return trainx, trainy
+    elif subset=='test':
+        test_data = unpickle(os.path.join(data_dir,'cifar-10-batches-py','test_batch'))
+        testx = test_data['x']
+        testy = test_data['y']
+        return testx, testy
+    else:
+        raise NotImplementedError('subset should be either train or test')
+
+class DataLoader(object):
+    """ an object that generates batches of CIFAR-10 data for training """
+
+    def __init__(self, data_dir, subset, batch_size, LMscore=None, rng=None, shuffle=False, return_labels=False):
+        """ 
+        - data_dir is location where to store files
+        - subset is train|test 
+        - batch_size is int, of #examples to load at once
+        - rng is np.random.RandomState object for reproducibility
+        """
+
+        self.data_dir = data_dir
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.return_labels = return_labels
+
+        # create temporary storage for the data, if not yet created
+        if not os.path.exists(data_dir):
+            print('creating folder', data_dir)
+            os.makedirs(data_dir)
+
+        # load CIFAR-10 training data to RAM
+        self.data, self.labels = load(os.path.join(data_dir,'cifar-10-python'), subset=subset)
+        self.data = np.transpose(self.data, (0,2,3,1)) # (N,3,32,32) -> (N,32,32,3)
+        
+        if subset == 'train':
+            self.LM = np.load(LMscore + '.train.npz')['arr_0']
+        elif subset == 'test':
+            self.LM = np.load(LMscore + '.test.npz')
+        else:
+            raise 'Not found proper LMscore folder'
+        
+        self.p = 0 # pointer to where we are in iteration
+        self.rng = np.random.RandomState(1) if rng is None else rng
+
+    def get_observation_size(self):
+        return self.data.shape[1:]
+
+    def get_num_labels(self):
+        return np.amax(self.labels) + 1
+
+    def reset(self):
+        self.p = 0
+
+    def __iter__(self):
+        return self
+
+    def __next__(self, n=None):
+        """ n is the number of examples to fetch """
+        if n is None: n = self.batch_size
+
+        # on first iteration lazily permute all data
+        if self.p == 0 and self.shuffle:
+            inds = self.rng.permutation(self.data.shape[0])
+            self.data = self.data[inds]
+            self.labels = self.labels[inds]
+            self.LM = self.LM[inds]
+
+        # on last iteration reset the counter and raise StopIteration
+        if self.p + n > self.data.shape[0]:
+            self.reset() # reset for next time we get called
+            raise StopIteration
+
+        # on intermediate iterations fetch the next batch
+        x = self.data[self.p : self.p + n]
+        y = self.labels[self.p : self.p + n]
+        lmscore = self.LM[self.p : self.p + n]
+        self.p += self.batch_size
+
+        if self.return_labels:
+            return x,y, lmscore
+        else:
+            return x, lmscore
+
+    next = __next__  # Python 2 compatibility (https://stackoverflow.com/questions/29578469/how-to-make-an-object-both-a-python2-and-python3-iterator)
+
+
--- a/DSL_ImgProcess/data2/cifar10_plotdata.py
+++ b/DSL_ImgProcess/data2/cifar10_plotdata.py
@ -0,0 +1,36 @@
+import cifar10_data
+import argparse
+import plotting
+import numpy as np
+
+data_dir = '/home/tim/data'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--save_dir', type=str, default='./log')
+parser.add_argument('--data_dir', type=str, default='/home/tim/data')
+parser.add_argument('--plot_title', type=str, default=None)
+args = parser.parse_args()
+print(args)
+
+data_dir = args.data_dir
+
+trainx, trainy = cifar10_data.load(data_dir)
+
+ids = [[] for i in range(10)]
+for i in range(trainx.shape[0]):
+    if len(ids[trainy[i]]) < 10:
+        ids[trainy[i]].append(i)
+    if np.alltrue(np.asarray([len(_ids) >= 10 for _ids in ids])):
+        break
+
+images = np.zeros((10*10,32,32,3),dtype='uint8')
+for i in range(len(ids)):
+    for j in range(len(ids[i])):
+        images[10*j+i] = trainx[ids[i][j]].transpose([1,2,0])
+print(ids)
+
+img_tile = plotting.img_tile(images, aspect_ratio=1.0, border_color=1.0, stretch=True)
+img = plotting.plot_img(img_tile, title=args.plot_title if args.plot_title != 'None' else None)
+plotting.plt.savefig(args.save_dir + '/cifar10_orig_images.png')
+plotting.plt.close('all')
+
--- a/DSL_ImgProcess/data2/imagenet_data.py
+++ b/DSL_ImgProcess/data2/imagenet_data.py
@ -0,0 +1,137 @@
+"""
+Utilities for loading the small ImageNet dataset used in Oord et al.
+use scripts/png_to_npz.py to create the npz files
+
+The code here currently assumes that the preprocessing was done manually.
+TODO: make automatic and painless
+"""
+
+import os
+import sys
+import tarfile
+from six.moves import urllib
+
+import numpy as np
+from scipy.misc import imread
+
+def fetch(url, filepath):
+    filename = url.split('/')[-1]
+    def _progress(count, block_size, total_size):
+        sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
+            float(count * block_size) / float(total_size) * 100.0))
+        sys.stdout.flush()
+    print(url)
+    filepath, headers = urllib.request.urlretrieve(url, filepath, _progress)
+    print()
+    statinfo = os.stat(filepath)
+    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
+
+def maybe_download_and_extract(data_dir):
+    # more info on the dataset at http://image-net.org/small/download.php
+    # downloads and extracts the two tar files for train/val
+
+    train_dir = os.path.join(data_dir, 'train_32x32')
+    if not os.path.exists(train_dir):
+        train_url = 'http://image-net.org/small/train_32x32.tar' # 4GB
+        filepath = os.path.join(data_dir, 'train_32x32.tar')
+        fetch(train_url, filepath)
+        print('unpacking the tar file', filepath)
+        tarfile.open(filepath, 'r').extractall(data_dir) # creates the train_32x32 folder
+
+    test_dir = os.path.join(data_dir, 'valid_32x32')
+    if not os.path.exists(test_dir):
+        test_url = 'http://image-net.org/small/valid_32x32.tar' # 154MB
+        filepath = os.path.join(data_dir, 'valid_32x32.tar')
+        fetch(test_url, filepath)
+        print('unpacking the tar file', filepath)
+        tarfile.open(filepath, 'r').extractall(data_dir) # creates the valid_32x32 folder
+
+def maybe_preprocess(data_dir):
+
+    npz_file = os.path.join(data_dir, 'imgnet_32x32.npz')
+    if os.path.exists(npz_file):
+        return # all good
+
+    trainx = []
+    train_dir = os.path.join(data_dir, 'train_32x32')
+    for f in os.listdir(train_dir):
+        if f.endswith('.png'):
+            print('reading', f)
+            filepath = os.path.join(train_dir, f)
+            trainx.append(imread(filepath).reshape((1,32,32,3)))
+    trainx = np.concatenate(trainx, axis=0)
+
+    testx = []
+    test_dir = os.path.join(data_dir, 'valid_32x32')
+    for f in os.listdir(test_dir):
+        if f.endswith('.png'):
+            print('reading', f)
+            filepath = os.path.join(test_dir, f)
+            testx.append(imread(filepath).reshape((1,32,32,3)))
+    testx = np.concatenate(testx, axis=0)
+
+    np.savez(npz_file, trainx=trainx, testx=testx)
+
+
+def load(data_dir, subset='train'):
+    if not os.path.exists(data_dir):
+        print('creating folder', data_dir)
+        os.makedirs(data_dir)
+    maybe_download_and_extract(data_dir)
+    maybe_preprocess(data_dir)
+    imagenet_data = np.load(os.path.join(data_dir,'imgnet_32x32.npz'))
+    return imagenet_data['trainx'] if subset == 'train' else imagenet_data['testx']
+
+
+
+class DataLoader(object):
+    """ an object that generates batches of CIFAR-10 data for training """
+
+    def __init__(self, data_dir, subset, batch_size, rng=None, shuffle=False):
+        """ 
+        - data_dir is location where the files are stored
+        - subset is train|test 
+        - batch_size is int, of #examples to load at once
+        - rng is np.random.RandomState object for reproducibility
+        """
+
+        self.data_dir = data_dir
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+
+        self.data = load(os.path.join(data_dir,'small_imagenet'), subset=subset)
+        
+        self.p = 0 # pointer to where we are in iteration
+        self.rng = np.random.RandomState(1) if rng is None else rng
+
+    def get_observation_size(self):
+        return self.data.shape[1:]
+
+    def reset(self):
+        self.p = 0
+
+    def __iter__(self):
+        return self
+
+    def __next__(self, n=None):
+        """ n is the number of examples to fetch """
+        if n is None: n = self.batch_size
+
+        # on first iteration lazily permute all data
+        if self.p == 0 and self.shuffle:
+            inds = self.rng.permutation(self.data.shape[0])
+            self.data = self.data[inds]
+
+        # on last iteration reset the counter and raise StopIteration
+        if self.p + n > self.data.shape[0]:
+            self.reset() # reset for next time we get called
+            raise StopIteration
+
+        # on intermediate iterations fetch the next batch
+        x = self.data[self.p : self.p + n]
+        self.p += self.batch_size
+
+        return x
+
+    next = __next__  # Python 2 compatibility (https://stackoverflow.com/questions/29578469/how-to-make-an-object-both-a-python2-and-python3-iterator)
+
--- a/DSL_ImgProcess/data2/pixelcnn_samples.png
+++ b/DSL_ImgProcess/data2/pixelcnn_samples.png
--- a/DSL_ImgProcess/data4/init.py
+++ b/DSL_ImgProcess/data4/init.py
--- a/DSL_ImgProcess/data4/cifar10_data.py
+++ b/DSL_ImgProcess/data4/cifar10_data.py
@ -0,0 +1,133 @@
+"""
+Utilities for downloading and unpacking the CIFAR-10 dataset, originally published
+by Krizhevsky et al. and hosted here: https://www.cs.toronto.edu/~kriz/cifar.html
+"""
+
+import os
+import sys
+import tarfile
+from six.moves import urllib
+import numpy as np
+
+def maybe_download_and_extract(data_dir, url='http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'):
+    if not os.path.exists(os.path.join(data_dir, 'cifar-10-batches-py')):
+        if not os.path.exists(data_dir):
+            os.makedirs(data_dir)
+        filename = url.split('/')[-1]
+        filepath = os.path.join(data_dir, filename)
+        if not os.path.exists(filepath):
+            def _progress(count, block_size, total_size):
+                sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
+                    float(count * block_size) / float(total_size) * 100.0))
+                sys.stdout.flush()
+            filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
+            print()
+            statinfo = os.stat(filepath)
+            print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
+            tarfile.open(filepath, 'r:gz').extractall(data_dir)
+
+def unpickle(file):
+    fo = open(file, 'rb')
+    if (sys.version_info >= (3, 0)):
+        import pickle
+        d = pickle.load(fo, encoding='latin1')
+    else:
+        import cPickle
+        d = cPickle.load(fo)
+    fo.close()
+    return {'x': d['data'].reshape((10000,3,32,32)), 'y': np.array(d['labels']).astype(np.uint8)}
+
+def load(data_dir, subset='train'):
+    maybe_download_and_extract(data_dir)
+    if subset=='train':
+        train_data = [unpickle(os.path.join(data_dir,'cifar-10-batches-py','data_batch_' + str(i))) for i in range(1,6)]
+        trainx = np.concatenate([d['x'] for d in train_data],axis=0)
+        trainy = np.concatenate([d['y'] for d in train_data],axis=0)
+        return trainx, trainy
+    elif subset=='test':
+        test_data = unpickle(os.path.join(data_dir,'cifar-10-batches-py','test_batch'))
+        testx = test_data['x']
+        testy = test_data['y']
+        return testx, testy
+    else:
+        raise NotImplementedError('subset should be either train or test')
+
+class DataLoader(object):
+    """ an object that generates batches of CIFAR-10 data for training """
+
+    def __init__(self, data_dir, subset, batch_size, rng=None, shuffle=False, return_labels=False, filter_labels=None,final=8):
+        """ 
+        - data_dir is location where to store files
+        - subset is train|test 
+        - batch_size is int, of #examples to load at once
+        - rng is np.random.RandomState object for reproducibility
+        """
+
+        self.data_dir = data_dir
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.return_labels = return_labels
+
+        # create temporary storage for the data, if not yet created
+        if not os.path.exists(data_dir):
+            print('creating folder', data_dir)
+            os.makedirs(data_dir)
+
+        # load CIFAR-10 training data to RAM
+        self.data, self.labels = load(os.path.join(data_dir,'cifar-10-python'), subset=subset)
+        if final > 0:
+            self.data = np.tile(self.data[-final:],[3,1,1,1]) 
+            self.labels = np.tile(self.labels[-final:],[3])   
+
+
+        if filter_labels is not None:
+            selected_idx = self.labels == filter_labels
+            self.data = self.data[selected_idx]
+            self.labels = self.labels[selected_idx]
+            print('There are %d samples left' % self.labels.size)
+
+        self.data = np.transpose(self.data, (0,2,3,1)) # (N,3,32,32) -> (N,32,32,3)
+        
+        self.p = 0 # pointer to where we are in iteration
+        self.rng = np.random.RandomState(1) if rng is None else rng
+
+    def get_observation_size(self):
+        return self.data.shape[1:]
+
+    def get_num_labels(self):
+        return np.amax(self.labels) + 1
+
+    def reset(self):
+        self.p = 0
+
+    def __iter__(self):
+        return self
+
+    def __next__(self, n=None):
+        """ n is the number of examples to fetch """
+        if n is None: n = self.batch_size
+
+        # on first iteration lazily permute all data
+        if self.p == 0 and self.shuffle:
+            inds = self.rng.permutation(self.data.shape[0])
+            self.data = self.data[inds]
+            self.labels = self.labels[inds]
+
+        # on last iteration reset the counter and raise StopIteration
+        if self.p + n > self.data.shape[0]:
+            self.reset() # reset for next time we get called
+            raise StopIteration
+
+        # on intermediate iterations fetch the next batch
+        x = self.data[self.p : self.p + n]
+        y = self.labels[self.p : self.p + n]
+        self.p += self.batch_size
+
+        if self.return_labels:
+            return x,y
+        else:
+            return x
+
+    next = __next__  # Python 2 compatibility (https://stackoverflow.com/questions/29578469/how-to-make-an-object-both-a-python2-and-python3-iterator)
+
+
--- a/DSL_ImgProcess/data4/cifar10_plotdata.py
+++ b/DSL_ImgProcess/data4/cifar10_plotdata.py
@ -0,0 +1,36 @@
+import cifar10_data
+import argparse
+import plotting
+import numpy as np
+
+data_dir = '/home/tim/data'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--save_dir', type=str, default='./log')
+parser.add_argument('--data_dir', type=str, default='/home/tim/data')
+parser.add_argument('--plot_title', type=str, default=None)
+args = parser.parse_args()
+print(args)
+
+data_dir = args.data_dir
+
+trainx, trainy = cifar10_data.load(data_dir)
+
+ids = [[] for i in range(10)]
+for i in range(trainx.shape[0]):
+    if len(ids[trainy[i]]) < 10:
+        ids[trainy[i]].append(i)
+    if np.alltrue(np.asarray([len(_ids) >= 10 for _ids in ids])):
+        break
+
+images = np.zeros((10*10,32,32,3),dtype='uint8')
+for i in range(len(ids)):
+    for j in range(len(ids[i])):
+        images[10*j+i] = trainx[ids[i][j]].transpose([1,2,0])
+print(ids)
+
+img_tile = plotting.img_tile(images, aspect_ratio=1.0, border_color=1.0, stretch=True)
+img = plotting.plot_img(img_tile, title=args.plot_title if args.plot_title != 'None' else None)
+plotting.plt.savefig(args.save_dir + '/cifar10_orig_images.png')
+plotting.plt.close('all')
+
--- a/DSL_ImgProcess/data4/imagenet_data.py
+++ b/DSL_ImgProcess/data4/imagenet_data.py
@ -0,0 +1,137 @@
+"""
+Utilities for loading the small ImageNet dataset used in Oord et al.
+use scripts/png_to_npz.py to create the npz files
+
+The code here currently assumes that the preprocessing was done manually.
+TODO: make automatic and painless
+"""
+
+import os
+import sys
+import tarfile
+from six.moves import urllib
+
+import numpy as np
+from scipy.misc import imread
+
+def fetch(url, filepath):
+    filename = url.split('/')[-1]
+    def _progress(count, block_size, total_size):
+        sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
+            float(count * block_size) / float(total_size) * 100.0))
+        sys.stdout.flush()
+    print(url)
+    filepath, headers = urllib.request.urlretrieve(url, filepath, _progress)
+    print()
+    statinfo = os.stat(filepath)
+    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
+
+def maybe_download_and_extract(data_dir):
+    # more info on the dataset at http://image-net.org/small/download.php
+    # downloads and extracts the two tar files for train/val
+
+    train_dir = os.path.join(data_dir, 'train_32x32')
+    if not os.path.exists(train_dir):
+        train_url = 'http://image-net.org/small/train_32x32.tar' # 4GB
+        filepath = os.path.join(data_dir, 'train_32x32.tar')
+        fetch(train_url, filepath)
+        print('unpacking the tar file', filepath)
+        tarfile.open(filepath, 'r').extractall(data_dir) # creates the train_32x32 folder
+
+    test_dir = os.path.join(data_dir, 'valid_32x32')
+    if not os.path.exists(test_dir):
+        test_url = 'http://image-net.org/small/valid_32x32.tar' # 154MB
+        filepath = os.path.join(data_dir, 'valid_32x32.tar')
+        fetch(test_url, filepath)
+        print('unpacking the tar file', filepath)
+        tarfile.open(filepath, 'r').extractall(data_dir) # creates the valid_32x32 folder
+
+def maybe_preprocess(data_dir):
+
+    npz_file = os.path.join(data_dir, 'imgnet_32x32.npz')
+    if os.path.exists(npz_file):
+        return # all good
+
+    trainx = []
+    train_dir = os.path.join(data_dir, 'train_32x32')
+    for f in os.listdir(train_dir):
+        if f.endswith('.png'):
+            print('reading', f)
+            filepath = os.path.join(train_dir, f)
+            trainx.append(imread(filepath).reshape((1,32,32,3)))
+    trainx = np.concatenate(trainx, axis=0)
+
+    testx = []
+    test_dir = os.path.join(data_dir, 'valid_32x32')
+    for f in os.listdir(test_dir):
+        if f.endswith('.png'):
+            print('reading', f)
+            filepath = os.path.join(test_dir, f)
+            testx.append(imread(filepath).reshape((1,32,32,3)))
+    testx = np.concatenate(testx, axis=0)
+
+    np.savez(npz_file, trainx=trainx, testx=testx)
+
+
+def load(data_dir, subset='train'):
+    if not os.path.exists(data_dir):
+        print('creating folder', data_dir)
+        os.makedirs(data_dir)
+    maybe_download_and_extract(data_dir)
+    maybe_preprocess(data_dir)
+    imagenet_data = np.load(os.path.join(data_dir,'imgnet_32x32.npz'))
+    return imagenet_data['trainx'] if subset == 'train' else imagenet_data['testx']
+
+
+
+class DataLoader(object):
+    """ an object that generates batches of CIFAR-10 data for training """
+
+    def __init__(self, data_dir, subset, batch_size, rng=None, shuffle=False):
+        """ 
+        - data_dir is location where the files are stored
+        - subset is train|test 
+        - batch_size is int, of #examples to load at once
+        - rng is np.random.RandomState object for reproducibility
+        """
+
+        self.data_dir = data_dir
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+
+        self.data = load(os.path.join(data_dir,'small_imagenet'), subset=subset)
+        
+        self.p = 0 # pointer to where we are in iteration
+        self.rng = np.random.RandomState(1) if rng is None else rng
+
+    def get_observation_size(self):
+        return self.data.shape[1:]
+
+    def reset(self):
+        self.p = 0
+
+    def __iter__(self):
+        return self
+
+    def __next__(self, n=None):
+        """ n is the number of examples to fetch """
+        if n is None: n = self.batch_size
+
+        # on first iteration lazily permute all data
+        if self.p == 0 and self.shuffle:
+            inds = self.rng.permutation(self.data.shape[0])
+            self.data = self.data[inds]
+
+        # on last iteration reset the counter and raise StopIteration
+        if self.p + n > self.data.shape[0]:
+            self.reset() # reset for next time we get called
+            raise StopIteration
+
+        # on intermediate iterations fetch the next batch
+        x = self.data[self.p : self.p + n]
+        self.p += self.batch_size
+
+        return x
+
+    next = __next__  # Python 2 compatibility (https://stackoverflow.com/questions/29578469/how-to-make-an-object-both-a-python2-and-python3-iterator)
+
--- a/DSL_ImgProcess/example.sh
+++ b/DSL_ImgProcess/example.sh
@ -0,0 +1,12 @@
+export PATH=/usr/anaconda2/bin:$PATH
+#export LD_LIBRARY_PATH=~/Downloads/cuda/lib64:"$LD_LIBRAYR_PATH"
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+
+# train two models (test 4 gpu)
+python monitor.py --data_dir=./cifar10_data --save_dir=./checkpoints_All --batch_size=12 --show_interval=10 --learning_rate=1e-4 --load_params=params_345uidx480248.ckpt --learning_rate_I2L=2e-4 --trade_off_I2L=30. --trade_off_L2I=1.5 --save_interval=1 --bias=0.02 --valid_interval=8 --lr_decay=1. --nr_gpu=4
+
+# train image classifier only (test single gpu)
+# python monitor.py --data_dir=./cifar10_data --save_dir=./checkpoints_I2L --batch_size=12 --show_interval=10 --learning_rate=1e-4 --load_params=params_345uidx480248.ckpt --learning_rate_I2L=2e-4 --trade_off_I2L=30. --trade_off_L2I=1.5 --save_interval=1 --bias=0.02 --valid_interval=8 --lr_decay=1. --nr_gpu=1 --oneside=I2L
+
+# train image generator only (test 2 gpu)
+# python monitor.py --data_dir=./cifar10_data --save_dir=./checkpoints_L2I --batch_size=12 --show_interval=10 --learning_rate=1e-4 --load_params=params_345uidx480248.ckpt --learning_rate_I2L=2e-4 --trade_off_I2L=30. --trade_off_L2I=1.5 --save_interval=1 --bias=0.02 --valid_interval=8 --lr_decay=1. --nr_gpu=2 --oneside=L2I
--- a/DSL_ImgProcess/monitor.py
+++ b/DSL_ImgProcess/monitor.py
@ -0,0 +1,462 @@
+import time
+import sys
+import os
+
+import cifar_input
+import numpy as np
+import resnet_model_basic as resnet_model
+import tensorflow as tf
+import data.cifar10_data as cifar10_data
+import data2.cifar10_data as cifar_10data2
+
+import json
+
+from worker_I2L import worker_I2L, lr_I2L
+from worker_L2I import worker_L2I
+import argparse
+import time
+
+# -----------------------------------------------------------------------------
+parser = argparse.ArgumentParser()
+# data I/O
+parser.add_argument('-i', '--data_dir', type=str, default='/tmp/pxpp/data', help='Location for the dataset')
+parser.add_argument('-o', '--save_dir', type=str, default='/tmp/pxpp/save', help='Location for parameter checkpoints and samples')
+parser.add_argument('-d', '--data_set', type=str, default='cifar', help='Can be either cifar|imagenet')
+parser.add_argument('-t', '--save_interval', type=int, default=2, help='Every how many epochs to write checkpoint/samples?')
+parser.add_argument('--valid_interval', type=int, default=1, help='Every how many epochs to valid?')
+parser.add_argument('-r', '--load_params', type=str, default=None, help='The detailed model name')
+
+
+# model
+parser.add_argument('-q', '--nr_resnet', type=int, default=5, help='Number of residual blocks per stage of the model')
+parser.add_argument('-n', '--nr_filters', type=int, default=160, help='Number of filters to use across the model. Higher = larger model.')
+parser.add_argument('-m', '--nr_logistic_mix', type=int, default=10, help='Number of logistic components in the mixture. Higher = more flexible model')
+parser.add_argument('-z', '--resnet_nonlinearity', type=str, default='concat_elu', help='Which nonlinearity to use in the ResNet layers. One of "concat_elu", "elu", "relu" ')
+parser.add_argument('-c', '--class_conditional', dest='class_conditional', action='store_true', help='Condition generative model on labels?')
+parser.add_argument('--trade_off_I2L', type=float, default=5e-3, help='the consistence tradeoff')
+parser.add_argument('--trade_off_L2I', type=float, default=0.3, help='the consistence tradeoff')
+parser.add_argument('-w', '--use_wide_resnet', dest='use_wide_resnet', action='store_true', help='Condition generative model on labels?')
+parser.add_argument('--show_interval', type=int, default=100, help='Batch size during training per GPU')
+parser.add_argument('--steal_params_L2I', type=str, default=None, help='Provide the file, which stores the warm values of L2I')
+parser.add_argument('--steal_params_I2L', type=str, default=None, help='Provide the file, which stores the warm values of I2L')
+parser.add_argument('--oneside', dest='oneside', type=str, default=None, help='None | I2L | L2I')
+
+# optimization
+parser.add_argument('--learning_rate_I2L', type=float, default=0.001, help='Base learning rate')
+parser.add_argument('-l', '--learning_rate', type=float, default=0.001, help='Base learning rate')
+parser.add_argument('-e', '--lr_decay', type=float, default=0.999995, help='Learning rate decay, applied every step of the optimization')
+parser.add_argument('-b', '--batch_size', type=int, default=12, help='Batch size during training per GPU')
+parser.add_argument('-a', '--init_batch_size', type=int, default=100, help='How much data to use for data-dependent initialization.')
+parser.add_argument('-p', '--dropout_p', type=float, default=0.5, help='Dropout strength (i.e. 1 - keep_prob). 0 = No dropout, higher = more dropout.')
+parser.add_argument('-x', '--max_epochs', type=int, default=5000, help='How many epochs to run in total?')
+parser.add_argument('-g', '--nr_gpu', type=int, default=1, help='How many GPUs to distribute the training across?')
+parser.add_argument('--num_classes', type=int, default=10, help='number of classes')
+parser.add_argument('--L2I_normalization', dest='L2I_normalization', action='store_true', help='Use L2I normalization')
+parser.add_argument('--L2IuseSGD', dest='L2IuseSGD', action='store_true', help='Whether to use pure SGD to tune L2I')
+parser.add_argument('--useSoftLabel', type=int, default=0, help='0: no use | 1: use | 2: -0.1')
+# Activate "useSoftLabel" or not does not make significant differences. So my suggestion is that we do not need it. Also, I did not test useSoftLabel under multiple GPU settings
+parser.add_argument('--bias', type=float, default=0.0, help='introduce the bias')
+
+
+# evaluation
+parser.add_argument('--polyak_decay', type=float, default=0.9995, help='Exponential decay rate of the sum of previous model iterates during Polyak averaging')
+parser.add_argument('--mode', type=str, default='train', help='train | I2L | L2I | ImgGen' )
+
+# reproducibility
+parser.add_argument('-s', '--seed', type=int, default=1, help='Random seed to use')
+args = parser.parse_args()
+print('input args:\n', json.dumps(vars(args), indent=4, separators=(',',':'))) # pretty print args
+
+DataLoader = cifar10_data.DataLoader
+DataLoader_train = cifar_10data2.DataLoader
+rng = np.random.RandomState(args.seed)
+train_data_iterator = DataLoader_train(args.data_dir, 'train', args.batch_size * args.nr_gpu,
+                                       './cifar10_data/cifar10-LMscore',
+                                       rng=rng, shuffle=True, return_labels=True)
+test_data_iterator  = DataLoader(args.data_dir, 'test',  args.batch_size * args.nr_gpu, shuffle=False, return_labels=True)
+
+
+class monitor(object):
+  def __init__(self):
+    if not os.path.exists(args.save_dir):
+      os.makedirs(args.save_dir)
+    self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
+    self.Worker_L2I = worker_L2I(args, train_data_iterator.get_num_labels(), train_data_iterator.get_observation_size())
+    self.Worker_I2L = worker_I2L(args)
+
+    self.image_LM = [tf.placeholder(tf.float32, shape=(args.batch_size,)) for _ in range(args.nr_gpu)]
+    self.trade_off_I2L = tf.placeholder(tf.float32, shape=())
+    self.trade_off_L2I = tf.placeholder(tf.float32, shape=())
+
+    self.I2L_grads = []
+    self.train_uidx = 0
+    self._build_onestep(oneside=args.oneside)
+
+    self.lr_l2i = self.Worker_L2I.args.learning_rate
+    self.current_epoch = 0
+
+    self.assign_op = lambda ref_, val_: tf.assign(ref_, val_)
+
+  def get_I2L_lr(self):
+    if args.use_wide_resnet:
+      step_wise = [60, 120, 160]
+      #step_wise = [51000, 76000, 102000] # counted by iter with batch_size 100
+      if self.current_epoch < step_wise[0]:
+        return args.learning_rate_I2L
+      elif self.current_epoch < step_wise[1]:
+        return args.learning_rate_I2L * 0.2
+      elif self.current_epoch < step_wise[2]:
+        return args.learning_rate_I2L * 0.04
+      else:
+        return args.learning_rate_I2L * 0.008
+    else:
+      step_wise = [102, 153, 204]
+      #step_wise = [51000, 76000, 102000] # counted by iter with batch_size 100
+      if self.current_epoch < step_wise[0]:
+        return args.learning_rate_I2L
+      elif self.current_epoch < step_wise[1]:
+        return args.learning_rate_I2L * 0.1
+      elif self.current_epoch < step_wise[2]:
+        return args.learning_rate_I2L * 0.01
+      else:
+        return args.learning_rate_I2L * 0.001
+
+  def get_L2I_lr(self):
+    self.lr_l2i *= self.Worker_L2I.args.lr_decay
+    return self.lr_l2i
+
+  def __del__(self):
+    self.sess.close()
+
+  def _build_onestep(self, oneside=None):
+    # Calculate all the costs and gradients
+    # Let us NOT use weight decay, since we have aleardy had a regularization term
+    # self.weightDecay_I2L = self.Worker_I2L.model.GetWeightDecay()
+    self.nlls_I2L = self.Worker_I2L.model.nlls
+    self.soft_labels = self.Worker_I2L.model.predictions # this is the soft labels [optional, may not use it]
+    nlls_L2I, loss_gen_test = self.Worker_L2I.GetLoss()
+
+    nlls_L2I_train_bpd_list, nlls_L2I_test_bpd_list, consistent_loss_list = \
+      [None for _ in xrange(args.nr_gpu)], [None for _ in xrange(args.nr_gpu)], [None for _ in xrange(args.nr_gpu)]
+    overall_cost_I2L_list, overall_cost_L2I_list, nlls_I2L_batchMean_list = \
+      [None for _ in xrange(args.nr_gpu)], [None for _ in xrange(args.nr_gpu)], [None for _ in xrange(args.nr_gpu)]
+    grads_I2L_list, grads_L2I_list = [None for _ in xrange(args.nr_gpu)], [None for _ in xrange(args.nr_gpu)]
+
+    for i in range(args.nr_gpu):
+      with tf.device('/gpu:%d' % i):
+        nlls_L2I_train_bpd = tf.reduce_mean(nlls_L2I[i]) / (np.log(2.) * 32 * 32 * 3 )
+        nlls_L2I_test_bpd = tf.reduce_mean(loss_gen_test[i]) / (np.log(2.) * 32 * 32 * 3 * args.batch_size)
+        if args.L2I_normalization:
+          consistent_loss = tf.reduce_mean(
+            (self.image_LM[i] * np.log(2.) + self.nlls_I2L[i] + tf.log(0.1) - nlls_L2I[i] / (32. * 32 * 3)) ** 2.)
+        else:
+          consistent_loss = tf.reduce_mean(
+            (self.image_LM[i] * np.log(2.) + (self.nlls_I2L[i] + tf.log(0.1) - nlls_L2I[i]) / 3072. + args.bias) ** 2.)
+
+        nlls_L2I_train_bpd_list[i] = nlls_L2I_train_bpd
+        nlls_L2I_test_bpd_list[i] = nlls_L2I_test_bpd
+        consistent_loss_list[i] = consistent_loss
+        nlls_I2L_batchMean = tf.reduce_mean(self.nlls_I2L[i])
+        overall_cost_I2L = nlls_I2L_batchMean + (self.trade_off_I2L ** 2.) * consistent_loss
+        overall_cost_L2I = nlls_L2I_train_bpd + (self.trade_off_L2I ** 2.) * consistent_loss
+        nlls_I2L_batchMean_list[i] = nlls_I2L_batchMean
+        overall_cost_I2L_list[i] = overall_cost_I2L
+        overall_cost_L2I_list[i] = overall_cost_L2I
+
+        if oneside is None:
+          grads_I2L_list[i] = tf.gradients(overall_cost_I2L, self.Worker_I2L.model.trainable_variables)
+          grads_L2I_list[i] = tf.gradients(overall_cost_L2I, self.Worker_L2I.all_params)
+        elif oneside == 'I2L':
+          grads_I2L_list[i] = tf.gradients(overall_cost_I2L, self.Worker_I2L.model.trainable_variables)
+        elif oneside == 'L2I':
+          grads_L2I_list[i] = tf.gradients(overall_cost_L2I, self.Worker_L2I.all_params)
+
+    with tf.device('/gpu:0'):
+      for i in range(1, args.nr_gpu):
+        nlls_L2I_train_bpd_list[0] += nlls_L2I_train_bpd_list[i]
+        nlls_L2I_test_bpd_list[0] += nlls_L2I_test_bpd_list[i]
+        consistent_loss_list[0] += consistent_loss_list[i]
+        overall_cost_I2L_list[0] += overall_cost_I2L_list[i]
+        overall_cost_L2I_list[0] += overall_cost_L2I_list[i]
+        nlls_I2L_batchMean_list[0] += nlls_I2L_batchMean_list[i]
+
+        if oneside != 'L2I':
+          for j in range(len(grads_I2L_list[0])):
+            grads_I2L_list[0][j] += grads_I2L_list[i][j]
+        if oneside != 'I2L':
+          for j in range(len(grads_L2I_list[0])):
+            grads_L2I_list[0][j] += grads_L2I_list[i][j]
+
+      if oneside != 'L2I':
+        for j in range(len(grads_I2L_list[0])):
+          grads_I2L_list[0][j] /= (args.nr_gpu * 1.)
+
+      if oneside != 'I2L':
+        for j in range(len(grads_L2I_list[0])):
+          grads_L2I_list[0][j] /= (args.nr_gpu * 1.)
+
+
+      if oneside is None:
+        self.Worker_I2L.model.Update(grads_I2L_list[0])
+        self.Worker_L2I.Update(grads_L2I_list[0])
+      elif oneside == 'I2L':
+        self.Worker_I2L.model.Update(grads_I2L_list[0])
+      elif oneside == 'L2I':
+        self.Worker_L2I.Update(grads_L2I_list[0])
+
+    self.nlls_L2I_train_bpd = nlls_L2I_train_bpd_list[0] / args.nr_gpu
+    self.nlls_L2I_test_bpd = nlls_L2I_test_bpd_list[0] / args.nr_gpu
+    self.consistent_loss = consistent_loss_list[0] /args.nr_gpu
+    self.nlls_I2L_batchMean = nlls_I2L_batchMean_list[0] / args.nr_gpu
+    self.overall_cost_I2L = overall_cost_I2L_list[0] / args.nr_gpu
+    self.overall_cost_L2I = overall_cost_L2I_list[0] / args.nr_gpu
+
+    # Build the sampler
+    self.Worker_L2I.build_sample_from_model()
+
+  def step(self, images, labels, LMscores, currEpoch, use_soft_label=0):
+    fetches = [self.nlls_I2L_batchMean, self.nlls_L2I_train_bpd, self.nlls_L2I_test_bpd, self.consistent_loss,
+               self.overall_cost_I2L, self.overall_cost_L2I]
+    if args.oneside is None:
+      fetches.append(self.Worker_I2L.model.update_ops)
+      fetches.append(self.Worker_L2I.update_ops)
+    elif args.oneside == 'I2L':
+      fetches.append(self.Worker_I2L.model.update_ops)
+    elif args.oneside == 'L2I':
+      fetches.append(self.Worker_L2I.update_ops)
+    else:
+      raise Exception('Currently, only support None | I2L | L2I')
+
+    feed_dict={
+      # self.Worker_I2L.model.input_image: images.astype('float32'),
+      # self.Worker_I2L.model.input_label: labels[:,None],
+      self.Worker_I2L.model.lrn_rate: self.get_I2L_lr(),
+      self.Worker_I2L.model.needImgAug: True,
+      self.Worker_L2I.tf_lr: self.get_L2I_lr(),
+      # self.image_LM: LMscores,
+      self.trade_off_I2L: args.trade_off_I2L,
+      self.trade_off_L2I: args.trade_off_L2I
+    }
+
+    splitted_image = np.split(images.astype('float32'), args.nr_gpu)
+    splitted_label = np.split(labels, args.nr_gpu)
+    splitted_LM = np.split(LMscores, args.nr_gpu)
+
+    feed_dict.update({self.image_LM[i]: splitted_LM[i] for i in range(args.nr_gpu)})
+    feed_dict.update({self.Worker_I2L.model.input_image[i]: splitted_image[i] for i in range(args.nr_gpu)})
+    feed_dict.update({self.Worker_I2L.model.input_label[i]: splitted_label[i][:,None] for i in range(args.nr_gpu)})
+    # Deal with xs and ys:
+    x = np.cast[np.float32]((images - 127.5) / 127.5)
+    x = np.split(x, self.Worker_L2I.args.nr_gpu)
+    feed_dict.update({self.Worker_L2I.xs[i] : x[i] for i in range(self.Worker_L2I.args.nr_gpu)})
+    if (use_soft_label == 2) or (use_soft_label == 1 and np.random.rand() < 0.8):
+      soft_labels_ = self.sess.run(self.soft_labels, feed_dict={
+        self.Worker_I2L.model.input_image: images.astype('float32'),
+        self.Worker_I2L.model.needImgAug: True
+      })
+      if use_soft_label == 2:
+        soft_labels_ -= 0.1
+      feed_dict.update({self.Worker_L2I.hs[i]: soft_labels_ for i in range(self.Worker_L2I.args.nr_gpu)})
+    else:
+      y = np.split(labels, self.Worker_L2I.args.nr_gpu)
+      feed_dict.update({self.Worker_L2I.ys[i] : y[i] for i in range(self.Worker_L2I.args.nr_gpu)})
+
+    if args.oneside is None:
+      nlls_I2L_mean, nlls_L2I_mean, nlls_L2I_mean_test, consistent_loss, overall_cost_I2L, overall_cost_L2I, _, _ = \
+        self.sess.run(fetches, feed_dict)
+    else:
+      nlls_I2L_mean, nlls_L2I_mean, nlls_L2I_mean_test, consistent_loss, overall_cost_I2L, overall_cost_L2I, _, = \
+        self.sess.run(fetches, feed_dict)
+    if self.train_uidx % args.show_interval == (args.show_interval - 1):
+      print('iter={}, I2L={}, L2I={}/{}, Consistent={}, Overall_I2L={}, Overall_L2I={}'.format(
+        self.train_uidx, '{0:.4f}'.format(nlls_I2L_mean), '{0:.4f}'.format(nlls_L2I_mean),
+        '{0:.4f}'.format(nlls_L2I_mean_test), '{0:.4f}'.format(consistent_loss), '{0:.4f}'.format(overall_cost_I2L),
+        '{0:.4f}'.format(overall_cost_L2I)
+      ))
+    self.train_uidx += 1
+
+  def data_dependent_init(self):
+    global_init = tf.global_variables_initializer()
+    _images, _labels, _ = train_data_iterator.next(self.Worker_L2I.args.init_batch_size)
+    initializer_dict = {
+      self.Worker_L2I.x_init: (np.cast[np.float32](_images) - 127.5)/127.5,
+      self.Worker_L2I.y_init: _labels
+    }
+    train_data_iterator.reset()
+    self.sess.run(global_init, initializer_dict)
+
+  def L2I_TestNll(self, alpha_=1.):
+    all_testnll = []
+    for images, labels in test_data_iterator:
+      feed_dict = {}
+      x = np.cast[np.float32]((images - 127.5) / 127.5)
+      x = np.split(x, args.nr_gpu)
+      feed_dict.update({self.Worker_L2I.xs[i]: x[i] for i in range(args.nr_gpu)})
+      if args.useSoftLabel == 1:
+        soft_labels_ = self.sess.run(self.soft_labels, feed_dict={
+          self.Worker_I2L.model.input_image: images.astype('float32'),
+          self.Worker_I2L.model.needImgAug: False
+        })
+        one_hot_labels_ = np.zeros((args.batch_size, 10), dtype=np.float32)
+        one_hot_labels_[np.arange(args.batch_size), labels] = 1.
+        feed_dict.update({self.Worker_L2I.hs[i]: (1. - alpha_)*soft_labels_+alpha_*one_hot_labels_ for i in range(self.Worker_L2I.args.nr_gpu)})
+      else:
+        y = np.split(labels, args.nr_gpu)
+        feed_dict.update({self.Worker_L2I.ys[i]: y[i] for i in range(args.nr_gpu)})
+      all_testnll.append(self.sess.run([self.nlls_L2I_test_bpd], feed_dict))
+    avg_testnll = np.mean(all_testnll)
+    print('testnll=%f' % avg_testnll)
+
+  def build_saver(self):
+    self.saver = tf.train.Saver(max_to_keep=None)
+    #tf.reset_default_graph()
+    if args.load_params is not None:
+      print('Reload from ', args.save_dir)
+      self.saver.restore(self.sess, args.save_dir + '/' + args.load_params)
+      print('Done')
+    else:
+      print('Start to initialize the two models')
+      self.data_dependent_init()
+      print('Done')
+
+  def _steal_L2I(self):
+    if args.steal_params_L2I is not None:
+      # try to retrieve parameters NOT starting with "Variable" from a well-trained model
+      success_ = 0
+      import pickle
+      with open(args.steal_params_L2I, 'rb') as f:
+        old_model = pickle.load(f)
+      for vidx, v in enumerate(tf.global_variables()):
+        if v.name in old_model and not v.name.startswith('I2L/'):
+          self.sess.run(self.assign_op(v, old_model[v.name][0]))
+          success_ += 1
+          print(vidx, len(tf.global_variables()))
+      print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_L2I))
+
+      '''
+      # this version can only reload "trainable vars"
+      success_ = 0
+      import pickle
+      with open(args.steal_params_L2I, 'rb') as f:
+        old_model = pickle.load(f)
+      for vidx, v in enumerate(self.Worker_L2I.all_params):
+        if v.name in old_model:
+          self.sess.run(self.assign_op(v, old_model[v.name][0]))
+          success_ += 1
+          print(vidx, len(self.Worker_L2I.all_params))
+      print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_L2I))
+      '''
+
+  def _steal_I2L(self):
+    if args.steal_params_I2L is not None:
+      # try to retrieve parameters from a well-trained model
+      success_ = 0
+      import pickle
+      with open(args.steal_params_I2L, 'rb') as f:
+        old_model = pickle.load(f)
+      for vidx, v in enumerate(self.Worker_I2L.model.all_variables):
+        if v.name[4:] in old_model:
+          self.sess.run(self.assign_op(v, old_model[v.name[4:]][0]))
+          success_ += 1
+          print(vidx, len(self.Worker_I2L.model.all_variables))
+      print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_I2L))
+
+  def _reload_from_pkl(self, filename):
+    success_ = 0
+    import pickle
+
+    with open(filename, 'rb') as f:
+      old_model = pickle.load(f)
+
+    for vidx, v in enumerate(self.Worker_I2L.model.all_variables):
+      if v.name in old_model:
+        self.sess.run(self.assign_op(v, old_model[v.name][0]))
+        success_ += 1
+        print(vidx, len(self.Worker_I2L.model.all_variables))
+
+    for vidx, v in enumerate(self.Worker_L2I.all_params):
+      if v.name in old_model:
+        self.sess.run(self.assign_op(v, old_model[v.name][0]))
+        success_ += 1
+        print(vidx, len(tf.global_variables()))
+
+    print('Retrieve %d / %d parameters from model ' % (success_, len(old_model)))
+
+  def train(self):
+    # do not delete the following three lines
+    # self._reload_from_pkl('warm_values')
+    # self.saver.save(self.sess, args.save_dir + '/params_stealt_models.ckpt')
+    # return
+    if args.load_params is None:
+      self._steal_L2I()
+      self._steal_I2L()
+      self.saver.save(self.sess, args.save_dir + '/params_stealt_models.ckpt')
+    for epoch in range(args.max_epochs):
+      self.current_epoch = epoch
+      for images, labels, LMscores in train_data_iterator:
+        self.step(images, labels, LMscores, epoch, args.useSoftLabel)
+
+      # if epoch % args.valid_interval == (args.valid_interval - 1):
+      #  self.Worker_I2L.Valid(test_data_iterator, self.sess)
+      #  self.L2I_TestNll()
+
+      if epoch % args.save_interval == (args.save_interval - 1):
+        self.saver.save(self.sess, args.save_dir  + '/params_' + str(epoch) + 'uidx' + str(self.train_uidx) + '.ckpt')
+        self.Worker_L2I.Gen_Images(self.sess, self.current_epoch)
+
+  def valid_I2L(self):
+    self.Worker_I2L.Valid(test_data_iterator, self.sess)
+
+  def valid_L2I(self):
+    self.L2I_TestNll()
+    '''
+    for alpha_ in range(11):
+      print('alpha=%f' % (alpha_ * 0.1))
+      self.L2I_TestNll(alpha_ * 0.1)
+    '''
+
+  def valid_ImgGen(self):
+    self.Worker_L2I.Gen_Images(self.sess, self.current_epoch)
+
+  def dump_model_to_pkl(self):
+    warm_models = {}
+    print('Classifier')
+    classifier_size = len(self.Worker_I2L.model.all_variables)
+    for idx, v in enumerate(self.Worker_I2L.model.all_variables):
+      vv = self.sess.run([v])
+      warm_models[v.name] = vv
+      if idx % 10 == 0:
+        print('{}-{}'.format(idx, classifier_size))
+
+    print('Generator')
+    generator_size = len(self.Worker_L2I.all_params)
+    for idx, v in enumerate(self.Worker_L2I.all_params):
+      vv = self.sess.run([v])
+      warm_models[v.name] = vv
+      if idx % 10 == 0:
+        print('{}-{}'.format(idx, generator_size))
+
+    import pickle
+    with open('warm_values', 'wb') as f:
+        pickle.dump(warm_models, f, protocol=2)
+
+
+
+def main(_):
+  #L2Ipath='./pxpp_c_2.95/params_cifar.ckpt'
+  monitor_ = monitor()
+  monitor_.build_saver()
+
+  if args.mode == 'train':
+    monitor_.train()
+  elif args.mode == 'I2L':
+    monitor_.valid_I2L()
+  elif args.mode == 'L2I':
+    monitor_.valid_L2I()
+  elif args.mode == 'ImgGen':
+    monitor_.valid_ImgGen()
+  else:
+    print('Un supported mode: ' + args.mode)
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/DSL_ImgProcess/monitor_GivenCLM.py
+++ b/DSL_ImgProcess/monitor_GivenCLM.py
@ -0,0 +1,332 @@
+import time
+import sys
+import os
+
+import cifar_input
+import numpy as np
+import resnet_model_basic as resnet_model
+import tensorflow as tf
+import data.cifar10_data as cifar10_data
+import data2.cifar10_data as cifar_10data2
+
+import json
+
+from worker_I2L import worker_I2L, lr_I2L
+from worker_L2I import worker_L2I
+import argparse
+import time
+
+# -----------------------------------------------------------------------------
+parser = argparse.ArgumentParser()
+# data I/O
+parser.add_argument('-i', '--data_dir', type=str, default='/tmp/pxpp/data', help='Location for the dataset')
+parser.add_argument('-o', '--save_dir', type=str, default='/tmp/pxpp/save', help='Location for parameter checkpoints and samples')
+parser.add_argument('-d', '--data_set', type=str, default='cifar', help='Can be either cifar|imagenet')
+parser.add_argument('-t', '--save_interval', type=int, default=2, help='Every how many epochs to write checkpoint/samples?')
+parser.add_argument('--valid_interval', type=int, default=1, help='Every how many epochs to valid?')
+parser.add_argument('-r', '--load_params', type=str, default=None, help='The detailed model name')
+
+
+# model
+parser.add_argument('-q', '--nr_resnet', type=int, default=5, help='Number of residual blocks per stage of the model')
+parser.add_argument('-n', '--nr_filters', type=int, default=160, help='Number of filters to use across the model. Higher = larger model.')
+parser.add_argument('-m', '--nr_logistic_mix', type=int, default=10, help='Number of logistic components in the mixture. Higher = more flexible model')
+parser.add_argument('-z', '--resnet_nonlinearity', type=str, default='concat_elu', help='Which nonlinearity to use in the ResNet layers. One of "concat_elu", "elu", "relu" ')
+parser.add_argument('-c', '--class_conditional', dest='class_conditional', action='store_true', help='Condition generative model on labels?')
+parser.add_argument('--trade_off_I2L', type=float, default=5e-3, help='the consistence tradeoff')
+parser.add_argument('--trade_off_L2I', type=float, default=0.3, help='the consistence tradeoff')
+parser.add_argument('-w', '--use_wide_resnet', dest='use_wide_resnet', action='store_true', help='Condition generative model on labels?')
+parser.add_argument('--show_interval', type=int, default=100, help='Batch size during training per GPU')
+parser.add_argument('--steal_params_L2I', type=str, default=None, help='Provide the file, which stores the warm values of L2I')
+parser.add_argument('--steal_params_I2L', type=str, default=None, help='Provide the file, which stores the warm values of I2L')
+parser.add_argument('--freezeL2I', dest='freezeL2I', action='store_true', help='Freeze L2I to quickly train L2I')
+
+# optimization
+parser.add_argument('--learning_rate_I2L', type=float, default=0.001, help='Base learning rate')
+parser.add_argument('-l', '--learning_rate', type=float, default=0.001, help='Base learning rate')
+parser.add_argument('-e', '--lr_decay', type=float, default=0.999995, help='Learning rate decay, applied every step of the optimization')
+parser.add_argument('-b', '--batch_size', type=int, default=12, help='Batch size during training per GPU')
+parser.add_argument('-a', '--init_batch_size', type=int, default=100, help='How much data to use for data-dependent initialization.')
+parser.add_argument('-p', '--dropout_p', type=float, default=0.5, help='Dropout strength (i.e. 1 - keep_prob). 0 = No dropout, higher = more dropout.')
+parser.add_argument('-x', '--max_epochs', type=int, default=5000, help='How many epochs to run in total?')
+parser.add_argument('-g', '--nr_gpu', type=int, default=1, help='How many GPUs to distribute the training across?')
+parser.add_argument('--num_classes', type=int, default=10, help='number of classes')
+parser.add_argument('--L2I_normalization', dest='L2I_normalization', action='store_true', help='Use L2I normalization')
+parser.add_argument('--L2IuseSGD', dest='L2IuseSGD', action='store_true', help='Whether to use pure SGD to tune L2I')
+parser.add_argument('--useSoftLabel', type=int, default=0, help='0: no use | 1: use | 2: -0.1')
+parser.add_argument('--bias', type=float, default=0.0, help='introduce the bias')
+
+
+# evaluation
+parser.add_argument('--polyak_decay', type=float, default=0.9995, help='Exponential decay rate of the sum of previous model iterates during Polyak averaging')
+parser.add_argument('--mode', type=str, default='train', help='train | I2L | L2I | ImgGen' )
+
+# reproducibility
+parser.add_argument('-s', '--seed', type=int, default=1, help='Random seed to use')
+args = parser.parse_args()
+print('input args:\n', json.dumps(vars(args), indent=4, separators=(',',':'))) # pretty print args
+
+DataLoader = cifar10_data.DataLoader
+DataLoader_train = cifar_10data2.DataLoader
+rng = np.random.RandomState(args.seed)
+train_data_iterator = DataLoader_train(args.data_dir, 'train', args.batch_size * args.nr_gpu, './cifar10_data/cifar10-LMscore', './cifar10_data/cifar10-CLMscore', rng=rng, shuffle=True, return_labels=True)
+test_data_iterator  = DataLoader(args.data_dir, 'test',  args.batch_size * args.nr_gpu, shuffle=False, return_labels=True)
+
+
+class moitor(object):
+  def __init__(self):
+    if not os.path.exists(args.save_dir):
+      os.makedirs(args.save_dir)
+    self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
+    self.Worker_L2I = worker_L2I(args, train_data_iterator.get_num_labels(), train_data_iterator.get_observation_size())
+    self.Worker_I2L = worker_I2L(args)
+
+    self.image_LM = tf.placeholder(tf.float32, shape=(args.batch_size,))
+    self.trade_off_I2L = tf.placeholder(tf.float32, shape=())
+    self.trade_off_L2I = tf.placeholder(tf.float32, shape=())
+
+    self.I2L_grads = []
+    self.train_uidx = 0
+    self._build_onestep()
+
+    self.lr_l2i = self.Worker_L2I.args.learning_rate
+    self.current_epoch = 0
+
+    self.assign_op = lambda ref_, val_: tf.assign(ref_, val_)
+
+
+  def get_I2L_lr(self):
+    if args.use_wide_resnet:
+      step_wise = [60, 120, 160]
+      #step_wise = [51000, 76000, 102000] # counted by iter with batch_size 100
+      if self.current_epoch < step_wise[0]:
+        return args.learning_rate_I2L
+      elif self.current_epoch < step_wise[1]:
+        return args.learning_rate_I2L * 0.2
+      elif self.current_epoch < step_wise[2]:
+        return args.learning_rate_I2L * 0.04
+      else:
+        return args.learning_rate_I2L * 0.008
+    else:
+      step_wise = [102, 153, 204]
+      #step_wise = [51000, 76000, 102000] # counted by iter with batch_size 100
+      if self.current_epoch < step_wise[0]:
+        return args.learning_rate_I2L
+      elif self.current_epoch < step_wise[1]:
+        return args.learning_rate_I2L * 0.1
+      elif self.current_epoch < step_wise[2]:
+        return args.learning_rate_I2L * 0.01
+      else:
+        return args.learning_rate_I2L * 0.001
+
+  def get_L2I_lr(self):
+    self.lr_l2i *= self.Worker_L2I.args.lr_decay
+    return self.lr_l2i
+
+
+  def __del__(self):
+    self.sess.close()
+
+  def _build_onestep(self):
+    # Calculate all the costs and gradients
+    # Let us NOT use weight decay, since we have aleardy had a regularization term
+    # self.weightDecay_I2L = self.Worker_I2L.model.GetWeightDecay()
+    self.nlls_I2L = self.Worker_I2L.model.nlls
+    self.soft_labels = self.Worker_I2L.model.predictions # this is the soft labels [optional, may not use it]
+    nlls_L2I, loss_gen_test = self.Worker_L2I.GetLoss()
+    self.nlls_L2I_train_bpd = tf.reduce_mean(nlls_L2I) / (np.log(2.) * 32 * 32 * 3 * args.nr_gpu )
+    self.nlls_L2I_test_bpd = tf.reduce_mean(loss_gen_test) / (np.log(2.) * 32 * 32 * 3 * args.nr_gpu * args.batch_size)
+    if args.L2I_normalization:
+      self.consistent_loss = tf.reduce_mean((self.image_LM * np.log(2.) + self.nlls_I2L + tf.log(0.1) - nlls_L2I / (32. * 32 * 3)) ** 2.)
+    else:
+      self.consistent_loss = tf.reduce_mean((self.image_LM * np.log(2.) + (self.nlls_I2L + tf.log(0.1))/3072. + args.bias) ** 2.)
+    self.nlls_I2L_batchMean = tf.reduce_mean(self.nlls_I2L)
+
+    self.overall_cost_I2L = self.nlls_I2L_batchMean + (self.trade_off_I2L ** 2.) * self.consistent_loss
+    self.overall_cost_L2I = self.nlls_L2I_train_bpd + (self.trade_off_L2I ** 2.) * self.consistent_loss
+    #self.overall_cost_L2I = self.nlls_I2L_batchMean + self.nlls_L2I_train_bpd + self.weightDecay_I2L + self.trade_off * self.consistent_loss
+
+    grads_I2L = tf.gradients(self.overall_cost_I2L, self.Worker_I2L.model.trainable_variables)
+    grads_L2I = tf.gradients(self.overall_cost_L2I, self.Worker_L2I.all_params)
+
+    # Update the parameters
+    self.Worker_I2L.model.Update(grads_I2L)
+    self.Worker_L2I.Update(grads_L2I, args.L2IuseSGD)
+
+    # Build the sampler
+    self.Worker_L2I.build_sample_from_model()
+
+  def step(self, images, labels, LMscores, currEpoch, use_soft_label=0):
+    fetches = [self.nlls_I2L_batchMean, self.consistent_loss,
+               self.overall_cost_I2L,
+               self.Worker_I2L.model.update_ops]
+
+    feed_dict={
+      self.Worker_I2L.model.input_image: images.astype('float32'),
+      self.Worker_I2L.model.input_label: labels[:,None],
+      self.Worker_I2L.model.lrn_rate: self.get_I2L_lr(),
+      self.Worker_I2L.model.needImgAug: True,
+      self.Worker_L2I.tf_lr: self.get_L2I_lr(),
+      self.image_LM: LMscores,
+      self.trade_off_I2L: args.trade_off_I2L,
+      self.trade_off_L2I: args.trade_off_L2I
+    }
+    # Deal with xs and ys:
+    x = np.cast[np.float32]((images - 127.5) / 127.5)
+    x = np.split(x, self.Worker_L2I.args.nr_gpu)
+    feed_dict.update({self.Worker_L2I.xs[i] : x[i] for i in range(self.Worker_L2I.args.nr_gpu)})
+    if (use_soft_label == 2) or (use_soft_label == 1 and np.random.rand() < 0.8):
+      soft_labels_ = self.sess.run(self.soft_labels, feed_dict={
+        self.Worker_I2L.model.input_image: images.astype('float32'),
+        self.Worker_I2L.model.needImgAug: True
+      })
+      if use_soft_label == 2:
+        soft_labels_ -= 0.1
+      feed_dict.update({self.Worker_L2I.hs[i]: soft_labels_ for i in range(self.Worker_L2I.args.nr_gpu)})
+    else:
+      y = np.split(labels, self.Worker_L2I.args.nr_gpu)
+      feed_dict.update({self.Worker_L2I.ys[i] : y[i] for i in range(self.Worker_L2I.args.nr_gpu)})
+
+
+    nlls_I2L_mean, consistent_loss, overall_cost_I2L, _ = \
+      self.sess.run(fetches, feed_dict)
+
+    if self.train_uidx % args.show_interval == (args.show_interval - 1):
+      print('iter={}, I2L={}, Consistent={}, Overall_I2L={}'.format(
+        self.train_uidx, '{0:.6f}'.format(nlls_I2L_mean), '{0:.6f}'.format(consistent_loss), '{0:.6f}'.format(overall_cost_I2L),
+      ))
+    self.train_uidx += 1
+
+  def data_dependent_init(self):
+    global_init = tf.global_variables_initializer()
+    _images, _labels, _ = train_data_iterator.next(self.Worker_L2I.args.init_batch_size)
+    initializer_dict = {
+      self.Worker_L2I.x_init: (np.cast[np.float32](_images) - 127.5)/127.5,
+      self.Worker_L2I.y_init: _labels
+    }
+    train_data_iterator.reset()
+    self.sess.run(global_init, initializer_dict)
+
+  def L2I_TestNll(self, alpha_=1.):
+    all_testnll = []
+    for images, labels in test_data_iterator:
+      feed_dict = {}
+      x = np.cast[np.float32]((images - 127.5) / 127.5)
+      x = np.split(x, args.nr_gpu)
+      feed_dict.update({self.Worker_L2I.xs[i]: x[i] for i in range(args.nr_gpu)})
+      if args.useSoftLabel == 1:
+        soft_labels_ = self.sess.run(self.soft_labels, feed_dict={
+          self.Worker_I2L.model.input_image: images.astype('float32'),
+          self.Worker_I2L.model.needImgAug: False
+        })
+        one_hot_labels_ = np.zeros((args.batch_size, 10), dtype=np.float32)
+        one_hot_labels_[np.arange(args.batch_size), labels] = 1.
+        feed_dict.update({self.Worker_L2I.hs[i]: (1. - alpha_)*soft_labels_+alpha_*one_hot_labels_ for i in range(self.Worker_L2I.args.nr_gpu)})
+      else:
+        y = np.split(labels, args.nr_gpu)
+        feed_dict.update({self.Worker_L2I.ys[i]: y[i] for i in range(args.nr_gpu)})
+      all_testnll.append(self.sess.run([self.nlls_L2I_test_bpd], feed_dict))
+    avg_testnll = np.mean(all_testnll)
+    print('[L2I], testnll={0:.6f}'.format(avg_testnll))
+
+  def build_saver(self):
+    self.saver = tf.train.Saver(max_to_keep=None)
+    if args.load_params is not None:
+      print('Reload from ', args.save_dir)
+      self.saver.restore(self.sess, args.save_dir + '/' + args.load_params)
+      print('Done')
+    else:
+      print('Start to initialize the two models')
+      self.data_dependent_init()
+      print('Done')
+
+  def _steal_L2I(self):
+    if args.steal_params_L2I is not None:
+      # try to retrieve parameters NOT starting with "Variable" from a well-trained model
+      success_ = 0
+      import pickle
+      with open(args.steal_params_L2I, 'rb') as f:
+        old_model = pickle.load(f)
+      for vidx, v in enumerate(tf.global_variables()):
+        if v.name in old_model and not v.name.startswith('I2L/'):
+          self.sess.run(self.assign_op(v, old_model[v.name][0]))
+          success_ += 1
+          print(vidx, len(tf.global_variables()))
+      print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_L2I))
+
+      '''
+      # this version can only reload "trainable vars"
+      success_ = 0
+      import pickle
+      with open(args.steal_params_L2I, 'rb') as f:
+        old_model = pickle.load(f)
+      for vidx, v in enumerate(self.Worker_L2I.all_params):
+        if v.name in old_model:
+          self.sess.run(self.assign_op(v, old_model[v.name][0]))
+          success_ += 1
+          print(vidx, len(self.Worker_L2I.all_params))
+      print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_L2I))
+      '''
+
+  def _steal_I2L(self):
+    if args.steal_params_I2L is not None:
+      # try to retrieve parameters from a well-trained model
+      success_ = 0
+      import pickle
+      with open(args.steal_params_I2L, 'rb') as f:
+        old_model = pickle.load(f)
+      for vidx, v in enumerate(self.Worker_I2L.model.all_variables):
+        if v.name[4:] in old_model:
+          self.sess.run(self.assign_op(v, old_model[v.name[4:]][0]))
+          success_ += 1
+          print(vidx, len(self.Worker_I2L.model.all_variables))
+      print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_I2L))
+
+  def train(self):
+    if args.load_params is None:
+      self._steal_L2I()
+      self._steal_I2L()
+      self.saver.save(self.sess, args.save_dir + '/params_stealt_models.ckpt')
+    for epoch in range(args.max_epochs):
+      self.current_epoch = epoch
+      for images, labels, LMscores, CLMscores in train_data_iterator:
+        self.step(images, labels, LMscores - CLMscores, epoch, args.useSoftLabel)
+
+      #if epoch % args.valid_interval == (args.valid_interval - 1):
+      #self.Worker_I2L.Valid(test_data_iterator, self.sess)
+      #self.L2I_TestNll()
+
+      if epoch % args.save_interval == (args.save_interval - 1):
+        self.saver.save(self.sess, args.save_dir  + '/params_' + str(epoch) + 'uidx' + str(self.train_uidx) + '.ckpt')
+        #self.Worker_L2I.Gen_Images(self.sess, self.current_epoch)
+
+  def valid_I2L(self):
+    self.Worker_I2L.Valid(test_data_iterator, self.sess)
+
+  def valid_L2I(self):
+    self.L2I_TestNll()
+    '''
+    for alpha_ in range(11):
+      print('alpha=%f' % (alpha_ * 0.1))
+      self.L2I_TestNll(alpha_ * 0.1)
+    '''
+  def valid_ImgGen(self):
+    self.Worker_L2I.Gen_Images(self.sess, self.current_epoch)
+
+def main(_):
+  #L2Ipath='./pxpp_c_2.95/params_cifar.ckpt'
+  monitor_ = moitor()
+  monitor_.build_saver()
+  if args.mode == 'train':
+    monitor_.train()
+  elif args.mode == 'I2L':
+    monitor_.valid_I2L()
+  elif args.mode == 'L2I':
+    monitor_.valid_L2I()
+  elif args.mode == 'ImgGen':
+    monitor_.valid_ImgGen()
+  else:
+    print('Un supported mode: ' + args.mode)
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/DSL_ImgProcess/monitor_checkleft.py
+++ b/DSL_ImgProcess/monitor_checkleft.py
@ -0,0 +1,358 @@
+import time
+import sys
+import os
+
+import cifar_input
+import numpy as np
+import resnet_model_basic as resnet_model
+import tensorflow as tf
+import data.cifar10_data as cifar10_data
+import data2.cifar10_data as cifar_10data2
+import data4.cifar10_data as cifar_10data3
+import json
+
+
+from worker_I2L import worker_I2L, lr_I2L
+from worker_L2I import worker_L2I
+import argparse
+import time
+
+
+
+
+# -----------------------------------------------------------------------------
+parser = argparse.ArgumentParser()
+# data I/O
+parser.add_argument('-i', '--data_dir', type=str, default='/tmp/pxpp/data', help='Location for the dataset')
+parser.add_argument('-o', '--save_dir', type=str, default='/tmp/pxpp/save', help='Location for parameter checkpoints and samples')
+parser.add_argument('-d', '--data_set', type=str, default='cifar', help='Can be either cifar|imagenet')
+parser.add_argument('-t', '--save_interval', type=int, default=2, help='Every how many epochs to write checkpoint/samples?')
+parser.add_argument('--valid_interval', type=int, default=1, help='Every how many epochs to valid?')
+parser.add_argument('-r', '--load_params', type=str, default=None, help='The detailed model name')
+
+
+# model
+parser.add_argument('-q', '--nr_resnet', type=int, default=5, help='Number of residual blocks per stage of the model')
+parser.add_argument('-n', '--nr_filters', type=int, default=160, help='Number of filters to use across the model. Higher = larger model.')
+parser.add_argument('-m', '--nr_logistic_mix', type=int, default=10, help='Number of logistic components in the mixture. Higher = more flexible model')
+parser.add_argument('-z', '--resnet_nonlinearity', type=str, default='concat_elu', help='Which nonlinearity to use in the ResNet layers. One of "concat_elu", "elu", "relu" ')
+parser.add_argument('-c', '--class_conditional', dest='class_conditional', action='store_true', help='Condition generative model on labels?')
+parser.add_argument('--trade_off_I2L', type=float, default=5e-3, help='the consistence tradeoff')
+parser.add_argument('--trade_off_L2I', type=float, default=0.3, help='the consistence tradeoff')
+parser.add_argument('-w', '--use_wide_resnet', dest='use_wide_resnet', action='store_true', help='Condition generative model on labels?')
+parser.add_argument('--show_interval', type=int, default=100, help='Batch size during training per GPU')
+parser.add_argument('--steal_params_L2I', type=str, default=None, help='Provide the file, which stores the warm values of L2I')
+parser.add_argument('--steal_params_I2L', type=str, default=None, help='Provide the file, which stores the warm values of I2L')
+
+
+# optimization
+parser.add_argument('--learning_rate_I2L', type=float, default=0.001, help='Base learning rate')
+parser.add_argument('-l', '--learning_rate', type=float, default=0.001, help='Base learning rate')
+parser.add_argument('-e', '--lr_decay', type=float, default=0.999995, help='Learning rate decay, applied every step of the optimization')
+parser.add_argument('-b', '--batch_size', type=int, default=12, help='Batch size during training per GPU')
+parser.add_argument('-a', '--init_batch_size', type=int, default=100, help='How much data to use for data-dependent initialization.')
+parser.add_argument('-p', '--dropout_p', type=float, default=0.5, help='Dropout strength (i.e. 1 - keep_prob). 0 = No dropout, higher = more dropout.')
+parser.add_argument('-x', '--max_epochs', type=int, default=5000, help='How many epochs to run in total?')
+parser.add_argument('-g', '--nr_gpu', type=int, default=1, help='How many GPUs to distribute the training across?')
+parser.add_argument('--num_classes', type=int, default=10, help='number of classes')
+parser.add_argument('--L2I_normalization', dest='L2I_normalization', action='store_true', help='Use L2I normalization')
+parser.add_argument('--L2IuseSGD', dest='L2IuseSGD', action='store_true', help='Whether to use pure SGD to tune L2I')
+parser.add_argument('--useSoftLabel', type=int, default=0, help='0: no use | 1: use | 2: -0.1')
+parser.add_argument('--bias', type=float, default=0.0, help='introduce the bias')
+
+
+# evaluation
+parser.add_argument('--polyak_decay', type=float, default=0.9995, help='Exponential decay rate of the sum of previous model iterates during Polyak averaging')
+parser.add_argument('--mode', type=str, default='train', help='train | I2L | L2I | ImgGen' )
+
+# reproducibility
+parser.add_argument('-s', '--seed', type=int, default=1, help='Random seed to use')
+args = parser.parse_args()
+print('input args:\n', json.dumps(vars(args), indent=4, separators=(',',':'))) # pretty print args
+
+DataLoader = cifar_10data3.DataLoader
+DataLoader_train = cifar_10data2.DataLoader
+rng = np.random.RandomState(args.seed)
+train_data_iterator = DataLoader_train(args.data_dir, 'train', args.batch_size * args.nr_gpu, './cifar10_data/cifar10-LMscore',  rng=rng, shuffle=True, return_labels=True)
+test_data_iterator  = DataLoader(args.data_dir, 'test',  args.batch_size * args.nr_gpu, shuffle=False, return_labels=True,final=4)
+
+
+class moitor(object):
+  def __init__(self):
+    if not os.path.exists(args.save_dir):
+      os.makedirs(args.save_dir)
+    self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
+    self.Worker_L2I = worker_L2I(args, train_data_iterator.get_num_labels(), train_data_iterator.get_observation_size())
+    self.Worker_I2L = worker_I2L(args)
+
+    self.image_LM = tf.placeholder(tf.float32, shape=(args.batch_size,))
+    self.trade_off_I2L = tf.placeholder(tf.float32, shape=())
+    self.trade_off_L2I = tf.placeholder(tf.float32, shape=())
+
+    self.I2L_grads = []
+    self.train_uidx = 0
+    self._build_onestep()
+
+    self.lr_l2i = self.Worker_L2I.args.learning_rate
+    self.current_epoch = 0
+
+    self.assign_op = lambda ref_, val_: tf.assign(ref_, val_)
+    '''
+    self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
+    self.Worker_L2I = worker_L2I(args, train_data_iterator.get_num_labels(), train_data_iterator.get_observation_size())
+    self.saver = tf.train.Saver()
+    if load_warm_start_models is None:
+      print('Start to retrieve the (warm) initial L2I model')
+      self.saver.restore(self.sess, L2Ipath)
+      print('Done')
+    self.Worker_I2L = worker_I2L(args)
+    if load_warm_start_models is None:
+      print('Start to initialize I2L model')
+      self.sess.run(tf.variables_initializer(self.Worker_I2L.model.all_variables, name='coldInit_I2L_model'))
+      print('Done')
+
+    if load_warm_start_models:
+      self.saver.restore(self.sess, load_warm_start_models)
+
+    self.image_LM = tf.placeholder(tf.float32, shape=(args.batch_size,))
+    self.trade_off = tf.placeholder(tf.float32, shape=())
+
+    self.I2L_grads = []
+    self.train_uidx = 0
+    '''
+
+
+  def get_I2L_lr(self):
+    if args.use_wide_resnet:
+      step_wise = [60, 120, 160]
+      #step_wise = [51000, 76000, 102000] # counted by iter with batch_size 100
+      if self.current_epoch < step_wise[0]:
+        return args.learning_rate_I2L
+      elif self.current_epoch < step_wise[1]:
+        return args.learning_rate_I2L * 0.2
+      elif self.current_epoch < step_wise[2]:
+        return args.learning_rate_I2L * 0.04
+      else:
+        return args.learning_rate_I2L * 0.008
+    else:
+      step_wise = [102, 153, 204]
+      #step_wise = [51000, 76000, 102000] # counted by iter with batch_size 100
+      if self.current_epoch < step_wise[0]:
+        return args.learning_rate_I2L
+      elif self.current_epoch < step_wise[1]:
+        return args.learning_rate_I2L * 0.1
+      elif self.current_epoch < step_wise[2]:
+        return args.learning_rate_I2L * 0.01
+      else:
+        return args.learning_rate_I2L * 0.001
+
+  def get_L2I_lr(self):
+    self.lr_l2i *= self.Worker_L2I.args.lr_decay
+    return self.lr_l2i
+
+
+  def __del__(self):
+    self.sess.close()
+
+  def _build_onestep(self):
+    # Calculate all the costs and gradients
+    # Let us NOT use weight decay, since we have aleardy had a regularization term
+    # self.weightDecay_I2L = self.Worker_I2L.model.GetWeightDecay()
+    self.nlls_I2L = self.Worker_I2L.model.nlls
+    self.soft_labels = self.Worker_I2L.model.predictions # this is the soft labels [optional, may not use it]
+    nlls_L2I, loss_gen_test = self.Worker_L2I.GetLoss()
+    self.nlls_L2I_train_bpd = tf.reduce_mean(nlls_L2I) / (np.log(2.) * 32 * 32 * 3 * args.nr_gpu )
+    self.nlls_L2I_test_bpd = tf.reduce_mean(loss_gen_test) / (np.log(2.) * 32 * 32 * 3 * args.nr_gpu * args.batch_size)
+    if args.L2I_normalization:
+      self.consistent_loss = tf.reduce_mean((self.image_LM * np.log(2.) + self.nlls_I2L + tf.log(0.1) - nlls_L2I / (32. * 32 * 3)) ** 2.)
+    else:
+      self.consistent_loss = tf.reduce_mean((self.image_LM * np.log(2.) + (self.nlls_I2L + tf.log(0.1) - nlls_L2I)/3072. + args.bias) ** 2.)
+    self.nlls_I2L_batchMean = tf.reduce_mean(self.nlls_I2L)
+
+    self.overall_cost_I2L = self.nlls_I2L_batchMean + (self.trade_off_I2L ** 2.) * self.consistent_loss
+    self.overall_cost_L2I = self.nlls_L2I_train_bpd + (self.trade_off_L2I ** 2.) * self.consistent_loss
+    #self.overall_cost_L2I = self.nlls_I2L_batchMean + self.nlls_L2I_train_bpd + self.weightDecay_I2L + self.trade_off * self.consistent_loss
+
+    grads_I2L = tf.gradients(self.overall_cost_I2L, self.Worker_I2L.model.trainable_variables)
+    grads_L2I = tf.gradients(self.overall_cost_L2I, self.Worker_L2I.all_params)
+
+    # Update the parameters
+    self.Worker_I2L.model.Update(grads_I2L)
+    self.Worker_L2I.Update(grads_L2I, args.L2IuseSGD)
+
+    # Build the sampler
+    self.Worker_L2I.build_sample_from_model()
+
+  def step(self, images, labels, LMscores, currEpoch, use_soft_label=0):
+    fetches = [self.nlls_I2L_batchMean, self.nlls_L2I_train_bpd, self.nlls_L2I_test_bpd, self.consistent_loss,
+               self.overall_cost_I2L, self.overall_cost_L2I,
+               self.Worker_I2L.model.update_ops, self.Worker_L2I.update_ops]
+    feed_dict={
+      self.Worker_I2L.model.input_image: images.astype('float32'),
+      self.Worker_I2L.model.input_label: labels[:,None],
+      self.Worker_I2L.model.lrn_rate: self.get_I2L_lr(),
+      self.Worker_I2L.model.needImgAug: True,
+      self.Worker_L2I.tf_lr: self.get_L2I_lr(),
+      self.image_LM: LMscores,
+      self.trade_off_I2L: args.trade_off_I2L if currEpoch>3 else 0.,
+      self.trade_off_L2I: args.trade_off_L2I if currEpoch>3 else 0.
+    }
+    # Deal with xs and ys:
+    x = np.cast[np.float32]((images - 127.5) / 127.5)
+    x = np.split(x, self.Worker_L2I.args.nr_gpu)
+    feed_dict.update({self.Worker_L2I.xs[i] : x[i] for i in range(self.Worker_L2I.args.nr_gpu)})
+    if (use_soft_label == 2) or (use_soft_label == 1 and np.random.rand() < 0.8):
+      soft_labels_ = self.sess.run(self.soft_labels, feed_dict={
+        self.Worker_I2L.model.input_image: images.astype('float32'),
+        self.Worker_I2L.model.needImgAug: True
+      })
+      if use_soft_label == 2:
+        soft_labels_ -= 0.1
+      feed_dict.update({self.Worker_L2I.hs[i]: soft_labels_ for i in range(self.Worker_L2I.args.nr_gpu)})
+    else:
+      y = np.split(labels, self.Worker_L2I.args.nr_gpu)
+      feed_dict.update({self.Worker_L2I.ys[i] : y[i] for i in range(self.Worker_L2I.args.nr_gpu)})
+    nlls_I2L_mean, nlls_L2I_mean, nlls_L2I_mean_test, consistent_loss, overall_cost_I2L, overall_cost_L2I,  _, _ = \
+      self.sess.run(fetches, feed_dict)
+    if self.train_uidx % args.show_interval == (args.show_interval - 1):
+      print('iter={}, I2L={}, L2I={}/{}, Consistent={}, Overall_I2L={}, Overall_L2I={}'.format(
+        self.train_uidx, '{0:.4f}'.format(nlls_I2L_mean), '{0:.4f}'.format(nlls_L2I_mean),
+        '{0:.4f}'.format(nlls_L2I_mean_test), '{0:.4f}'.format(consistent_loss), '{0:.4f}'.format(overall_cost_I2L),
+        '{0:.4f}'.format(overall_cost_L2I)
+      ))
+    self.train_uidx += 1
+
+  def data_dependent_init(self):
+    global_init =  tf.global_variables_initializer()
+    _images, _labels, _ = train_data_iterator.next(self.Worker_L2I.args.init_batch_size)
+    initializer_dict = {
+      self.Worker_L2I.x_init: (np.cast[np.float32](_images) - 127.5)/127.5,
+      self.Worker_L2I.y_init: _labels
+    }
+    train_data_iterator.reset()
+    self.sess.run(global_init, initializer_dict)
+
+  def L2I_TestNll(self, alpha_=1.):
+    all_testnll = []
+    for images, labels in test_data_iterator:
+      feed_dict = {}
+      x = np.cast[np.float32]((images - 127.5) / 127.5)
+      x = np.split(x, args.nr_gpu)
+      feed_dict.update({self.Worker_L2I.xs[i]: x[i] for i in range(args.nr_gpu)})
+      if args.useSoftLabel == 1:
+        soft_labels_ = self.sess.run(self.soft_labels, feed_dict={
+          self.Worker_I2L.model.input_image: images.astype('float32'),
+          self.Worker_I2L.model.needImgAug: False
+        })
+        one_hot_labels_ = np.zeros((args.batch_size, 10), dtype=np.float32)
+        one_hot_labels_[np.arange(args.batch_size), labels] = 1.
+        feed_dict.update({self.Worker_L2I.hs[i]: (1. - alpha_)*soft_labels_+alpha_*one_hot_labels_ for i in range(self.Worker_L2I.args.nr_gpu)})
+      else:
+        y = np.split(labels, args.nr_gpu)
+        feed_dict.update({self.Worker_L2I.ys[i]: y[i] for i in range(args.nr_gpu)})
+      all_testnll.append(self.sess.run([self.nlls_L2I_test_bpd], feed_dict))
+    avg_testnll = np.mean(all_testnll)
+    print('[L2I], testnll={0:.6f}'.format(avg_testnll))
+
+  def build_saver(self):
+    self.saver = tf.train.Saver(max_to_keep=None)
+    if args.load_params is not None:
+      print('Reload from ', args.save_dir)
+      self.saver.restore(self.sess, args.save_dir + '/' + args.load_params)
+      print('Done')
+    else:
+      print('Start to initialize the two models')
+      self.data_dependent_init()
+      print('Done')
+
+
+  def _steal_L2I(self):
+    if args.steal_params_L2I is not None:
+      # try to retrieve parameters NOT starting with "Variable" from a well-trained model
+      success_ = 0
+      import pickle
+      with open(args.steal_params_L2I, 'rb') as f:
+        old_model = pickle.load(f)
+      for vidx, v in enumerate(tf.global_variables()):
+        if v.name in old_model:
+          self.sess.run(self.assign_op(v, old_model[v.name][0]))
+          success_ += 1
+          print(vidx, len(tf.global_variables()))
+      print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_L2I))
+
+      '''
+      # this version can only reload "trainable vars"
+      success_ = 0
+      import pickle
+      with open(args.steal_params_L2I, 'rb') as f:
+        old_model = pickle.load(f)
+      for vidx, v in enumerate(self.Worker_L2I.all_params):
+        if v.name in old_model:
+          self.sess.run(self.assign_op(v, old_model[v.name][0]))
+          success_ += 1
+          print(vidx, len(self.Worker_L2I.all_params))
+      print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_L2I))
+      '''
+
+  def _steal_I2L(self):
+    if args.steal_params_I2L is not None:
+      # try to retrieve parameters from a well-trained model
+      success_ = 0
+      import pickle
+      with open(args.steal_params_I2L, 'rb') as f:
+        old_model = pickle.load(f)
+      for vidx, v in enumerate(self.Worker_I2L.model.all_variables):
+        if v.name[4:] in old_model:
+          self.sess.run(self.assign_op(v, old_model[v.name[4:]][0]))
+          success_ += 1
+          print(vidx, len(self.Worker_I2L.model.all_variables))
+      print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_I2L))
+
+  def train(self):
+    if args.load_params is None:
+      self._steal_L2I()
+      self._steal_I2L()
+      self.saver.save(self.sess, args.save_dir + '/params_stealt_models.ckpt')
+    for epoch in range(args.max_epochs):
+      self.current_epoch = epoch
+      for images, labels, LMscores in train_data_iterator:
+        self.step(images, labels, LMscores, epoch, args.useSoftLabel)
+
+      if epoch % args.valid_interval == (args.valid_interval - 1):
+        self.Worker_I2L.Valid(test_data_iterator, self.sess)
+        self.L2I_TestNll()
+
+      if epoch % args.save_interval == (args.save_interval - 1):
+        self.saver.save(self.sess, args.save_dir  + '/params_' + str(epoch) + 'uidx' + str(self.train_uidx) + '.ckpt')
+        self.Worker_L2I.Gen_Images(self.sess, self.current_epoch)
+
+  def valid_I2L(self):
+    self.Worker_I2L.Valid(test_data_iterator, self.sess)
+
+  def valid_L2I(self):
+    self.L2I_TestNll()
+    '''
+    for alpha_ in range(11):
+      print('alpha=%f' % (alpha_ * 0.1))
+      self.L2I_TestNll(alpha_ * 0.1)
+    '''
+  def valid_ImgGen(self):
+    self.Worker_L2I.Gen_Images(self.sess, self.current_epoch)
+
+def main(_):
+  #L2Ipath='./pxpp_c_2.95/params_cifar.ckpt'
+  monitor_ = moitor()
+  monitor_.build_saver()
+  if args.mode == 'train':
+    monitor_.train()
+  elif args.mode == 'I2L':
+    monitor_.valid_I2L()
+  elif args.mode == 'L2I':
+    monitor_.valid_L2I()
+  elif args.mode == 'ImgGen':
+    monitor_.valid_ImgGen()
+  else:
+    print('Un supported mode: ' + args.mode)
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/DSL_ImgProcess/pixel_cnn_pp/init.py
+++ b/DSL_ImgProcess/pixel_cnn_pp/init.py
--- a/DSL_ImgProcess/pixel_cnn_pp/model.py
+++ b/DSL_ImgProcess/pixel_cnn_pp/model.py
@ -0,0 +1,85 @@
+"""
+The core Pixel-CNN model
+"""
+
+import tensorflow as tf
+from tensorflow.contrib.framework.python.ops import arg_scope
+import pixel_cnn_pp.nn as nn
+
+def model_spec(x, h=None, init=False, ema=None, dropout_p=0.5, nr_resnet=5, nr_filters=160, nr_logistic_mix=10, resnet_nonlinearity='concat_elu'):
+    """
+    We receive a Tensor x of shape (N,H,W,D1) (e.g. (12,32,32,3)) and produce
+    a Tensor x_out of shape (N,H,W,D2) (e.g. (12,32,32,100)), where each fiber
+    of the x_out tensor describes the predictive distribution for the RGB at
+    that position.
+    'h' is an optional N x K matrix of values to condition our generative model on
+    """
+
+    counters = {}
+    with arg_scope([nn.conv2d, nn.deconv2d, nn.gated_resnet, nn.dense], counters=counters, init=init, ema=ema, dropout_p=dropout_p):
+
+        # parse resnet nonlinearity argument
+        if resnet_nonlinearity == 'concat_elu':
+            resnet_nonlinearity = nn.concat_elu
+        elif resnet_nonlinearity == 'elu':
+            resnet_nonlinearity = tf.nn.elu
+        elif resnet_nonlinearity == 'relu':
+            resnet_nonlinearity = tf.nn.relu
+        else:
+            raise('resnet nonlinearity ' + resnet_nonlinearity + ' is not supported')
+
+        with arg_scope([nn.gated_resnet], nonlinearity=resnet_nonlinearity, h=h):
+
+            # ////////// up pass through pixelCNN ////////
+            xs = nn.int_shape(x)
+            x_pad = tf.concat([x,tf.ones(xs[:-1]+[1])],3) # add channel of ones to distinguish image from padding later on
+            u_list = [nn.down_shift(nn.down_shifted_conv2d(x_pad, num_filters=nr_filters, filter_size=[2, 3]))] # stream for pixels above
+            ul_list = [nn.down_shift(nn.down_shifted_conv2d(x_pad, num_filters=nr_filters, filter_size=[1,3])) + \
+                       nn.right_shift(nn.down_right_shifted_conv2d(x_pad, num_filters=nr_filters, filter_size=[2,1]))] # stream for up and to the left
+
+            for rep in range(nr_resnet):
+                u_list.append(nn.gated_resnet(u_list[-1], conv=nn.down_shifted_conv2d))
+                ul_list.append(nn.gated_resnet(ul_list[-1], u_list[-1], conv=nn.down_right_shifted_conv2d))
+
+            u_list.append(nn.down_shifted_conv2d(u_list[-1], num_filters=nr_filters, stride=[2, 2]))
+            ul_list.append(nn.down_right_shifted_conv2d(ul_list[-1], num_filters=nr_filters, stride=[2, 2]))
+
+            for rep in range(nr_resnet):
+                u_list.append(nn.gated_resnet(u_list[-1], conv=nn.down_shifted_conv2d))
+                ul_list.append(nn.gated_resnet(ul_list[-1], u_list[-1], conv=nn.down_right_shifted_conv2d))
+
+            u_list.append(nn.down_shifted_conv2d(u_list[-1], num_filters=nr_filters, stride=[2, 2]))
+            ul_list.append(nn.down_right_shifted_conv2d(ul_list[-1], num_filters=nr_filters, stride=[2, 2]))
+
+            for rep in range(nr_resnet):
+                u_list.append(nn.gated_resnet(u_list[-1], conv=nn.down_shifted_conv2d))
+                ul_list.append(nn.gated_resnet(ul_list[-1], u_list[-1], conv=nn.down_right_shifted_conv2d))
+
+            # /////// down pass ////////
+            u = u_list.pop()
+            ul = ul_list.pop()
+            for rep in range(nr_resnet):
+                u = nn.gated_resnet(u, u_list.pop(), conv=nn.down_shifted_conv2d)
+                ul = nn.gated_resnet(ul, tf.concat([u, ul_list.pop()],3), conv=nn.down_right_shifted_conv2d)
+
+            u = nn.down_shifted_deconv2d(u, num_filters=nr_filters, stride=[2, 2])
+            ul = nn.down_right_shifted_deconv2d(ul, num_filters=nr_filters, stride=[2, 2])
+
+            for rep in range(nr_resnet+1):
+                u = nn.gated_resnet(u, u_list.pop(), conv=nn.down_shifted_conv2d)
+                ul = nn.gated_resnet(ul, tf.concat([u, ul_list.pop()],3), conv=nn.down_right_shifted_conv2d)
+
+            u = nn.down_shifted_deconv2d(u, num_filters=nr_filters, stride=[2, 2])
+            ul = nn.down_right_shifted_deconv2d(ul, num_filters=nr_filters, stride=[2, 2])
+
+            for rep in range(nr_resnet+1):
+                u = nn.gated_resnet(u, u_list.pop(), conv=nn.down_shifted_conv2d)
+                ul = nn.gated_resnet(ul, tf.concat([u, ul_list.pop()],3), conv=nn.down_right_shifted_conv2d)
+
+            x_out = nn.nin(tf.nn.elu(ul),10*nr_logistic_mix)
+
+            assert len(u_list) == 0
+            assert len(ul_list) == 0
+
+            return x_out
+
--- a/DSL_ImgProcess/pixel_cnn_pp/nn.py
+++ b/DSL_ImgProcess/pixel_cnn_pp/nn.py
@ -0,0 +1,319 @@
+"""
+Various tensorflow utilities
+"""
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.contrib.framework.python.ops import add_arg_scope
+
+def int_shape(x):
+    return list(map(int, x.get_shape()))
+
+def concat_elu(x):
+    """ like concatenated ReLU (http://arxiv.org/abs/1603.05201), but then with ELU """
+    axis = len(x.get_shape())-1
+    return tf.nn.elu(tf.concat([x, -x],axis))
+
+def log_sum_exp(x):
+    """ numerically stable log_sum_exp implementation that prevents overflow """
+    axis = len(x.get_shape())-1
+    m = tf.reduce_max(x, axis)
+    m2 = tf.reduce_max(x, axis, keep_dims=True)
+    return m + tf.log(tf.reduce_sum(tf.exp(x-m2), axis))
+
+def log_prob_from_logits(x):
+    """ numerically stable log_softmax implementation that prevents overflow """
+    axis = len(x.get_shape())-1
+    m = tf.reduce_max(x, axis, keep_dims=True)
+    return x - m - tf.log(tf.reduce_sum(tf.exp(x-m), axis, keep_dims=True))
+
+def discretized_mix_logistic_loss(x,l,sum_all=True):
+    """ log-likelihood for mixture of discretized logistics, assumes the data has been rescaled to [-1,1] interval """
+    xs = int_shape(x) # true image (i.e. labels) to regress to, e.g. (B,32,32,3)
+    ls = int_shape(l) # predicted distribution, e.g. (B,32,32,100)
+    nr_mix = int(ls[-1] / 10) # here and below: unpacking the params of the mixture of logistics
+    logit_probs = l[:,:,:,:nr_mix]
+    l = tf.reshape(l[:,:,:,nr_mix:], xs + [nr_mix*3])
+    means = l[:,:,:,:,:nr_mix]
+    log_scales = tf.maximum(l[:,:,:,:,nr_mix:2*nr_mix], -7.)
+    coeffs = tf.nn.tanh(l[:,:,:,:,2*nr_mix:3*nr_mix])
+    x = tf.reshape(x, xs + [1]) + tf.zeros(xs + [nr_mix]) # here and below: getting the means and adjusting them based on preceding sub-pixels
+    m2 = tf.reshape(means[:,:,:,1,:] + coeffs[:, :, :, 0, :] * x[:, :, :, 0, :], [xs[0],xs[1],xs[2],1,nr_mix])
+    m3 = tf.reshape(means[:, :, :, 2, :] + coeffs[:, :, :, 1, :] * x[:, :, :, 0, :] + coeffs[:, :, :, 2, :] * x[:, :, :, 1, :], [xs[0],xs[1],xs[2],1,nr_mix])
+    means = tf.concat([tf.reshape(means[:,:,:,0,:], [xs[0],xs[1],xs[2],1,nr_mix]), m2, m3],3)
+    centered_x = x - means
+    inv_stdv = tf.exp(-log_scales)
+    plus_in = inv_stdv * (centered_x + 1./255.)
+    cdf_plus = tf.nn.sigmoid(plus_in)
+    min_in = inv_stdv * (centered_x - 1./255.)
+    cdf_min = tf.nn.sigmoid(min_in)
+    log_cdf_plus = plus_in - tf.nn.softplus(plus_in) # log probability for edge case of 0 (before scaling)
+    log_one_minus_cdf_min = -tf.nn.softplus(min_in) # log probability for edge case of 255 (before scaling)
+    cdf_delta = cdf_plus - cdf_min # probability for all other cases
+    mid_in = inv_stdv * centered_x
+    log_pdf_mid = mid_in - log_scales - 2.*tf.nn.softplus(mid_in) # log probability in the center of the bin, to be used in extreme cases (not actually used in our code)
+
+    # now select the right output: left edge case, right edge case, normal case, extremely low prob case (doesn't actually happen for us)
+
+    # this is what we are really doing, but using the robust version below for extreme cases in other applications and to avoid NaN issue with tf.select()
+    # log_probs = tf.select(x < -0.999, log_cdf_plus, tf.select(x > 0.999, log_one_minus_cdf_min, tf.log(cdf_delta)))
+
+    # robust version, that still works if probabilities are below 1e-5 (which never happens in our code)
+    # tensorflow backpropagates through tf.select() by multiplying with zero instead of selecting: this requires use to use some ugly tricks to avoid potential NaNs
+    # the 1e-12 in tf.maximum(cdf_delta, 1e-12) is never actually used as output, it's purely there to get around the tf.select() gradient issue
+    # if the probability on a sub-pixel is below 1e-5, we use an approximation based on the assumption that the log-density is constant in the bin of the observed sub-pixel value
+    log_probs = tf.where(x < -0.999, log_cdf_plus, tf.where(x > 0.999, log_one_minus_cdf_min, tf.where(cdf_delta > 1e-5, tf.log(tf.maximum(cdf_delta, 1e-12)), log_pdf_mid - np.log(127.5))))
+
+    log_probs = tf.reduce_sum(log_probs,3) + log_prob_from_logits(logit_probs)
+    if sum_all:
+        return -tf.reduce_sum(log_sum_exp(log_probs))
+    else:
+        return -tf.reduce_sum(log_sum_exp(log_probs),[1,2])
+
+def sample_from_discretized_mix_logistic(l,nr_mix):
+    ls = int_shape(l)
+    xs = ls[:-1] + [3]
+    # unpack parameters
+    logit_probs = l[:, :, :, :nr_mix]
+    l = tf.reshape(l[:, :, :, nr_mix:], xs + [nr_mix*3])
+    # sample mixture indicator from softmax
+    sel = tf.one_hot(tf.argmax(logit_probs - tf.log(-tf.log(tf.random_uniform(logit_probs.get_shape(), minval=1e-5, maxval=1. - 1e-5))), 3), depth=nr_mix, dtype=tf.float32)
+    sel = tf.reshape(sel, xs[:-1] + [1,nr_mix])
+    # select logistic parameters
+    means = tf.reduce_sum(l[:,:,:,:,:nr_mix]*sel,4)
+    log_scales = tf.maximum(tf.reduce_sum(l[:,:,:,:,nr_mix:2*nr_mix]*sel,4), -7.)
+    coeffs = tf.reduce_sum(tf.nn.tanh(l[:,:,:,:,2*nr_mix:3*nr_mix])*sel,4)
+    # sample from logistic & clip to interval
+    # we don't actually round to the nearest 8bit value when sampling
+    u = tf.random_uniform(means.get_shape(), minval=1e-5, maxval=1. - 1e-5)
+    x = means + tf.exp(log_scales)*(tf.log(u) - tf.log(1. - u))
+    x0 = tf.minimum(tf.maximum(x[:,:,:,0], -1.), 1.)
+    x1 = tf.minimum(tf.maximum(x[:,:,:,1] + coeffs[:,:,:,0]*x0, -1.), 1.)
+    x2 = tf.minimum(tf.maximum(x[:,:,:,2] + coeffs[:,:,:,1]*x0 + coeffs[:,:,:,2]*x1, -1.), 1.)
+    return tf.concat([tf.reshape(x0,xs[:-1]+[1]), tf.reshape(x1,xs[:-1]+[1]), tf.reshape(x2,xs[:-1]+[1])],3)
+
+def get_var_maybe_avg(var_name, ema, **kwargs):
+    ''' utility for retrieving polyak averaged params '''
+    v = tf.get_variable(var_name, **kwargs)
+    if ema is not None:
+        v = ema.average(v)
+    return v
+
+def get_vars_maybe_avg(var_names, ema, **kwargs):
+    ''' utility for retrieving polyak averaged params '''
+    vars = []
+    for vn in var_names:
+        vars.append(get_var_maybe_avg(vn, ema, **kwargs))
+    return vars
+
+def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999):
+    ''' Adam optimizer '''
+    updates = []
+    if type(cost_or_grads) is not list:
+        grads = tf.gradients(cost_or_grads, params)
+    else:
+        grads = cost_or_grads
+    t = tf.Variable(1., 'adam_t')
+    for p, g in zip(params, grads):
+        mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
+        if mom1>0:
+            v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
+            v_t = mom1*v + (1. - mom1)*g
+            v_hat = v_t / (1. - tf.pow(mom1,t))
+            updates.append(v.assign(v_t))
+        else:
+            v_hat = g
+        mg_t = mom2*mg + (1. - mom2)*tf.square(g)
+        mg_hat = mg_t / (1. - tf.pow(mom2,t))
+        g_t = v_hat / tf.sqrt(mg_hat + 1e-8)
+        p_t = p - lr * g_t
+        updates.append(mg.assign(mg_t))
+        updates.append(p.assign(p_t))
+    updates.append(t.assign_add(1))
+    return tf.group(*updates)
+
+def get_name(layer_name, counters):
+    ''' utlity for keeping track of layer names '''
+    if not layer_name in counters:
+        counters[layer_name] = 0
+    name = layer_name + '_' + str(counters[layer_name])
+    counters[layer_name] += 1
+    return name
+
+@add_arg_scope
+def dense(x, num_units, nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
+    ''' fully connected layer '''
+    name = get_name('dense', counters)
+    with tf.variable_scope(name):
+        if init:
+            # data based initialization of parameters
+            V = tf.get_variable('V', [int(x.get_shape()[1]),num_units], tf.float32, tf.random_normal_initializer(0, 0.05), trainable=True)
+            V_norm = tf.nn.l2_normalize(V.initialized_value(), [0])
+            x_init = tf.matmul(x, V_norm)
+            m_init, v_init = tf.nn.moments(x_init, [0])
+            scale_init = init_scale/tf.sqrt(v_init + 1e-10)
+            g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
+            b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init*scale_init, trainable=True)
+            x_init = tf.reshape(scale_init,[1,num_units])*(x_init-tf.reshape(m_init,[1,num_units]))
+            if nonlinearity is not None:
+                x_init = nonlinearity(x_init)
+            return x_init
+
+        else:
+            V,g,b = get_vars_maybe_avg(['V','g','b'], ema)
+            # According to the comments at
+            # https: // github.com / openai / pixel - cnn / issues / 17,
+            # I simply comment the following line
+            # tf.assert_variables_initialized([V,g,b])
+
+            # use weight normalization (Salimans & Kingma, 2016)
+            x = tf.matmul(x, V)
+            scaler = g/tf.sqrt(tf.reduce_sum(tf.square(V),[0]))
+            x = tf.reshape(scaler,[1,num_units])*x + tf.reshape(b,[1,num_units])
+
+            # apply nonlinearity
+            if nonlinearity is not None:
+                x = nonlinearity(x)
+            return x
+
+@add_arg_scope
+def conv2d(x, num_filters, filter_size=[3,3], stride=[1,1], pad='SAME', nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
+    ''' convolutional layer '''
+    name = get_name('conv2d', counters)
+    with tf.variable_scope(name):
+        if init:
+            # data based initialization of parameters
+            V = tf.get_variable('V', filter_size+[int(x.get_shape()[-1]),num_filters], tf.float32, tf.random_normal_initializer(0, 0.05), trainable=True)
+            V_norm = tf.nn.l2_normalize(V.initialized_value(), [0,1,2])
+            x_init = tf.nn.conv2d(x, V_norm, [1]+stride+[1], pad)
+            m_init, v_init = tf.nn.moments(x_init, [0,1,2])
+            scale_init = init_scale/tf.sqrt(v_init + 1e-8)
+            g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
+            b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init*scale_init, trainable=True)
+            x_init = tf.reshape(scale_init,[1,1,1,num_filters])*(x_init-tf.reshape(m_init,[1,1,1,num_filters]))
+            if nonlinearity is not None:
+                x_init = nonlinearity(x_init)
+            return x_init
+
+        else:
+            V, g, b = get_vars_maybe_avg(['V', 'g', 'b'], ema)
+            # tf.assert_variables_initialized([V,g,b])
+
+            # use weight normalization (Salimans & Kingma, 2016)
+            W = tf.reshape(g,[1,1,1,num_filters])*tf.nn.l2_normalize(V,[0,1,2])
+
+            # calculate convolutional layer output
+            x = tf.nn.bias_add(tf.nn.conv2d(x, W, [1]+stride+[1], pad), b)
+
+            # apply nonlinearity
+            if nonlinearity is not None:
+                x = nonlinearity(x)
+            return x
+
+@add_arg_scope
+def deconv2d(x, num_filters, filter_size=[3,3], stride=[1,1], pad='SAME', nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
+    ''' transposed convolutional layer '''
+    name = get_name('deconv2d', counters)
+    xs = int_shape(x)
+    if pad=='SAME':
+        target_shape = [xs[0], xs[1]*stride[0], xs[2]*stride[1], num_filters]
+    else:
+        target_shape = [xs[0], xs[1]*stride[0] + filter_size[0]-1, xs[2]*stride[1] + filter_size[1]-1, num_filters]
+    with tf.variable_scope(name):
+        if init:
+            # data based initialization of parameters
+            V = tf.get_variable('V', filter_size+[num_filters,int(x.get_shape()[-1])], tf.float32, tf.random_normal_initializer(0, 0.05), trainable=True)
+            V_norm = tf.nn.l2_normalize(V.initialized_value(), [0,1,3])
+            x_init = tf.nn.conv2d_transpose(x, V_norm, target_shape, [1]+stride+[1], padding=pad)
+            m_init, v_init = tf.nn.moments(x_init, [0,1,2])
+            scale_init = init_scale/tf.sqrt(v_init + 1e-8)
+            g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
+            b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init*scale_init, trainable=True)
+            x_init = tf.reshape(scale_init,[1,1,1,num_filters])*(x_init-tf.reshape(m_init,[1,1,1,num_filters]))
+            if nonlinearity is not None:
+                x_init = nonlinearity(x_init)
+            return x_init
+
+        else:
+            V, g, b = get_vars_maybe_avg(['V', 'g', 'b'], ema)
+            # tf.assert_variables_initialized([V,g,b])
+
+            # use weight normalization (Salimans & Kingma, 2016)
+            W = tf.reshape(g,[1,1,num_filters,1])*tf.nn.l2_normalize(V,[0,1,3])
+
+            # calculate convolutional layer output
+            x = tf.nn.conv2d_transpose(x, W, target_shape, [1]+stride+[1], padding=pad)
+            x = tf.nn.bias_add(x, b)
+
+            # apply nonlinearity
+            if nonlinearity is not None:
+                x = nonlinearity(x)
+            return x
+
+@add_arg_scope
+def nin(x, num_units, **kwargs):
+    """ a network in network layer (1x1 CONV) """
+    s = int_shape(x)
+    x = tf.reshape(x, [np.prod(s[:-1]),s[-1]])
+    x = dense(x, num_units, **kwargs)
+    return tf.reshape(x, s[:-1]+[num_units])
+
+''' meta-layer consisting of multiple base layers '''
+
+@add_arg_scope
+def gated_resnet(x, a=None, h=None, nonlinearity=concat_elu, conv=conv2d, init=False, counters={}, ema=None, dropout_p=0., **kwargs):
+    xs = int_shape(x)
+    num_filters = xs[-1]
+
+    c1 = conv(nonlinearity(x), num_filters)
+    if a is not None: # add short-cut connection if auxiliary input 'a' is given
+        c1 += nin(nonlinearity(a), num_filters)
+    c1 = nonlinearity(c1)
+    if dropout_p > 0:
+        c1 = tf.nn.dropout(c1, keep_prob=1. - dropout_p)
+    c2 = conv(c1, num_filters * 2, init_scale=0.1)
+
+    # add projection of h vector if included: conditional generation
+    if h is not None:
+        with tf.variable_scope(get_name('conditional_weights', counters)):
+            hw = get_var_maybe_avg('hw', ema, shape=[int_shape(h)[-1], 2 * num_filters], dtype=tf.float32,
+                                    initializer=tf.random_normal_initializer(0, 0.05), trainable=True)
+        if init:
+            hw = hw.initialized_value()
+        c2 += tf.reshape(tf.matmul(h, hw), [xs[0], 1, 1, 2 * num_filters])
+
+    a, b = tf.split(c2, 2, 3)
+    c3 = a * tf.nn.sigmoid(b)
+    return x + c3
+
+''' utilities for shifting the image around, efficient alternative to masking convolutions '''
+
+def down_shift(x):
+    xs = int_shape(x)
+    return tf.concat([tf.zeros([xs[0],1,xs[2],xs[3]]), x[:,:xs[1]-1,:,:]],1)
+
+def right_shift(x):
+    xs = int_shape(x)
+    return tf.concat([tf.zeros([xs[0],xs[1],1,xs[3]]), x[:,:,:xs[2]-1,:]],2)
+
+@add_arg_scope
+def down_shifted_conv2d(x, num_filters, filter_size=[2,3], stride=[1,1], **kwargs):
+    x = tf.pad(x, [[0,0],[filter_size[0]-1,0], [int((filter_size[1]-1)/2),int((filter_size[1]-1)/2)],[0,0]])
+    return conv2d(x, num_filters, filter_size=filter_size, pad='VALID', stride=stride, **kwargs)
+
+@add_arg_scope
+def down_shifted_deconv2d(x, num_filters, filter_size=[2,3], stride=[1,1], **kwargs):
+    x = deconv2d(x, num_filters, filter_size=filter_size, pad='VALID', stride=stride, **kwargs)
+    xs = int_shape(x)
+    return x[:,:(xs[1]-filter_size[0]+1),int((filter_size[1]-1)/2):(xs[2]-int((filter_size[1]-1)/2)),:]
+
+@add_arg_scope
+def down_right_shifted_conv2d(x, num_filters, filter_size=[2,2], stride=[1,1], **kwargs):
+    x = tf.pad(x, [[0,0],[filter_size[0]-1, 0], [filter_size[1]-1, 0],[0,0]])
+    return conv2d(x, num_filters, filter_size=filter_size, pad='VALID', stride=stride, **kwargs)
+
+@add_arg_scope
+def down_right_shifted_deconv2d(x, num_filters, filter_size=[2,2], stride=[1,1], **kwargs):
+    x = deconv2d(x, num_filters, filter_size=filter_size, pad='VALID', stride=stride, **kwargs)
+    xs = int_shape(x)
+    return x[:,:(xs[1]-filter_size[0]+1):,:(xs[2]-filter_size[1]+1),:]
--- a/DSL_ImgProcess/pixel_cnn_pp/plotting.py
+++ b/DSL_ImgProcess/pixel_cnn_pp/plotting.py
@ -0,0 +1,194 @@
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+from matplotlib import pyplot as plt
+
+# Plot image examples.
+def plot_img(img, title=None):
+    plt.figure()
+    plt.imshow(img, interpolation='nearest')
+    if title is not None:
+        plt.title(title)
+    plt.axis('off')
+    plt.tight_layout()
+
+def img_stretch(img):
+    img = img.astype(float)
+    img -= np.min(img)
+    img /= np.max(img)+1e-12
+    return img
+
+def img_tile(imgs, aspect_ratio=1.0, tile_shape=None, border=1,
+             border_color=0, stretch=False):
+    ''' Tile images in a grid.
+    If tile_shape is provided only as many images as specified in tile_shape
+    will be included in the output.
+    '''
+
+    # Prepare images
+    if stretch:
+        imgs = img_stretch(imgs)
+    imgs = np.array(imgs)
+    if imgs.ndim != 3 and imgs.ndim != 4:
+        raise ValueError('imgs has wrong number of dimensions.')
+    n_imgs = imgs.shape[0]
+
+    # Grid shape
+    img_shape = np.array(imgs.shape[1:3])
+    if tile_shape is None:
+        img_aspect_ratio = img_shape[1] / float(img_shape[0])
+        aspect_ratio *= img_aspect_ratio
+        tile_height = int(np.ceil(np.sqrt(n_imgs * aspect_ratio)))
+        tile_width = int(np.ceil(np.sqrt(n_imgs / aspect_ratio)))
+        grid_shape = np.array((tile_height, tile_width))
+    else:
+        assert len(tile_shape) == 2
+        grid_shape = np.array(tile_shape)
+
+    # Tile image shape
+    tile_img_shape = np.array(imgs.shape[1:])
+    tile_img_shape[:2] = (img_shape[:2] + border) * grid_shape[:2] - border
+
+    # Assemble tile image
+    tile_img = np.empty(tile_img_shape)
+    tile_img[:] = border_color
+    for i in range(grid_shape[0]):
+        for j in range(grid_shape[1]):
+            img_idx = j + i*grid_shape[1]
+            if img_idx >= n_imgs:
+                # No more images - stop filling out the grid.
+                break
+            img = imgs[img_idx]
+            yoff = (img_shape[0] + border) * i
+            xoff = (img_shape[1] + border) * j
+            tile_img[yoff:yoff+img_shape[0], xoff:xoff+img_shape[1], ...] = img
+
+    return tile_img
+
+def conv_filter_tile(filters):
+    n_filters, n_channels, height, width = filters.shape
+    tile_shape = None
+    if n_channels == 3:
+        # Interpret 3 color channels as RGB
+        filters = np.transpose(filters, (0, 2, 3, 1))
+    else:
+        # Organize tile such that each row corresponds to a filter and the
+        # columns are the filter channels
+        tile_shape = (n_channels, n_filters)
+        filters = np.transpose(filters, (1, 0, 2, 3))
+        filters = np.resize(filters, (n_filters*n_channels, height, width))
+    filters = img_stretch(filters)
+    return img_tile(filters, tile_shape=tile_shape)
+    
+def scale_to_unit_interval(ndar, eps=1e-8):
+  """ Scales all values in the ndarray ndar to be between 0 and 1 """
+  ndar = ndar.copy()
+  ndar -= ndar.min()
+  ndar *= 1.0 / (ndar.max() + eps)
+  return ndar
+
+
+def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
+                       scale_rows_to_unit_interval=True,
+                       output_pixel_vals=True):
+  """
+  Transform an array with one flattened image per row, into an array in
+  which images are reshaped and layed out like tiles on a floor.
+
+  This function is useful for visualizing datasets whose rows are images,
+  and also columns of matrices for transforming those rows
+  (such as the first layer of a neural net).
+
+  :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can
+  be 2-D ndarrays or None;
+  :param X: a 2-D array in which every row is a flattened image.
+
+  :type img_shape: tuple; (height, width)
+  :param img_shape: the original shape of each image
+
+  :type tile_shape: tuple; (rows, cols)
+  :param tile_shape: the number of images to tile (rows, cols)
+
+  :param output_pixel_vals: if output should be pixel values (i.e. int8
+  values) or floats
+
+  :param scale_rows_to_unit_interval: if the values need to be scaled before
+  being plotted to [0,1] or not
+
+
+  :returns: array suitable for viewing as an image.
+  (See:`PIL.Image.fromarray`.)
+  :rtype: a 2-d array with same dtype as X.
+
+  """
+
+  assert len(img_shape) == 2
+  assert len(tile_shape) == 2
+  assert len(tile_spacing) == 2
+
+  # The expression below can be re-written in a more C style as
+  # follows :
+  #
+  # out_shape = [0,0]
+  # out_shape[0] = (img_shape[0] + tile_spacing[0]) * tile_shape[0] -
+  #                tile_spacing[0]
+  # out_shape[1] = (img_shape[1] + tile_spacing[1]) * tile_shape[1] -
+  #                tile_spacing[1]
+  out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
+                      in zip(img_shape, tile_shape, tile_spacing)]
+
+  if isinstance(X, tuple):
+      assert len(X) == 4
+      # Create an output numpy ndarray to store the image
+      if output_pixel_vals:
+          out_array = np.zeros((out_shape[0], out_shape[1], 4), dtype='uint8')
+      else:
+          out_array = np.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype)
+
+      #colors default to 0, alpha defaults to 1 (opaque)
+      if output_pixel_vals:
+          channel_defaults = [0, 0, 0, 255]
+      else:
+          channel_defaults = [0., 0., 0., 1.]
+
+      for i in range(4):
+          if X[i] is None:
+              # if channel is None, fill it with zeros of the correct
+              # dtype
+              out_array[:, :, i] = np.zeros(out_shape,
+                      dtype='uint8' if output_pixel_vals else out_array.dtype
+                      ) + channel_defaults[i]
+          else:
+              # use a recurrent call to compute the channel and store it
+              # in the output
+              out_array[:, :, i] = tile_raster_images(X[i], img_shape, tile_shape, tile_spacing, scale_rows_to_unit_interval, output_pixel_vals)
+      return out_array
+
+  else:
+      # if we are dealing with only one channel
+      H, W = img_shape
+      Hs, Ws = tile_spacing
+
+      # generate a matrix to store the output
+      out_array = np.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
+
+
+      for tile_row in range(tile_shape[0]):
+          for tile_col in range(tile_shape[1]):
+              if tile_row * tile_shape[1] + tile_col < X.shape[0]:
+                  if scale_rows_to_unit_interval:
+                      # if we should scale values to be between 0 and 1
+                      # do this by calling the `scale_to_unit_interval`
+                      # function
+                      this_img = scale_to_unit_interval(X[tile_row * tile_shape[1] + tile_col].reshape(img_shape))
+                  else:
+                      this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)
+                  # add the slice to the corresponding position in the
+                  # output array
+                  out_array[
+                      tile_row * (H+Hs): tile_row * (H + Hs) + H,
+                      tile_col * (W+Ws): tile_col * (W + Ws) + W
+                      ] \
+                      = this_img * (255 if output_pixel_vals else 1)
+      return out_array
+
--- a/DSL_ImgProcess/resnet_model_basic.py
+++ b/DSL_ImgProcess/resnet_model_basic.py
@ -0,0 +1,436 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""ResNet model.
+
+Related papers:
+https://arxiv.org/pdf/1603.05027v2.pdf
+https://arxiv.org/pdf/1512.03385v1.pdf
+https://arxiv.org/pdf/1605.07146v1.pdf
+"""
+from collections import namedtuple
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.training import moving_averages
+
+
+HParams = namedtuple('HParams',
+                     'batch_size, num_classes, min_lrn_rate, lrn_rate, '
+                     'num_residual_units, use_bottleneck, weight_decay_rate, '
+                     'relu_leakiness, optimizer')
+
+
+class ResNet(object):
+  """ResNet model."""
+
+  def __init__(self, hps, mode, image_size=32, use_wide_resnet=False, nr_gpu=1):
+    self.hps = hps
+    self.batch_size = self.hps.batch_size
+    self.input_image = [tf.placeholder(tf.float32, shape=(self.batch_size,image_size,image_size,3)) for _ in range(nr_gpu)]
+    self.input_label = [tf.placeholder(tf.int32, shape=(self.batch_size,1)) for _ in range(nr_gpu)]
+    self.mode = mode
+    self.needImgAug = tf.placeholder(tf.bool, shape=())
+    self.image_size = image_size
+    self.nr_gpu = nr_gpu
+
+    self._extra_train_ops = []
+    self.lrn_rate = tf.placeholder(tf.float32, shape=())
+    self.use_wide_resnet = use_wide_resnet
+
+  def build_graph(self):
+    """Build a whole graph for the model."""
+    with tf.variable_scope('I2L'):
+      self.global_step = tf.contrib.framework.get_or_create_global_step()
+    self._build_model()
+    self.trainable_variables = [v for v in tf.trainable_variables() if v.name.startswith('I2L/')]
+    self.all_variables = [v for v in tf.global_variables() if v.name.startswith('I2L/')]
+    #if self.mode == 'train':
+    #  self._build_train_op()
+
+  def _stride_arr(self, stride):
+    """Map a stride scalar to the stride array for tf.nn.conv2d."""
+    return [1, stride, stride, 1]
+
+  def _PreprocessImages(self):
+    def _aug_one_img(img):
+      img = tf.image.resize_image_with_crop_or_pad(img, self.image_size+4, self.image_size+4)
+      img = tf.random_crop(img, [self.image_size, self.image_size, 3])
+      img = tf.image.random_flip_left_right(img)
+      return img
+    def _deal_one_img(img):
+      img = tf.cond(self.needImgAug, lambda: _aug_one_img(img), lambda: img)
+      img = tf.image.per_image_standardization(img)
+      return img
+    #images = tf.map_fn(lambda img: _deal_one_img(img), self.input_image)
+    #self.image = images
+    self.image = [tf.map_fn(lambda img: _deal_one_img(img), X) for X in self.input_image]
+
+  def _make_1hot_labels(self):
+    self.labels = []
+    for L in self.input_label:
+      labels = tf.reshape(L, [self.batch_size, 1])
+      indices = tf.reshape(tf.range(0, self.batch_size, 1), [self.batch_size, 1])
+      labels = tf.sparse_to_dense(
+          tf.concat([indices, labels],1),
+          [self.batch_size, self.hps.num_classes], 1.0, 0.0)
+      self.labels.append(labels)
+
+  def _build_basic_structure(self, x, y):
+    with tf.variable_scope('init'):
+      x = self._conv('init_conv', x, 3, 3, 16, self._stride_arr(1))
+
+    strides = [1, 2, 2]
+    activate_before_residual = [True, False, False]
+    if self.hps.use_bottleneck:
+      res_func = self._bottleneck_residual
+      filters = [16, 64, 128, 256]
+    else:
+      res_func = self._residual
+      if self.use_wide_resnet:
+        filters = [16, 160, 320, 640]
+      else:
+        filters = [16, 16, 32, 64]
+      # Uncomment the following codes to use w28-10 wide residual network.
+      # It is more memory efficient than very deep residual network and has
+      # comparably good performance.
+      # https://arxiv.org/pdf/1605.07146v1.pdf
+      # filters = [16, 160, 320, 640]
+      # Update hps.num_residual_units to 9
+
+    with tf.variable_scope('unit_1_0'):
+      x = res_func(x, filters[0], filters[1], self._stride_arr(strides[0]),
+                   activate_before_residual[0])
+    for i in range(1, self.hps.num_residual_units):
+      with tf.variable_scope('unit_1_%d' % i):
+        x = res_func(x, filters[1], filters[1], self._stride_arr(1), False)
+
+    with tf.variable_scope('unit_2_0'):
+      x = res_func(x, filters[1], filters[2], self._stride_arr(strides[1]),
+                   activate_before_residual[1])
+    for i in range(1, self.hps.num_residual_units):
+      with tf.variable_scope('unit_2_%d' % i):
+        x = res_func(x, filters[2], filters[2], self._stride_arr(1), False)
+
+    with tf.variable_scope('unit_3_0'):
+      x = res_func(x, filters[2], filters[3], self._stride_arr(strides[2]),
+                   activate_before_residual[2])
+    for i in range(1, self.hps.num_residual_units):
+      with tf.variable_scope('unit_3_%d' % i):
+        x = res_func(x, filters[3], filters[3], self._stride_arr(1), False)
+
+    with tf.variable_scope('unit_last'):
+      x = self._batch_norm('final_bn', x)
+      x = self._relu(x, self.hps.relu_leakiness)
+      x = self._global_avg_pool(x)
+
+    with tf.variable_scope('logit'):
+      logits = self._fully_connected(x, self.hps.num_classes)
+      predictions_ = tf.nn.softmax(logits)
+
+    with tf.variable_scope('costs'):
+      xent = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
+      nlls_ = xent
+      cost_ = tf.reduce_mean(xent, name='xent')
+      cost_ += self._decay()
+
+    return nlls_, cost_, predictions_
+
+      #tf.scalar_summary('cost', self.cost)
+
+  def _build_model(self):
+    """Build the core model within the graph."""
+    # Preprocess
+    self._PreprocessImages()
+    self._make_1hot_labels()
+    self.nlls = [None for _ in range(self.nr_gpu)]
+    self.cost = [None for _ in range(self.nr_gpu)]
+    self.predictions = [None for _ in range(self.nr_gpu)]
+
+    for i in range(self.nr_gpu):
+      with tf.variable_scope('I2L', reuse=True if i >= 1 else None):
+        with tf.device('/gpu:%d' % i):
+          nll_, cost_, predicted_ = self._build_basic_structure(self.image[i], self.labels[i])
+          self.nlls[i] = nll_
+          self.cost[i] = cost_
+          self.predictions[i] = predicted_
+
+  '''
+  def _build_model(self):
+    """Build the core model within the graph."""
+    # Preprocess
+    self._PreprocessImages()
+    self._make_1hot_labels()
+
+    with tf.variable_scope('init'):
+      x = self.image
+      x = self._conv('init_conv', x, 3, 3, 16, self._stride_arr(1))
+
+    strides = [1, 2, 2]
+    activate_before_residual = [True, False, False]
+    if self.hps.use_bottleneck:
+      res_func = self._bottleneck_residual
+      filters = [16, 64, 128, 256]
+    else:
+      res_func = self._residual
+      if self.use_wide_resnet:
+        filters = [16, 160, 320, 640]
+      else:
+        filters = [16, 16, 32, 64]
+      # Uncomment the following codes to use w28-10 wide residual network.
+      # It is more memory efficient than very deep residual network and has
+      # comparably good performance.
+      # https://arxiv.org/pdf/1605.07146v1.pdf
+      # filters = [16, 160, 320, 640]
+      # Update hps.num_residual_units to 9
+
+    with tf.variable_scope('unit_1_0'):
+      x = res_func(x, filters[0], filters[1], self._stride_arr(strides[0]),
+                   activate_before_residual[0])
+    for i in range(1, self.hps.num_residual_units):
+      with tf.variable_scope('unit_1_%d' % i):
+        x = res_func(x, filters[1], filters[1], self._stride_arr(1), False)
+
+    with tf.variable_scope('unit_2_0'):
+      x = res_func(x, filters[1], filters[2], self._stride_arr(strides[1]),
+                   activate_before_residual[1])
+    for i in range(1, self.hps.num_residual_units):
+      with tf.variable_scope('unit_2_%d' % i):
+        x = res_func(x, filters[2], filters[2], self._stride_arr(1), False)
+
+    with tf.variable_scope('unit_3_0'):
+      x = res_func(x, filters[2], filters[3], self._stride_arr(strides[2]),
+                   activate_before_residual[2])
+    for i in range(1, self.hps.num_residual_units):
+      with tf.variable_scope('unit_3_%d' % i):
+        x = res_func(x, filters[3], filters[3], self._stride_arr(1), False)
+
+    with tf.variable_scope('unit_last'):
+      x = self._batch_norm('final_bn', x)
+      x = self._relu(x, self.hps.relu_leakiness)
+      x = self._global_avg_pool(x)
+
+    with tf.variable_scope('logit'):
+      logits = self._fully_connected(x, self.hps.num_classes)
+      self.predictions = tf.nn.softmax(logits)
+
+    with tf.variable_scope('costs'):
+      xent = tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=logits)
+      self.nlls = xent
+      self.cost = tf.reduce_mean(xent, name='xent')
+      self.cost += self._decay()
+
+      #tf.scalar_summary('cost', self.cost)
+  '''
+
+  def _build_train_op(self):
+    """Build training specific ops for the graph."""
+    self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32)
+    #tf.scalar_summary('learning rate', self.lrn_rate)
+
+    trainable_variables = tf.trainable_variables()
+    #self.trainable_variables = [v for v in tf.trainable_variables() if v.name.startswith('LM/')]
+
+    grads = tf.gradients(self.cost, trainable_variables)
+
+    if self.hps.optimizer == 'sgd':
+      optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate)
+    elif self.hps.optimizer == 'mom':
+      optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9)
+
+    apply_op = optimizer.apply_gradients(
+        zip(grads, trainable_variables),
+        global_step=self.global_step, name='train_step')
+
+    train_ops = [apply_op] + self._extra_train_ops
+    self.train_op = tf.group(*train_ops)
+
+
+  def Update(self, grads):
+    """Build training specific ops for the graph."""
+    if self.hps.optimizer == 'sgd':
+      optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate)
+    elif self.hps.optimizer == 'mom':
+      optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9)
+
+    apply_op = optimizer.apply_gradients(
+        zip(grads, self.trainable_variables),
+        global_step=self.global_step, name='train_step')
+
+    train_ops = [apply_op] + self._extra_train_ops
+    self.update_ops = tf.group(*train_ops)
+
+  # TODO(xpan): Consider batch_norm in contrib/layers/python/layers/layers.py
+  def _batch_norm(self, name, x):
+    """Batch normalization."""
+    with tf.variable_scope(name):
+      params_shape = [x.get_shape()[-1]]
+
+      beta = tf.get_variable(
+          'beta', params_shape, tf.float32,
+          initializer=tf.constant_initializer(0.0, tf.float32))
+      gamma = tf.get_variable(
+          'gamma', params_shape, tf.float32,
+          initializer=tf.constant_initializer(1.0, tf.float32))
+
+      if self.mode == 'train':
+        mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
+
+        moving_mean = tf.get_variable(
+            'moving_mean', params_shape, tf.float32,
+            initializer=tf.constant_initializer(0.0, tf.float32),
+            trainable=False)
+        moving_variance = tf.get_variable(
+            'moving_variance', params_shape, tf.float32,
+            initializer=tf.constant_initializer(1.0, tf.float32),
+            trainable=False)
+
+        self._extra_train_ops.append(moving_averages.assign_moving_average(
+            moving_mean, mean, 0.9))
+        self._extra_train_ops.append(moving_averages.assign_moving_average(
+            moving_variance, variance, 0.9))
+      else:
+        mean = tf.get_variable(
+            'moving_mean', params_shape, tf.float32,
+            initializer=tf.constant_initializer(0.0, tf.float32),
+            trainable=False)
+        variance = tf.get_variable(
+            'moving_variance', params_shape, tf.float32,
+            initializer=tf.constant_initializer(1.0, tf.float32),
+            trainable=False)
+        #tf.histogram_summary(mean.op.name, mean)
+        #tf.histogram_summary(variance.op.name, variance)
+      # elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
+      y = tf.nn.batch_normalization(
+          x, mean, variance, beta, gamma, 0.001)
+      y.set_shape(x.get_shape())
+      return y
+
+  def _residual(self, x, in_filter, out_filter, stride,
+                activate_before_residual=False):
+    """Residual unit with 2 sub layers."""
+    if activate_before_residual:
+      with tf.variable_scope('shared_activation'):
+        x = self._batch_norm('init_bn', x)
+        x = self._relu(x, self.hps.relu_leakiness)
+        orig_x = x
+    else:
+      with tf.variable_scope('residual_only_activation'):
+        orig_x = x
+        x = self._batch_norm('init_bn', x)
+        x = self._relu(x, self.hps.relu_leakiness)
+
+    with tf.variable_scope('sub1'):
+      x = self._conv('conv1', x, 3, in_filter, out_filter, stride)
+
+    with tf.variable_scope('sub2'):
+      x = self._batch_norm('bn2', x)
+      x = self._relu(x, self.hps.relu_leakiness)
+      x = self._conv('conv2', x, 3, out_filter, out_filter, [1, 1, 1, 1])
+
+    with tf.variable_scope('sub_add'):
+      if in_filter != out_filter:
+        orig_x = tf.nn.avg_pool(orig_x, stride, stride, 'VALID')
+        orig_x = tf.pad(
+            orig_x, [[0, 0], [0, 0], [0, 0],
+                     [(out_filter-in_filter)//2, (out_filter-in_filter)//2]])
+      x += orig_x
+
+    tf.logging.info('image after unit %s', x.get_shape())
+    return x
+
+  def _bottleneck_residual(self, x, in_filter, out_filter, stride,
+                           activate_before_residual=False):
+    """Bottleneck resisual unit with 3 sub layers."""
+    if activate_before_residual:
+      with tf.variable_scope('common_bn_relu'):
+        x = self._batch_norm('init_bn', x)
+        x = self._relu(x, self.hps.relu_leakiness)
+        orig_x = x
+    else:
+      with tf.variable_scope('residual_bn_relu'):
+        orig_x = x
+        x = self._batch_norm('init_bn', x)
+        x = self._relu(x, self.hps.relu_leakiness)
+
+    with tf.variable_scope('sub1'):
+      x = self._conv('conv1', x, 1, in_filter, out_filter/4, stride)
+
+    with tf.variable_scope('sub2'):
+      x = self._batch_norm('bn2', x)
+      x = self._relu(x, self.hps.relu_leakiness)
+      x = self._conv('conv2', x, 3, out_filter/4, out_filter/4, [1, 1, 1, 1])
+
+    with tf.variable_scope('sub3'):
+      x = self._batch_norm('bn3', x)
+      x = self._relu(x, self.hps.relu_leakiness)
+      x = self._conv('conv3', x, 1, out_filter/4, out_filter, [1, 1, 1, 1])
+
+    with tf.variable_scope('sub_add'):
+      if in_filter != out_filter:
+        orig_x = self._conv('project', orig_x, 1, in_filter, out_filter, stride)
+      x += orig_x
+
+    tf.logging.info('image after unit %s', x.get_shape())
+    return x
+
+  def _decay(self):
+    """L2 weight decay loss."""
+    costs = []
+    for var in tf.trainable_variables():
+      if var.op.name.find(r'DW') > 0:
+        costs.append(tf.nn.l2_loss(var))
+        # tf.histogram_summary(var.op.name, var)
+
+    return tf.multiply(self.hps.weight_decay_rate, tf.add_n(costs))
+
+  def GetWeightDecay(self):
+    """L2 weight decay loss."""
+    costs = []
+    for var in self.trainable_variables:
+      if var.op.name.find(r'DW') > 0:
+        costs.append(tf.nn.l2_loss(var))
+        # tf.histogram_summary(var.op.name, var)
+
+    return tf.mul(self.hps.weight_decay_rate, tf.add_n(costs))
+
+  def _conv(self, name, x, filter_size, in_filters, out_filters, strides):
+    """Convolution."""
+    with tf.variable_scope(name):
+      n = filter_size * filter_size * out_filters
+      kernel = tf.get_variable(
+          'DW', [filter_size, filter_size, in_filters, out_filters],
+          tf.float32, initializer=tf.random_normal_initializer(
+              stddev=np.sqrt(2.0/n)))
+      return tf.nn.conv2d(x, kernel, strides, padding='SAME')
+
+  def _relu(self, x, leakiness=0.0):
+    """Relu, with optional leaky support."""
+    return tf.where(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu')
+
+  def _fully_connected(self, x, out_dim):
+    """FullyConnected layer for final output."""
+    x = tf.reshape(x, [self.batch_size, -1])
+    w = tf.get_variable(
+        'DW', [x.get_shape()[1], out_dim],
+        initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
+    b = tf.get_variable('biases', [out_dim],
+                        initializer=tf.constant_initializer())
+    return tf.nn.xw_plus_b(x, w, b)
+
+  def _global_avg_pool(self, x):
+    assert x.get_shape().ndims == 4
+    return tf.reduce_mean(x, [1, 2])
--- a/DSL_ImgProcess/test_grab_train_loss.py
+++ b/DSL_ImgProcess/test_grab_train_loss.py
@ -0,0 +1,196 @@
+"""
+Trains a Pixel-CNN++ generative model on CIFAR-10 or Tiny ImageNet data.
+Uses multiple GPUs, indicated by the flag --nr-gpu
+
+Example usage:
+CUDA_VISIBLE_DEVICES=0,1,2,3 python train_double_cnn.py --nr_gpu 4
+"""
+
+import os
+import sys
+import time
+import json
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+import pixel_cnn_pp.nn as nn
+import pixel_cnn_pp.plotting as plotting
+from pixel_cnn_pp.model import model_spec
+import data.cifar10_data as cifar10_data
+import data.imagenet_data as imagenet_data
+
+# -----------------------------------------------------------------------------
+parser = argparse.ArgumentParser()
+# data I/O
+parser.add_argument('-i', '--data_dir', type=str, default='/tmp/pxpp/data', help='Location for the dataset')
+parser.add_argument('-o', '--save_dir', type=str, default='/tmp/pxpp/save', help='Location for parameter checkpoints and samples')
+parser.add_argument('-d', '--data_set', type=str, default='cifar', help='Can be either cifar|imagenet')
+parser.add_argument('-t', '--save_interval', type=int, default=20, help='Every how many epochs to write checkpoint/samples?')
+parser.add_argument('-r', '--load_params', dest='load_params', action='store_true', help='Restore training from previous model checkpoint?')
+# model
+parser.add_argument('-q', '--nr_resnet', type=int, default=5, help='Number of residual blocks per stage of the model')
+parser.add_argument('-n', '--nr_filters', type=int, default=160, help='Number of filters to use across the model. Higher = larger model.')
+parser.add_argument('-m', '--nr_logistic_mix', type=int, default=10, help='Number of logistic components in the mixture. Higher = more flexible model')
+parser.add_argument('-z', '--resnet_nonlinearity', type=str, default='concat_elu', help='Which nonlinearity to use in the ResNet layers. One of "concat_elu", "elu", "relu" ')
+parser.add_argument('-c', '--class_conditional', dest='class_conditional', action='store_true', help='Condition generative model on labels?')
+# optimization
+parser.add_argument('-l', '--learning_rate', type=float, default=0.001, help='Base learning rate')
+parser.add_argument('-e', '--lr_decay', type=float, default=0.999995, help='Learning rate decay, applied every step of the optimization')
+parser.add_argument('-b', '--batch_size', type=int, default=12, help='Batch size during training per GPU')
+parser.add_argument('-a', '--init_batch_size', type=int, default=100, help='How much data to use for data-dependent initialization.')
+parser.add_argument('-p', '--dropout_p', type=float, default=0.5, help='Dropout strength (i.e. 1 - keep_prob). 0 = No dropout, higher = more dropout.')
+parser.add_argument('-x', '--max_epochs', type=int, default=5000, help='How many epochs to run in total?')
+parser.add_argument('-g', '--nr_gpu', type=int, default=8, help='How many GPUs to distribute the training across?')
+# evaluation
+parser.add_argument('--polyak_decay', type=float, default=0.9995, help='Exponential decay rate of the sum of previous model iterates during Polyak averaging')
+# reproducibility
+parser.add_argument('-s', '--seed', type=int, default=1, help='Random seed to use')
+args = parser.parse_args()
+print('input args:\n', json.dumps(vars(args), indent=4, separators=(',',':'))) # pretty print args
+
+# -----------------------------------------------------------------------------
+# fix random seed for reproducibility
+rng = np.random.RandomState(args.seed)
+tf.set_random_seed(args.seed)
+
+# initialize data loaders for train/test splits
+if args.data_set == 'imagenet' and args.class_conditional:
+    raise("We currently don't have labels for the small imagenet data set")
+DataLoader = {'cifar':cifar10_data.DataLoader, 'imagenet':imagenet_data.DataLoader}[args.data_set]
+train_data = DataLoader(args.data_dir, 'train', args.batch_size * args.nr_gpu, rng=rng, shuffle=False, return_labels=args.class_conditional)
+test_data = DataLoader(args.data_dir, 'test', args.batch_size * args.nr_gpu, shuffle=False, return_labels=args.class_conditional)
+obs_shape = train_data.get_observation_size() # e.g. a tuple (32,32,3)
+assert len(obs_shape) == 3, 'assumed right now'
+
+# data place holders
+x_init = tf.placeholder(tf.float32, shape=(args.init_batch_size,) + obs_shape)
+xs = [tf.placeholder(tf.float32, shape=(args.batch_size, ) + obs_shape) for i in range(args.nr_gpu)]
+
+# if the model is class-conditional we'll set up label placeholders + one-hot encodings 'h' to condition on
+if args.class_conditional:
+    num_labels = train_data.get_num_labels()
+    y_init = tf.placeholder(tf.int32, shape=(args.init_batch_size,))
+    h_init = tf.one_hot(y_init, num_labels)
+    y_sample = np.split(np.mod(np.arange(args.batch_size*args.nr_gpu), num_labels), args.nr_gpu)
+    h_sample = [tf.one_hot(tf.Variable(y_sample[i], trainable=False), num_labels) for i in range(args.nr_gpu)]
+    ys = [tf.placeholder(tf.int32, shape=(args.batch_size,)) for i in range(args.nr_gpu)]
+    hs = [tf.one_hot(ys[i], num_labels) for i in range(args.nr_gpu)]
+else:
+    h_init = None
+    h_sample = [None] * args.nr_gpu
+    hs = h_sample
+
+# create the model
+model_opt = { 'nr_resnet': args.nr_resnet, 'nr_filters': args.nr_filters, 'nr_logistic_mix': args.nr_logistic_mix, 'resnet_nonlinearity': args.resnet_nonlinearity }
+model = tf.make_template('model', model_spec)
+
+# run once for data dependent initialization of parameters
+gen_par = model(x_init, h_init, init=True, dropout_p=args.dropout_p, **model_opt)
+
+# keep track of moving average
+all_params = tf.trainable_variables()
+ema = tf.train.ExponentialMovingAverage(decay=args.polyak_decay)
+maintain_averages_op = tf.group(ema.apply(all_params))
+
+# get loss gradients over multiple GPUs
+grads = []
+loss_gen = []
+loss_gen_test = []
+for i in range(args.nr_gpu):
+    with tf.device('/gpu:%d' % i):
+        # train
+        gen_par = model(xs[i], hs[i], ema=None, dropout_p=args.dropout_p, **model_opt)
+        loss_gen.append(nn.discretized_mix_logistic_loss(xs[i], gen_par))
+        # gradients
+        grads.append(tf.gradients(loss_gen[i], all_params))
+        # test
+        gen_par = model(xs[i], hs[i], ema=ema, dropout_p=0., **model_opt)
+        loss_gen_test.append(nn.discretized_mix_logistic_loss(xs[i], gen_par))
+
+# add losses and gradients together and get training updates
+tf_lr = tf.placeholder(tf.float32, shape=[])
+with tf.device('/gpu:0'):
+    for i in range(1,args.nr_gpu):
+        loss_gen[0] += loss_gen[i]
+        loss_gen_test[0] += loss_gen_test[i]
+        for j in range(len(grads[0])):
+            grads[0][j] += grads[i][j]
+    # training op
+    optimizer = tf.group(nn.adam_updates(all_params, grads[0], lr=tf_lr, mom1=0.95, mom2=0.9995), maintain_averages_op)
+
+# convert loss to bits/dim
+bits_per_dim = loss_gen[0]/(args.nr_gpu*np.log(2.)*np.prod(obs_shape)*args.batch_size)
+bits_per_dim_test = loss_gen_test[0]/(args.nr_gpu*np.log(2.)*np.prod(obs_shape)*args.batch_size)
+
+# sample from the model
+new_x_gen = []
+for i in range(args.nr_gpu):
+    with tf.device('/gpu:%d' % i):
+        gen_par = model(xs[i], h_sample[i], ema=ema, dropout_p=0, **model_opt)
+        new_x_gen.append(nn.sample_from_discretized_mix_logistic(gen_par, args.nr_logistic_mix))
+def sample_from_model(sess):
+    x_gen = [np.zeros((args.batch_size,) + obs_shape, dtype=np.float32) for i in range(args.nr_gpu)]
+    for yi in range(obs_shape[0]):
+        for xi in range(obs_shape[1]):
+            new_x_gen_np = sess.run(new_x_gen, {xs[i]: x_gen[i] for i in range(args.nr_gpu)})
+            for i in range(args.nr_gpu):
+                x_gen[i][:,yi,xi,:] = new_x_gen_np[i][:,yi,xi,:]
+    return np.concatenate(x_gen, axis=0)
+
+# init & save
+initializer = tf.initialize_all_variables()
+saver = tf.train.Saver()
+
+# turn numpy inputs into feed_dict for use with tensorflow
+def make_feed_dict(data, init=False):
+    if type(data) is tuple:
+        x,y = data
+    else:
+        x = data
+        y = None
+    x = np.cast[np.float32]((x - 127.5) / 127.5) # input to pixelCNN is scaled from uint8 [0,255] to float in range [-1,1]
+    if init:
+        feed_dict = {x_init: x}
+        if y is not None:
+            feed_dict.update({y_init: y})
+    else:
+        x = np.split(x, args.nr_gpu)
+        feed_dict = {xs[i]: x[i] for i in range(args.nr_gpu)}
+        if y is not None:
+            y = np.split(y, args.nr_gpu)
+            feed_dict.update({ys[i]: y[i] for i in range(args.nr_gpu)})
+    return feed_dict
+
+# //////////// perform testing //////////////
+
+print('starting testing')
+test_bpd = []
+lr = args.learning_rate
+
+with tf.Session() as sess:
+    # compute likelihood over test data    
+    ckpt_file = args.save_dir + '/params_' + args.data_set + '.ckpt'
+    print('restoring parameters from', ckpt_file)
+    saver.restore(sess, ckpt_file)
+
+    test_losses = []
+    uidx = 0
+    for d in train_data:
+        feed_dict = make_feed_dict(d)
+        l = sess.run(bits_per_dim_test, feed_dict)
+        test_losses.append(l)
+        uidx += 1
+        if uidx % 100 == 0:
+            print(uidx, l)
+    test_loss_gen = np.mean(test_losses)
+    print(uidx, ' -- ', test_loss_gen)
+    test_bpd.append(test_loss_gen)
+    print('Test nll=%.2f' % test_loss_gen)
+
+    np.savez('./TMD', np.array(test_losses))
+
+
+
+
--- a/DSL_ImgProcess/train.py
+++ b/DSL_ImgProcess/train.py
@ -0,0 +1,222 @@
+"""
+Trains a Pixel-CNN++ generative model on CIFAR-10 or Tiny ImageNet data.
+Uses multiple GPUs, indicated by the flag --nr-gpu
+
+Example usage:
+CUDA_VISIBLE_DEVICES=0,1,2,3 python train_double_cnn.py --nr_gpu 4
+"""
+
+import os
+import sys
+import time
+import json
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+import pixel_cnn_pp.nn as nn
+import pixel_cnn_pp.plotting as plotting
+from pixel_cnn_pp.model import model_spec
+import data.cifar10_data as cifar10_data
+import data.imagenet_data as imagenet_data
+
+# -----------------------------------------------------------------------------
+parser = argparse.ArgumentParser()
+# data I/O
+parser.add_argument('-i', '--data_dir', type=str, default='/tmp/pxpp/data', help='Location for the dataset')
+parser.add_argument('-o', '--save_dir', type=str, default='/tmp/pxpp/save', help='Location for parameter checkpoints and samples')
+parser.add_argument('-d', '--data_set', type=str, default='cifar', help='Can be either cifar|imagenet')
+parser.add_argument('-t', '--save_interval', type=int, default=20, help='Every how many epochs to write checkpoint/samples?')
+parser.add_argument('-r', '--load_params', dest='load_params', action='store_true', help='Restore training from previous model checkpoint?')
+# model
+parser.add_argument('-q', '--nr_resnet', type=int, default=5, help='Number of residual blocks per stage of the model')
+parser.add_argument('-n', '--nr_filters', type=int, default=160, help='Number of filters to use across the model. Higher = larger model.')
+parser.add_argument('-m', '--nr_logistic_mix', type=int, default=10, help='Number of logistic components in the mixture. Higher = more flexible model')
+parser.add_argument('-z', '--resnet_nonlinearity', type=str, default='concat_elu', help='Which nonlinearity to use in the ResNet layers. One of "concat_elu", "elu", "relu" ')
+parser.add_argument('-c', '--class_conditional', dest='class_conditional', action='store_true', help='Condition generative model on labels?')
+# optimization
+parser.add_argument('-l', '--learning_rate', type=float, default=0.001, help='Base learning rate')
+parser.add_argument('-e', '--lr_decay', type=float, default=0.999995, help='Learning rate decay, applied every step of the optimization')
+parser.add_argument('-b', '--batch_size', type=int, default=12, help='Batch size during training per GPU')
+parser.add_argument('-a', '--init_batch_size', type=int, default=100, help='How much data to use for data-dependent initialization.')
+parser.add_argument('-p', '--dropout_p', type=float, default=0.5, help='Dropout strength (i.e. 1 - keep_prob). 0 = No dropout, higher = more dropout.')
+parser.add_argument('-x', '--max_epochs', type=int, default=5000, help='How many epochs to run in total?')
+parser.add_argument('-g', '--nr_gpu', type=int, default=8, help='How many GPUs to distribute the training across?')
+# evaluation
+parser.add_argument('--polyak_decay', type=float, default=0.9995, help='Exponential decay rate of the sum of previous model iterates during Polyak averaging')
+# reproducibility
+parser.add_argument('-s', '--seed', type=int, default=1, help='Random seed to use')
+args = parser.parse_args()
+print('input args:\n', json.dumps(vars(args), indent=4, separators=(',',':'))) # pretty print args
+
+# -----------------------------------------------------------------------------
+# fix random seed for reproducibility
+rng = np.random.RandomState(args.seed)
+tf.set_random_seed(args.seed)
+
+# initialize data loaders for train/test splits
+if args.data_set == 'imagenet' and args.class_conditional:
+    raise("We currently don't have labels for the small imagenet data set")
+DataLoader = {'cifar':cifar10_data.DataLoader, 'imagenet':imagenet_data.DataLoader}[args.data_set]
+train_data = DataLoader(args.data_dir, 'train', args.batch_size * args.nr_gpu, rng=rng, shuffle=True, return_labels=args.class_conditional)
+test_data = DataLoader(args.data_dir, 'test', args.batch_size * args.nr_gpu, shuffle=False, return_labels=args.class_conditional)
+obs_shape = train_data.get_observation_size() # e.g. a tuple (32,32,3)
+assert len(obs_shape) == 3, 'assumed right now'
+
+# data place holders
+x_init = tf.placeholder(tf.float32, shape=(args.init_batch_size,) + obs_shape)
+xs = [tf.placeholder(tf.float32, shape=(args.batch_size, ) + obs_shape) for i in range(args.nr_gpu)]
+
+# if the model is class-conditional we'll set up label placeholders + one-hot encodings 'h' to condition on
+if args.class_conditional:
+    num_labels = train_data.get_num_labels()
+    y_init = tf.placeholder(tf.int32, shape=(args.init_batch_size,))
+    h_init = tf.one_hot(y_init, num_labels)
+    y_sample = np.split(np.mod(np.arange(args.batch_size*args.nr_gpu), num_labels), args.nr_gpu)
+    h_sample = [tf.one_hot(tf.Variable(y_sample[i], trainable=False), num_labels) for i in range(args.nr_gpu)]
+    ys = [tf.placeholder(tf.int32, shape=(args.batch_size,)) for i in range(args.nr_gpu)]
+    hs = [tf.one_hot(ys[i], num_labels) for i in range(args.nr_gpu)]
+else:
+    h_init = None
+    h_sample = [None] * args.nr_gpu
+    hs = h_sample
+
+# create the model
+model_opt = { 'nr_resnet': args.nr_resnet, 'nr_filters': args.nr_filters, 'nr_logistic_mix': args.nr_logistic_mix, 'resnet_nonlinearity': args.resnet_nonlinearity }
+model = tf.make_template('model', model_spec)
+
+# run once for data dependent initialization of parameters
+gen_par = model(x_init, h_init, init=True, dropout_p=args.dropout_p, **model_opt)
+
+# keep track of moving average
+all_params = tf.trainable_variables()
+ema = tf.train.ExponentialMovingAverage(decay=args.polyak_decay)
+maintain_averages_op = tf.group(ema.apply(all_params))
+
+# get loss gradients over multiple GPUs
+grads = []
+loss_gen = []
+loss_gen_test = []
+for i in range(args.nr_gpu):
+    with tf.device('/gpu:%d' % i):
+        # train
+        gen_par = model(xs[i], hs[i], ema=None, dropout_p=args.dropout_p, **model_opt)
+        loss_gen.append(nn.discretized_mix_logistic_loss(xs[i], gen_par))
+        # gradients
+        grads.append(tf.gradients(loss_gen[i], all_params))
+        # test
+        gen_par = model(xs[i], hs[i], ema=ema, dropout_p=0., **model_opt)
+        loss_gen_test.append(nn.discretized_mix_logistic_loss(xs[i], gen_par))
+
+# add losses and gradients together and get training updates
+tf_lr = tf.placeholder(tf.float32, shape=[])
+with tf.device('/gpu:0'):
+    for i in range(1,args.nr_gpu):
+        loss_gen[0] += loss_gen[i]
+        loss_gen_test[0] += loss_gen_test[i]
+        for j in range(len(grads[0])):
+            grads[0][j] += grads[i][j]
+    # training op
+    optimizer = tf.group(nn.adam_updates(all_params, grads[0], lr=tf_lr, mom1=0.95, mom2=0.9995), maintain_averages_op)
+
+# convert loss to bits/dim
+bits_per_dim = loss_gen[0]/(args.nr_gpu*np.log(2.)*np.prod(obs_shape)*args.batch_size)
+bits_per_dim_test = loss_gen_test[0]/(args.nr_gpu*np.log(2.)*np.prod(obs_shape)*args.batch_size)
+
+# sample from the model
+new_x_gen = []
+for i in range(args.nr_gpu):
+    with tf.device('/gpu:%d' % i):
+        gen_par = model(xs[i], h_sample[i], ema=ema, dropout_p=0, **model_opt)
+        new_x_gen.append(nn.sample_from_discretized_mix_logistic(gen_par, args.nr_logistic_mix))
+def sample_from_model(sess):
+    x_gen = [np.zeros((args.batch_size,) + obs_shape, dtype=np.float32) for i in range(args.nr_gpu)]
+    for yi in range(obs_shape[0]):
+        for xi in range(obs_shape[1]):
+            new_x_gen_np = sess.run(new_x_gen, {xs[i]: x_gen[i] for i in range(args.nr_gpu)})
+            for i in range(args.nr_gpu):
+                x_gen[i][:,yi,xi,:] = new_x_gen_np[i][:,yi,xi,:]
+    return np.concatenate(x_gen, axis=0)
+
+# init & save
+initializer = tf.initialize_all_variables()
+saver = tf.train.Saver()
+
+# turn numpy inputs into feed_dict for use with tensorflow
+def make_feed_dict(data, init=False):
+    if type(data) is tuple:
+        x,y = data
+    else:
+        x = data
+        y = None
+    x = np.cast[np.float32]((x - 127.5) / 127.5) # input to pixelCNN is scaled from uint8 [0,255] to float in range [-1,1]
+    if init:
+        feed_dict = {x_init: x}
+        if y is not None:
+            feed_dict.update({y_init: y})
+    else:
+        x = np.split(x, args.nr_gpu)
+        feed_dict = {xs[i]: x[i] for i in range(args.nr_gpu)}
+        if y is not None:
+            y = np.split(y, args.nr_gpu)
+            feed_dict.update({ys[i]: y[i] for i in range(args.nr_gpu)})
+    return feed_dict
+
+# //////////// perform training //////////////
+if not os.path.exists(args.save_dir):
+    os.makedirs(args.save_dir)
+print('starting training')
+test_bpd = []
+lr = args.learning_rate
+with tf.Session() as sess:
+    for epoch in range(args.max_epochs):
+        begin = time.time()
+
+        # init
+        if epoch == 0:
+            feed_dict = make_feed_dict(train_data.next(args.init_batch_size), init=True) # manually retrieve exactly init_batch_size examples
+            train_data.reset()  # rewind the iterator back to 0 to do one full epoch
+            sess.run(initializer, feed_dict)
+            print('initializing the model...')
+            if args.load_params:
+                ckpt_file = args.save_dir + '/params_' + args.data_set + '.ckpt'
+                print('restoring parameters from', ckpt_file)
+                saver.restore(sess, ckpt_file)
+
+        # train for one epoch
+        train_losses = []
+        for d in train_data:
+            feed_dict = make_feed_dict(d)
+            # forward/backward/update model on each gpu
+            lr *= args.lr_decay
+            feed_dict.update({ tf_lr: lr })
+            l,_ = sess.run([bits_per_dim, optimizer], feed_dict)
+            train_losses.append(l)
+        train_loss_gen = np.mean(train_losses)
+
+        # compute likelihood over test data
+        test_losses = []
+        for d in test_data:
+            feed_dict = make_feed_dict(d)
+            l = sess.run(bits_per_dim_test, feed_dict)
+            test_losses.append(l)
+        test_loss_gen = np.mean(test_losses)
+        test_bpd.append(test_loss_gen)
+
+        # log progress to console
+        print("Iteration %d, time = %ds, train bits_per_dim = %.4f, test bits_per_dim = %.4f" % (epoch, time.time()-begin, train_loss_gen, test_loss_gen))
+        sys.stdout.flush()
+
+        if epoch % args.save_interval == 0:
+
+            # generate samples from the model
+            sample_x = sample_from_model(sess)
+            img_tile = plotting.img_tile(sample_x[:int(np.floor(np.sqrt(args.batch_size*args.nr_gpu))**2)], aspect_ratio=1.0, border_color=1.0, stretch=True)
+            img = plotting.plot_img(img_tile, title=args.data_set + ' samples')
+            plotting.plt.savefig(os.path.join(args.save_dir,'%s_sample%d.png' % (args.data_set, epoch)))
+            plotting.plt.close('all')
+
+            # save params
+            saver.save(sess, args.save_dir + '/params_' + args.data_set + '.ckpt')
+            np.savez(args.save_dir + '/test_bpd_' + args.data_set + '.npz', test_bpd=np.array(test_bpd))
--- a/DSL_ImgProcess/worker_I2L.py
+++ b/DSL_ImgProcess/worker_I2L.py
@ -0,0 +1,91 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""ResNet Train/Eval module.
+"""
+import time
+import sys
+import os
+
+import cifar_input
+import numpy as np
+import resnet_model_basic as resnet_model
+import tensorflow as tf
+import data.cifar10_data as cifar10_data
+
+
+
+def lr_I2L(train_step):
+  #step_wise = [40000,60000,80000] # this is the one for original
+  step_wise = [51000,76000,102000]
+  if train_step < step_wise[0]:
+    return 0.1
+  elif train_step < step_wise[1]:
+    return 0.01
+  elif train_step < step_wise[2]:
+    return 0.001
+  else:
+    return 0.0001
+
+class worker_I2L(object):
+  def __init__(self, args):
+
+    hps = resnet_model.HParams(batch_size=args.batch_size,
+                               num_classes=10,
+                               min_lrn_rate=0.0001,
+                               lrn_rate=0.1,
+                               num_residual_units=18,
+                               use_bottleneck=False,
+                               weight_decay_rate=0.0002,
+                               relu_leakiness=0.1,
+                               optimizer='mom')
+    self.args = args
+    self.model = resnet_model.ResNet(hps, args.mode, use_wide_resnet=args.use_wide_resnet, nr_gpu=args.nr_gpu)
+    self.model.build_graph()
+
+    truth = tf.argmax(tf.concat(self.model.labels, axis=0), axis=1)
+    predictions = tf.argmax(tf.concat(self.model.predictions,axis=0), axis=1)
+    self.right_decision = tf.reduce_sum(tf.to_float(tf.equal(predictions, truth)))
+
+  def GetLoss(self):
+    return self.model.nlls, self.model.GetWeightDecay()
+
+  def Valid(self, test_data, sess):
+    with tf.device('/gpu:0'):
+      cost_all = self.model.nlls[0]
+      for i in range(1, self.args.nr_gpu):
+        cost_all += self.model.nlls[i]
+
+    m_sample = 0
+    m_correct = 0.
+    costs = 0.
+    for test_image, test_label in test_data:
+      m_sample += test_image.shape[0]
+
+      splitted_image = np.split(test_image.astype('float32'), self.args.nr_gpu)
+      splitted_label = np.split(test_label, self.args.nr_gpu)
+
+      feed_dict = {self.model.needImgAug: False}
+      feed_dict.update({self.model.input_image[i]: splitted_image[i] for i in range(self.args.nr_gpu)})
+      feed_dict.update({self.model.input_label[i]: splitted_label[i][:, None] for i in range(self.args.nr_gpu)})
+
+      _cost, _right_decision = sess.run([cost_all, self.right_decision], feed_dict)
+      costs += np.sum(_cost)
+      m_correct += _right_decision
+    test_loss = costs / m_sample
+    test_acc = m_correct * 1. / m_sample
+    print('[I2L] test_nll={},test_acc={}'.format(
+        '{0:.4f}'.format(test_loss), '{0:.6f}'.format(test_acc) )
+    )
--- a/DSL_ImgProcess/worker_L2I.py
+++ b/DSL_ImgProcess/worker_L2I.py
@ -0,0 +1,134 @@
+"""
+Trains a Pixel-CNN++ generative model on CIFAR-10 or Tiny ImageNet data.
+Uses multiple GPUs, indicated by the flag --nr-gpu
+
+Example usage:
+CUDA_VISIBLE_DEVICES=0,1,2,3 python train_double_cnn.py --nr_gpu 4
+"""
+
+import os
+import sys
+import time
+import json
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+import pixel_cnn_pp.nn as nn
+import pixel_cnn_pp.plotting as plotting
+from pixel_cnn_pp.model import model_spec
+import data.cifar10_data as cifar10_data
+
+class worker_L2I(object):
+    def __init__(self, args, num_labels, image_shape):
+        # Default parameters
+        self.num_labels = num_labels
+        self.image_shape=image_shape
+        self.args = args
+
+        # Data userd for data-dependent parameter initialization
+        self.x_init = tf.placeholder(tf.float32, shape=(args.init_batch_size,) + self.image_shape)
+        self.xs = [tf.placeholder(tf.float32, shape=(args.batch_size, ) + self.image_shape) for _ in range(args.nr_gpu)]
+        self.y_init = tf.placeholder(tf.int32, shape=(args.init_batch_size,))
+        self.h_init = tf.one_hot(self.y_init, self.num_labels)
+
+        # parameters used for sampling
+        self.y_sample = np.split(np.mod(np.arange(args.batch_size*args.nr_gpu), self.num_labels), args.nr_gpu)
+        # self.h_sample = [tf.one_hot(tf.Variable(self.y_sample[i], trainable=False), self.num_labels) for i in range(args.nr_gpu)]
+        # the above line is the version used for icml paper. I revise it as follows
+        self.h_sample = [tf.one_hot(self.y_sample[i], self.num_labels) for i in range(args.nr_gpu)]
+        self.ys = [tf.placeholder(tf.int32, shape=(args.batch_size,)) for i in range(args.nr_gpu)]
+        self.hs = [tf.one_hot(self.ys[i], self.num_labels) for i in range(args.nr_gpu)]
+        # create the model
+        self.model_opt = { 'nr_resnet': args.nr_resnet, 'nr_filters': args.nr_filters, 'nr_logistic_mix': args.nr_logistic_mix, 'resnet_nonlinearity': args.resnet_nonlinearity }
+        self.model = tf.make_template('model', model_spec)
+
+        # run once for data dependent initialization of parameters
+        # in the original code, it is " gen_par = self.model(...)"; when init=True, it will run initilization automatically
+        self.model(self.x_init, self.h_init, init=True, dropout_p=args.dropout_p, **self.model_opt)
+
+        # keep track of moving average
+        self.all_params = tf.trainable_variables()
+        self.ema = tf.train.ExponentialMovingAverage(decay=args.polyak_decay)
+        self.maintain_averages_op = tf.group(self.ema.apply(self.all_params))
+
+        # parameters for optimization
+        self.tf_lr = tf.placeholder(tf.float32, shape=())
+
+    def GetLoss(self):
+        # get loss gradients over multiple GPUs
+        loss_gen = []
+        loss_gen_test = []
+        for i in range(self.args.nr_gpu):
+            with tf.device('/gpu:%d' % i):
+                # train
+                gen_par = self.model(self.xs[i], self.hs[i], ema=None, dropout_p=self.args.dropout_p, **self.model_opt)
+                loss_gen.append(nn.discretized_mix_logistic_loss(self.xs[i], gen_par, sum_all=False))
+
+                # test
+                gen_par = self.model(self.xs[i], self.hs[i], ema=self.ema, dropout_p=0., **self.model_opt)
+                loss_gen_test.append(nn.discretized_mix_logistic_loss(self.xs[i], gen_par))
+
+        return loss_gen, loss_gen_test
+
+    def GetOverallLoss(self):
+        # get loss gradients over multiple GPUs
+        loss_gen = []
+        loss_gen_test = []
+        for i in range(self.args.nr_gpu):
+            with tf.device('/gpu:%d' % i):
+                # train
+                gen_par = self.model(self.xs[i], self.hs[i], ema=None, dropout_p=self.args.dropout_p, **self.model_opt)
+                loss_gen.append(nn.discretized_mix_logistic_loss(self.xs[i], gen_par, sum_all=False))
+
+                # test
+                gen_par = self.model(self.xs[i], self.hs[i], ema=self.ema, dropout_p=0., **self.model_opt)
+                loss_gen_test.append(nn.discretized_mix_logistic_loss(self.xs[i], gen_par))
+
+        # add the lossx to /gpu:0
+        with tf.device('/gpu:0'):
+            for i in range(1,self.args.nr_gpu):
+                loss_gen[0] += loss_gen[i]
+                loss_gen_test[0] += loss_gen_test[i]
+
+            # training op
+            #optimizer = tf.group(nn.adam_updates(self.all_params, grads[0], lr=self.tf_lr, mom1=0.95, mom2=0.9995), self.maintain_averages_op)
+
+        # convert loss to bits/dim
+        self.bits_per_dim = loss_gen[0]/(self.args.nr_gpu*np.log(2.)*np.prod(self.image_shape)*self.args.batch_size)
+        self.bits_per_dim_test = loss_gen_test[0]/(self.args.nr_gpu*np.log(2.)*np.prod(self.image_shape)*self.args.batch_size)
+
+    def Update(self, grads, useSGD=False):
+        if useSGD:
+            print('Use pure SGD for Label-->Image tasks')
+            optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.tf_lr)
+            apply_op = optimizer.apply_gradients(zip(grads, self.all_params))
+            self.update_ops = tf.group(apply_op)
+        else:
+            self.update_ops = tf.group(nn.adam_updates(self.all_params, grads, lr=self.tf_lr, mom1=0.95, mom2=0.9995), self.maintain_averages_op)
+
+    def build_sample_from_model(self):
+        # sample from the model
+        self.new_x_gen = []
+        for i in range(self.args.nr_gpu):
+            with tf.device('/gpu:%d' % i):
+                gen_par = self.model(self.xs[i], self.h_sample[i], ema=self.ema, dropout_p=0, **self.model_opt)
+                self.new_x_gen.append(nn.sample_from_discretized_mix_logistic(gen_par, self.args.nr_logistic_mix))
+
+    def _sample_from_model(self, sess):
+        x_gen = [np.zeros((self.args.batch_size,) + self.image_shape, dtype=np.float32) for _ in range(self.args.nr_gpu)]
+        for yi in range(self.image_shape[0]):
+            for xi in range(self.image_shape[1]):
+                new_x_gen_np = sess.run(self.new_x_gen, {self.xs[i]: x_gen[i] for i in range(self.args.nr_gpu)})
+                for i in range(self.args.nr_gpu):
+                    x_gen[i][:,yi,xi,:] = new_x_gen_np[i][:,yi,xi,:]
+        return np.concatenate(x_gen, axis=0)
+
+
+    def Gen_Images(self, sess, epoch):
+        sample_x = self._sample_from_model(sess)
+        img_tile = plotting.img_tile(sample_x[:int(np.floor(np.sqrt(self.args.batch_size*self.args.nr_gpu))**2)], aspect_ratio=1.0, border_color=1.0, stretch=True)
+        img = plotting.plot_img(img_tile, title=self.args.data_set + ' samples')
+        plotting.plt.savefig(os.path.join(self.args.save_dir,'%s_sample%d.png' % (self.args.data_set, epoch)))
+        plotting.plt.close('all')
--- a/DSL_SentimentAnalysis/AllocateGPU.py
+++ b/DSL_SentimentAnalysis/AllocateGPU.py
@ -0,0 +1,40 @@
+import sys
+
+mapper_machine_freecard = {}
+mapper_machine_rank = {}
+
+def MapIDs(m_machine):
+    for i in range(m_machine):
+        fo = open('record' + str(i))
+        id = 0
+        m_line = 0
+        machine_name = ''
+        for line in fo:
+            if id == 0:
+                machine_name = line[:-1]
+                mapper_machine_freecard[machine_name] = []
+                if mapper_machine_rank.has_key(machine_name):
+                    mapper_machine_rank[machine_name].append(i)
+                else:
+                    mapper_machine_rank[machine_name] = [i]
+            elif id > 1:
+                mapper_machine_freecard[machine_name].append(int(line))
+            id = id + 1
+        fo.close()
+
+def Map_Rank_Card(m_machine):
+    MapIDs(m_machine)
+    allocations = range(m_machine)
+    for k in mapper_machine_rank.keys():
+        ranks = mapper_machine_rank[k]
+        cards = mapper_machine_freecard[k]
+    #if len(ranks) == len(cards):
+        for i in range(len(ranks)):
+            allocations[ranks[i]] = cards[i]
+    
+    for l in allocations:
+        print l
+
+
+if __name__ == '__main__':
+    Map_Rank_Card(int(sys.argv[1]))
--- a/DSL_SentimentAnalysis/CLM/AllocateGPU.py
+++ b/DSL_SentimentAnalysis/CLM/AllocateGPU.py
@ -0,0 +1,40 @@
+import sys
+
+mapper_machine_freecard = {}
+mapper_machine_rank = {}
+
+def MapIDs(m_machine):
+    for i in range(m_machine):
+        fo = open('record' + str(i))
+        id = 0
+        m_line = 0
+        machine_name = ''
+        for line in fo:
+            if id == 0:
+                machine_name = line[:-1]
+                mapper_machine_freecard[machine_name] = []
+                if mapper_machine_rank.has_key(machine_name):
+                    mapper_machine_rank[machine_name].append(i)
+                else:
+                    mapper_machine_rank[machine_name] = [i]
+            elif id > 1:
+                mapper_machine_freecard[machine_name].append(int(line))
+            id = id + 1
+        fo.close()
+
+def Map_Rank_Card(m_machine):
+    MapIDs(m_machine)
+    allocations = range(m_machine)
+    for k in mapper_machine_rank.keys():
+        ranks = mapper_machine_rank[k]
+        cards = mapper_machine_freecard[k]
+    #if len(ranks) == len(cards):
+        for i in range(len(ranks)):
+            allocations[ranks[i]] = cards[i]
+    
+    for l in allocations:
+        print l
+
+
+if __name__ == '__main__':
+    Map_Rank_Card(int(sys.argv[1]))
--- a/DSL_SentimentAnalysis/CLM/CLM.py
+++ b/DSL_SentimentAnalysis/CLM/CLM.py
@ -0,0 +1,455 @@
+from nmt_base import *
+from Data import *
+
+def _p(pp, name):
+    return '%s_%s' % (pp, name)
+
+class CLM_worker(object):
+    def __init__(self,
+                 round = 0,
+                 dim_word=500,  # word vector dimensionality
+                 dim_proj=1024,  # the number of GRU units
+                 encoder='lstm',
+                 patience=10,  # early stopping patience
+                 max_epochs=5000,
+                 finish_after=10000000000000,  # finish after this many updates
+                 decay_c=-1.,  # L2 weight decay penalty
+                 clip_c=5.,
+                 lrate=1.,
+                 n_words=10000,    # vocabulary size
+                 maxlen=None,  # maximum length of the description
+                 minlen=1,
+                 start_iter=0,
+                 start_epoch=0,
+                 optimizer='adadelta',
+                 batch_size=16,
+                 valid_batch_size=16,
+                 saveto='model.npz',
+                 validFreq=2000,
+                 dispFreq=100,
+                 saveFreq=100000,  # save the parameters after every saveFreq updates
+                 newDumpFreq=10000,
+                 syncFreq = 500000000000,
+                 sampleFreq=10000000000,  # generate some samples after every sampleFreq
+                 valid_dataset=None,
+                 test_dataset=None,
+                 dictionary=None,
+                 sampleFileName="sampleFile.txt",
+                 embedding=None,
+                 dropout_input=None,
+                 dropout_output=None,
+                 reload_model=None,
+                 reload_option=None,
+                 log=None,
+                 monitor_grad=False,
+                 pad_sos=False):
+            # Model options
+        if pad_sos:
+            n_words += 1
+        self.options = locals().copy()
+
+        print('log = ', log)
+        F_log = open(log, "a")
+
+        voc_size = n_words - 1 if pad_sos else n_words
+
+        # reload options
+        if reload_option is not None and os.path.exists(reload_option):
+            print "Reloading model options...",
+            with open('%s' % reload_option, 'rb') as f:
+                model_options = pkl.load(f)
+            print "Done"
+
+        # init parameters
+        print 'Initializing model parameters...',
+        params = init_lm_params(self.options)
+        print 'Done'
+
+        # load pre-trained word embedding
+        if embedding is not None and os.path.exists(embedding):
+            print 'Load Embedding from ', embedding
+            Wemb = numpy.array(numpy.load(open(embedding, "rb")))
+            assert Wemb.shape[0] == self.options['n_words']
+            assert Wemb.shape[1] == self.options['dim_word']
+            print 'Using pre-trained word embedding...',
+            params['Wemb'] = Wemb.astype(numpy.float32)
+            print 'vocab size', params['Wemb'].shape[0], ', dim', params['Wemb'].shape[1]
+
+        # reload parameters
+        if reload_model is not None and os.path.exists(reload_model):
+            print "Reloading model parameters...",
+            params = load_params(reload_model, params)
+            print "Done"
+
+        # create shared variables for parameters
+        self.tparams = init_tparams(params)
+
+        # build the symbolic computational graph
+        print 'Building model...'
+        self.trng = RandomStreams(1234)
+        self.use_noise = theano.shared(numpy.float32(0.))
+
+    def GetNll(self):
+        srcx, srcx_mask, ctx_, cost, sentenceLen = self.build_lm_model()
+        print 'Done'
+
+        print 'Building f_log_probs',
+        self.f_log_probs = theano.function([srcx, srcx_mask, ctx_], cost, profile = profile)
+        print 'Done'
+        return srcx, srcx_mask, ctx_, cost, sentenceLen
+
+    # build a training model
+    def build_lm_model(self):
+        srcx = tensor.matrix('x', dtype='int64')
+        srcx_mask = tensor.matrix('x_mask', dtype='float32')
+        ctx_ = tensor.vector('ctx_', dtype='int64')
+        x = srcx[:-1, :]
+        y = srcx[1:,:]
+
+        n_timesteps = x.shape[0]
+        n_samples = x.shape[1]
+        print('check init ok')
+        emb  = self.tparams['Wemb'][x.flatten()]
+        emb = emb.reshape([n_timesteps, n_samples, self.options['dim_word']])
+        emb_ctx = self.tparams['Wemb_ctx'][ctx_].reshape([n_samples, self.options['dim_word']])
+        print('check embed ok')
+        # input
+
+        if self.options['dropout_input'] is not None and self.options['dropout_input'] > 0:
+            print 'Applying drop-out on input embedding (dropout_input:', self.options['dropout_input'], ")"
+            emb = dropout_layer(emb, self.use_noise, self.trng, self.options['dropout_input'])
+            emb_ctx = dropout_layer(emb_ctx, self.use_noise, self.trng, self.options['dropout_input'])
+
+        init_state = tensor.alloc(0., n_samples, self.options['dim_proj'])
+        init_cell  = tensor.alloc(0., n_samples, self.options['dim_proj'])
+
+        # pass through gru layer, recurrence here
+        print 'Using', self.options['encoder'], 'unit for encoder'
+        print 'Training with successive sentences'
+        init_states = [init_state, init_cell]
+        proj = lstm_layer(self.tparams, emb, emb_ctx, self.options,
+                          prefix='encoder',
+                          init_state=init_state,
+                          cell_state=init_cell,
+                          mask = srcx_mask[:-1,:])
+
+
+        proj_h = proj[0] # all hidden states
+
+        next_states = [st[-1] for st in proj] # first last hidden_state, second last cell_state
+
+        if self.options['dropout_output'] is not None and self.options['dropout_output'] > 0:
+            print 'Applying drop-out on hidden states (dropout_proj:', self.options['dropout_output'], ")"
+            proj_h = dropout_layer(proj_h, self.use_noise, self.trng, self.options['dropout_output'])
+
+
+        # compute word probabilities
+        def _prob(proj_h):
+            logit_lstm = get_layer('ff')[1](self.tparams, proj_h, self.options, prefix='ff_logit_lstm', activ='linear')
+            logit_prev = get_layer('ff')[1](self.tparams, emb, self.options, prefix='ff_logit_prev', activ='linear')
+            logit_label = get_layer('ff')[1](self.tparams, emb_ctx, self.options, prefix='ff_logit_label', activ='linear')
+            logit = tensor.tanh(logit_lstm + logit_prev + logit_label)
+
+            #logit = tensor.tanh(logit_lstm)
+            # split to calculate
+            logit = get_layer('ff')[1](self.tparams, logit, self.options, prefix='ff_logit', activ='linear')
+            logit_shp = logit.shape # n_timesteps * n_samples * n_words
+            probs = tensor.nnet.softmax(logit.reshape([logit_shp[0] * logit_shp[1], logit_shp[2]]))
+            return probs
+
+        probs = _prob(proj_h)
+
+        # cost
+        y_flat = y.flatten()
+        y_flat_idx = tensor.arange(y_flat.shape[0]) * self.options['n_words'] + y_flat
+
+        # probs:(seq,batch,worddim) <-> x:(seq,batch) become the right place value
+        # y:(seq_len, batch_size)
+        def _cost(probs):
+            cost = -tensor.log(probs.flatten()[y_flat_idx] + 1e-10)
+            cost = cost.reshape([y.shape[0], y.shape[1]])
+            sentenceLen = srcx_mask[1:,:].sum(axis=0)
+            cost = (cost * srcx_mask[1:, :]).sum(axis=0) / sentenceLen
+            return cost, sentenceLen
+
+        cost, sentenceLen = _cost(probs)
+
+        return srcx, srcx_mask, ctx_, cost, sentenceLen #(seq, batch, worddim)
+
+    # calculate the log probablities on a given corpus using language model
+    def pred_probs(self, valid_Data, valid_batch_size):
+        self.use_noise.set_value(0.)
+        nlls = []
+        dataLen = []
+        valid_x, valid_y = valid_Data[0], valid_Data[1]
+
+        for idx in xrange((len(valid_x) + valid_batch_size - 1) // valid_batch_size ):
+            data = valid_x[idx * valid_batch_size : (idx + 1) * valid_batch_size]
+            label = valid_y[idx * valid_batch_size : (idx + 1) * valid_batch_size]
+            dataLen += [len(tt) for tt in data]
+            x, x_mask = prepare_data_x(data, pad_sos=self.options['pad_sos'], n_word=self.options['n_words'])
+            cost = self.f_log_probs(x, x_mask, numpy.array(label).astype('int64'))
+            nlls += cost.tolist()
+
+        nlls = numpy.array(nlls).astype('float32')
+        dataLen = numpy.array(dataLen).astype('float32')
+        return numpy.exp((nlls * dataLen).sum() / dataLen.sum())
+
+    def evaluate(self, validSet, testSet):
+        valid_ppl = self.pred_probs(validSet, 32)
+        test_ppl = self.pred_probs(testSet, 32)
+        return valid_ppl,  test_ppl
+
+
+'''
+def train(round = 0,
+        dim_word=1000,  # word vector dimensionality
+        dim_proj=1000,  # the number of GRU units
+        encoder='lstm',
+        patience=10,  # early stopping patience
+        max_epochs=5000,
+        finish_after=10000000000000,  # finish after this many updates
+        decay_c=0.,  # L2 weight decay penalty
+        clip_c=5.,
+        lrate=1.,
+        n_words = 10000,    # vocabulary size
+        maxlen=None,  # maximum length of the description
+        minlen=1,
+        start_iter=0,
+        start_epoch=0,
+        optimizer='adadelta',
+        batch_size=32,
+        valid_batch_size=20,
+        saveto='model.npz',
+        validFreq=1000,
+        dispFreq=100,
+        saveFreq=1000,  # save the parameters after every saveFreq updates
+        newDumpFreq=10000,
+        syncFreq = 50,
+        sampleFreq=100,  # generate some samples after every sampleFreq
+        sampleNum = 50, # generate sampleNum sentences
+        dataset=None,
+        valid_dataset=None,
+        test_dataset=None,
+        dictionary=None,
+        sampleFileName="sampleFile.txt",
+        embedding=None,
+        dropout_input=None,
+        dropout_output=None,
+        reload_model=None,
+        reload_option=None,
+        log=None,
+        monitor_grad=False,
+        pad_sos=False):
+
+    # Model options
+    if pad_sos:
+        n_words += 1
+    model_options = locals().copy()
+    print "model options:"
+    for kk, vv in model_options.iteritems():
+        print "\t"+kk+":\t"+str(vv)
+
+    print('log = ', log)
+    F_log = open(log, "a")
+
+    if start_iter == 0:
+        F_log.write("model options:\n")
+        for kk, vv in model_options.iteritems():
+            F_log.write("\t"+kk+":\t"+str(vv)+"\n")
+        F_log.write("-----------------------------------------\n\n")
+
+
+    print 'Loading training dataset...'
+
+    voc_size = n_words - 1 if pad_sos else n_words
+
+    trainSet, validSet, testSet = load_data(path=dataset, n_words=n_words, maxlen=maxlen, sort_by_len=True, fixed_valid=True)
+
+    # reload options
+    if reload_option is not None and os.path.exists(reload_option):
+        print "Reloading model options...",
+        with open('%s' % reload_option, 'rb') as f:
+            model_options = pkl.load(f)
+        print "Done"
+
+    # init parameters
+    print 'Initializing model parameters...',
+    params = init_lm_params(model_options)
+    print 'Done'
+
+    # load pre-trained word embedding
+    if embedding is not None and os.path.exists(embedding):
+        print 'Load Embedding from ', embedding
+        Wemb = numpy.array(numpy.load(open(embedding, "rb")))
+        if Wemb.shape[0] == model_options['n_words'] and Wemb.shape[1] == model_options['dim_word']:
+            print 'Using pre-trained word embedding...',
+            params['Wemb'] = Wemb.astype(numpy.float32)
+            print 'vocab size', params['Wemb'].shape[0], ', dim', params['Wemb'].shape[1]
+
+    # reload parameters
+    if reload_model is not None and os.path.exists(reload_model):
+        print "Reloading model parameters...",
+        params = load_params(reload_model, params)
+        print "Done"
+
+    # create shared variables for parameters
+    tparams = init_tparams(params)
+
+    # build the symbolic computational graph
+    print 'Building model...'
+    trng, use_noise, srcx, srcx_mask, ctx_, cost = build_lm_model(tparams, model_options)
+
+    print 'Building f_log_probs',
+    f_log_probs = theano.function([srcx, srcx_mask, ctx_], cost, profile = profile)
+    print 'Done'
+    cost = cost.mean(axis=0)
+    # apply L2 regularization on weights
+    if decay_c > 0.:
+        print "Applying L2 regularization (decay_c: "+str(decay_c)+')...',
+        cost = l2_regularization(tparams, cost, decay_c)
+        print "Done"
+
+    # after any regularizer - compile the computational graph for cost
+    print 'Building f_cost',
+    f_cost = theano.function([srcx, srcx_mask, ctx_], cost, profile = profile)
+    print 'Done'
+
+    print 'Computing gradient',
+    grads = tensor.grad(cost, wrt=itemlist(tparams))
+    print 'Done'
+
+    # apply gradient clipping here
+    if clip_c > 0.:
+        print 'Applying gradient clipping (clip_c:'+str(clip_c)+')...',
+        grads = grad_clipping(grads, clip_c)
+        print 'Done'
+
+    # compile the optimizer, the actual computational graph is compiled here
+    print 'Building optimizers...',
+    lr = tensor.scalar(name='lr')
+    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, [srcx, srcx_mask, ctx_], cost)
+    #f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)
+    print 'Done'
+
+    sys.stdout.flush()
+
+    history_errs = []
+    # reload history
+    if reload_model is not None and os.path.exists(reload_model):
+        history_errs = list(numpy.load(reload_model)['history_errs'])
+    best_p = None
+    bad_count = 0
+
+    # Training loop
+    bad_counter = 0
+    uidx = start_iter
+    estop = False
+    start_time = time.time()
+    n_samples = 0
+    cost_accu = 0
+
+    for eidx in xrange(start_epoch, max_epochs):
+        epoch_start_time = time.time()
+        print "Start epoch ", eidx
+        n_samples = 0
+
+        print 'Start epoch', eidx
+        epoch_start_time = time.time()
+        n_samples = 0
+
+        kf_train = get_minibatches_idx(len(trainSet[0]), batch_size, shuffle=True)
+
+        for _, train_index in kf_train:
+            uidx += 1
+            x = [trainSet[0][t] for t in train_index]
+            y = [trainSet[1][t] for t in train_index]
+            n_samples += len(x)
+            use_noise.set_value(1.) #training mode
+
+            # pad batch and create mask
+            x, x_mask = prepare_data_x(x, pad_eos=True,pad_sos=model_options['pad_sos'],n_word=model_options['n_words'])
+
+            if x is None:
+                print 'Minibatch with zero sample under length ', maxlen
+                uidx -= 1
+                continue
+
+            ud_start = time.time()
+
+            # compute cost, grads and copy grads to shared variables
+            cost = f_grad_shared(x, x_mask, y) # input argument issue fixed
+
+            # do the update on parameters
+            f_update(lrate)
+
+            ud = time.time() - ud_start
+
+            # check for bad numbers
+            if numpy.isnan(cost) or numpy.isinf(cost):
+                print 'NaN detected'
+                F_log.write("=========================================\nNaN detected\n")
+                F_log.write('Epoch'+str(eidx)+'\tIter '+str(uidx)+'\tBatch Length '+str(x.shape[0])+'\n')
+                return 1.
+
+            cost_accu += cost
+            if numpy.mod(uidx, dispFreq) == 0:
+                print 'Epoch ', eidx, '\tIter ', uidx, '\tLoss ', cost_accu/float(dispFreq), '\tUD ', ud,
+                print '\tLength', x.shape[0], '\tSize ', x.shape[1]
+                F_log.write('Epoch '+str(eidx)+'\tIter '+str(uidx)+'\tLoss '+str(cost_accu/float(dispFreq))
+                        +'\tUD '+str(ud)+'\tLength '+str(x.shape[0])+'\tSize '+str(x.shape[1])+'\n')
+                cost_accu = 0
+                sys.stdout.flush()
+
+            # validate model on validation set and early stop if necessary
+            if numpy.mod(uidx, validFreq) == 0:
+                print "Validating...",
+                use_noise.set_value(0.)
+                # fixed for successive mode
+                valid_ppl = pred_probs(f_log_probs, prepare_data_x, model_options, validSet, batch_size)
+                history_errs.append(valid_ppl)
+                print "Done"
+
+                if uidx == 0 or valid_ppl <= numpy.array(history_errs).min():
+                    best_p = unzip(tparams)
+                    bad_counter = 0
+                if len(history_errs) > patience and valid_ppl >= numpy.array(history_errs)[:-patience].min():
+                    bad_counter += 1
+                    if bad_counter > patience:
+                        print 'Early Stop!'
+                        F_log.write('##############\nEarly Stop!\n##############\n')
+                        estop = True
+                        break
+
+                # perplexity
+
+                test_ppl = pred_probs(f_log_probs, prepare_data_x, model_options, testSet, batch_size)
+
+                print 'Perplexity: { Valid', valid_ppl, ', Test', test_ppl, '}'
+                F_log.write('Perplexity: Valid '+str(valid_ppl)+'\tTest '+str(test_ppl)+'\n')
+                F_log.write('====================================\n')
+                sys.stdout.flush()
+
+                # save the current models
+                savefile = saveto + "_e" + str(eidx) + "_i" + str(uidx) + "_valid_" + str(valid_ppl) + '_test_' + str(test_ppl)
+                numpy.savez(savefile, history_errs=history_errs, **unzip(tparams))
+                pkl.dump(model_options, open('%s.option.pkl' % saveto, 'wb'))
+
+            # finish after this many updates
+            if uidx >= finish_after:
+                print 'Finishing after %d iterations!' % uidx
+                F_log.write('##############\nFinishing after '+str(uidx)+' iterations!\n##############\n')
+                estop = True
+                break
+
+        epoch_end_time = time.time()
+        print 'Epoch', eidx, 'completed, Seen', n_samples, 'samples, Time', epoch_end_time-epoch_start_time
+        F_log.write("-----------------------------------------------------------\n")
+        F_log.write("Epoch "+str(eidx)+" completed, Seen "+str(n_samples)+" samples, Time "+str(epoch_end_time-epoch_start_time)+"\n")
+        F_log.write("------------------------------------------------------------\n")
+
+        if estop:
+            break
+
+    end_time = time.time()
+'''
--- a/DSL_SentimentAnalysis/CLM/Data.py
+++ b/DSL_SentimentAnalysis/CLM/Data.py
@ -0,0 +1,369 @@
+"""
+data loading and minibatch generation
+"""
+__author__ = 'v-yirwan'
+
+import cPickle as pkl
+import gzip
+import os
+import numpy
+from theano import config
+
+def get_dataset_file(dataset, default_dataset, origin):
+    '''
+    Look for it as if it was a full path, if not, try local file,
+    if not try in the data directory.
+
+    Download dataset if it is not present
+    '''
+    data_dir, data_file = os.path.split(dataset)
+    if data_dir == "" and not os.path.isfile(dataset):
+        # Check if dataset is in the data directory.
+        new_path = os.path.join(
+            os.path.split(__file__)[0],
+            "..",
+            "data",
+            dataset
+        )
+        if os.path.isfile(new_path) or data_file == default_dataset:
+            dataset = new_path
+
+    if (not os.path.isfile(dataset)) and data_file == default_dataset:
+        from six.moves import urllib
+        print('Downloading data from %s' % origin)
+        urllib.request.urlretrieve(origin, dataset)
+
+    return dataset
+
+def load_data(path="imdb.pkl", n_words=100000, maxlen=None,
+              sort_by_len=True, fixed_valid=True, valid_portion=0.1):
+    '''
+    Loads the dataset
+    :type path: String
+    :param path: The path to the dataset (here IMDB)
+    :type n_words: int
+    :param n_words: The number of word to keep in the vocabulary.
+        All extra words are set to unknow (1).
+    :type maxlen: None or positive int
+    :param maxlen: the max sequence length we use in the train/valid set.
+    :type sort_by_len: bool
+    :name sort_by_len: Sort by the sequence lenght for the train,
+        valid and test set. This allow faster execution as it cause
+        less padding per minibatch. Another mechanism must be used to
+        shuffle the train set at each epoch.
+    :type fixed_valid: bool
+    :param fixed_valid: load fixed validation set from the corpus file,
+        which would otherwise be picked randomly from the training set with
+        proportion [valid_portion]
+    :type valid_portion: float
+    :param valid_portion: The proportion of the full train set used for
+        the validation set.
+
+    '''
+
+    # Load the dataset
+    path = get_dataset_file(
+        path, "imdb.pkl",
+        "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
+    if path.endswith(".gz"):
+        f = gzip.open(path, 'rb')
+    else:
+        f = open(path, 'rb')
+
+    train_set = pkl.load(f)
+    if fixed_valid:
+        valid_set = pkl.load(f)
+    test_set = pkl.load(f)
+    f.close()
+
+    def _truncate_data(train_set):
+        '''
+        truncate sequences with lengths exceed max-len threshold
+        :param train_set: a list of sequences list and corresponding labels list
+        :return: truncated train_set
+        '''
+        new_train_set_x = []
+        new_train_set_y = []
+        for x, y in zip(train_set[0], train_set[1]):
+            if len(x) < maxlen:
+                new_train_set_x.append(x)
+                new_train_set_y.append(y)
+        train_set = (new_train_set_x, new_train_set_y)
+        del new_train_set_x, new_train_set_y
+        return train_set
+
+    def _set_valid(train_set, valid_portion):
+        '''
+        set validation with [valid_portion] proportion of training set
+        '''
+        train_set_x, train_set_y = train_set
+        n_samples = len(train_set_x)
+        sidx = numpy.random.permutation(n_samples) # shuffle data
+        n_train = int(numpy.round(n_samples * (1. - valid_portion)))
+        valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
+        valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
+        train_set_x = [train_set_x[s] for s in sidx[:n_train]]
+        train_set_y = [train_set_y[s] for s in sidx[:n_train]]
+        train_set = (train_set_x, train_set_y)
+        valid_set = (valid_set_x, valid_set_y)
+        del train_set_x, train_set_y, valid_set_x, valid_set_y
+        return train_set, valid_set
+
+    if maxlen:
+        train_set = _truncate_data(train_set)
+        if fixed_valid:
+            print 'Loading with fixed validation set...',
+            valid_set = _truncate_data(valid_set)
+        else:
+            print 'Setting validation set with proportion:', valid_portion, '...',
+            train_set, valid_set = _set_valid(train_set, valid_portion)
+        test_set = _truncate_data(test_set)
+
+    if maxlen is None and not fixed_valid:
+        train_set, valid_set = _set_valid(train_set, valid_portion)
+
+    def remove_unk(x):
+        return [[1 if w >= n_words else w for w in sen] for sen in x]
+
+    test_set_x, test_set_y = test_set
+    valid_set_x, valid_set_y = valid_set
+    train_set_x, train_set_y = train_set
+
+    # remove unk from dataset
+    train_set_x = remove_unk(train_set_x) # use 1 if unk
+    valid_set_x = remove_unk(valid_set_x)
+    test_set_x = remove_unk(test_set_x)
+
+    def len_argsort(seq):
+        return sorted(range(len(seq)), key=lambda x: len(seq[x]))
+
+    if sort_by_len:
+        sorted_index = len_argsort(test_set_x)
+        # ranked from shortest to longest
+        test_set_x = [test_set_x[i] for i in sorted_index]
+        test_set_y = [test_set_y[i] for i in sorted_index]
+
+        sorted_index = len_argsort(valid_set_x)
+        valid_set_x = [valid_set_x[i] for i in sorted_index]
+        valid_set_y = [valid_set_y[i] for i in sorted_index]
+
+        sorted_index = len_argsort(train_set_x)
+        train_set_x = [train_set_x[i] for i in sorted_index]
+        train_set_y = [train_set_y[i] for i in sorted_index]
+
+    train = (train_set_x, train_set_y)
+    valid = (valid_set_x, valid_set_y)
+    test = (test_set_x, test_set_y)
+
+    return train, valid, test
+
+def load_mnist(path='mnist.pkl', fixed_permute=True, rand_permute=False):
+    f = open(path, 'rb')
+    train = pkl.load(f)
+    valid = pkl.load(f)
+    test = pkl.load(f)
+    f.close()
+
+    def _permute(data, perm):
+        x, y = data
+        x_new = []
+        for xx in x:
+            xx_new = [xx[pp] for pp in perm]
+            x_new.append(xx_new)
+        return (x_new, y)
+
+    def _trans2list(data):
+        x, y = data
+        x = [list(xx) for xx in x]
+        return (x, y)
+
+    if rand_permute:
+        print 'Using a fixed random permutation of pixels...',
+        perm = numpy.random.permutation(range(784))
+        train = _permute(train, perm)
+        valid = _permute(valid, perm)
+        test = _permute(test, perm)
+    elif fixed_permute:
+        print 'Using permuted dataset...',
+
+    _trans2list(train)
+    _trans2list(valid)
+    _trans2list(test)
+
+    return train, valid, test
+
+def get_minibatches_idx(n, minibatch_size, shuffle=False):
+    """
+    Used to shuffle the dataset at each iteration.
+    """
+
+    idx_list = numpy.arange(n, dtype="int32")
+
+    if shuffle:
+        numpy.random.shuffle(idx_list)
+
+    minibatches = []
+    minibatch_start = 0
+    for i in range(n // minibatch_size):
+        minibatches.append(idx_list[minibatch_start:
+                                    minibatch_start + minibatch_size])
+        minibatch_start += minibatch_size
+
+    if (minibatch_start != n):
+        # Make a minibatch out of what is left
+        minibatches.append(idx_list[minibatch_start:])
+
+    return zip(range(len(minibatches)), minibatches)
+
+def get_minibatches_idx_bucket(dataset, minibatch_size, shuffle=False):
+    """
+    divide into different buckets according to sequence lengths
+    dynamic batch size
+    """
+    # divide into buckets
+    slen = [len(ss) for ss in dataset]
+    bucket1000 = [sidx for sidx in xrange(len(dataset))
+                  if slen[sidx] > 0 and slen[sidx] <= 1000]
+    bucket3000 = [sidx for sidx in xrange(len(dataset))
+                  if slen[sidx] > 1000 and slen[sidx] <= 3000]
+    bucket_long = [sidx for sidx in xrange(len(dataset))
+                   if slen[sidx] > 3000]
+
+    # shuffle each bucket
+    if shuffle:
+        numpy.random.shuffle(bucket1000)
+        numpy.random.shuffle(bucket3000)
+        numpy.random.shuffle(bucket_long)
+
+    # make minibatches
+    def _make_batch(minibatches, bucket, minibatch_size):
+        minibatch_start = 0
+        n = len(bucket)
+        for i in range(n // minibatch_size):
+            minibatches.append(bucket[minibatch_start : minibatch_start + minibatch_size])
+            minibatch_start += minibatch_size
+        if (minibatch_start != n):
+            # Make a minibatch out of what is left
+            minibatches.append(bucket[minibatch_start:])
+        return minibatches
+
+    minibatches = []
+    _make_batch(minibatches, bucket1000, minibatch_size=minibatch_size)
+    _make_batch(minibatches, bucket3000, minibatch_size=minibatch_size//2)
+    _make_batch(minibatches, bucket_long, minibatch_size=minibatch_size//8)
+
+    # shuffle minibatches
+    numpy.random.shuffle(minibatches)
+
+    return zip(range(len(minibatches)), minibatches)
+
+def prepare_data(seqs, labels, maxlen=None, dataset='text'):
+    """Create the matrices from the datasets.
+
+    This pad each sequence to the same lenght: the lenght of the
+    longuest sequence or maxlen.
+
+    if maxlen is set, we will cut all sequence to this maximum
+    lenght.
+
+    This swap the axis!
+    """
+    # x: a list of sentences
+    lengths = [len(s) for s in seqs]
+
+    if maxlen is not None:
+        new_seqs = []
+        new_labels = []
+        new_lengths = []
+        for l, s, y in zip(lengths, seqs, labels):
+            if l < maxlen:
+                new_seqs.append(s)
+                new_labels.append(y)
+                new_lengths.append(l)
+        lengths = new_lengths
+        labels = new_labels
+        seqs = new_seqs
+
+        if len(lengths) < 1:
+            return None, None, None
+
+    n_samples = len(seqs)
+    maxlen = numpy.max(lengths)
+
+    if dataset == 'mnist':
+        x = numpy.zeros((maxlen, n_samples)).astype('float32')
+    else:
+        x = numpy.zeros((maxlen, n_samples)).astype('int64')
+    x_mask = numpy.zeros((maxlen, n_samples)).astype(config.floatX)
+    for idx, s in enumerate(seqs):
+        x[:lengths[idx], idx] = s
+        x_mask[:lengths[idx], idx] = 1.
+
+    return x, x_mask, labels
+
+def prepare_data_hier(seqs, labels, hier_len, maxlen=None, dataset='text'):
+    '''
+    prepare minibatch for hierarchical model
+    '''
+    # sort (long->short)
+    sorted_idx = sorted(range(len(seqs)), key=lambda x: len(seqs[x]), reverse=True)
+    seqs = [seqs[i] for i in sorted_idx]
+    labels = [labels[i] for i in sorted_idx]
+
+    # truncate data
+    lengths = [len(s) for s in seqs]
+    if maxlen is not None:
+        new_seqs = []
+        new_labels = []
+        new_lengths = []
+        for l, s, y in zip(lengths, seqs, labels):
+            if l  <maxlen :
+                new_seqs.append(s)
+                new_labels.append(y)
+                new_lengths.append(l)
+        lengths = new_lengths
+        labels = new_labels
+        seqs = new_seqs
+        if len(lengths) < 1:
+            return None, None, None
+
+    # set batch size
+    n_samples = len(seqs)
+    maxlen = numpy.max(lengths)
+    if maxlen % hier_len == 0:
+        n_batch = maxlen/hier_len
+    else:
+        n_batch = maxlen//hier_len + 1
+        maxlen = n_batch * hier_len
+
+    # padding whole batch
+    if dataset == 'mnist':
+        x = numpy.zeros((maxlen, n_samples)).astype('float32')
+    else:
+        x = numpy.zeros((maxlen, n_samples)).astype('int64')
+    x_mask = numpy.zeros((maxlen, n_samples)).astype(config.floatX)
+    for idx, s in enumerate(seqs):
+        x[:lengths[idx], idx] = s
+        x_mask[:lengths[idx], idx] = 1
+
+    # slice to mini-batches
+    x_batch = [x[bidx*hier_len:(bidx+1)*hier_len, :] for bidx in range(n_batch)]
+    if dataset == 'mnist':
+        x_batch = numpy.array(x_batch).astype('float32')
+    else:
+        x_batch = numpy.array(x_batch).astype('int64')
+    mask_batch = [x_mask[bidx*hier_len:(bidx+1)*hier_len, :] for bidx in range(n_batch)]
+    mask_batch = numpy.array(mask_batch).astype(config.floatX)
+
+    # mask for hier-level
+    mask_hier = numpy.ones((n_batch, n_samples)).astype(config.floatX)
+    for idx in range(n_samples):
+        mpos = numpy.where(x_mask[:, idx]==0)[0]
+        if len(mpos) == 0:
+            continue
+        bidx = min(mpos[0]//hier_len+1, n_batch)
+        if mpos[0] % hier_len == 0:
+            bidx -= 1 # bug fixed TODO: more elegant solution?
+        mask_hier[bidx:, idx] = 0
+
+    return x_batch, mask_batch, mask_hier, labels
--- a/DSL_SentimentAnalysis/CLM/GPU_Usage.py
+++ b/DSL_SentimentAnalysis/CLM/GPU_Usage.py
@ -0,0 +1,38 @@
+import re
+import os
+import socket
+import sys
+
+filename = r'.\gpu_usage_draft_'
+default_gpu = 58 + 30
+
+
+
+def GrabGPU(rank):
+    cmdstr = '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\" > ' + filename + rank
+    os.system(cmdstr)
+
+def GetGPUUSage(rank):
+    pattern = re.compile(r'(?P<num>[0-9]{1,5})MiB[\s]+/')
+    id = 0
+    GPUs = []
+    fo = open(filename + rank, 'r')
+    for line in fo:
+        result = pattern.search(line)
+        if result:
+            if int(result.group("num")) < default_gpu:
+                GPUs.append(id)
+            id = id + 1
+    fo.close()
+
+    print len(GPUs)
+    for gpu in GPUs:
+        print gpu
+
+
+if __name__ == '__main__':
+    rank = sys.argv[1]
+    GrabGPU(rank)
+    print socket.gethostname()
+    GetGPUUSage(rank)
+    #os.system('del /q ' + filename + rank)
--- a/DSL_SentimentAnalysis/CLM/MapGPU.py
+++ b/DSL_SentimentAnalysis/CLM/MapGPU.py
@ -0,0 +1,17 @@
+import os
+def MapDeviceIds(comm):
+    rank = comm.Get_rank()
+    num_machine = comm.Get_size()
+    os.system('python GPU_Usage.py ' + str(rank) + ' > record' + str(rank))
+    comm.Barrier()
+    if rank == 0:
+        os.system('python AllocateGPU.py ' + str(num_machine) + ' > DirtyRecord')
+    comm.Barrier()
+    cardid = str(0)
+    with open('DirtyRecord', 'r') as f:
+        for idx, line in enumerate(f):
+            if idx == rank:
+                cardid = line.strip()
+                break
+            
+    return cardid
--- a/DSL_SentimentAnalysis/CLM/init.py
+++ b/DSL_SentimentAnalysis/CLM/init.py
--- a/DSL_SentimentAnalysis/CLM/gen_theanorc.py
+++ b/DSL_SentimentAnalysis/CLM/gen_theanorc.py
@ -0,0 +1,32 @@
+import sys
+import codecs
+
+if len(sys.argv) < 3:
+    raise Exception('Not enough argv')
+
+theano_rc = r"""
+[global]
+mode = FAST_RUN
+device = gpu
+floatX = float32
+on_unused_input = warn
+optimizer = fast_run
+#allow_gc=False
+cuda.disable_gcc_cudnn_check=True
+
+[lib]
+cnmem = 0.75
+
+[nvcc]
+flags=-L{0}\libs
+root=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+fast_math = True
+
+"""
+
+theano_rc = theano_rc.format(sys.argv[1])
+
+print(theano_rc)
+
+with codecs.open(sys.argv[2], 'w', 'utf-8') as f:
+    f.write(theano_rc)
--- a/DSL_SentimentAnalysis/CLM/ipdb/init.py
+++ b/DSL_SentimentAnalysis/CLM/ipdb/init.py
@ -0,0 +1,24 @@
+# Copyright (c) 2007, 2010, 2011, 2012 Godefroid Chapelle
+# 
+# This file is part of ipdb.
+# GNU package is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free 
+# Software Foundation, either version 2 of the License, or (at your option) 
+# any later version.
+#
+# GNU package is distributed in the hope that it will be useful, but 
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+
+# You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
+
+from __main__ import set_trace, post_mortem, pm, run, runcall, runeval, launch_ipdb_on_exception
+
+pm                       # please pyflakes
+post_mortem              # please pyflakes
+run                      # please pyflakes
+runcall                  # please pyflakes
+runeval                  # please pyflakes
+set_trace                # please pyflakes
+launch_ipdb_on_exception # please pyflakes
--- a/DSL_SentimentAnalysis/CLM/ipdb/main.py
+++ b/DSL_SentimentAnalysis/CLM/ipdb/main.py
@ -0,0 +1,184 @@
+# Copyright (c) 2011, 2012 Godefroid Chapelle
+#
+# This file is part of ipdb.
+# GNU package is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 2 of the License, or (at your option)
+# any later version.
+#
+# GNU package is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+
+from __future__ import print_function
+import sys
+import os
+import traceback
+
+from contextlib import contextmanager
+
+try:
+    from pdb import Restart
+except ImportError:
+    class Restart(Exception):
+        pass
+
+import IPython
+
+if IPython.__version__ > '0.10.2':
+    from IPython.core.debugger import Pdb, BdbQuit_excepthook
+    try:
+        get_ipython
+    except NameError:
+        # Make it more resilient to different versions of IPython and try to
+        # find a module.
+        possible_modules = ['IPython.terminal.embed',           # Newer IPython
+                            'IPython.frontend.terminal.embed']  # Older IPython
+
+        count = len(possible_modules)
+        for module in possible_modules:
+            try:
+                embed = __import__(module, fromlist=["InteractiveShellEmbed"])
+                InteractiveShellEmbed = embed.InteractiveShellEmbed
+            except ImportError:
+                count -= 1
+                if count == 0:
+                    raise
+            else:
+                break
+
+        ipshell = InteractiveShellEmbed()
+        def_colors = ipshell.colors
+    else:
+        def_colors = get_ipython.im_self.colors
+
+    from IPython.utils import io
+
+    if 'nose' in sys.modules.keys():
+        def update_stdout():
+            # setup stdout to ensure output is available with nose
+            io.stdout = sys.stdout = sys.__stdout__
+    else:
+        def update_stdout():
+            pass
+else:
+    from IPython.Debugger import Pdb, BdbQuit_excepthook
+    from IPython.Shell import IPShell
+    from IPython import ipapi
+
+    ip = ipapi.get()
+    if ip is None:
+        IPShell(argv=[''])
+        ip = ipapi.get()
+    def_colors = ip.options.colors
+
+    from IPython.Shell import Term
+
+    if 'nose' in sys.modules.keys():
+        def update_stdout():
+            # setup stdout to ensure output is available with nose
+            Term.cout = sys.stdout = sys.__stdout__
+    else:
+        def update_stdout():
+            pass
+
+
+def wrap_sys_excepthook():
+    # make sure we wrap it only once or we would end up with a cycle
+    #  BdbQuit_excepthook.excepthook_ori == BdbQuit_excepthook
+    if sys.excepthook != BdbQuit_excepthook:
+        BdbQuit_excepthook.excepthook_ori = sys.excepthook
+        sys.excepthook = BdbQuit_excepthook
+
+
+def set_trace(frame=None):
+    update_stdout()
+    wrap_sys_excepthook()
+    if frame is None:
+        frame = sys._getframe().f_back
+    Pdb(def_colors).set_trace(frame)
+
+
+def post_mortem(tb):
+    update_stdout()
+    wrap_sys_excepthook()
+    p = Pdb(def_colors)
+    p.reset()
+    if tb is None:
+        return
+    p.interaction(None, tb)
+
+
+def pm():
+    post_mortem(sys.last_traceback)
+
+
+def run(statement, globals=None, locals=None):
+    Pdb(def_colors).run(statement, globals, locals)
+
+
+def runcall(*args, **kwargs):
+    return Pdb(def_colors).runcall(*args, **kwargs)
+
+
+def runeval(expression, globals=None, locals=None):
+    return Pdb(def_colors).runeval(expression, globals, locals)
+
+
+@contextmanager
+def launch_ipdb_on_exception():
+    try:
+        yield
+    except Exception:
+        e, m, tb = sys.exc_info()
+        print(m.__repr__(), file=sys.stderr)
+        post_mortem(tb)
+    finally:
+        pass
+
+
+def main():
+    if not sys.argv[1:] or sys.argv[1] in ("--help", "-h"):
+        print("usage: ipdb.py scriptfile [arg] ...")
+        sys.exit(2)
+
+    mainpyfile = sys.argv[1]     # Get script filename
+    if not os.path.exists(mainpyfile):
+        print('Error:', mainpyfile, 'does not exist')
+        sys.exit(1)
+
+    del sys.argv[0]         # Hide "pdb.py" from argument list
+
+    # Replace pdb's dir with script's dir in front of module search path.
+    sys.path[0] = os.path.dirname(mainpyfile)
+
+    # Note on saving/restoring sys.argv: it's a good idea when sys.argv was
+    # modified by the script being debugged. It's a bad idea when it was
+    # changed by the user from the command line. There is a "restart" command
+    # which allows explicit specification of command line arguments.
+    pdb = Pdb(def_colors)
+    while 1:
+        try:
+            pdb._runscript(mainpyfile)
+            if pdb._user_requested_quit:
+                break
+            print("The program finished and will be restarted")
+        except Restart:
+            print("Restarting", mainpyfile, "with arguments:")
+            print("\t" + " ".join(sys.argv[1:]))
+        except SystemExit:
+            # In most cases SystemExit does not warrant a post-mortem session.
+            print("The program exited via sys.exit(). Exit status: ", end='')
+            print(sys.exc_info()[1])
+        except:
+            traceback.print_exc()
+            print("Uncaught exception. Entering post mortem debugging")
+            print("Running 'cont' or 'step' will restart the program")
+            t = sys.exc_info()[2]
+            pdb.interaction(None, t)
+            print("Post mortem debugger finished. The " + mainpyfile +
+                  " will be restarted")
+
+if __name__ == '__main__':
+    main()
--- a/DSL_SentimentAnalysis/CLM/multiverso/Multiverso.dll
+++ b/DSL_SentimentAnalysis/CLM/multiverso/Multiverso.dll
--- a/DSL_SentimentAnalysis/CLM/multiverso/init.py
+++ b/DSL_SentimentAnalysis/CLM/multiverso/init.py
@ -0,0 +1,5 @@
+#!/usr/bin/env python
+# coding:utf8
+
+from api import init, shutdown, barrier, workers_num, worker_id, server_id, is_master_worker
+from tables import ArrayTableHandler, MatrixTableHandler
--- a/DSL_SentimentAnalysis/CLM/multiverso/api.py
+++ b/DSL_SentimentAnalysis/CLM/multiverso/api.py
@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# coding:utf8
+
+import ctypes
+from utils import Loader
+import numpy as np
+
+
+mv_lib = Loader.get_lib()
+
+
+def init(sync=False):
+    '''Initialize mutliverso.
+
+    This should be called only once before training at the beginning of the
+    whole project.
+    If sync is True, a sync server will be created. Otherwise an async server
+    will be created.
+    '''
+    args = [""]  # the first argument will be ignored. So we put a placeholder here
+    if sync:
+        args.append("-sync=true")
+    n = len(args)
+    args_type = ctypes.c_char_p * n
+    mv_lib.MV_Init(ctypes.pointer(ctypes.c_int(n)), args_type(*[ctypes.c_char_p(arg) for arg in args]))
+
+
+def shutdown():
+    '''Set a barrier for all workers to wait.
+
+    Workers will wait until all workers reach a specific barrier.
+    '''
+    mv_lib.MV_ShutDown()
+
+
+def barrier():
+    '''Shutdown multiverso.
+
+    This should be called only once after finishing training at the end of the
+    whole project.
+    '''
+    mv_lib.MV_Barrier()
+
+
+def workers_num():
+    '''Return the total number of workers.'''
+    return mv_lib.MV_NumWorkers()
+
+
+def worker_id():
+    '''Return the id (zero-based index) for current worker.'''
+    return mv_lib.MV_WorkerId()
+
+
+def server_id():
+    return mv_lib.MV_ServerId()
+
+
+def is_master_worker():
+    '''If the worker is master worker
+
+    Some things only need one worker process, such as validation, outputing the
+    result, initializing the parameters and so on. So we mark the worker 0 as
+    the master worker to finish these things.
+    '''
+    return worker_id() == 0
--- a/DSL_SentimentAnalysis/CLM/multiverso/tables.py
+++ b/DSL_SentimentAnalysis/CLM/multiverso/tables.py
@ -0,0 +1,163 @@
+#!/usr/bin/env python
+# coding:utf8
+
+import ctypes
+from utils import Loader
+from utils import convert_data
+import numpy as np
+import api
+
+
+mv_lib = Loader.get_lib()
+
+
+class TableHandler(object):
+    '''`TableHandler` is an interface to sync different kinds of values.
+
+    If you are not writing python code based on theano or lasagne, you are
+    supposed to sync models (for initialization) and gradients (during
+    training) so as to let multiverso help you manage the models in distributed
+    environments.
+    Otherwise, you'd better use the classes in `multiverso.theano_ext` or
+    `multiverso.theano_ext.lasagne_ext`
+    '''
+    def __init__(self, size, init_value=None):
+        raise NotImplementedError("You must implement the __init__ method.")
+
+    def get(self, size):
+        raise NotImplementedError("You must implement the get method.")
+
+    def add(self, data, sync=False):
+        raise NotImplementedError("You must implement the add method.")
+
+
+# types
+C_FLOAT_P = ctypes.POINTER(ctypes.c_float)
+
+
+class ArrayTableHandler(TableHandler):
+    '''`ArrayTableHandler` is used to sync array-like (one-dimensional) value.'''
+    def __init__(self, size, init_value=None):
+        '''Constructor for syncing array-like (one-dimensional) value.
+
+        The `size` should be a int equal to the size of value we want to sync.
+        If init_value is None, zeros will be used to initialize the tables,
+        otherwise the table will be initialized as the init_value.
+        Notice: if the init_value is different in different processes, the
+        average of them will be used.
+        '''
+        self._handler = ctypes.c_void_p()
+        self._size = size
+        mv_lib.MV_NewArrayTable(size, ctypes.byref(self._handler))
+        if init_value is not None:
+            init_value = convert_data(init_value)
+            # sync add is used because we want to make sure that the initial
+            # value has taken effect when the call returns.
+            self.add(init_value / api.workers_num(), sync=True)
+
+    def get(self):
+        '''get the latest value from multiverso ArrayTable
+
+        Data type of return value is numpy.ndarray with one-dimensional
+        '''
+        data = np.zeros((self._size, ), dtype=np.dtype("float32"))
+        mv_lib.MV_GetArrayTable(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
+        return data
+
+    def add(self, data, sync=False):
+        '''add the data to the multiverso ArrayTable
+
+        Data type of `data` is numpy.ndarray with one-dimensional
+
+        If sync is True, this call will blocked by IO until the call finish.
+        Otherwise it will return immediately
+        '''
+        data = convert_data(data)
+        assert(data.size == self._size)
+        if sync:
+            mv_lib.MV_AddArrayTable(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
+        else:
+            mv_lib.MV_AddAsyncArrayTable(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
+
+
+class MatrixTableHandler(TableHandler):
+    def __init__(self, num_row, num_col, init_value=None):
+        '''Constructor for syncing matrix-like (two-dimensional) value.
+
+        The `num_row` should be the number of rows and the `num_col` should be
+        the number of columns.
+
+        If init_value is None, zeros will be used to initialize the tables,
+        otherwise the table will be initialized as the init_value.
+        Notice: if the init_value is different in different processes, the
+        average of them will be used.
+        '''
+        self._handler = ctypes.c_void_p()
+        self._num_row = num_row
+        self._num_col = num_col
+        self._size = num_col * num_row
+        mv_lib.MV_NewMatrixTable(num_row, num_col, ctypes.byref(self._handler))
+        if init_value is not None:
+            init_value = convert_data(init_value)
+            # sync add is used because we want to make sure that the initial
+            # value has taken effect when the call returns.
+            self.add(init_value / api.workers_num(), sync=True)
+
+    def get(self, row_ids=None):
+        '''get the latest value from multiverso MatrixTable
+
+        If row_ids is None, we will return all rows as numpy.narray , e.g.
+        array([[1, 3], [3, 4]]).
+        Otherwise we will return the data according to the row_ids(e.g. you can
+        pass [1] to row_ids to get only the first row, it will return a
+        two-dimensional numpy.ndarray with one row)
+
+        Data type of return value is numpy.ndarray with two-dimensional
+        '''
+        if row_ids is None:
+            data = np.zeros((self._num_row, self._num_col), dtype=np.dtype("float32"))
+            mv_lib.MV_GetMatrixTableAll(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
+            return data
+        else:
+            row_ids_n = len(row_ids)
+            int_array_type = ctypes.c_int * row_ids_n
+            data = np.zeros((row_ids_n, self._num_col), dtype=np.dtype("float32"))
+            mv_lib.MV_GetMatrixTableByRows(self._handler, data.ctypes.data_as(C_FLOAT_P),
+                                           row_ids_n * self._num_col,
+                                           int_array_type(*row_ids), row_ids_n)
+            return data
+
+    def add(self, data=None, row_ids=None, sync=False):
+        '''add the data to the multiverso MatrixTable
+
+        If row_ids is None, we will add all data, and the data
+        should be a list, e.g. [1, 2, 3, ...]
+
+        Otherwise we will add the data according to the row_ids
+
+        Data type of `data` is numpy.ndarray with two-dimensional
+
+        If sync is True, this call will blocked by IO until the call finish.
+        Otherwise it will return immediately
+        '''
+        assert(data is not None)
+        data = convert_data(data)
+
+        if row_ids is None:
+            assert(data.size == self._size)
+            if sync:
+                mv_lib.MV_AddMatrixTableAll(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
+            else:
+                mv_lib.MV_AddAsyncMatrixTableAll(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
+        else:
+            row_ids_n = len(row_ids)
+            assert(data.size == row_ids_n * self._num_col)
+            int_array_type = ctypes.c_int * row_ids_n
+            if sync:
+                mv_lib.MV_AddMatrixTableByRows(self._handler, data.ctypes.data_as(C_FLOAT_P),
+                                               row_ids_n * self._num_col,
+                                               int_array_type(*row_ids), row_ids_n)
+            else:
+                mv_lib.MV_AddAsyncMatrixTableByRows(self._handler, data.ctypes.data_as(C_FLOAT_P),
+                                               row_ids_n * self._num_col,
+                                               int_array_type(*row_ids), row_ids_n)
--- a/DSL_SentimentAnalysis/CLM/multiverso/tests/test_multiverso.py
+++ b/DSL_SentimentAnalysis/CLM/multiverso/tests/test_multiverso.py
@ -0,0 +1,111 @@
+#!/usr/bin/env python
+# coding:utf8
+import multiverso as mv
+import unittest
+import numpy as np
+import theano
+from multiverso.theano_ext import sharedvar
+
+
+def setUpModule():
+    mv.init()
+
+
+def tearDownModule():
+    mv.shutdown()
+
+
+class TestMultiversoTables(unittest.TestCase):
+    '''
+    Use the commands below to run test
+    $ nosetests
+    '''
+
+    def _test_array(self, size):
+        tbh = mv.ArrayTableHandler(size)
+        mv.barrier()
+
+        for i in xrange(100):
+            tbh.add(range(1, size + 1))
+            tbh.add(range(1, size + 1))
+            mv.barrier()
+            for j, actual in enumerate(tbh.get()):
+                self.assertEqual((j + 1) * (i + 1) * 2 * mv.workers_num(), actual)
+            mv.barrier()
+
+    def test_small_array(self):
+        # TODO : this is not supported by multiverso because of the size
+        # limited. Waiting for the solution of this issue
+        # https://github.com/Microsoft/multiverso/issues/69
+
+        # self._test_array(1)
+        pass
+
+    def test_array(self):
+        self._test_array(10000)
+
+    def test_matrix(self):
+        num_row = 11
+        num_col = 10
+        size = num_col * num_row
+        workers_num = mv.workers_num()
+        tbh = mv.MatrixTableHandler(num_row, num_col)
+        mv.barrier()
+        for count in xrange(1, 21):
+            row_ids = [0, 1, 5, 10]
+            tbh.add(range(size))
+            tbh.add([range(rid * num_col, (1 + rid) * num_col) for rid in row_ids], row_ids)
+            mv.barrier()
+            data = tbh.get()
+            mv.barrier()
+            for i, row in enumerate(data):
+                for j, actual in enumerate(row):
+                    expected = (i * num_col + j) * count * workers_num
+                    if i in row_ids:
+                        expected += (i * num_col + j) * count * workers_num
+                    self.assertEqual(expected, actual)
+            data = tbh.get(row_ids)
+            mv.barrier()
+            for i, row in enumerate(data):
+                for j, actual in enumerate(row):
+                    expected = (row_ids[i] * num_col + j) * count * workers_num * 2
+                    self.assertEqual(expected, actual)
+
+
+class TestMultiversoSharedVariable(unittest.TestCase):
+    '''
+    Use the commands below to run test
+    $ nosetests
+    '''
+
+    def _test_sharedvar(self, row, col):
+        W = sharedvar.mv_shared(
+            value=np.zeros(
+                (row, col),
+                dtype=theano.config.floatX
+            ),
+            name='W',
+            borrow=True
+        )
+        delta = np.array(range(1, row * col + 1),
+                        dtype=theano.config.floatX).reshape((row, col))
+        train_model = theano.function([], updates=[(W, W + delta)])
+        mv.barrier()
+
+        for i in xrange(100):
+            train_model()
+            train_model()
+            sharedvar.sync_all_mv_shared_vars()
+            mv.barrier()
+            # to get the newest value, we must sync again
+            sharedvar.sync_all_mv_shared_vars()
+            for j, actual in enumerate(W.get_value().reshape(-1)):
+                self.assertEqual((j + 1) * (i + 1) * 2 * mv.workers_num(), actual)
+            mv.barrier()
+
+    def test_sharedvar(self):
+        self._test_sharedvar(200, 200)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/DSL_SentimentAnalysis/CLM/multiverso/theano_ext/init.py
+++ b/DSL_SentimentAnalysis/CLM/multiverso/theano_ext/init.py
--- a/DSL_SentimentAnalysis/CLM/multiverso/theano_ext/lasagne_ext/init.py
+++ b/DSL_SentimentAnalysis/CLM/multiverso/theano_ext/lasagne_ext/init.py
--- a/DSL_SentimentAnalysis/CLM/multiverso/theano_ext/lasagne_ext/param_manager.py
+++ b/DSL_SentimentAnalysis/CLM/multiverso/theano_ext/lasagne_ext/param_manager.py
@ -0,0 +1,64 @@
+#!/usr/bin/env python
+# coding:utf8
+
+import lasagne
+import numpy as np
+import multiverso as mv
+
+
+class MVNetParamManager(object):
+    '''
+    MVNetParamManager is manager to make managing and synchronizing the
+    variables in lasagne more easily
+    '''
+    def __init__(self, network):
+        ''' The constructor of MVNetParamManager
+
+        The constructor will associate the parameter with multiverso array
+        table.  The initial value of ArrayTableHandler will be same as the
+        parameters of network. If different parameters are used in different
+        processes, the average of them will be used as the initial value
+        '''
+        self.shapes = []
+        self.dtypes = []
+        self.sizes = []
+        self.all_param_list = []
+        self.network = network
+        for arr in lasagne.layers.get_all_param_values(self.network):
+            self.shapes.append(arr.shape)
+            # TODO: Now only float32 is supported in multiverso. So I store all
+            # the parameters in a float32 array. This place need modification
+            # after other types are supported
+            assert(np.dtype("float32") == arr.dtype)
+            self.dtypes.append(arr.dtype)
+            self.sizes.append(arr.size)
+            self.all_param_list.extend([i for i in np.nditer(arr)])
+        self.all_param_list = np.array(self.all_param_list)
+
+        self.tbh = mv.ArrayTableHandler(len(self.all_param_list), init_value=self.all_param_list)
+        mv.barrier()  # add barrier to make sure the initial values have token effect
+        self.all_param_list = self.tbh.get()
+        self._set_all_param_to_net()
+
+    def _set_all_param_to_net(self):
+        n = 0
+        params = []
+        for i, size in enumerate(self.sizes):
+            params.append(self.all_param_list[n:n + size].reshape(self.shapes[i]))
+            n += size
+        lasagne.layers.set_all_param_values(self.network, params)
+
+    def sync_all_param(self):
+        '''sync all parameters with multiverso server
+
+        This function will
+        1) calc all the delta of params in the network and add the delta to multiverso server
+        2) get the latest value from the multiverso server
+        '''
+        cur_network_params = np.concatenate([
+            arr.reshape(-1) for arr in lasagne.layers.get_all_param_values(self.network)])
+
+        params_delta = cur_network_params - self.all_param_list
+        self.tbh.add(params_delta)
+        self.all_param_list = self.tbh.get()
+        self._set_all_param_to_net()
--- a/DSL_SentimentAnalysis/CLM/multiverso/theano_ext/sharedvar.py
+++ b/DSL_SentimentAnalysis/CLM/multiverso/theano_ext/sharedvar.py
@ -0,0 +1,100 @@
+#!/usr/bin/env python
+# coding:utf8
+
+from theano.tensor.basic import TensorType, _tensor_py_operators
+from theano.compile import SharedVariable
+from theano.compile.sharedvalue import shared
+from theano.gof import Variable, utils
+import numpy
+import multiverso as mv
+
+
+class MVSharedVariable(object):
+    '''MVSharedVariable is an wrapper of SharedVariable
+
+    It will act same as SharedVariable. The only difference is a multiverso
+    ArrayTable is addded to make it easier to sync values.
+    '''
+    def __init__(self, svobj):
+        '''Constructor of the MVSharedVariable
+
+        The constructor will create ArrayTableHandler and associate the shared
+        variable with it. The initial value of ArrayTableHandler will be same
+        as the value of SharedVariable. If different initial value is used in
+        different processes, the average of them will be used as the initial
+        value
+        '''
+        assert(isinstance(svobj, SharedVariable))
+        self._svobj = svobj
+        self._mv_array = mv.ArrayTableHandler(self._svobj.get_value().size,
+                                              init_value=self._svobj.get_value().reshape((-1,)))
+
+        mv.barrier()  # add barrier to make sure the initial values have token effect
+        # _last_mv_data restore a copy of value. It will be used for calculate
+        # the update for multiverso when calling mv_sync
+        self._last_mv_data = self._mv_array.get().reshape(self._svobj.get_value().shape)
+        self._svobj.set_value(self._last_mv_data, borrow=False)
+
+    def mv_sync(self):
+        ''' sync values with multiverso server
+
+        mv_sync will add the delta of SharedVariable, which is usually the
+        gradients in typical examples, to parameter server and then get the
+        latest value in multiverso.
+        '''
+        # because multiverso always use add method to sync value, the delta
+        # will be the difference of the current value of last synced value
+        self._mv_array.add(self._svobj.get_value() - self._last_mv_data)
+
+        self._svobj.set_value(self._mv_array.get().reshape(self._svobj.get_value().shape))
+        self._last_mv_data = self._svobj.get_value(borrow=False)
+
+    def __getstate__(self):
+        '''This is for cPickle to store state.
+
+        It is usually called when you want to dump the model to file with
+        cPickle
+        '''
+        odict = self.__dict__.copy()  # copy the dict since we change it
+        del odict['_mv_array']  # remove mv_array, because we can't pickle it
+        return odict
+
+    def __getattribute__(self, attr):
+        '''This function make MVSharedVariable act same as SharedVariable'''
+        if attr in ['_svobj', '_mv_array', '_last_mv_data']:
+            # If get the attribute of self, use parent __getattribute__ to get
+            # attribute from the object, otherwise it will fall into infinite
+            # loop
+            return object.__getattribute__(self, attr)
+        elif attr in ['mv_sync', "__getstate__"]:
+            # If call method of MVSharedVariable, then call the method directly
+            # and bound the method to self object
+            return getattr(MVSharedVariable, attr).__get__(self)
+        else:
+            # Otherwise I will get attribute from the wrapped object
+            return getattr(self._svobj, attr)
+
+
+def mv_shared(*args, **kwargs):
+    '''mv_shared works same as `theano.shared`
+
+    It calls `theano.shared` to create the SharedVariable and use
+    MVSharedVariable to wrap it.
+    '''
+    var = shared(*args, **kwargs)
+    mv_shared.shared_vars.append(MVSharedVariable(var))
+    return var
+
+
+mv_shared.shared_vars = []  # all shared_vars in multiverso will be recorded here
+
+
+def sync_all_mv_shared_vars():
+    '''Sync shared value created by `mv_shared` with multiverso
+
+    It is often used when you are training model, and it will add the gradients
+    (delta value) to the server and update the latest value from the server.
+    Notice: It will **only** sync shared value created by `mv_shared`
+    '''
+    for sv in mv_shared.shared_vars:
+        sv.mv_sync()
--- a/DSL_SentimentAnalysis/CLM/multiverso/utils.py
+++ b/DSL_SentimentAnalysis/CLM/multiverso/utils.py
@ -0,0 +1,77 @@
+#!/usr/bin/env python
+# coding:utf8
+
+import ctypes
+import os
+import platform
+from ctypes.util import find_library
+import numpy as np
+
+PACKAGE_PATH = os.path.abspath(os.path.dirname(__file__))
+
+
+class Loader(object):
+    '''
+    This loader is responsible for loading multiverso dynamic library in both
+    *nux and windows
+    '''
+
+    LIB = None
+
+    @classmethod
+    def _find_mv_path(cls):
+        if platform.system() == "Windows":
+            mv_lib_path = find_library("Multiverso")
+            if mv_lib_path is None:
+                print "* Fail to load Multiverso.dll from the windows $PATH."\
+                      "Because Multiverso.dll can not be found in the $PATH "\
+                      "directories. Go on loading Multiverso from the package."
+            else:
+                return mv_lib_path
+
+            mv_lib_path = os.path.join(PACKAGE_PATH, "Multiverso.dll")
+            if not os.path.exists(mv_lib_path):
+                print "* Fail to load Multiverso.dll from the package. Because"\
+                      " the file " + mv_lib_path + " can not be found."
+            else:
+                return mv_lib_path
+        else:
+            mv_lib_path = find_library("multiverso")
+            if mv_lib_path is None:
+                print "* Fail to load libmultiverso.so from the system"\
+                      "libraries. Because libmultiverso.so can't be found in"\
+                      "library paths. Go on loading Multiverso from the package."
+            else:
+                return mv_lib_path
+
+            mv_lib_path = os.path.join(PACKAGE_PATH, "libmultiverso.so")
+            if not os.path.exists(mv_lib_path):
+                print "* Fail to load libmultiverso.so from the package. Because"\
+                      " the file " + mv_lib_path + " can not be found."
+            else:
+                return mv_lib_path
+        return None
+
+    @classmethod
+    def load_lib(cls):
+        mv_lib_path = cls._find_mv_path()
+        if mv_lib_path is None:
+            print "Fail to load the multiverso library. Please make sure you"\
+                  "  have installed multiverso successfully"
+        else:
+            print "Find the multiverso library successfully(%s)" % mv_lib_path
+        return ctypes.cdll.LoadLibrary(mv_lib_path)
+
+    @classmethod
+    def get_lib(cls):
+        if not cls.LIB:
+            cls.LIB = cls.load_lib()
+            cls.LIB.MV_NumWorkers.restype = ctypes.c_int
+        return cls.LIB
+
+
+def convert_data(data):
+    '''convert the data to float32 ndarray'''
+    if not isinstance(data, np.ndarray):
+        data = np.array(data)
+    return data.astype(np.float32)
--- a/DSL_SentimentAnalysis/CLM/nmt_base.py
+++ b/DSL_SentimentAnalysis/CLM/nmt_base.py
--- a/DSL_SentimentAnalysis/CLM/training_scripts/train_clm_WithDropout_lr0.5.py
+++ b/DSL_SentimentAnalysis/CLM/training_scripts/train_clm_WithDropout_lr0.5.py
@ -0,0 +1,58 @@
+from CLM import train
+
+def log_with_print(log, context):
+    print >>log, context
+    print context
+
+
+logfile = __file__ + 'log'
+log = open(logfile, 'w')
+
+round = 0
+log_with_print(log, 'round ' + str(round) + 'begin ------------------------------- !!')
+# change some for round
+
+
+max_epochs = 100000
+
+obj_directory = r'..\Sentiment_CLM_WithDropout'
+reload_model  = obj_directory + r'\T.npz'
+
+
+train(round = round,
+        saveto =            obj_directory + '\\round%d_model_lstm.npz'%(round),
+        reload_model  =     reload_model,
+        reload_option =     reload_model + '.pkl',
+        dataset =           r'../data/imdb.pkl', #%(work_id + 1),
+        encoder       =     'lstm',
+        dropout_input =     0.5,
+        dropout_output=     0.5,
+        clip_c        =     5.,
+        dim_word      =     500,
+        dim_proj      =     1024,
+        n_words       =     10000,
+        #n_words_sqrt  =     n_words_sqrt,
+        optimizer     =     'adadelta',
+        lrate         =     0.5,
+        maxlen        =     None,
+        minlen        =     1,
+        start_iter    =     0,
+        start_epoch   =     0,
+        max_epochs    =     max_epochs, #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        batch_size    =     16,
+        patience      =     100,
+        validFreq     =     5000,
+        saveFreq      =     50000000,
+        dispFreq      =     1,
+        sampleFreq    =     20000000,
+        newDumpFreq   =     20000,
+        syncFreq      =     5000000000,
+        sampleNum     =     25,
+        decay_c       =     0.,
+        log           =     logfile,
+        monitor_grad  =     False,
+        sampleFileName=     obj_directory + '\\round%d_sample.txt'%(round),
+        pad_sos = False,
+        embedding = '../data/embedding500.npz'
+        )
+
--- a/DSL_SentimentAnalysis/CLM/training_scripts/train_clm_nodr.py
+++ b/DSL_SentimentAnalysis/CLM/training_scripts/train_clm_nodr.py
@ -0,0 +1,58 @@
+from CLM import train
+
+def log_with_print(log, context):
+    print >>log, context
+    print context
+
+
+logfile = __file__ + 'log'
+log = open(logfile, 'w')
+
+round = 0
+log_with_print(log, 'round ' + str(round) + 'begin ------------------------------- !!')
+# change some for round
+
+
+max_epochs = 100000
+
+obj_directory = r'..\Sentiment_CLM_nodrop'
+reload_model  = obj_directory + r'\de.npz'
+
+
+train(round = round,
+        saveto =            obj_directory + '\\round%d_model_lstm.npz'%(round),
+        reload_model  =     None, #reload_model,
+        reload_option =     None, #reload_model + '.pkl',
+        dataset =           r'../data/imdb.pkl', #%(work_id + 1),
+        encoder       =     'lstm',
+        dropout_input =     None,
+        dropout_output=     None,
+        clip_c        =     5.,
+        dim_word      =     500,
+        dim_proj      =     1024,
+        n_words       =     10000,
+        #n_words_sqrt  =     n_words_sqrt,
+        optimizer     =     'adadelta',
+        lrate         =     1.0,
+        maxlen        =     None,
+        minlen        =     1,
+        start_iter    =     0,
+        start_epoch   =     0,
+        max_epochs    =     max_epochs, #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        batch_size    =     16,
+        patience      =     100,
+        validFreq     =     10000,
+        saveFreq      =     50000000,
+        dispFreq      =     1,
+        sampleFreq    =     20000000,
+        newDumpFreq   =     20000,
+        syncFreq      =     5000000000,
+        sampleNum     =     25,
+        decay_c       =     0.,
+        log           =     logfile,
+        monitor_grad  =     False,
+        sampleFileName=     obj_directory + '\\round%d_sample.txt'%(round),
+        pad_sos = False,
+        embedding = '../data/embedding500.npz'
+        )
+
--- a/DSL_SentimentAnalysis/CLM/training_scripts/train_clm_nodr_lr0.5.py
+++ b/DSL_SentimentAnalysis/CLM/training_scripts/train_clm_nodr_lr0.5.py
@ -0,0 +1,58 @@
+from CLM import train
+
+def log_with_print(log, context):
+    print >>log, context
+    print context
+
+
+logfile = __file__ + 'log'
+log = open(logfile, 'w')
+
+round = 0
+log_with_print(log, 'round ' + str(round) + 'begin ------------------------------- !!')
+# change some for round
+
+
+max_epochs = 100000
+
+obj_directory = r'..\Sentiment_CLM_nodrop_lr0.5'
+reload_model  = obj_directory + r'\T.npz'
+
+
+train(round = round,
+        saveto =            obj_directory + '\\round%d_model_lstm.npz'%(round),
+        reload_model  =     reload_model,
+        reload_option =     reload_model + '.pkl',
+        dataset =           r'../data/imdb.pkl', #%(work_id + 1),
+        encoder       =     'lstm',
+        dropout_input =     None,
+        dropout_output=     None,
+        clip_c        =     5.,
+        dim_word      =     500,
+        dim_proj      =     1024,
+        n_words       =     10000,
+        #n_words_sqrt  =     n_words_sqrt,
+        optimizer     =     'adadelta',
+        lrate         =     0.5,
+        maxlen        =     None,
+        minlen        =     1,
+        start_iter    =     0,
+        start_epoch   =     0,
+        max_epochs    =     max_epochs, #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        batch_size    =     16,
+        patience      =     100,
+        validFreq     =     5000,
+        saveFreq      =     50000000,
+        dispFreq      =     1,
+        sampleFreq    =     20000000,
+        newDumpFreq   =     20000,
+        syncFreq      =     5000000000,
+        sampleNum     =     25,
+        decay_c       =     0.,
+        log           =     logfile,
+        monitor_grad  =     False,
+        sampleFileName=     obj_directory + '\\round%d_sample.txt'%(round),
+        pad_sos = False,
+        embedding = '../data/embedding500.npz'
+        )
+
--- a/DSL_SentimentAnalysis/CLM/worker.bat
+++ b/DSL_SentimentAnalysis/CLM/worker.bat
@ -0,0 +1,4 @@
+@echo off
+setlocal ENABLEDELAYEDEXPANSION
+set THEANO_FLAGS=device=gpu1
+python train_clm_WithDropout_lr0.5.py
--- a/DSL_SentimentAnalysis/CLM/wrapper.bat
+++ b/DSL_SentimentAnalysis/CLM/wrapper.bat
@ -0,0 +1,148 @@
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+@rem Windows batch file to use Theano on GCR
+@rem
+@rem Updated: April 7, 2016
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+
+@rem set the PATH system variable
+@rem Start from the 26th letter
+set working_sub_dir=%cd:~26%
+
+set PATH=^
+C:\Windows\system32;^
+C:\Windows\System32\Wbem;^
+C:\Windows\System32\WindowsPowerShell\v1.0\;^
+C:\Windows;^
+C:\Program Files\Microsoft HPC Pack 2012\Bin\;^
+C:\Program Files\Microsoft MPI\Bin\;^
+C:\Program Files (x86)\Windows Kits\8.1\Windows Performance Toolkit\
+
+pushd \\gcr\Scratch\RR1\v-yixia\Theano
+set ToolkitFolderDriver=%cd%
+
+@rem set the environment variable for the CUDA 7.5 Toolkit
+rem set CUDA_HOME=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 rem the old version
+set CUDA_HOME=%ToolkitFolderDriver%\CUDA\v7.0+cudnn4008
+set CUDA_BIN=%CUDA_HOME%\bin
+set CUDA_INCLUDE=%CUDA_HOME%\include
+set CUDA_LIB=%CUDA_HOME%\lib\x64
+set CUDA_LIBNVVP=%CUDA_HOME%\libnvvp
+
+@rem add all CUDA Toolkit folders to the PATH system variable
+set PATH=^
+%CUDA_HOME%;^
+%CUDA_BIN%;^
+%CUDA_INCLUDE%;^
+%CUDA_LIB%;^
+%CUDA_LIBNVVP%;^
+%PATH%
+
+@echo %PATH%
+
+@rem setting up VC complier
+
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+@rem connect to the shared toolkit folder \\gcr\Tools\Shared_Toolkits\Theano
+rem pushd \\gcr\Tools\Shared_Toolkits\Theano
+
+
+
+@rem unset these variables
+@set Framework40Version=
+@set FrameworkDIR32=
+@set FrameworkVersion32=
+@set FSHARPINSTALLDIR=
+@set VSINSTALLDIR=
+@set WindowsSDK_ExecutablePath_x64=
+@set WindowsSDK_ExecutablePath_x86=
+
+@set VCINSTALLDIR=%ToolkitFolderDriver%\VS_portable\Microsoft Visual Studio 12.0v2\VC\
+@set WindowsSdkDir=%ToolkitFolderDriver%\Windows Kits\8.1v2\
+
+:amd64
+
+@rem set Windows SDK include/lib path
+@rem --------------------------------------------------
+if not "%WindowsSdkDir%" == "" @set PATH=%WindowsSdkDir%bin\x64;%WindowsSdkDir%bin\x86;%PATH%
+if not "%WindowsSdkDir%" == "" @set INCLUDE=%WindowsSdkDir%include\shared;%WindowsSdkDir%include\um;%WindowsSdkDir%include\winrt;%INCLUDE%
+if not "%WindowsSdkDir%" == "" @set LIB=%WindowsSdkDir%lib\winv6.3\um\x64;%LIB%
+if not "%WindowsSdkDir%" == "" @set LIBPATH=%WindowsLibPath%;%ExtensionSDKDir%\Microsoft.VCLibs\14.0\References\CommonConfiguration\neutral;%LIBPATH%
+
+@rem set the environment variables for Microsoft Visual Studio
+@rem --------------------------------------------------
+@rem PATH
+@rem --------------------------------------------------
+if exist "%VCINSTALLDIR%VCPackages" set PATH=%VCINSTALLDIR%VCPackages;%PATH%
+if exist "%VCINSTALLDIR%BIN\amd64" set PATH=%VCINSTALLDIR%BIN\amd64;%PATH%
+@rem --------------------------------------------------
+@rem INCLUDE
+@rem --------------------------------------------------
+if exist "%VCINSTALLDIR%ATLMFC\INCLUDE" set INCLUDE=%VCINSTALLDIR%ATLMFC\INCLUDE;%INCLUDE%
+if exist "%VCINSTALLDIR%INCLUDE" set INCLUDE=%VCINSTALLDIR%INCLUDE;%INCLUDE%
+@rem --------------------------------------------------
+@rem LIB
+@rem --------------------------------------------------
+if exist "%VCINSTALLDIR%ATLMFC\LIB\amd64" set LIB=%VCINSTALLDIR%ATLMFC\LIB\amd64;%LIB%
+if exist "%VCINSTALLDIR%LIB\amd64" set LIB=%VCINSTALLDIR%LIB\amd64;%LIB%
+@rem --------------------------------------------------
+@rem LIBPATH
+@rem --------------------------------------------------
+if exist "%VCINSTALLDIR%ATLMFC\LIB\amd64" set LIBPATH=%VCINSTALLDIR%ATLMFC\LIB\amd64;%LIBPATH%
+if exist "%VCINSTALLDIR%LIB\amd64" set LIBPATH=%VCINSTALLDIR%LIB\amd64;%LIBPATH%
+
+@rem set the environment variables for the cuDNN v4 (Feb 10, 2016) for CUDA 7.0 and later.
+rem set CUDNN_PATH=%ToolkitFolderDriver%\Shared_Toolkits\Theano\CUDA\cudnn-4.0.7\cuda
+rem set INCLUDE=%CUDNN_PATH%\include;%INCLUDE%
+rem set LIB=%CUDNN_PATH%\lib\x64;%LIB%
+rem set PATH=%CUDNN_PATH%\bin;%PATH%
+
+set Platform=X64
+set CommandPromptType=Native
+
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+@rem connect to your scratch storage \\gcr\Scratch\<location>\<alias>
+@rem Note: Please copy ANACONDA2 to \\gcr\Scratch\<location>\<alias>\Anaconda2
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+pushd \\gcr\scratch\RR1\v-yixia
+set CONDANETDRIVE=%cd:~0,2%
+
+@rem set the environment variable for the Anaconda2
+set ANACONDA2=%CONDANETDRIVE%\RR1\taoqin\Anaconda-gpu002
+set ANACONDA2_SCRIPTS=%ANACONDA2%\Scripts
+set ANACONDA2_BIN=%ANACONDA2%\Library\bin
+
+@rem add Anaconda2 folders to the PATH system variable
+set PATH=^
+%ANACONDA2%;^
+%ANACONDA2_BIN%;^
+%ANACONDA2_SCRIPTS%;^
+%PATH%
+
+@echo %PATH%
+
+@rem example files from DeepLearningTutorials are available at \\gcr\Tools\Shared_Toolkits\Theano\Examples
+set PROJDRIVE=%CONDANETDRIVE%
+set MYHOME=%PROJDRIVE%\RR1\v-yixia
+set PROJHOME=%MYHOME%\%working_sub_dir%
+
+%PROJDRIVE%
+
+cd %PROJHOME%
+
+@rem setup theano env (generate .theanorc.txt)
+call python gen_theanorc.py %ANACONDA2% .theanorc.txt
+del %userprofile%\.theanorc.txt /Q /F
+copy .theanorc.txt %userprofile% /Y
+
+call python write_script.py %*
+
+call worker.bat
+
+@echo delete theano env
+del %userprofile%\.theanorc.txt /Q /F
+
+popd
+
+popd
+
+
--- a/DSL_SentimentAnalysis/CLM/wrapper3.bat
+++ b/DSL_SentimentAnalysis/CLM/wrapper3.bat
@ -0,0 +1,148 @@
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+@rem Windows batch file to use Theano on GCR
+@rem
+@rem Updated: April 7, 2016
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+
+@rem set the PATH system variable
+@rem Start from the 26th letter
+set working_sub_dir=%cd:~26%
+
+set PATH=^
+C:\Windows\system32;^
+C:\Windows\System32\Wbem;^
+C:\Windows\System32\WindowsPowerShell\v1.0\;^
+C:\Windows;^
+C:\Program Files\Microsoft HPC Pack 2012\Bin\;^
+C:\Program Files\Microsoft MPI\Bin\;^
+C:\Program Files (x86)\Windows Kits\8.1\Windows Performance Toolkit\
+
+pushd \\gcr\Scratch\RR1\v-yixia\Theano
+set ToolkitFolderDriver=%cd%
+
+@rem set the environment variable for the CUDA 7.5 Toolkit
+rem set CUDA_HOME=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 rem the old version
+set CUDA_HOME=%ToolkitFolderDriver%\CUDA\v7.0
+set CUDA_BIN=%CUDA_HOME%\bin
+set CUDA_INCLUDE=%CUDA_HOME%\include
+set CUDA_LIB=%CUDA_HOME%\lib\x64
+set CUDA_LIBNVVP=%CUDA_HOME%\libnvvp
+
+@rem add all CUDA Toolkit folders to the PATH system variable
+set PATH=^
+%CUDA_HOME%;^
+%CUDA_BIN%;^
+%CUDA_INCLUDE%;^
+%CUDA_LIB%;^
+%CUDA_LIBNVVP%;^
+%PATH%
+
+@echo %PATH%
+
+@rem setting up VC complier
+
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+@rem connect to the shared toolkit folder \\gcr\Tools\Shared_Toolkits\Theano
+rem pushd \\gcr\Tools\Shared_Toolkits\Theano
+
+
+
+@rem unset these variables
+@set Framework40Version=
+@set FrameworkDIR32=
+@set FrameworkVersion32=
+@set FSHARPINSTALLDIR=
+@set VSINSTALLDIR=
+@set WindowsSDK_ExecutablePath_x64=
+@set WindowsSDK_ExecutablePath_x86=
+
+@set VCINSTALLDIR=%ToolkitFolderDriver%\VS_portable\Microsoft Visual Studio 12.0\VC\
+@set WindowsSdkDir=%ToolkitFolderDriver%\Windows Kits\8.1\
+
+:amd64
+
+@rem set Windows SDK include/lib path
+@rem --------------------------------------------------
+if not "%WindowsSdkDir%" == "" @set PATH=%WindowsSdkDir%bin\x64;%WindowsSdkDir%bin\x86;%PATH%
+if not "%WindowsSdkDir%" == "" @set INCLUDE=%WindowsSdkDir%include\%WindowsSDKVersion%shared;%WindowsSdkDir%include\%WindowsSDKVersion%um;%WindowsSdkDir%include\%WindowsSDKVersion%winrt;%INCLUDE%
+if not "%WindowsSdkDir%" == "" @set LIB=%WindowsSdkDir%lib\%WindowsSDKLibVersion%um\x64;%LIB%
+if not "%WindowsSdkDir%" == "" @set LIBPATH=%WindowsLibPath%;%ExtensionSDKDir%\Microsoft.VCLibs\14.0\References\CommonConfiguration\neutral;%LIBPATH%
+
+@rem set the environment variables for Microsoft Visual Studio
+@rem --------------------------------------------------
+@rem PATH
+@rem --------------------------------------------------
+if exist "%VCINSTALLDIR%VCPackages" set PATH=%VCINSTALLDIR%VCPackages;%PATH%
+if exist "%VCINSTALLDIR%BIN\amd64" set PATH=%VCINSTALLDIR%BIN\amd64;%PATH%
+@rem --------------------------------------------------
+@rem INCLUDE
+@rem --------------------------------------------------
+if exist "%VCINSTALLDIR%ATLMFC\INCLUDE" set INCLUDE=%VCINSTALLDIR%ATLMFC\INCLUDE;%INCLUDE%
+if exist "%VCINSTALLDIR%INCLUDE" set INCLUDE=%VCINSTALLDIR%INCLUDE;%INCLUDE%
+@rem --------------------------------------------------
+@rem LIB
+@rem --------------------------------------------------
+if exist "%VCINSTALLDIR%ATLMFC\LIB\amd64" set LIB=%VCINSTALLDIR%ATLMFC\LIB\amd64;%LIB%
+if exist "%VCINSTALLDIR%LIB\amd64" set LIB=%VCINSTALLDIR%LIB\amd64;%LIB%
+@rem --------------------------------------------------
+@rem LIBPATH
+@rem --------------------------------------------------
+if exist "%VCINSTALLDIR%ATLMFC\LIB\amd64" set LIBPATH=%VCINSTALLDIR%ATLMFC\LIB\amd64;%LIBPATH%
+if exist "%VCINSTALLDIR%LIB\amd64" set LIBPATH=%VCINSTALLDIR%LIB\amd64;%LIBPATH%
+
+@rem set the environment variables for the cuDNN v4 (Feb 10, 2016) for CUDA 7.0 and later.
+set CUDNN_PATH=%ToolkitFolderDriver%\Shared_Toolkits\Theano\CUDA\cudnn-4.0.7\cuda
+set INCLUDE=%CUDNN_PATH%\include;%INCLUDE%
+set LIB=%CUDNN_PATH%\lib\x64;%LIB%
+set PATH=%CUDNN_PATH%\bin;%PATH%
+
+set Platform=X64
+set CommandPromptType=Native
+
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+@rem connect to your scratch storage \\gcr\Scratch\<location>\<alias>
+@rem Note: Please copy ANACONDA2 to \\gcr\Scratch\<location>\<alias>\Anaconda2
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+pushd \\gcr\scratch\RR1\v-yixia
+set CONDANETDRIVE=%cd:~0,2%
+
+@rem set the environment variable for the Anaconda2
+set ANACONDA2=%CONDANETDRIVE%\RR1\v-yirwan\Anaconda2
+set ANACONDA2_SCRIPTS=%ANACONDA2%\Scripts
+set ANACONDA2_BIN=%ANACONDA2%\Library\bin
+
+@rem add Anaconda2 folders to the PATH system variable
+set PATH=^
+%ANACONDA2%;^
+%ANACONDA2_BIN%;^
+%ANACONDA2_SCRIPTS%;^
+%PATH%
+
+@echo %PATH%
+
+@rem example files from DeepLearningTutorials are available at \\gcr\Tools\Shared_Toolkits\Theano\Examples
+set PROJDRIVE=%CONDANETDRIVE%
+set MYHOME=%PROJDRIVE%\RR1\v-yixia
+set PROJHOME=%MYHOME%\%working_sub_dir%
+
+%PROJDRIVE%
+
+cd %PROJHOME%
+
+@rem setup theano env (generate .theanorc.txt)
+call python gen_theanorc.py %ANACONDA2% .theanorc.txt
+del %userprofile%\.theanorc.txt /Q /F
+copy .theanorc.txt %userprofile% /Y
+
+call python write_script.py %1
+
+call worker.bat
+
+@echo delete theano env
+del %userprofile%\.theanorc.txt /Q /F
+
+popd
+
+popd
+
+
--- a/DSL_SentimentAnalysis/CLM/write_script.py
+++ b/DSL_SentimentAnalysis/CLM/write_script.py
@ -0,0 +1,39 @@
+import re, os, numpy, sys
+
+
+filename = r'.\gpu_usage_draft'
+
+
+
+def GrabGPU():
+    cmdstr = '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\" > ' + filename
+    os.system(cmdstr)
+
+def GetGPUUSage():
+    pattern = re.compile(r'(?P<num>[0-9]+)MiB[\s]+/')
+    mem = []
+    fo = open(filename, 'r')
+    for line in fo:
+        result = pattern.search(line)
+        if result:
+            mem.append(int(result.group('num')))
+    fo.close()
+
+    return numpy.array(mem).argsort()[0]
+
+def print_script(cmd):
+    GrabGPU()
+    with open('worker.bat', 'w') as f:
+        f.write('@echo off\nsetlocal ENABLEDELAYEDEXPANSION\n')  
+        if len(cmd) == 1:
+            f.write('set THEANO_FLAGS=device=gpu%d\n' % GetGPUUSage())  
+            f.write('python ' + cmd[0]) 
+        elif len(cmd) == 2:
+            f.write('set THEANO_FLAGS=device=gpu' + cmd[1] + '\n')  
+            f.write('python ' + cmd[0]) 
+        
+
+if __name__ == '__main__':
+    print_script(sys.argv[1:])
+    
+    # os.system('del /q ' + filename + rank)
--- a/DSL_SentimentAnalysis/Classifier/AllocateGPU.py
+++ b/DSL_SentimentAnalysis/Classifier/AllocateGPU.py
@ -0,0 +1,40 @@
+import sys
+
+mapper_machine_freecard = {}
+mapper_machine_rank = {}
+
+def MapIDs(m_machine):
+    for i in range(m_machine):
+        fo = open('record' + str(i))
+        id = 0
+        m_line = 0
+        machine_name = ''
+        for line in fo:
+            if id == 0:
+                machine_name = line[:-1]
+                mapper_machine_freecard[machine_name] = []
+                if mapper_machine_rank.has_key(machine_name):
+                    mapper_machine_rank[machine_name].append(i)
+                else:
+                    mapper_machine_rank[machine_name] = [i]
+            elif id > 1:
+                mapper_machine_freecard[machine_name].append(int(line))
+            id = id + 1
+        fo.close()
+
+def Map_Rank_Card(m_machine):
+    MapIDs(m_machine)
+    allocations = range(m_machine)
+    for k in mapper_machine_rank.keys():
+        ranks = mapper_machine_rank[k]
+        cards = mapper_machine_freecard[k]
+    #if len(ranks) == len(cards):
+        for i in range(len(ranks)):
+            allocations[ranks[i]] = cards[i]
+    
+    for l in allocations:
+        print l
+
+
+if __name__ == '__main__':
+    Map_Rank_Card(int(sys.argv[1]))
--- a/DSL_SentimentAnalysis/Classifier/Data.py
+++ b/DSL_SentimentAnalysis/Classifier/Data.py
@ -0,0 +1,369 @@
+"""
+data loading and minibatch generation
+"""
+__author__ = 'v-yirwan'
+
+import cPickle as pkl
+import gzip
+import os
+import numpy
+from theano import config
+
+def get_dataset_file(dataset, default_dataset, origin):
+    '''
+    Look for it as if it was a full path, if not, try local file,
+    if not try in the data directory.
+
+    Download dataset if it is not present
+    '''
+    data_dir, data_file = os.path.split(dataset)
+    if data_dir == "" and not os.path.isfile(dataset):
+        # Check if dataset is in the data directory.
+        new_path = os.path.join(
+            os.path.split(__file__)[0],
+            "..",
+            "data",
+            dataset
+        )
+        if os.path.isfile(new_path) or data_file == default_dataset:
+            dataset = new_path
+
+    if (not os.path.isfile(dataset)) and data_file == default_dataset:
+        from six.moves import urllib
+        print('Downloading data from %s' % origin)
+        urllib.request.urlretrieve(origin, dataset)
+
+    return dataset
+
+def load_data(path="imdb.pkl", n_words=100000, maxlen=None,
+              sort_by_len=True, fixed_valid=True, valid_portion=0.1):
+    '''
+    Loads the dataset
+    :type path: String
+    :param path: The path to the dataset (here IMDB)
+    :type n_words: int
+    :param n_words: The number of word to keep in the vocabulary.
+        All extra words are set to unknow (1).
+    :type maxlen: None or positive int
+    :param maxlen: the max sequence length we use in the train/valid set.
+    :type sort_by_len: bool
+    :name sort_by_len: Sort by the sequence lenght for the train,
+        valid and test set. This allow faster execution as it cause
+        less padding per minibatch. Another mechanism must be used to
+        shuffle the train set at each epoch.
+    :type fixed_valid: bool
+    :param fixed_valid: load fixed validation set from the corpus file,
+        which would otherwise be picked randomly from the training set with
+        proportion [valid_portion]
+    :type valid_portion: float
+    :param valid_portion: The proportion of the full train set used for
+        the validation set.
+
+    '''
+
+    # Load the dataset
+    path = get_dataset_file(
+        path, "imdb.pkl",
+        "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
+    if path.endswith(".gz"):
+        f = gzip.open(path, 'rb')
+    else:
+        f = open(path, 'rb')
+
+    train_set = pkl.load(f)
+    if fixed_valid:
+        valid_set = pkl.load(f)
+    test_set = pkl.load(f)
+    f.close()
+
+    def _truncate_data(train_set):
+        '''
+        truncate sequences with lengths exceed max-len threshold
+        :param train_set: a list of sequences list and corresponding labels list
+        :return: truncated train_set
+        '''
+        new_train_set_x = []
+        new_train_set_y = []
+        for x, y in zip(train_set[0], train_set[1]):
+            if len(x) < maxlen:
+                new_train_set_x.append(x)
+                new_train_set_y.append(y)
+        train_set = (new_train_set_x, new_train_set_y)
+        del new_train_set_x, new_train_set_y
+        return train_set
+
+    def _set_valid(train_set, valid_portion):
+        '''
+        set validation with [valid_portion] proportion of training set
+        '''
+        train_set_x, train_set_y = train_set
+        n_samples = len(train_set_x)
+        sidx = numpy.random.permutation(n_samples) # shuffle data
+        n_train = int(numpy.round(n_samples * (1. - valid_portion)))
+        valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
+        valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
+        train_set_x = [train_set_x[s] for s in sidx[:n_train]]
+        train_set_y = [train_set_y[s] for s in sidx[:n_train]]
+        train_set = (train_set_x, train_set_y)
+        valid_set = (valid_set_x, valid_set_y)
+        del train_set_x, train_set_y, valid_set_x, valid_set_y
+        return train_set, valid_set
+
+    if maxlen:
+        train_set = _truncate_data(train_set)
+        if fixed_valid:
+            print 'Loading with fixed validation set...',
+            valid_set = _truncate_data(valid_set)
+        else:
+            print 'Setting validation set with proportion:', valid_portion, '...',
+            train_set, valid_set = _set_valid(train_set, valid_portion)
+        test_set = _truncate_data(test_set)
+
+    if maxlen is None and not fixed_valid:
+        train_set, valid_set = _set_valid(train_set, valid_portion)
+
+    def remove_unk(x):
+        return [[1 if w >= n_words else w for w in sen] for sen in x]
+
+    test_set_x, test_set_y = test_set
+    valid_set_x, valid_set_y = valid_set
+    train_set_x, train_set_y = train_set
+
+    # remove unk from dataset
+    train_set_x = remove_unk(train_set_x) # use 1 if unk
+    valid_set_x = remove_unk(valid_set_x)
+    test_set_x = remove_unk(test_set_x)
+
+    def len_argsort(seq):
+        return sorted(range(len(seq)), key=lambda x: len(seq[x]))
+
+    if sort_by_len:
+        sorted_index = len_argsort(test_set_x)
+        # ranked from shortest to longest
+        test_set_x = [test_set_x[i] for i in sorted_index]
+        test_set_y = [test_set_y[i] for i in sorted_index]
+
+        sorted_index = len_argsort(valid_set_x)
+        valid_set_x = [valid_set_x[i] for i in sorted_index]
+        valid_set_y = [valid_set_y[i] for i in sorted_index]
+
+        sorted_index = len_argsort(train_set_x)
+        train_set_x = [train_set_x[i] for i in sorted_index]
+        train_set_y = [train_set_y[i] for i in sorted_index]
+
+    train = (train_set_x, train_set_y)
+    valid = (valid_set_x, valid_set_y)
+    test = (test_set_x, test_set_y)
+
+    return train, valid, test
+
+def load_mnist(path='mnist.pkl', fixed_permute=True, rand_permute=False):
+    f = open(path, 'rb')
+    train = pkl.load(f)
+    valid = pkl.load(f)
+    test = pkl.load(f)
+    f.close()
+
+    def _permute(data, perm):
+        x, y = data
+        x_new = []
+        for xx in x:
+            xx_new = [xx[pp] for pp in perm]
+            x_new.append(xx_new)
+        return (x_new, y)
+
+    def _trans2list(data):
+        x, y = data
+        x = [list(xx) for xx in x]
+        return (x, y)
+
+    if rand_permute:
+        print 'Using a fixed random permutation of pixels...',
+        perm = numpy.random.permutation(range(784))
+        train = _permute(train, perm)
+        valid = _permute(valid, perm)
+        test = _permute(test, perm)
+    elif fixed_permute:
+        print 'Using permuted dataset...',
+
+    _trans2list(train)
+    _trans2list(valid)
+    _trans2list(test)
+
+    return train, valid, test
+
+def get_minibatches_idx(n, minibatch_size, shuffle=False):
+    """
+    Used to shuffle the dataset at each iteration.
+    """
+
+    idx_list = numpy.arange(n, dtype="int32")
+
+    if shuffle:
+        numpy.random.shuffle(idx_list)
+
+    minibatches = []
+    minibatch_start = 0
+    for i in range(n // minibatch_size):
+        minibatches.append(idx_list[minibatch_start:
+                                    minibatch_start + minibatch_size])
+        minibatch_start += minibatch_size
+
+    if (minibatch_start != n):
+        # Make a minibatch out of what is left
+        minibatches.append(idx_list[minibatch_start:])
+
+    return zip(range(len(minibatches)), minibatches)
+
+def get_minibatches_idx_bucket(dataset, minibatch_size, shuffle=False):
+    """
+    divide into different buckets according to sequence lengths
+    dynamic batch size
+    """
+    # divide into buckets
+    slen = [len(ss) for ss in dataset]
+    bucket1000 = [sidx for sidx in xrange(len(dataset))
+                  if slen[sidx] > 0 and slen[sidx] <= 1000]
+    bucket3000 = [sidx for sidx in xrange(len(dataset))
+                  if slen[sidx] > 1000 and slen[sidx] <= 3000]
+    bucket_long = [sidx for sidx in xrange(len(dataset))
+                   if slen[sidx] > 3000]
+
+    # shuffle each bucket
+    if shuffle:
+        numpy.random.shuffle(bucket1000)
+        numpy.random.shuffle(bucket3000)
+        numpy.random.shuffle(bucket_long)
+
+    # make minibatches
+    def _make_batch(minibatches, bucket, minibatch_size):
+        minibatch_start = 0
+        n = len(bucket)
+        for i in range(n // minibatch_size):
+            minibatches.append(bucket[minibatch_start : minibatch_start + minibatch_size])
+            minibatch_start += minibatch_size
+        if (minibatch_start != n):
+            # Make a minibatch out of what is left
+            minibatches.append(bucket[minibatch_start:])
+        return minibatches
+
+    minibatches = []
+    _make_batch(minibatches, bucket1000, minibatch_size=minibatch_size)
+    _make_batch(minibatches, bucket3000, minibatch_size=minibatch_size//2)
+    _make_batch(minibatches, bucket_long, minibatch_size=minibatch_size//8)
+
+    # shuffle minibatches
+    numpy.random.shuffle(minibatches)
+
+    return zip(range(len(minibatches)), minibatches)
+
+def prepare_data(seqs, labels, maxlen=None, dataset='text'):
+    """Create the matrices from the datasets.
+
+    This pad each sequence to the same lenght: the lenght of the
+    longuest sequence or maxlen.
+
+    if maxlen is set, we will cut all sequence to this maximum
+    lenght.
+
+    This swap the axis!
+    """
+    # x: a list of sentences
+    lengths = [len(s) for s in seqs]
+
+    if maxlen is not None:
+        new_seqs = []
+        new_labels = []
+        new_lengths = []
+        for l, s, y in zip(lengths, seqs, labels):
+            if l < maxlen:
+                new_seqs.append(s)
+                new_labels.append(y)
+                new_lengths.append(l)
+        lengths = new_lengths
+        labels = new_labels
+        seqs = new_seqs
+
+        if len(lengths) < 1:
+            return None, None, None
+
+    n_samples = len(seqs)
+    maxlen = numpy.max(lengths)
+
+    if dataset == 'mnist':
+        x = numpy.zeros((maxlen, n_samples)).astype('float32')
+    else:
+        x = numpy.zeros((maxlen, n_samples)).astype('int64')
+    x_mask = numpy.zeros((maxlen, n_samples)).astype(config.floatX)
+    for idx, s in enumerate(seqs):
+        x[:lengths[idx], idx] = s
+        x_mask[:lengths[idx], idx] = 1.
+
+    return x, x_mask, labels
+
+def prepare_data_hier(seqs, labels, hier_len, maxlen=None, dataset='text'):
+    '''
+    prepare minibatch for hierarchical model
+    '''
+    # sort (long->short)
+    sorted_idx = sorted(range(len(seqs)), key=lambda x: len(seqs[x]), reverse=True)
+    seqs = [seqs[i] for i in sorted_idx]
+    labels = [labels[i] for i in sorted_idx]
+
+    # truncate data
+    lengths = [len(s) for s in seqs]
+    if maxlen is not None:
+        new_seqs = []
+        new_labels = []
+        new_lengths = []
+        for l, s, y in zip(lengths, seqs, labels):
+            if l  <maxlen :
+                new_seqs.append(s)
+                new_labels.append(y)
+                new_lengths.append(l)
+        lengths = new_lengths
+        labels = new_labels
+        seqs = new_seqs
+        if len(lengths) < 1:
+            return None, None, None
+
+    # set batch size
+    n_samples = len(seqs)
+    maxlen = numpy.max(lengths)
+    if maxlen % hier_len == 0:
+        n_batch = maxlen/hier_len
+    else:
+        n_batch = maxlen//hier_len + 1
+        maxlen = n_batch * hier_len
+
+    # padding whole batch
+    if dataset == 'mnist':
+        x = numpy.zeros((maxlen, n_samples)).astype('float32')
+    else:
+        x = numpy.zeros((maxlen, n_samples)).astype('int64')
+    x_mask = numpy.zeros((maxlen, n_samples)).astype(config.floatX)
+    for idx, s in enumerate(seqs):
+        x[:lengths[idx], idx] = s
+        x_mask[:lengths[idx], idx] = 1
+
+    # slice to mini-batches
+    x_batch = [x[bidx*hier_len:(bidx+1)*hier_len, :] for bidx in range(n_batch)]
+    if dataset == 'mnist':
+        x_batch = numpy.array(x_batch).astype('float32')
+    else:
+        x_batch = numpy.array(x_batch).astype('int64')
+    mask_batch = [x_mask[bidx*hier_len:(bidx+1)*hier_len, :] for bidx in range(n_batch)]
+    mask_batch = numpy.array(mask_batch).astype(config.floatX)
+
+    # mask for hier-level
+    mask_hier = numpy.ones((n_batch, n_samples)).astype(config.floatX)
+    for idx in range(n_samples):
+        mpos = numpy.where(x_mask[:, idx]==0)[0]
+        if len(mpos) == 0:
+            continue
+        bidx = min(mpos[0]//hier_len+1, n_batch)
+        if mpos[0] % hier_len == 0:
+            bidx -= 1 # bug fixed TODO: more elegant solution?
+        mask_hier[bidx:, idx] = 0
+
+    return x_batch, mask_batch, mask_hier, labels
--- a/DSL_SentimentAnalysis/Classifier/GPU_Usage.py
+++ b/DSL_SentimentAnalysis/Classifier/GPU_Usage.py
@ -0,0 +1,38 @@
+import re
+import os
+import socket
+import sys
+
+filename = r'.\gpu_usage_draft_'
+default_gpu = 58 + 30
+
+
+
+def GrabGPU(rank):
+    cmdstr = '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\" > ' + filename + rank
+    os.system(cmdstr)
+
+def GetGPUUSage(rank):
+    pattern = re.compile(r'(?P<num>[0-9]{1,5})MiB[\s]+/')
+    id = 0
+    GPUs = []
+    fo = open(filename + rank, 'r')
+    for line in fo:
+        result = pattern.search(line)
+        if result:
+            if int(result.group("num")) < default_gpu:
+                GPUs.append(id)
+            id = id + 1
+    fo.close()
+
+    print len(GPUs)
+    for gpu in GPUs:
+        print gpu
+
+
+if __name__ == '__main__':
+    rank = sys.argv[1]
+    GrabGPU(rank)
+    print socket.gethostname()
+    GetGPUUSage(rank)
+    #os.system('del /q ' + filename + rank)
--- a/DSL_SentimentAnalysis/Classifier/Layers.py
+++ b/DSL_SentimentAnalysis/Classifier/Layers.py
@ -0,0 +1,838 @@
+"""
+supports simple-rnn, lstm, hierarchical lstm
+supports lstm with identity skip-connections(soft), parametric skip-connections(soft)
+supports resnet, resnet with identity skip-connections(full and soft), parametric skip connections(soft)
+supports hybrid structure (lstm+resnet)
+"""
+
+__author__ = 'v-yirwan'
+
+import theano.tensor as tensor
+from Util import *
+
+layers = {'lstm': ('param_init_lstm', 'lstm_layer'),
+          'lstm_skip': ('param_init_lstm', 'lstm_skip_layer'),
+          'lstm_pskip': ('param_init_lstm_pskip', 'lstm_pskip_layer'),
+          'residual': ('param_init_residual', 'residual_layer'),
+          'residual_full_skip': ('param_init_residual', 'residual_full_skip_layer'),
+          'residual_skip': ('param_init_residual', 'residual_skip_layer'),
+          'residual_pskip': ('param_init_residual_pskip', 'residual_pskip_layer'),
+          'rnn': ('param_init_rnn', 'rnn_layer'),
+          'rnn_pskip': ('param_init_rnn_pskip', 'rnn_pskip_layer'),
+          # modules for ResNet Modifications
+          'presidual': ('param_init_presidual', 'presidual_layer'),
+          'pxresidual': ('param_init_pxresidual', 'pxresidual_layer'),
+          'residual_pskip_mod': ('param_init_residual_pskip', 'residual_pskip_mod_layer')
+          }
+
+def _p(pp, name):
+    return '%s_%s' % (pp, name)
+
+def get_layer(name):
+    fns = layers[name]
+    return (eval(fns[0]), eval(fns[1]))
+
+# ===========================
+# LSTM-related layers
+# LSTM, LSTM with identity and parametric skip connections (soft)
+# ===========================
+
+def param_init_lstm(options, params, prefix='lstm', hier_level=False):
+    """
+    Init the LSTM parameter
+    Support hierarchical architecture
+    """
+    if hier_level:
+        # bug fixed: dimension matching for hier-mode
+        W = numpy.concatenate([ortho_weight(options['dim_proj']),
+                               ortho_weight(options['dim_proj']),
+                               ortho_weight(options['dim_proj']),
+                               ortho_weight(options['dim_proj'])], axis=1)
+    else:
+        # bug fixed: different dim for embedding and hidden state
+        W = numpy.concatenate([norm_weight(options['dim_word'], options['dim_proj']),
+                               norm_weight(options['dim_word'], options['dim_proj']),
+                               norm_weight(options['dim_word'], options['dim_proj']),
+                               norm_weight(options['dim_word'], options['dim_proj'])], axis=1)
+    params[_p(prefix, 'W')] = W
+    U = numpy.concatenate([ortho_weight(options['dim_proj']),
+                           ortho_weight(options['dim_proj']),
+                           ortho_weight(options['dim_proj']),
+                           ortho_weight(options['dim_proj'])], axis=1)
+    params[_p(prefix, 'U')] = U
+    b = numpy.zeros((4 * options['dim_proj'],))
+    params[_p(prefix, 'b')] = b.astype(config.floatX)
+
+    return params
+
+def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
+    nsteps = state_below.shape[0]
+    if state_below.ndim == 3:
+        n_samples = state_below.shape[1]
+    else:
+        n_samples = 1
+
+    assert mask is not None
+
+    def _slice(_x, n, dim):
+        if _x.ndim == 3:
+            return _x[:, :, n * dim:(n + 1) * dim]
+        return _x[:, n * dim:(n + 1) * dim]
+
+    def _step(m_, x_, h_, c_):
+        preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
+        preact += x_
+
+        i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
+        f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
+        o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
+        c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
+
+        c = f * c_ + i * c
+        c = m_[:, None] * c + (1. - m_)[:, None] * c_
+
+        h = o * tensor.tanh(c)
+        h = m_[:, None] * h + (1. - m_)[:, None] * h_
+
+        return h, c
+
+    state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
+                   tparams[_p(prefix, 'b')])
+
+    dim_proj = options['dim_proj']
+    rval, updates = theano.scan(_step,
+                                sequences=[mask, state_below],
+                                outputs_info=[tensor.alloc(numpy_floatX(0.),
+                                                           n_samples,
+                                                           dim_proj),
+                                              tensor.alloc(numpy_floatX(0.),
+                                                           n_samples,
+                                                           dim_proj)],
+                                name=_p(prefix, '_layers'),
+                                n_steps=nsteps,
+                                truncate_gradient=options['truncate_grad'])
+    return rval[0]
+
+def lstm_skip_layer(tparams, state_below, options, prefix='lstm_skip', mask=None):
+    '''
+    lstm layer with soft identity skip connections
+    '''
+    nsteps = state_below.shape[0]
+    n_skip = options['skip_steps']
+    if state_below.ndim == 3:
+        n_samples = state_below.shape[1]
+    else:
+        n_samples = 1
+
+    assert mask is not None
+
+    def _slice(_x, n, dim):
+        if _x.ndim == 3:
+            return _x[:, :, n * dim:(n + 1) * dim]
+        return _x[:, n * dim:(n + 1) * dim]
+
+    def _lstm_unit(m_, x_, h_, c_, h_skip, hcnt):
+        skip_flag = tensor.eq(hcnt % n_skip, 0)
+        preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
+        preact += x_
+
+        # gates
+        i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
+        f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
+        o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
+        c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
+
+        # cell state
+        c = f * c_ + i * c
+        c = m_[:, None] * c + (1. - m_)[:, None] * c_
+        # new hidden stae
+        h = o * tensor.tanh(c) + h_skip * skip_flag
+        h = m_[:, None] * h + (1. - m_)[:, None] * h_
+        # update h_skip
+        h_skip = h_skip * (1-skip_flag) + h * skip_flag
+        hcnt += 1
+
+        return h, c, h_skip, hcnt
+
+    state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
+                   tparams[_p(prefix, 'b')])
+
+    dim_proj = options['dim_proj']
+    h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    c = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    h_skip = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    hcnt = tensor.zeros_like(theano.shared(10.).astype('float32'))
+    rval, updates = theano.scan(_lstm_unit,
+                                sequences=[mask, state_below],
+                                outputs_info=[h, c, h_skip, hcnt],
+                                name=_p(prefix, 'layers'),
+                                n_steps=nsteps,
+                                truncate_gradient=options['truncate_grad'])
+    # return all hidden states h(t)
+    return rval[0]
+
+def param_init_lstm_pskip(options, params, prefix='lstm_pskip', hier_level=False):
+    """
+    Init the LSTM-pskip parameter
+    """
+    # same as vanilla lstm layer
+    params = param_init_lstm(options, params, prefix=prefix, hier_level=hier_level)
+    # weight for skip connection
+    params[_p(prefix, 'W_skip')] = numpy.array([numpy.random.random_sample()]).astype('float32')[0]
+    # random value in (0,1)
+
+    return params
+
+def lstm_pskip_layer(tparams, state_below, options, prefix='lstm_pskip', mask=None):
+    '''
+    lstm layer with soft parametric weighted skip connections
+    '''
+    nsteps = state_below.shape[0]
+    n_skip = options['skip_steps']
+    if state_below.ndim == 3:
+        n_samples = state_below.shape[1]
+    else:
+        n_samples = 1
+
+    assert mask is not None
+
+    def _slice(_x, n, dim):
+        if _x.ndim == 3:
+            return _x[:, :, n * dim:(n + 1) * dim]
+        return _x[:, n * dim:(n + 1) * dim]
+
+    def _lstm_unit(m_, x_, h_, c_, h_skip, hcnt):
+        '''
+        lstm_soft_pskip unit at each time step
+        :param m_: mask
+        :param x_: x(t) input
+        :param h_: h(t-1) recurrent hidden state
+        :param c_: c(t-1) cell state
+        :param h_skip: h(t-n_skip) for skip connection
+        :param hcnt: mark current time stamp (to determine whether skip connection exists)
+        :return: h(t), c(t), h_skip, hcnt
+        '''
+        skip_flag = tensor.eq(hcnt % n_skip, 0)
+        preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
+        preact += x_
+
+        # gates
+        i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
+        f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
+        o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
+        c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
+
+        # cell state
+        c = f * c_ + i * c
+        c = m_[:, None] * c + (1. - m_)[:, None] * c_
+        # new hidden stae
+        h = o * tensor.tanh(c) + h_skip * skip_flag * tparams[_p(prefix, 'W_skip')]
+        h = m_[:, None] * h + (1. - m_)[:, None] * h_
+        # update h_skip
+        h_skip = h_skip * (1-skip_flag) + h * skip_flag
+        hcnt += 1 # bug fixed T^T
+
+        return h, c, h_skip, hcnt
+
+    state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
+                   tparams[_p(prefix, 'b')])
+
+    dim_proj = options['dim_proj']
+    h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    c = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    h_skip = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    hcnt = tensor.zeros_like(theano.shared(10.).astype('float32'))
+    rval, updates = theano.scan(_lstm_unit,
+                                sequences=[mask, state_below],
+                                outputs_info=[h, c, h_skip, hcnt],
+                                name=_p(prefix, 'layers'),
+                                n_steps=nsteps,
+                                truncate_gradient=options['truncate_grad'])
+    # return all hidden states h(t)
+    return rval[0]
+
+
+# ===========================
+# ResNet-related layers
+# ResNet, ResNet with identity skip connections (full and soft),
+# ResNet with parametric skip connections(soft)
+# ===========================
+
+def param_init_residual(options, params, prefix='residual'):
+    """
+    Init the residual_network parameter:
+    """
+    # weight for input x
+    depth = options['unit_depth']
+    Wx = dict()
+    for idx in xrange(depth):
+        Wx[idx] = norm_weight(options['dim_word'], options['dim_proj'])
+    W = numpy.concatenate([ww for kk, ww in Wx.iteritems()], axis=1)
+    params[_p(prefix, 'W')] = W
+    b = numpy.zeros((depth * options['dim_proj'],))
+    params[_p(prefix, 'b')] = b.astype(config.floatX)
+
+    # weight for identity connection
+    '''
+    w_res = numpy.array([numpy.random.random_sample()]).astype('float32')[0]
+    params[_p(prefix, 'w_res')] = w_res.astype(config.floatX)
+    '''
+    # weight for inter-states
+    for idx in xrange(depth):
+        U = ortho_weight(options['dim_proj'])
+        params[_p(prefix, 'U'+str(idx+1))] = U
+
+    return params
+
+def residual_layer(tparams, state_below, options, prefix='residual', mask=None):
+    '''
+    vanilla residual layer (recurrent depth adjustable)
+    '''
+    # here state_below in x_emb
+    nsteps = state_below.shape[0]
+    depth = options['unit_depth']
+    if state_below.ndim == 3:
+        n_samples = state_below.shape[1]
+    else:
+        n_samples = 1
+
+    assert mask is not None
+
+    def _slice(_x, n, dim):
+        if _x.ndim == 3:
+            return _x[:, :, n * dim:(n + 1) * dim]
+        return _x[:, n * dim:(n + 1) * dim]
+
+    def _resblock(m_, x_, h_):
+        y = h_
+        for idx in xrange(depth):
+            hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
+            # y(i) = sigmoid(Wx(t)+b + Uy(i-1))
+            y = tensor.nnet.sigmoid(_slice(x_, idx, options['dim_proj']) + hy)
+        h = tensor.tanh(h_ + y)
+        h = m_[:, None] * h + (1. - m_)[:, None] * h_
+        return h
+
+    # state_below = W*x(t)+b (for all inter_state y)
+    state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
+                   tparams[_p(prefix, 'b')])
+
+    h = tensor.alloc(numpy_floatX(0.), n_samples, options['dim_proj'])
+    rval, updates = theano.scan(_resblock,
+                                sequences=[mask, state_below],
+                                outputs_info=[h],
+                                name=_p(prefix, 'layers'),
+                                n_steps=nsteps,
+                                truncate_gradient=options['truncate_grad'])
+    return rval # bug fixed: not rval[0], attention here
+
+def residual_full_skip_layer(tparams, state_below, options, prefix='residual_full_skip', mask=None):
+    '''
+    residual layer with full skip connections (direct link without weight)
+    '''
+    # here state_below in x_emb
+    nsteps = state_below.shape[0]
+    depth = options['unit_depth']
+    if state_below.ndim == 3:
+        n_samples = state_below.shape[1]
+    else:
+        n_samples = 1
+
+    assert mask is not None
+
+    def _slice(_x, n, dim):
+        if _x.ndim == 3:
+            return _x[:, :, n * dim:(n + 1) * dim]
+        return _x[:, n * dim:(n + 1) * dim]
+
+    # input mask, x(t), h(t-1), H(t-1)
+    def _resblock(m_, x_, h_, H_):
+        y = h_
+        for idx in xrange(depth):
+            hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
+            # y(i) = sigmoid(Wx(t)+b + Uy(i-1))
+            y = tensor.nnet.sigmoid(_slice(x_, idx, options['dim_proj']) + hy)
+        # new hidden state
+        h = tensor.tanh(h_ + y + H_[:,:,0])
+        h = m_[:, None] * h + (1. - m_)[:, None] * h_ # mask
+        # update skip hidden matrix
+        H = tensor.zeros_like(H_)
+        H = tensor.set_subtensor(H[:,:,:-1], H_[:,:,1:])
+        H = tensor.set_subtensor(H[:,:,-1], h)
+        return h, H
+
+    # state_below = W*x(t)+b (for all inter_state y)
+    state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
+                   tparams[_p(prefix, 'b')])
+
+    dim_proj = options['dim_proj']
+    n_skip = options['skip_steps']
+    h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    H = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj, n_skip)
+    rval, updates = theano.scan(_resblock,
+                                sequences=[mask, state_below],
+                                outputs_info=[h, H],
+                                name=_p(prefix, '_layers'),
+                                n_steps=nsteps,
+                                truncate_gradient=options['truncate_grad'])
+    return rval[0] # return all hidden states h
+
+def residual_skip_layer(tparams, state_below, options, prefix='residual_skip', mask=None):
+    '''
+    residual layer with (soft) skip connections (direct link without weight)
+    '''
+    # here state_below in x_emb
+    nsteps = state_below.shape[0]
+    depth = options['unit_depth']
+    dim_proj = options['dim_proj']
+    n_skip = options['skip_steps']
+    if state_below.ndim == 3:
+        n_samples = state_below.shape[1]
+    else:
+        n_samples = 1
+
+    assert mask is not None
+
+    def _slice(_x, n, dim):
+        if _x.ndim == 3:
+            return _x[:, :, n * dim:(n + 1) * dim]
+        return _x[:, n * dim:(n + 1) * dim]
+
+    # input mask, x(t), h(t-1), h(skip), time_idx
+    def _resblock(m_, x_, h_, h_skip, hcnt):
+        y = h_
+        skip_flag = theano.tensor.eq(hcnt%n_skip, 0)
+        for idx in xrange(depth):
+            hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
+            # y(i) = sigmoid(Wx(t)+b + Uy(i-1))
+            y = tensor.nnet.sigmoid(_slice(x_, idx, options['dim_proj']) + hy)
+        # new hidden state
+        h = tensor.tanh(h_ + y + h_skip*skip_flag)
+        h = m_[:, None] * h + (1. - m_)[:, None] * h_ # mask
+        # update h(skip)
+        h_skip = h_skip*(1-skip_flag) + h*skip_flag
+        hcnt += 1
+        return h, h_skip, hcnt
+
+    # state_below = W*x(t)+b (for all inter_state y)
+    state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
+                   tparams[_p(prefix, 'b')])
+
+    h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    h_skip = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    # fixme: 0-dim init
+    hcnt = tensor.zeros_like(theano.shared(10.).astype('float32'))
+    rval, updates = theano.scan(_resblock,
+                                sequences=[mask, state_below],
+                                outputs_info=[h, h_skip, hcnt],
+                                name=_p(prefix, '_layers'),
+                                n_steps=nsteps,
+                                truncate_gradient=options['truncate_grad'])
+    return rval[0] # return all hidden states h
+
+def param_init_residual_pskip(options, params, prefix='residual_pskip'):
+    """
+    Init the residual network with parametric weighted skip connections:
+    """
+    # weight for input x
+    depth = options['unit_depth']
+    Wx = dict()
+    for idx in xrange(depth):
+        Wx[idx] = norm_weight(options['dim_word'], options['dim_proj'])
+    W = numpy.concatenate([ww for kk, ww in Wx.iteritems()], axis=1)
+    params[_p(prefix, 'W')] = W
+    b = numpy.zeros((depth * options['dim_proj'],))
+    params[_p(prefix, 'b')] = b.astype(config.floatX)
+
+    # weight for skip connection
+    params[_p(prefix, 'W_skip')] = numpy.array([numpy.random.random_sample()]).astype('float32')[0]
+    # random value in (0,1)
+
+    # weight for inter-states
+    for idx in xrange(depth):
+        U = ortho_weight(options['dim_proj'])
+        params[_p(prefix, 'U'+str(idx+1))] = U
+    return params
+
+def residual_pskip_layer(tparams, state_below, options, prefix='residual_pskip', mask=None):
+    '''
+    residual layer with soft parametric weighted skip connections
+    '''
+    # here state_below in x_emb
+    nsteps = state_below.shape[0]
+    depth = options['unit_depth']
+    dim_proj = options['dim_proj']
+    n_skip = options['skip_steps']
+    if state_below.ndim == 3:
+        n_samples = state_below.shape[1]
+    else:
+        n_samples = 1
+
+    assert mask is not None
+
+    def _slice(_x, n, dim):
+        if _x.ndim == 3:
+            return _x[:, :, n * dim:(n + 1) * dim]
+        return _x[:, n * dim:(n + 1) * dim]
+
+    # input mask, x(t), h(t-1), h(skip), time_idx
+    def _resblock(m_, x_, h_, h_skip, hcnt):
+        y = h_
+        skip_flag = theano.tensor.eq(hcnt%n_skip, 0)
+        for idx in xrange(depth):
+            hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
+            # y(i) = sigmoid(Wx(t)+b + Uy(i-1))
+            y = tensor.nnet.sigmoid(_slice(x_, idx, options['dim_proj']) + hy)
+        # new hidden state
+        h = tensor.tanh(h_ + y + h_skip * skip_flag * tparams[_p(prefix, 'W_skip')])
+        h = m_[:, None] * h + (1. - m_)[:, None] * h_ # mask
+        # update h(skip)
+        h_skip = h_skip*(1-skip_flag) + h*skip_flag
+        hcnt += 1
+        return h, h_skip, hcnt
+
+    # state_below = W*x(t)+b (for all inter_state y)
+    state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
+                   tparams[_p(prefix, 'b')])
+
+    h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    h_skip = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    # fixme: 0-dim init
+    hcnt = tensor.zeros_like(theano.shared(10.).astype('float32'))
+    rval, updates = theano.scan(_resblock,
+                                sequences=[mask, state_below],
+                                outputs_info=[h, h_skip, hcnt],
+                                name=_p(prefix, '_layers'),
+                                n_steps=nsteps,
+                                truncate_gradient=options['truncate_grad'])
+    return rval[0] # return all hidden states h
+
+
+# ===========================
+# RNN-related layers
+# simple rnn and rnn with parametric skip connections (soft)
+# ===========================
+
+def param_init_rnn(options, params, prefix='rnn', hier_level=False):
+    '''
+    Initialize parameters for simple rnn unit
+    Support hierarchical architecture
+    '''
+    if hier_level:
+        W = ortho_weight(options['dim_proj'])
+    else:
+        W = norm_weight(options['dim_word'], options['dim_proj'])
+    params[_p(prefix, 'W')] = W
+    U = ortho_weight(options['dim_proj'])
+    params[_p(prefix, 'U')] = U
+    b = numpy.zeros((options['dim_proj']))
+    params[_p(prefix, 'b')] = b.astype(config.floatX)
+
+    return params
+
+def rnn_layer(tparams, state_below, options, prefix='rnn', mask=None):
+    nsteps = state_below.shape[0]
+    dim_proj = options['dim_proj']
+    if state_below.ndim == 3:
+        n_samples = state_below.shape[1]
+    else:
+        n_samples = 1
+
+    assert mask is not None
+
+    # input: mask, x(t), h(t-1)
+    def _rnn_unit(m_, x_, h_):
+        h = tensor.tanh(tensor.dot(x_, tparams[_p(prefix, 'W')]) +
+                        tensor.dot(h_, tparams[_p(prefix, 'U')]) +
+                        tparams[_p(prefix, 'b')])
+        h = m_[:, None] * h + (1.-m_)[:, None] * h_ # mask
+        return h
+
+    h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    rval, updates = theano.scan(_rnn_unit,
+                                sequences=[mask, state_below],
+                                outputs_info=[h],
+                                name=_p(prefix, 'layers'),
+                                n_steps=nsteps,
+                                truncate_gradient=options['truncate_grad'])
+    return rval
+
+def param_init_rnn_pskip(options, params, prefix='rnn_pskip', hier_level=False):
+    '''
+    Initialize parameters for simple-rnn unit with parametric soft skip connections
+    '''
+    # weight for vanilla simple-rnn
+    params = param_init_rnn(options, params, prefix=prefix, hier_level=hier_level)
+    # weight for skip connection
+    params[_p(prefix, 'W_skip')] = numpy.array([numpy.random.random_sample()]).astype('float32')[0]
+
+    return params
+
+def rnn_pskip_layer(tparams, state_below, options, prefix='rnn_pskip', mask=None):
+    nsteps = state_below.shape[0]
+    n_skip = options['skip_steps']
+    dim_proj = options['dim_proj']
+    if state_below.ndim == 3:
+        n_samples = state_below.shape[1]
+    else:
+        n_samples = 1
+    assert mask is not None
+
+    def _rnn_pskip(m_, x_, h_, h_skip, hcnt):
+        skip_flag = tensor.eq(hcnt % n_skip, 0)
+        h = tensor.tanh(tensor.dot(x_, tparams[_p(prefix, 'W')]) +
+                        tensor.dot(h_, tparams[_p(prefix, 'U')]) +
+                        tparams[_p(prefix, 'b')] +
+                        skip_flag * h_skip * tparams[_p(prefix, 'W_skip')])
+        h = m_[:, None] * h + (1.-m_)[:, None] * h_
+        h_skip = skip_flag * h + (1-skip_flag) * h_skip
+        hcnt += 1
+        return h, h_skip, hcnt
+
+    h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    h_skip = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    hcnt = tensor.zeros_like(theano.shared(10.).astype('float32'))
+    rval, updates = theano.scan(_rnn_pskip,
+                                sequences=[mask, state_below],
+                                outputs_info=[h, h_skip, hcnt],
+                                name=_p(prefix, 'layers'),
+                                n_steps=nsteps,
+                                truncate_gradient=options['truncate_grad'])
+    return rval[0]
+
+
+# ===========================
+# ResNet modifications
+# ===========================
+
+def residual_pskip_mod_layer(tparams, state_below, options, prefix='residual_pskip_mod', mask=None):
+    '''
+    residual layer with soft parametric weighted skip connections
+    modifications on original pskip model
+    '''
+    # here state_below in x_emb
+    nsteps = state_below.shape[0]
+    depth = options['unit_depth']
+    dim_proj = options['dim_proj']
+    n_skip = options['skip_steps']
+    if state_below.ndim == 3:
+        n_samples = state_below.shape[1]
+    else:
+        n_samples = 1
+
+    assert mask is not None
+
+    def _slice(_x, n, dim):
+        if _x.ndim == 3:
+            return _x[:, :, n * dim:(n + 1) * dim]
+        return _x[:, n * dim:(n + 1) * dim]
+
+    # input mask, x(t), h(t-1), h(skip), time_idx
+    def _resblock_mod(m_, x_, h_, h_skip, hcnt):
+        y = h_
+        skip_flag = theano.tensor.eq(hcnt % n_skip, 0)
+        for idx in xrange(depth):
+            hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
+            # y(i) = sigmoid(Wx(t)+b + Uy(i-1))
+            y = tensor.nnet.sigmoid(_slice(x_, idx, options['dim_proj']) + hy)
+        # modification: skip connection after activation
+        h = tensor.tanh(h_ + y) + h_skip * skip_flag * tparams[_p(prefix, 'W_skip')]
+        h = m_[:, None] * h + (1. - m_)[:, None] * h_
+        h_skip = h_skip*(1-skip_flag) + h*skip_flag
+        hcnt += 1
+        return h, h_skip, hcnt
+
+    # state_below = W*x(t)+b (for all inter_state y)
+    state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
+                   tparams[_p(prefix, 'b')])
+
+    h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    h_skip = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
+    # fixme: 0-dim init
+    hcnt = tensor.zeros_like(theano.shared(10.).astype('float32'))
+    rval, updates = theano.scan(_resblock_mod,
+                                sequences=[mask, state_below],
+                                outputs_info=[h, h_skip, hcnt],
+                                name=_p(prefix, '_layers'),
+                                n_steps=nsteps,
+                                truncate_gradient=options['truncate_grad'])
+    return rval[0] # return all hidden states h
+
+def param_init_presidual(options, params, prefix='presidual', nin=None, dim=None):
+    """
+    Init the parametric_residual_network parameter:
+    """
+    if nin is None:
+        nin = options['dim_word']
+    if dim is None:
+        dim = options['dim_proj']
+
+    # weight for input x
+    depth = options['unit_depth']
+    Wx = dict()
+    for idx in xrange(depth):
+        Wx[idx] = norm_weight(nin, dim)
+    W = numpy.concatenate([ww for kk, ww in Wx.iteritems()], axis=1)
+    params[_p(prefix, 'W')] = W
+    b = numpy.zeros((depth * dim,))
+    params[_p(prefix, 'b')] = b.astype(config.floatX)
+    w_res = rand_weight(dim, 1)
+    params[_p(prefix, 'w_res')] = w_res.astype(config.floatX)
+    b_res = numpy.array([numpy.random.random_sample()]).astype('float32')[0]
+    params[_p(prefix, 'b_res')] = b_res
+
+    # weight for inter-states
+    for idx in xrange(depth):
+        U = ortho_weight(dim)
+        params[_p(prefix, 'U'+str(idx+1))] = U
+    return params
+
+def presidual_layer(tparams, state_below, options, prefix='presidual', mask=None,
+                    one_step=False, init_state=None, **kwargs):
+    '''
+    parametric residual layer (recurrent depth adjustable)
+    parametric vector on identity connection
+    '''
+    if one_step:
+        assert init_state, 'previous state must be provided'
+
+    # here state_below in x_emb
+    nsteps = state_below.shape[0]
+    depth = options['unit_depth']
+    dim = options['dim_proj']
+    if state_below.ndim == 3:
+        n_samples = state_below.shape[1]
+    else:
+        n_samples = 1
+
+    if mask is None:
+        mask = tensor.alloc(1., state_below.shape[0], 1)
+
+    def _slice(_x, n, dim):
+        if _x.ndim == 3:
+            return _x[:, :, n * dim:(n + 1) * dim]
+        return _x[:, n * dim:(n + 1) * dim]
+
+    # input mask, x(t), h(t-1)
+    def _presblock(m_, x_, h_):
+        y = h_
+        for idx in xrange(depth):
+            hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
+            y = tensor.nnet.sigmoid(_slice(x_, idx, dim) + hy)
+        # p = 2*sigmoid(wh(t-1)+b)-1
+        p = 2 * tensor.nnet.sigmoid(tensor.dot(h_, tparams[_p(prefix, 'w_res')]) + tparams[_p(prefix, 'b_res')]) - 1
+        p_vec = p.reshape(p.shape[0], 1)
+        # h(t) = tanh(ph(t-1)+y)
+        h = tensor.tanh(tensor.dot(tensor.nlinalg.alloc_diag(p_vec), h_) + y)
+        h = m_[:, None] * h + (1. - m_)[:, None] * h_
+        return h
+
+    # state_below = W*x(t)+b (for all inter_state y)
+    state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
+                   tparams[_p(prefix, 'b')])
+
+    if init_state is None:
+        init_state = tensor.alloc(numpy_floatX(0.), n_samples, dim)
+
+    if one_step:
+        rval = _presblock(mask, state_below, init_state)
+    else:
+        rval, updates = theano.scan(_presblock,
+                                    sequences=[mask, state_below],
+                                    outputs_info=[init_state],
+                                    name=_p(prefix, 'layers'),
+                                    n_steps=nsteps)
+    # rval = [rval] # note: for consistency among model layers
+    return rval
+
+def param_init_pxresidual(options, params, prefix='pxresidual', nin=None, dim=None):
+    """
+    Init the parametric (with respect to input) residual network parameter:
+    """
+    if nin is None:
+        nin = options['dim_word']
+    if dim is None:
+        dim = options['dim_proj']
+
+    # weight for input x
+    depth = options['unit_depth']
+    Wx = dict()
+    for idx in xrange(depth):
+        Wx[idx] = norm_weight(nin, dim)
+    W = numpy.concatenate([ww for kk, ww in Wx.iteritems()], axis=1)
+    params[_p(prefix, 'W')] = W
+    b = numpy.zeros((depth * dim,))
+    params[_p(prefix, 'b')] = b.astype(config.floatX)
+    w_res = rand_weight(dim, 1)
+    params[_p(prefix, 'w_res')] = w_res.astype(config.floatX)
+    u_res = rand_weight(nin, 1)
+    params[_p(prefix, 'u_res')] = u_res.astype(config.floatX)
+    b_res = numpy.array([numpy.random.random_sample()]).astype('float32')[0]
+    params[_p(prefix, 'b_res')] = b_res
+
+    # weight for inter-states
+    for idx in xrange(depth):
+        U = ortho_weight(dim)
+        params[_p(prefix, 'U'+str(idx+1))] = U
+    return params
+
+def pxresidual_layer(tparams, state_below, options, prefix='pxresidual', mask=None,
+                   one_step=False, init_state=None, **kwargs):
+    '''
+    parametric (with respect to input) residual layer (recurrent depth adjustable)
+    parametric vector on identity connection
+    '''
+    if one_step:
+        assert init_state, 'previous state must be provided'
+
+    # here state_below in x_emb
+    nsteps = state_below.shape[0]
+    depth = options['unit_depth']
+    dim = options['dim_proj']
+    if state_below.ndim == 3:
+        n_samples = state_below.shape[1]
+    else:
+        n_samples = 1
+
+    if mask is None:
+        mask = tensor.alloc(1., state_below.shape[0], 1)
+
+    def _slice(_x, n, dim):
+        if _x.ndim == 3:
+            return _x[:, :, n * dim:(n + 1) * dim]
+        return _x[:, n * dim:(n + 1) * dim]
+
+    # input mask, x(t), h(t-1)
+    def _presblock(m_, x_, px_, h_):
+        y = h_
+        for idx in xrange(depth):
+            hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
+            y = tensor.nnet.sigmoid(_slice(x_, idx, dim) + hy)
+        # p = 2 * sigmoid(wh(t-1) + (ux(t)+b)) - 1
+        p = 2 * tensor.nnet.sigmoid(tensor.dot(h_, tparams[_p(prefix, 'w_res')]) + px_) - 1
+        p_vec = p.reshape(p.shape[0], 1)
+        # h(t) = tanh(p*h(t-1) + y)
+        h = tensor.tanh(tensor.dot(tensor.nlinalg.alloc_diag(p_vec), h_) + y)
+        h = m_[:, None] * h + (1. - m_)[:, None] * h_
+        return h
+
+    # state_below_x = W*x(t)+b (for all inter_state y)
+    state_below_x = tensor.dot(state_below, tparams[_p(prefix, 'W')]) \
+                     + tparams[_p(prefix, 'b')]
+    # state_below_px = u_res*x(t)+b_res (for parametric weight on identity connection)
+    state_below_px = tensor.dot(state_below, tparams[_p(prefix, 'u_res')]) \
+                     + tparams[_p(prefix, 'b_res')]
+
+    if init_state is None:
+        init_state = tensor.alloc(numpy_floatX(0.), n_samples, dim)
+
+    if one_step:
+        rval = _presblock(mask, state_below_x, state_below_px, init_state)
+    else:
+        rval, updates = theano.scan(_presblock,
+                                    sequences=[mask, state_below_x, state_below_px],
+                                    outputs_info=[init_state],
+                                    name=_p(prefix, 'layers'),
+                                    n_steps=nsteps)
+    # rval = [rval] # note: for consistency among model layers
+    return rval
--- a/DSL_SentimentAnalysis/Classifier/MapGPU.py
+++ b/DSL_SentimentAnalysis/Classifier/MapGPU.py
@ -0,0 +1,17 @@
+import os
+def MapDeviceIds(comm):
+    rank = comm.Get_rank()
+    num_machine = comm.Get_size()
+    os.system('python GPU_Usage.py ' + str(rank) + ' > record' + str(rank))
+    comm.Barrier()
+    if rank == 0:
+        os.system('python AllocateGPU.py ' + str(num_machine) + ' > DirtyRecord')
+    comm.Barrier()
+    cardid = str(0)
+    with open('DirtyRecord', 'r') as f:
+        for idx, line in enumerate(f):
+            if idx == rank:
+                cardid = line.strip()
+                break
+            
+    return cardid
--- a/DSL_SentimentAnalysis/Classifier/Models.py
+++ b/DSL_SentimentAnalysis/Classifier/Models.py
@ -0,0 +1,379 @@
+"""
+model for classification task
+supports simple-rnn, lstm, hierarchical lstm
+supports lstm with identity skip-connections(soft), parametric skip-connections(soft)
+supports resnet, resnet with identity skip-connections(hard and soft), parametric skip connections(soft)
+supports hybrid structure (lstm+resnet)
+supports dropout on non-recurrent layers, gradient clipping, L2-regularization
+"""
+__author__ = 'v-yirwan'
+
+import sys
+import time
+
+import numpy
+import cPickle as pkl
+import theano
+import theano.tensor as tensor
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
+
+from Layers import get_layer
+from Data import *
+from Util import *
+
+# Set the random number generators' seeds for consistency
+SEED = 123
+numpy.random.seed(SEED)
+
+def _p(pp, name):
+    return '%s_%s' % (pp, name)
+
+def init_params(options):
+    """
+    Global (not LSTM) parameter. For the embedding and the classifier.
+    """
+    params = OrderedDict()
+    # embedding
+    if options['dataset'] != 'mnist':
+        randn = rand_weight(options['n_words'], options['dim_word'])
+        params['Wemb'] = randn.astype(config.floatX)
+
+    # encoder layer
+    params = get_layer(options['encoder'])[0](options, params,
+                                                  prefix=options['encoder'])
+
+    # classifier
+    if options['lastHiddenLayer'] is not None:
+        params['U'] = 0.01 * numpy.random.randn(options['lastHiddenLayer'],
+                                                options['ydim']).astype(config.floatX)
+        params['b'] = numpy.zeros((options['ydim'],)).astype(config.floatX)
+
+        params['ToLastHidden_W'] = 0.01 * numpy.random.randn(options['dim_proj'],
+                                        options['lastHiddenLayer']).astype(config.floatX)
+        params['ToLastHidden_b'] = numpy.zeros((options['lastHiddenLayer'],)).astype(config.floatX)
+
+
+    else:
+        params['U'] = 0.01 * numpy.random.randn(options['dim_proj'],
+                                                options['ydim']).astype(config.floatX)
+        params['b'] = numpy.zeros((options['ydim'],)).astype(config.floatX)
+
+    return params
+
+def load_params(path, params):
+    failer=0
+    pp = numpy.load(path)
+    for kk, vv in params.items():
+        if kk not in pp:
+            failer += 1
+            raise Warning('%s is not in the archive' % kk)
+        params[kk] = pp[kk]
+    print failer, ' failed out of ', len(params)
+    return params
+
+def init_tparams(params):
+    tparams = OrderedDict()
+    for kk, pp in params.items():
+        tparams[kk] = theano.shared(params[kk], name=kk)
+    return tparams
+
+def encoder_word_layer(tparams, state_below, options, mask=None):
+    '''
+    word(bottom)-level encoder for hierarchical architecture
+    '''
+    def _encode(x_sub, mask_sub, proj_sub):
+        n_timesteps = x_sub.shape[0]
+        n_samples = x_sub.shape[1]
+        emb_sub = tparams['Wemb'][x_sub.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])
+        proj_sub = get_layer(options['encoder'])[1](tparams, emb_sub, options,
+                                                    prefix=options['encoder']+'_word',
+                                                    mask=mask_sub)
+        return proj_sub[-1]
+    proj_sub = tensor.alloc(numpy_floatX(0.), state_below.shape[2], options['dim_proj'])
+    rval, update = theano.scan(_encode,
+                               sequences=[state_below, mask],
+                               outputs_info=[proj_sub],
+                               name='word_encoder_layer',
+                               n_steps=state_below.shape[0])
+    return rval
+
+def build_model(tparams, options):
+    trng = RandomStreams(SEED)
+
+    # Used for dropout.
+    use_noise = theano.shared(numpy_floatX(0.))
+
+    if options['dataset'] == 'mnist':
+        print 'Using mnist dataset with single number input'
+        x = tensor.matrix('x', dtype='float32')
+    else:
+        print 'Using text dataset with embedding input'
+        x = tensor.matrix('x', dtype='int64')
+    mask = tensor.matrix('mask', dtype=config.floatX)
+    y = tensor.vector('y', dtype='int64')
+
+    n_timesteps = x.shape[0]
+    n_samples = x.shape[1]
+
+    # input word embedding
+    if options['dataset'] == 'mnist':
+        emb = x.reshape([n_timesteps, n_samples, options['dim_word']])
+    else:
+        emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])
+
+    # dropout on embedding
+    if options['dropout_input'] > 0:
+        print 'Applying drop-out on input embedding (dropout_input:', options['dropout_input'], ')'
+        emb = dropout_layer(emb, options['dropout_input'], use_noise, trng)
+
+    # encoder information
+    print 'Using', options['encoder'], 'unit'
+    if options['truncate_grad'] is not None and options['truncate_grad'] > 0:
+        print 'Using gradient truncation to', options['truncate_grad'], 'steps'
+    else:
+        options['truncate_grad'] = -1
+
+    # encoding layer
+    proj = get_layer(options['encoder'])[1](tparams, emb, options,
+                                            prefix=options['encoder'],
+                                            mask=mask)
+
+    # pooling
+    if options['mean_pooling']:
+        print 'Using mean_pooling'
+        proj = (proj * mask[:, :, None]).sum(axis=0) # mean pooling
+        proj = proj / mask.sum(axis=0)[:, None]
+    else:
+        print 'Using last hidden state'
+        proj = proj[-1] # last hidden state
+
+    sys.stdout.flush()
+
+    # dropout on hidden states
+    if options['lastHiddenLayer'] is not None:
+        lastH = tensor.dot(proj, tparams['ToLastHidden_W']) + tparams['ToLastHidden_b']
+        lastH = tensor.nnet.sigmoid(lastH)
+        if options['dropout_output'] > 0:
+            lastH = dropout_layer(lastH, options['dropout_output'], use_noise, trng)
+        pred = tensor.nnet.softmax(tensor.dot(lastH, tparams['U']) + tparams['b'])
+    else:
+        if options['dropout_output'] > 0:
+            print 'Applying drop-out on hidden states (dropout_output:', options['dropout_output'], ")"
+            proj = dropout_layer(proj, options['dropout_output'], use_noise, trng)
+
+        pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b'])
+
+    # for training
+    f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob')
+    f_pred = theano.function([x, mask], pred.argmax(axis=1), name='f_pred') # sample by argmax
+
+    off = 1e-8
+    if pred.dtype == 'float16':
+        off = 1e-6
+    nlls = -tensor.log(pred[tensor.arange(n_samples), y] + off)
+
+    return use_noise, x, mask, y, f_pred_prob, f_pred, nlls
+
+class Model:
+    def __init__(self,
+                 dim_word=500, # word embeding dimension
+                 dim_proj=1024,  # LSTM number of hidden units
+                 patience=10,  # Number of epoch to wait before early stop if no progress
+                 max_epochs=5000,  # The maximum number of epoch to run
+                 decay_c=-1.,  # Weight decay (for L2-regularization)
+                 clip_c=-1., # gradient clipping threshold
+                 lrate=1.,  # Learning rate for sgd (not used for adadelta and rmsprop)
+                 n_words=10000,  # Vocabulary size
+                 optimizer='adadelta',
+                 encoder='lstm', # name of encoder unit, refer to 'layers'
+                 encoder2=None, # only used in hybrid mode
+                 hierarchical=False, # whether use hierarchical structure
+                 hier_len=None, # length of bottom (word-level) encoder
+                 hybrid=False, # whether use hybrid model
+                 mean_pooling=False, # use last hidden state if false
+                 unit_depth=-1, # recurrent depth of residual unit
+                 skip_steps=-1, # skip connection length (h(t) -> h(t+skip_steps))
+                 skip_steps2=-1, # only used in hybrid mode
+                 truncate_grad=-1, # e number of steps to use in truncated BPTT, set to -1 if not to apply
+                 saveto='model.npz',  # The best model will be saved there
+                 dispFreq=50,  # Display the training progress after this number of updates
+                 validFreq=300,  # Compute the validation error after this number of updates
+                 newDumpFreq=5000000, # Dump model into a new file after this number of updates
+                 maxlen=None,  # Sequence longer then this get ignored
+                 batch_size=16,  # The batch size during training.
+                 batch_len_threshold=None, # use dynamic batch size if sequence lengths exceed this threshold
+                 valid_batch_size=16,  # The batch size used for validation/test set.
+                 dataset='text', # dataset dype
+                 corpus='imdb.pkl', # path to load training data
+                 start_iter=0,
+                 start_epoch=0,
+                 noise_std=0.,
+                 lastHiddenLayer=None,
+                 dropout_output=None,  # Dropout on output hidden states (before softmax layer)
+                 dropout_input=None, # Dropout on input embeddings
+                 reload_options=None, # Path to a saved model options we want to start from
+                 reload_model=None,  # Path to a saved model we want to start from.
+                 embedding=None, # Path to the word embedding file (otherwise randomized)
+                 warm_LM=None,
+                 test_size=None,  # If >0, we keep only this number of test example.
+                 monitor_grad=False, # Print gradient norm to log file at each iteration if set True
+                 logFile='log.txt' # Path to log file
+                 ):
+
+        # Model options
+        self.model_options = locals().copy()
+        self.model_options['self'] = None
+
+        # log files
+        self.F_log = open(logFile, "a")
+
+        if start_iter == 0:
+            self.F_log.write("model options:\n")
+            for kk, vv in self.model_options.iteritems():
+                self.F_log.write("\t"+kk+":\t"+str(vv)+"\n")
+            self.F_log.write("-----------------------------------------\n")
+
+        pkl.dump(self.model_options, open('%s.pkl' % saveto, 'wb'))
+
+        print 'Loading data...',
+        if dataset == 'mnist':
+            self.trainSet, self.validSet, self.testSet = load_mnist(path=corpus,
+                                                                    fixed_permute=True,
+                                                                    rand_permute=False)
+        else:
+            self.trainSet, self.validSet, self.testSet = load_data(path=corpus,
+                                                                   n_words=n_words,
+                                                                   maxlen=maxlen,
+                                                                   sort_by_len=True,
+                                                                   fixed_valid=True)
+        print 'Done! '
+        print 'Training', len(self.trainSet[0]), 'Valid', len(self.validSet[0]), 'Test', len(self.testSet[0])
+        sys.stdout.flush()
+
+        if test_size > 0:
+            test_size = min(test_size, len(self.testSet[0]))
+            idx = numpy.arange(len(self.testSet[0]))
+            numpy.random.shuffle(idx)
+            idx = idx[:test_size]
+            self.testSet = ([self.testSet[0][n] for n in idx], [self.testSet[1][n] for n in idx])
+
+        # number of classes
+        ydim = numpy.max(self.trainSet[1]) + 1
+        self.model_options['ydim'] = ydim
+
+        print 'Initializing model parameters...',
+        params = init_params(self.model_options)
+        print 'Done'
+        print 'Model size:', self.model_options['dim_word'], '*', self.model_options['dim_proj']
+        sys.stdout.flush()
+
+        # load pre-trained word embedding
+        if embedding is not None and os.path.exists(embedding):
+            Wemb = numpy.array(numpy.load(open(embedding, "rb")))
+            if Wemb.shape[0] == self.model_options['n_words'] and \
+                            Wemb.shape[1] == self.model_options['dim_word']:
+                print 'Using pre-trained word embedding'
+                params['Wemb'] = Wemb.astype(numpy.float32) # bug fixed
+                print 'vocab size', params['Wemb'].shape[0], ', dim', params['Wemb'].shape[1]
+
+        # reload options
+        if reload_options is not None and os.path.exists(reload_options):
+            print "Reloading model options...",
+            with open(reload_options, 'rb') as f:
+                self.model_options = pkl.load(f)
+            print "Done"
+
+        # reload parameters
+        self.start_iter = 0
+        self.start_epoch = 0
+        self.history_errs = []
+        if reload_model is not None and os.path.exists(reload_model): # bug fixed
+            print 'Reloading model parameters...',
+            load_params(reload_model, params)
+            self.start_iter = start_iter
+            self.start_epoch = start_epoch
+            #self.history_errs = list(numpy.load(self.model_options['reload_model'])['history_errs'])
+            print 'Done'
+        sys.stdout.flush()
+
+        if warm_LM is not None:
+            print 'Steal from language model'
+            warmLM_ = numpy.load(warm_LM)
+            assert params['lstm_W'].shape == warmLM_['encoder_W'].shape
+            assert params['lstm_b'].shape == warmLM_['encoder_b'].shape
+            assert params['lstm_U'].shape == warmLM_['encoder_U'].shape
+            assert params['Wemb'].shape == warmLM_['Wemb'].shape
+            params['lstm_W'] = warmLM_['encoder_W']
+            params['lstm_b'] = warmLM_['encoder_b']
+            params['lstm_U'] = warmLM_['encoder_U']
+            params['Wemb'] = warmLM_['Wemb']
+
+        self.tparams = init_tparams(params)
+
+        # build model
+        mask_proj = None
+        # vanilla structure
+    def GetNll(self):
+        print 'Using vanilla structure'
+        self.use_noise, x, mask, y, \
+        self.f_pred_prob, self.f_pred, nlls = \
+            build_model(self.tparams, self.model_options)
+        #inps = [x, mask, y]
+        return x, mask, y, nlls
+
+    def get_accu(self, data, iterator, hier_len=None):
+        """
+        Just compute the error
+        modified to support hierarchical mode
+        """
+        valid_acc = 0
+        for _, valid_index in iterator:
+            if hier_len is not None:
+                x, mask, mask_proj, y = prepare_data_hier([data[0][t] for t in valid_index],
+                                                           numpy.array(data[1])[valid_index],
+                                                           hier_len=hier_len)
+                preds = self.f_pred(x, mask, mask_proj)
+            else:
+                x, mask, y = prepare_data([data[0][t] for t in valid_index],
+                                          numpy.array(data[1])[valid_index],
+                                          maxlen=None,
+                                          dataset=self.model_options['dataset'])
+                preds = self.f_pred(x, mask) # result obtained by argmax
+            valid_acc += (preds == y).sum() # note that batch is sorted in hier-mode
+        valid_acc = numpy_floatX(valid_acc) / numpy_floatX(len(data[0])) # accuracy
+
+        return valid_acc
+
+    def save_model(self, savefile, best_p=None):
+        if best_p is not None: # save the best model so far
+            params = best_p
+        else:
+            params = unzip(self.tparams)
+        numpy.savez(savefile, history_errs=self.history_errs, **params)
+        pkl.dump(self.model_options, open('%s.pkl' % self.model_options['saveto'], 'wb'))
+
+    def valid(self):
+        train_acc = self.get_accu(self.trainSet, self.kf_train)
+        #hier_len=self.model_options['hier_len'])
+        valid_acc = self.get_accu(self.validSet, self.kf_valid)
+                                  #hier_len=self.model_options['hier_len'])
+        test_acc = self.get_accu(self.testSet, self.kf_test)
+                                 #hier_len=self.model_options['hier_len'])
+        return train_acc, valid_acc, test_acc
+
+    def evaluate(self, *dataset):
+        acc = []
+        for k in xrange(len(dataset)):
+            data = dataset[k]
+            idx = get_minibatches_idx(len(data[0]), 16)
+            acc.append(self.get_accu(data, idx))
+        return acc
+
+
+
+if __name__ == '__main__':
+    pass
+
+
+
+
--- a/DSL_SentimentAnalysis/Classifier/Util.py
+++ b/DSL_SentimentAnalysis/Classifier/Util.py
@ -0,0 +1,257 @@
+import numpy
+from collections import OrderedDict
+
+import theano
+import theano.tensor as tensor
+from theano import config
+
+# ==========================
+# some operations with hyper-parameters
+# supports non-recurrent layer dropout, L2-regularization, gradient clipping
+# ==========================
+
+def l2_regularization(tparams, cost, decay_c):
+    decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c')
+    weight_decay = 0.
+    weight_decay += (tparams['U'] ** 2).sum()
+    weight_decay *= decay_c
+    cost += weight_decay
+    return cost
+
+def grad_clipping(grads, clip_c):
+    g2 = 0.
+    for g in grads:
+        g2 += (g**2).sum()
+    new_grads = []
+    for g in grads:
+        new_grads.append(tensor.switch(g2 > (clip_c**2), g/tensor.sqrt(g2) * clip_c, g))
+    grads = new_grads
+    return grads
+
+def dropout_layer(state_before, dropout, use_noise, trng):
+    proj = tensor.switch(use_noise,
+                         (state_before *
+                          trng.binomial(state_before.shape,
+                                        p=(1-dropout), n=1,
+                                        dtype=state_before.dtype)),
+                          state_before * (1-dropout))
+    return proj
+
+# ==========================
+# optimizers
+# supports sgd, adadelta and rmsprop
+# only adadelta supports hierarchical structure
+# ==========================
+
+def sgd(lr, tparams, grads, x, mask, y, cost):
+    """ Stochastic Gradient Descent
+
+    :note: A more complicated version of sgd then needed.  This is
+        done like that for adadelta and rmsprop.
+
+    """
+    # New set of shared variable that will contain the gradient
+    # for a mini-batch.
+    gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
+               for k, p in tparams.items()]
+    gsup = [(gs, g) for gs, g in zip(gshared, grads)]
+
+    # Function that computes gradients for a mini-batch, but do not
+    # updates the weights.
+    f_grad_shared = theano.function([x, mask, y], cost, updates=gsup,
+                                    name='sgd_f_grad_shared')
+
+    pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)]
+
+    # Function that updates the weights from the previously computed
+    # gradient.
+    f_update = theano.function([lr], [], updates=pup,
+                               name='sgd_f_update')
+
+    return f_grad_shared, f_update
+
+def adadelta(lr, tparams, grads, x, mask, y, cost, mask_hier=None):
+    """
+    An adaptive learning rate optimizer
+    # modified to support hierarchical mode
+
+    Parameters
+    ----------
+    lr : Theano SharedVariable
+        Initial learning rate
+    tpramas: Theano SharedVariable
+        Model parameters
+    grads: Theano variable
+        Gradients of cost w.r.t to parameres
+    x: Theano variable
+        Model inputs
+    mask: Theano variable
+        Sequence mask
+    y: Theano variable
+        Targets
+    cost: Theano variable
+        Objective fucntion to minimize
+
+    Notes
+    -----
+    For more information, see [ADADELTA]_.
+
+    .. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
+       Rate Method*, arXiv:1212.5701.
+    """
+
+    zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
+                                  name='%s_grad' % k)
+                    for k, p in tparams.items()]
+    running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.),
+                                 name='%s_rup2' % k)
+                   for k, p in tparams.items()]
+    running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
+                                    name='%s_rgrad2' % k)
+                      for k, p in tparams.items()]
+
+    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
+    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
+             for rg2, g in zip(running_grads2, grads)]
+    if mask_hier is not None:
+        f_grad_shared = theano.function([x, mask, mask_hier, y], cost, updates=zgup + rg2up,
+                                         name='adadelta_f_grad_shared')
+    else:
+        f_grad_shared = theano.function([x, mask, y], cost, updates=zgup + rg2up,
+                                         name='adadelta_f_grad_shared')
+
+    updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
+             for zg, ru2, rg2 in zip(zipped_grads,
+                                     running_up2,
+                                     running_grads2)]
+    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
+             for ru2, ud in zip(running_up2, updir)]
+    param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
+
+    f_update = theano.function([lr], [], updates=ru2up + param_up,
+                               on_unused_input='ignore',
+                               name='adadelta_f_update')
+
+    return f_grad_shared, f_update
+
+def rmsprop(lr, tparams, grads, x, mask, y, cost):
+    """
+    A variant of  SGD that scales the step size by running average of the
+    recent step norms.
+
+    Parameters
+    ----------
+    lr : Theano SharedVariable
+        Initial learning rate
+    tpramas: Theano SharedVariable
+        Model parameters
+    grads: Theano variable
+        Gradients of cost w.r.t to parameres
+    x: Theano variable
+        Model inputs
+    mask: Theano variable
+        Sequence mask
+    y: Theano variable
+        Targets
+    cost: Theano variable
+        Objective fucntion to minimize
+
+    Notes
+    -----
+    For more information, see [Hint2014]_.
+
+    .. [Hint2014] Geoff Hinton, *Neural Networks for Machine Learning*,
+       lecture 6a,
+       http://cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
+    """
+
+    zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
+                                  name='%s_grad' % k)
+                    for k, p in tparams.items()]
+    running_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
+                                   name='%s_rgrad' % k)
+                     for k, p in tparams.items()]
+    running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
+                                    name='%s_rgrad2' % k)
+                      for k, p in tparams.items()]
+
+    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
+    rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
+    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
+             for rg2, g in zip(running_grads2, grads)]
+
+    f_grad_shared = theano.function([x, mask, y], cost,
+                                    updates=zgup + rgup + rg2up,
+                                    name='rmsprop_f_grad_shared')
+
+    updir = [theano.shared(p.get_value() * numpy_floatX(0.),
+                           name='%s_updir' % k)
+             for k, p in tparams.items()]
+    updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
+                 for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
+                                            running_grads2)]
+    param_up = [(p, p + udn[1])
+                for p, udn in zip(tparams.values(), updir_new)]
+    f_update = theano.function([lr], [], updates=updir_new + param_up,
+                               on_unused_input='ignore',
+                               name='rmsprop_f_update')
+
+    return f_grad_shared, f_update
+
+# ==========================
+# matrix initializations
+# supports normalized, orthogonal and randomized
+# ==========================
+
+def ortho_weight(ndim):
+    W = numpy.random.randn(ndim, ndim)
+    u, s, v = numpy.linalg.svd(W)
+    return u.astype(config.floatX)
+
+def norm_weight(nin, nout=None, scale=0.01, ortho=True):
+    if nout is None:
+        nout = nin
+    if nout == nin and ortho:
+        W = ortho_weight(nin)
+    else:
+        # bug fixed: set to be ortho_init
+        # W = scale * numpy.random.randn(nin, nout)
+        W = numpy.random.randn(nin, nout)
+        u, s, v = numpy.linalg.svd(W)
+        if nin > nout:
+            W = u[:, :nout]
+        else:
+            W = v[:nin, :]
+    return W.astype('float32')
+
+def rand_weight(nin, nout=None, scale=0.01, ortho=True):
+    if nout is None:
+        nout = nin
+    if nout == nin and ortho:
+        W = ortho_weight(nin)
+    else:
+        W = scale * numpy.random.randn(nin, nout)
+    return W.astype('float32')
+
+# ==========================
+# some utility functions
+# ==========================
+
+def zipp(params, tparams):
+    """
+    When we reload the model. Needed for the GPU stuff.
+    """
+    for kk, vv in params.items():
+        tparams[kk].set_value(vv)
+
+def unzip(zipped):
+    """
+    When we pickle the model. Needed for the GPU stuff.
+    """
+    new_params = OrderedDict()
+    for kk, vv in zipped.items():
+        new_params[kk] = vv.get_value()
+    return new_params
+
+def numpy_floatX(data):
+    return numpy.asarray(data, dtype=config.floatX)
--- a/DSL_SentimentAnalysis/Classifier/init.py
+++ b/DSL_SentimentAnalysis/Classifier/init.py
--- a/DSL_SentimentAnalysis/Classifier/gen_theanorc.py
+++ b/DSL_SentimentAnalysis/Classifier/gen_theanorc.py
@ -0,0 +1,32 @@
+import sys
+import codecs
+
+if len(sys.argv) < 3:
+    raise Exception('Not enough argv')
+
+theano_rc = r"""
+[global]
+mode = FAST_RUN
+device = gpu
+floatX = float32
+on_unused_input = warn
+optimizer = fast_run
+#allow_gc=False
+cuda.disable_gcc_cudnn_check=True
+
+[lib]
+cnmem = 0.75
+
+[nvcc]
+flags=-L{0}\libs
+root=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+fast_math = True
+
+"""
+
+theano_rc = theano_rc.format(sys.argv[1])
+
+print(theano_rc)
+
+with codecs.open(sys.argv[2], 'w', 'utf-8') as f:
+    f.write(theano_rc)
--- a/DSL_SentimentAnalysis/Classifier/worker.bat
+++ b/DSL_SentimentAnalysis/Classifier/worker.bat
@ -0,0 +1,4 @@
+@echo off
+setlocal ENABLEDELAYEDEXPANSION
+set THEANO_FLAGS=device=gpu5
+python train_classifier_LM_NoDrop_google_sgd0.2.py
--- a/DSL_SentimentAnalysis/Classifier/wrapper.bat
+++ b/DSL_SentimentAnalysis/Classifier/wrapper.bat
@ -0,0 +1,148 @@
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+@rem Windows batch file to use Theano on GCR
+@rem
+@rem Updated: April 7, 2016
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+
+@rem set the PATH system variable
+@rem Start from the 26th letter
+set working_sub_dir=%cd:~26%
+
+set PATH=^
+C:\Windows\system32;^
+C:\Windows\System32\Wbem;^
+C:\Windows\System32\WindowsPowerShell\v1.0\;^
+C:\Windows;^
+C:\Program Files\Microsoft HPC Pack 2012\Bin\;^
+C:\Program Files\Microsoft MPI\Bin\;^
+C:\Program Files (x86)\Windows Kits\8.1\Windows Performance Toolkit\
+
+pushd \\gcr\Scratch\RR1\v-yixia\Theano
+set ToolkitFolderDriver=%cd%
+
+@rem set the environment variable for the CUDA 7.5 Toolkit
+rem set CUDA_HOME=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 rem the old version
+set CUDA_HOME=%ToolkitFolderDriver%\CUDA\v7.0+cudnn4008
+set CUDA_BIN=%CUDA_HOME%\bin
+set CUDA_INCLUDE=%CUDA_HOME%\include
+set CUDA_LIB=%CUDA_HOME%\lib\x64
+set CUDA_LIBNVVP=%CUDA_HOME%\libnvvp
+
+@rem add all CUDA Toolkit folders to the PATH system variable
+set PATH=^
+%CUDA_HOME%;^
+%CUDA_BIN%;^
+%CUDA_INCLUDE%;^
+%CUDA_LIB%;^
+%CUDA_LIBNVVP%;^
+%PATH%
+
+@echo %PATH%
+
+@rem setting up VC complier
+
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+@rem connect to the shared toolkit folder \\gcr\Tools\Shared_Toolkits\Theano
+rem pushd \\gcr\Tools\Shared_Toolkits\Theano
+
+
+
+@rem unset these variables
+@set Framework40Version=
+@set FrameworkDIR32=
+@set FrameworkVersion32=
+@set FSHARPINSTALLDIR=
+@set VSINSTALLDIR=
+@set WindowsSDK_ExecutablePath_x64=
+@set WindowsSDK_ExecutablePath_x86=
+
+@set VCINSTALLDIR=%ToolkitFolderDriver%\VS_portable\Microsoft Visual Studio 12.0v2\VC\
+@set WindowsSdkDir=%ToolkitFolderDriver%\Windows Kits\8.1v2\
+
+:amd64
+
+@rem set Windows SDK include/lib path
+@rem --------------------------------------------------
+if not "%WindowsSdkDir%" == "" @set PATH=%WindowsSdkDir%bin\x64;%WindowsSdkDir%bin\x86;%PATH%
+if not "%WindowsSdkDir%" == "" @set INCLUDE=%WindowsSdkDir%include\shared;%WindowsSdkDir%include\um;%WindowsSdkDir%include\winrt;%INCLUDE%
+if not "%WindowsSdkDir%" == "" @set LIB=%WindowsSdkDir%lib\winv6.3\um\x64;%LIB%
+if not "%WindowsSdkDir%" == "" @set LIBPATH=%WindowsLibPath%;%ExtensionSDKDir%\Microsoft.VCLibs\14.0\References\CommonConfiguration\neutral;%LIBPATH%
+
+@rem set the environment variables for Microsoft Visual Studio
+@rem --------------------------------------------------
+@rem PATH
+@rem --------------------------------------------------
+if exist "%VCINSTALLDIR%VCPackages" set PATH=%VCINSTALLDIR%VCPackages;%PATH%
+if exist "%VCINSTALLDIR%BIN\amd64" set PATH=%VCINSTALLDIR%BIN\amd64;%PATH%
+@rem --------------------------------------------------
+@rem INCLUDE
+@rem --------------------------------------------------
+if exist "%VCINSTALLDIR%ATLMFC\INCLUDE" set INCLUDE=%VCINSTALLDIR%ATLMFC\INCLUDE;%INCLUDE%
+if exist "%VCINSTALLDIR%INCLUDE" set INCLUDE=%VCINSTALLDIR%INCLUDE;%INCLUDE%
+@rem --------------------------------------------------
+@rem LIB
+@rem --------------------------------------------------
+if exist "%VCINSTALLDIR%ATLMFC\LIB\amd64" set LIB=%VCINSTALLDIR%ATLMFC\LIB\amd64;%LIB%
+if exist "%VCINSTALLDIR%LIB\amd64" set LIB=%VCINSTALLDIR%LIB\amd64;%LIB%
+@rem --------------------------------------------------
+@rem LIBPATH
+@rem --------------------------------------------------
+if exist "%VCINSTALLDIR%ATLMFC\LIB\amd64" set LIBPATH=%VCINSTALLDIR%ATLMFC\LIB\amd64;%LIBPATH%
+if exist "%VCINSTALLDIR%LIB\amd64" set LIBPATH=%VCINSTALLDIR%LIB\amd64;%LIBPATH%
+
+@rem set the environment variables for the cuDNN v4 (Feb 10, 2016) for CUDA 7.0 and later.
+rem set CUDNN_PATH=%ToolkitFolderDriver%\Shared_Toolkits\Theano\CUDA\cudnn-4.0.7\cuda
+rem set INCLUDE=%CUDNN_PATH%\include;%INCLUDE%
+rem set LIB=%CUDNN_PATH%\lib\x64;%LIB%
+rem set PATH=%CUDNN_PATH%\bin;%PATH%
+
+set Platform=X64
+set CommandPromptType=Native
+
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+@rem connect to your scratch storage \\gcr\Scratch\<location>\<alias>
+@rem Note: Please copy ANACONDA2 to \\gcr\Scratch\<location>\<alias>\Anaconda2
+@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+pushd \\gcr\scratch\RR1\v-yixia
+set CONDANETDRIVE=%cd:~0,2%
+
+@rem set the environment variable for the Anaconda2
+set ANACONDA2=%CONDANETDRIVE%\RR1\taoqin\Anaconda-gpu002
+set ANACONDA2_SCRIPTS=%ANACONDA2%\Scripts
+set ANACONDA2_BIN=%ANACONDA2%\Library\bin
+
+@rem add Anaconda2 folders to the PATH system variable
+set PATH=^
+%ANACONDA2%;^
+%ANACONDA2_BIN%;^
+%ANACONDA2_SCRIPTS%;^
+%PATH%
+
+@echo %PATH%
+
+@rem example files from DeepLearningTutorials are available at \\gcr\Tools\Shared_Toolkits\Theano\Examples
+set PROJDRIVE=%CONDANETDRIVE%
+set MYHOME=%PROJDRIVE%\RR1\v-yixia
+set PROJHOME=%MYHOME%\%working_sub_dir%
+
+%PROJDRIVE%
+
+cd %PROJHOME%
+
+@rem setup theano env (generate .theanorc.txt)
+call python gen_theanorc.py %ANACONDA2% .theanorc.txt
+del %userprofile%\.theanorc.txt /Q /F
+copy .theanorc.txt %userprofile% /Y
+
+call python write_script.py %*
+
+call worker.bat
+
+@echo delete theano env
+del %userprofile%\.theanorc.txt /Q /F
+
+popd
+
+popd
+
+
--- a/DSL_SentimentAnalysis/Classifier/write_script.py
+++ b/DSL_SentimentAnalysis/Classifier/write_script.py
@ -0,0 +1,39 @@
+import re, os, numpy, sys
+
+
+filename = r'.\gpu_usage_draft'
+
+
+
+def GrabGPU():
+    cmdstr = '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\" > ' + filename
+    os.system(cmdstr)
+
+def GetGPUUSage():
+    pattern = re.compile(r'(?P<num>[0-9]+)MiB[\s]+/')
+    mem = []
+    fo = open(filename, 'r')
+    for line in fo:
+        result = pattern.search(line)
+        if result:
+            mem.append(int(result.group('num')))
+    fo.close()
+
+    return numpy.array(mem).argsort()[0]
+
+def print_script(cmd):
+    GrabGPU()
+    with open('worker.bat', 'w') as f:
+        f.write('@echo off\nsetlocal ENABLEDELAYEDEXPANSION\n')  
+        if len(cmd) == 1:
+            f.write('set THEANO_FLAGS=device=gpu%d\n' % GetGPUUSage())  
+            f.write('python ' + cmd[0]) 
+        elif len(cmd) == 2:
+            f.write('set THEANO_FLAGS=device=gpu' + cmd[1] + '\n')  
+            f.write('python ' + cmd[0]) 
+        
+
+if __name__ == '__main__':
+    print_script(sys.argv[1:])
+    
+    # os.system('del /q ' + filename + rank)
--- a/DSL_SentimentAnalysis/Data.py
+++ b/DSL_SentimentAnalysis/Data.py
@ -0,0 +1,369 @@
+"""
+data loading and minibatch generation
+"""
+__author__ = 'v-yirwan'
+
+import cPickle as pkl
+import gzip
+import os
+import numpy
+from theano import config
+
+def get_dataset_file(dataset, default_dataset, origin):
+    '''
+    Look for it as if it was a full path, if not, try local file,
+    if not try in the data directory.
+
+    Download dataset if it is not present
+    '''
+    data_dir, data_file = os.path.split(dataset)
+    if data_dir == "" and not os.path.isfile(dataset):
+        # Check if dataset is in the data directory.
+        new_path = os.path.join(
+            os.path.split(__file__)[0],
+            "..",
+            "data",
+            dataset
+        )
+        if os.path.isfile(new_path) or data_file == default_dataset:
+            dataset = new_path
+
+    if (not os.path.isfile(dataset)) and data_file == default_dataset:
+        from six.moves import urllib
+        print('Downloading data from %s' % origin)
+        urllib.request.urlretrieve(origin, dataset)
+
+    return dataset
+
+def load_data(path="imdb.pkl", n_words=100000, maxlen=None,
+              sort_by_len=True, fixed_valid=True, valid_portion=0.1):
+    '''
+    Loads the dataset
+    :type path: String
+    :param path: The path to the dataset (here IMDB)
+    :type n_words: int
+    :param n_words: The number of word to keep in the vocabulary.
+        All extra words are set to unknow (1).
+    :type maxlen: None or positive int
+    :param maxlen: the max sequence length we use in the train/valid set.
+    :type sort_by_len: bool
+    :name sort_by_len: Sort by the sequence lenght for the train,
+        valid and test set. This allow faster execution as it cause
+        less padding per minibatch. Another mechanism must be used to
+        shuffle the train set at each epoch.
+    :type fixed_valid: bool
+    :param fixed_valid: load fixed validation set from the corpus file,
+        which would otherwise be picked randomly from the training set with
+        proportion [valid_portion]
+    :type valid_portion: float
+    :param valid_portion: The proportion of the full train set used for
+        the validation set.
+
+    '''
+
+    # Load the dataset
+    path = get_dataset_file(
+        path, "imdb.pkl",
+        "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
+    if path.endswith(".gz"):
+        f = gzip.open(path, 'rb')
+    else:
+        f = open(path, 'rb')
+
+    train_set = pkl.load(f)
+    if fixed_valid:
+        valid_set = pkl.load(f)
+    test_set = pkl.load(f)
+    f.close()
+
+    def _truncate_data(train_set):
+        '''
+        truncate sequences with lengths exceed max-len threshold
+        :param train_set: a list of sequences list and corresponding labels list
+        :return: truncated train_set
+        '''
+        new_train_set_x = []
+        new_train_set_y = []
+        for x, y in zip(train_set[0], train_set[1]):
+            if len(x) < maxlen:
+                new_train_set_x.append(x)
+                new_train_set_y.append(y)
+        train_set = (new_train_set_x, new_train_set_y)
+        del new_train_set_x, new_train_set_y
+        return train_set
+
+    def _set_valid(train_set, valid_portion):
+        '''
+        set validation with [valid_portion] proportion of training set
+        '''
+        train_set_x, train_set_y = train_set
+        n_samples = len(train_set_x)
+        sidx = numpy.random.permutation(n_samples) # shuffle data
+        n_train = int(numpy.round(n_samples * (1. - valid_portion)))
+        valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
+        valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
+        train_set_x = [train_set_x[s] for s in sidx[:n_train]]
+        train_set_y = [train_set_y[s] for s in sidx[:n_train]]
+        train_set = (train_set_x, train_set_y)
+        valid_set = (valid_set_x, valid_set_y)
+        del train_set_x, train_set_y, valid_set_x, valid_set_y
+        return train_set, valid_set
+
+    if maxlen:
+        train_set = _truncate_data(train_set)
+        if fixed_valid:
+            print 'Loading with fixed validation set...',
+            valid_set = _truncate_data(valid_set)
+        else:
+            print 'Setting validation set with proportion:', valid_portion, '...',
+            train_set, valid_set = _set_valid(train_set, valid_portion)
+        test_set = _truncate_data(test_set)
+
+    if maxlen is None and not fixed_valid:
+        train_set, valid_set = _set_valid(train_set, valid_portion)
+
+    def remove_unk(x):
+        return [[1 if w >= n_words else w for w in sen] for sen in x]
+
+    test_set_x, test_set_y = test_set
+    valid_set_x, valid_set_y = valid_set
+    train_set_x, train_set_y = train_set
+
+    # remove unk from dataset
+    train_set_x = remove_unk(train_set_x) # use 1 if unk
+    valid_set_x = remove_unk(valid_set_x)
+    test_set_x = remove_unk(test_set_x)
+
+    def len_argsort(seq):
+        return sorted(range(len(seq)), key=lambda x: len(seq[x]))
+
+    if sort_by_len:
+        sorted_index = len_argsort(test_set_x)
+        # ranked from shortest to longest
+        test_set_x = [test_set_x[i] for i in sorted_index]
+        test_set_y = [test_set_y[i] for i in sorted_index]
+
+        sorted_index = len_argsort(valid_set_x)
+        valid_set_x = [valid_set_x[i] for i in sorted_index]
+        valid_set_y = [valid_set_y[i] for i in sorted_index]
+
+        sorted_index = len_argsort(train_set_x)
+        train_set_x = [train_set_x[i] for i in sorted_index]
+        train_set_y = [train_set_y[i] for i in sorted_index]
+
+    train = (train_set_x, train_set_y)
+    valid = (valid_set_x, valid_set_y)
+    test = (test_set_x, test_set_y)
+
+    return train, valid, test
+
+def load_mnist(path='mnist.pkl', fixed_permute=True, rand_permute=False):
+    f = open(path, 'rb')
+    train = pkl.load(f)
+    valid = pkl.load(f)
+    test = pkl.load(f)
+    f.close()
+
+    def _permute(data, perm):
+        x, y = data
+        x_new = []
+        for xx in x:
+            xx_new = [xx[pp] for pp in perm]
+            x_new.append(xx_new)
+        return (x_new, y)
+
+    def _trans2list(data):
+        x, y = data
+        x = [list(xx) for xx in x]
+        return (x, y)
+
+    if rand_permute:
+        print 'Using a fixed random permutation of pixels...',
+        perm = numpy.random.permutation(range(784))
+        train = _permute(train, perm)
+        valid = _permute(valid, perm)
+        test = _permute(test, perm)
+    elif fixed_permute:
+        print 'Using permuted dataset...',
+
+    _trans2list(train)
+    _trans2list(valid)
+    _trans2list(test)
+
+    return train, valid, test
+
+def get_minibatches_idx(n, minibatch_size, shuffle=False):
+    """
+    Used to shuffle the dataset at each iteration.
+    """
+
+    idx_list = numpy.arange(n, dtype="int32")
+
+    if shuffle:
+        numpy.random.shuffle(idx_list)
+
+    minibatches = []
+    minibatch_start = 0
+    for i in range(n // minibatch_size):
+        minibatches.append(idx_list[minibatch_start:
+                                    minibatch_start + minibatch_size])
+        minibatch_start += minibatch_size
+
+    if (minibatch_start != n):
+        # Make a minibatch out of what is left
+        minibatches.append(idx_list[minibatch_start:])
+
+    return zip(range(len(minibatches)), minibatches)
+
+def get_minibatches_idx_bucket(dataset, minibatch_size, shuffle=False):
+    """
+    divide into different buckets according to sequence lengths
+    dynamic batch size
+    """
+    # divide into buckets
+    slen = [len(ss) for ss in dataset]
+    bucket1000 = [sidx for sidx in xrange(len(dataset))
+                  if slen[sidx] > 0 and slen[sidx] <= 1000]
+    bucket3000 = [sidx for sidx in xrange(len(dataset))
+                  if slen[sidx] > 1000 and slen[sidx] <= 3000]
+    bucket_long = [sidx for sidx in xrange(len(dataset))
+                   if slen[sidx] > 3000]
+
+    # shuffle each bucket
+    if shuffle:
+        numpy.random.shuffle(bucket1000)
+        numpy.random.shuffle(bucket3000)
+        numpy.random.shuffle(bucket_long)
+
+    # make minibatches
+    def _make_batch(minibatches, bucket, minibatch_size):
+        minibatch_start = 0
+        n = len(bucket)
+        for i in range(n // minibatch_size):
+            minibatches.append(bucket[minibatch_start : minibatch_start + minibatch_size])
+            minibatch_start += minibatch_size
+        if (minibatch_start != n):
+            # Make a minibatch out of what is left
+            minibatches.append(bucket[minibatch_start:])
+        return minibatches
+
+    minibatches = []
+    _make_batch(minibatches, bucket1000, minibatch_size=minibatch_size)
+    _make_batch(minibatches, bucket3000, minibatch_size=minibatch_size//2)
+    _make_batch(minibatches, bucket_long, minibatch_size=minibatch_size//8)
+
+    # shuffle minibatches
+    numpy.random.shuffle(minibatches)
+
+    return zip(range(len(minibatches)), minibatches)
+
+def prepare_data(seqs, labels, maxlen=None, dataset='text'):
+    """Create the matrices from the datasets.
+
+    This pad each sequence to the same lenght: the lenght of the
+    longuest sequence or maxlen.
+
+    if maxlen is set, we will cut all sequence to this maximum
+    lenght.
+
+    This swap the axis!
+    """
+    # x: a list of sentences
+    lengths = [len(s) for s in seqs]
+
+    if maxlen is not None:
+        new_seqs = []
+        new_labels = []
+        new_lengths = []
+        for l, s, y in zip(lengths, seqs, labels):
+            if l < maxlen:
+                new_seqs.append(s)
+                new_labels.append(y)
+                new_lengths.append(l)
+        lengths = new_lengths
+        labels = new_labels
+        seqs = new_seqs
+
+        if len(lengths) < 1:
+            return None, None, None
+
+    n_samples = len(seqs)
+    maxlen = numpy.max(lengths)
+
+    if dataset == 'mnist':
+        x = numpy.zeros((maxlen, n_samples)).astype('float32')
+    else:
+        x = numpy.zeros((maxlen, n_samples)).astype('int64')
+    x_mask = numpy.zeros((maxlen, n_samples)).astype(config.floatX)
+    for idx, s in enumerate(seqs):
+        x[:lengths[idx], idx] = s
+        x_mask[:lengths[idx], idx] = 1.
+
+    return x, x_mask, labels
+
+def prepare_data_hier(seqs, labels, hier_len, maxlen=None, dataset='text'):
+    '''
+    prepare minibatch for hierarchical model
+    '''
+    # sort (long->short)
+    sorted_idx = sorted(range(len(seqs)), key=lambda x: len(seqs[x]), reverse=True)
+    seqs = [seqs[i] for i in sorted_idx]
+    labels = [labels[i] for i in sorted_idx]
+
+    # truncate data
+    lengths = [len(s) for s in seqs]
+    if maxlen is not None:
+        new_seqs = []
+        new_labels = []
+        new_lengths = []
+        for l, s, y in zip(lengths, seqs, labels):
+            if l  <maxlen :
+                new_seqs.append(s)
+                new_labels.append(y)
+                new_lengths.append(l)
+        lengths = new_lengths
+        labels = new_labels
+        seqs = new_seqs
+        if len(lengths) < 1:
+            return None, None, None
+
+    # set batch size
+    n_samples = len(seqs)
+    maxlen = numpy.max(lengths)
+    if maxlen % hier_len == 0:
+        n_batch = maxlen/hier_len
+    else:
+        n_batch = maxlen//hier_len + 1
+        maxlen = n_batch * hier_len
+
+    # padding whole batch
+    if dataset == 'mnist':
+        x = numpy.zeros((maxlen, n_samples)).astype('float32')
+    else:
+        x = numpy.zeros((maxlen, n_samples)).astype('int64')
+    x_mask = numpy.zeros((maxlen, n_samples)).astype(config.floatX)
+    for idx, s in enumerate(seqs):
+        x[:lengths[idx], idx] = s
+        x_mask[:lengths[idx], idx] = 1
+
+    # slice to mini-batches
+    x_batch = [x[bidx*hier_len:(bidx+1)*hier_len, :] for bidx in range(n_batch)]
+    if dataset == 'mnist':
+        x_batch = numpy.array(x_batch).astype('float32')
+    else:
+        x_batch = numpy.array(x_batch).astype('int64')
+    mask_batch = [x_mask[bidx*hier_len:(bidx+1)*hier_len, :] for bidx in range(n_batch)]
+    mask_batch = numpy.array(mask_batch).astype(config.floatX)
+
+    # mask for hier-level
+    mask_hier = numpy.ones((n_batch, n_samples)).astype(config.floatX)
+    for idx in range(n_samples):
+        mpos = numpy.where(x_mask[:, idx]==0)[0]
+        if len(mpos) == 0:
+            continue
+        bidx = min(mpos[0]//hier_len+1, n_batch)
+        if mpos[0] % hier_len == 0:
+            bidx -= 1 # bug fixed TODO: more elegant solution?
+        mask_hier[bidx:, idx] = 0
+
+    return x_batch, mask_batch, mask_hier, labels
--- a/DSL_SentimentAnalysis/GPU_Usage.py
+++ b/DSL_SentimentAnalysis/GPU_Usage.py
@ -0,0 +1,38 @@
+import re
+import os
+import socket
+import sys
+
+filename = r'.\gpu_usage_draft_'
+default_gpu = 58 + 30
+
+
+
+def GrabGPU(rank):
+    cmdstr = '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\" > ' + filename + rank
+    os.system(cmdstr)
+
+def GetGPUUSage(rank):
+    pattern = re.compile(r'(?P<num>[0-9]{1,5})MiB[\s]+/')
+    id = 0
+    GPUs = []
+    fo = open(filename + rank, 'r')
+    for line in fo:
+        result = pattern.search(line)
+        if result:
+            if int(result.group("num")) < default_gpu:
+                GPUs.append(id)
+            id = id + 1
+    fo.close()
+
+    print len(GPUs)
+    for gpu in GPUs:
+        print gpu
+
+
+if __name__ == '__main__':
+    rank = sys.argv[1]
+    GrabGPU(rank)
+    print socket.gethostname()
+    GetGPUUSage(rank)
+    #os.system('del /q ' + filename + rank)
--- a/DSL_SentimentAnalysis/MapGPU.py
+++ b/DSL_SentimentAnalysis/MapGPU.py
@ -0,0 +1,17 @@
+import os
+def MapDeviceIds(comm):
+    rank = comm.Get_rank()
+    num_machine = comm.Get_size()
+    os.system('python GPU_Usage.py ' + str(rank) + ' > record' + str(rank))
+    comm.Barrier()
+    if rank == 0:
+        os.system('python AllocateGPU.py ' + str(num_machine) + ' > DirtyRecord')
+    comm.Barrier()
+    cardid = str(0)
+    with open('DirtyRecord', 'r') as f:
+        for idx, line in enumerate(f):
+            if idx == rank:
+                cardid = line.strip()
+                break
+            
+    return cardid
--- a/DSL_SentimentAnalysis/README.md
+++ b/DSL_SentimentAnalysis/README.md
@ -0,0 +1,6 @@
+Dual supervised learning for sentiment analysis.
+
+The models are at:
+  https://www.dropbox.com/sh/sbl9lv6q0agsrrz/AADIYiS_4stp36X2waW2Wfiaa?dl=0
+  
+You can refer to "train.bat/train_linux.sh" and "valid.bat/valid_linux.sh" for how to run our code.
--- a/DSL_SentimentAnalysis/Util_basic.py
+++ b/DSL_SentimentAnalysis/Util_basic.py
--- a/DSL_SentimentAnalysis/config.py
+++ b/DSL_SentimentAnalysis/config.py
@ -0,0 +1,45 @@
+import argparse
+parser = argparse.ArgumentParser()
+
+# data I/O
+
+parser.add_argument('--data_dir', type=str, default='./data/imdb.pkl', help='Location for the dataset')
+parser.add_argument('--LMScoreFile', type=str, default='./data/LMScore.npz', help='Location for the LMScoreFile')
+parser.add_argument('--GCRmode', dest='GCRmode', action='store_true', help='GCRmode')
+parser.add_argument('--gpu', type=int, default=0, help='')
+
+
+# optimization parameters
+parser.add_argument('--model_dir', type=str, default=None)
+parser.add_argument('--model_S2L', type=str, default='warmClassifier.npz')
+parser.add_argument('--model_S2L_pkl', type=str, default=None)
+parser.add_argument('--model_L2S', type=str, default='warmCLM.npz')
+parser.add_argument('--model_L2S_pkl', type=str, default=None)
+parser.add_argument('--dual_style', type=str, default='all', help='all | S2L | L2S ')
+parser.add_argument('--optim', type=str, default='adadelta')
+
+parser.add_argument('--minibatch', type=int, default=16, help='')
+parser.add_argument('--trade_off_S2L', type=float, default=5e-3, help='the consistence tradeoff')
+parser.add_argument('--trade_off_L2S', type=float, default=5e-3, help='the consistence tradeoff')
+parser.add_argument('--clip_S2L', type=float, default=-1., help='gradient clip S2L')
+parser.add_argument('--clip_L2S', type=float, default=5., help='gradient clip L2S')
+parser.add_argument('--bias', type=float, default=0.02, help='the bias')
+parser.add_argument('--FreezeEmb', dest='FreezeEmb', action='store_true', help='FreezeEmb')
+parser.add_argument('--lrS2L', type=float, default=0.1, help='')
+parser.add_argument('--lrL2S', type=float, default=0.1, help='the bias')
+parser.add_argument('--lrate', type=float, default=0.1, help='the bias')
+parser.add_argument('--maxEpoch', type=int, default=100, help='')
+parser.add_argument('--validFreq', type=int, default=2000, help='')
+parser.add_argument('--classifier_drop_in', type=float, default=0.8, help='classifier_drop_in')
+parser.add_argument('--classifier_drop_out', type=float, default=0.5, help='classifier_drop_out')
+parser.add_argument('--CLM_drop_in', type=float, default=0.5, help='CLM_drop_in')
+parser.add_argument('--CLM_drop_out', type=float, default=0.5, help='CLM_drop_out')
+
+config_params = parser.parse_args()
+
+
+
+
+
+
+
--- a/DSL_SentimentAnalysis/data/readme.txt
+++ b/DSL_SentimentAnalysis/data/readme.txt
@ -0,0 +1,2 @@
+Please download the files from
+https://www.dropbox.com/sh/j9l5hhnjsyhtd02/AABMk8m6b_8tS8fuURqk66zCa?dl=0
--- a/DSL_SentimentAnalysis/inference.py
+++ b/DSL_SentimentAnalysis/inference.py
@ -0,0 +1,6 @@
+from monitor import *
+
+
+runner = monitor()
+print 'valid classifier', runner.valid_S2L()
+print 'valid CLM:', runner.valid_L2S()
--- a/DSL_SentimentAnalysis/ipdb/init.py
+++ b/DSL_SentimentAnalysis/ipdb/init.py
@ -0,0 +1,24 @@
+# Copyright (c) 2007, 2010, 2011, 2012 Godefroid Chapelle
+# 
+# This file is part of ipdb.
+# GNU package is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free 
+# Software Foundation, either version 2 of the License, or (at your option) 
+# any later version.
+#
+# GNU package is distributed in the hope that it will be useful, but 
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+
+# You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
+
+from __main__ import set_trace, post_mortem, pm, run, runcall, runeval, launch_ipdb_on_exception
+
+pm                       # please pyflakes
+post_mortem              # please pyflakes
+run                      # please pyflakes
+runcall                  # please pyflakes
+runeval                  # please pyflakes
+set_trace                # please pyflakes
+launch_ipdb_on_exception # please pyflakes
--- a/DSL_SentimentAnalysis/ipdb/main.py
+++ b/DSL_SentimentAnalysis/ipdb/main.py
@ -0,0 +1,184 @@
+# Copyright (c) 2011, 2012 Godefroid Chapelle
+#
+# This file is part of ipdb.
+# GNU package is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 2 of the License, or (at your option)
+# any later version.
+#
+# GNU package is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+
+from __future__ import print_function
+import sys
+import os
+import traceback
+
+from contextlib import contextmanager
+
+try:
+    from pdb import Restart
+except ImportError:
+    class Restart(Exception):
+        pass
+
+import IPython
+
+if IPython.__version__ > '0.10.2':
+    from IPython.core.debugger import Pdb, BdbQuit_excepthook
+    try:
+        get_ipython
+    except NameError:
+        # Make it more resilient to different versions of IPython and try to
+        # find a module.
+        possible_modules = ['IPython.terminal.embed',           # Newer IPython
+                            'IPython.frontend.terminal.embed']  # Older IPython
+
+        count = len(possible_modules)
+        for module in possible_modules:
+            try:
+                embed = __import__(module, fromlist=["InteractiveShellEmbed"])
+                InteractiveShellEmbed = embed.InteractiveShellEmbed
+            except ImportError:
+                count -= 1
+                if count == 0:
+                    raise
+            else:
+                break
+
+        ipshell = InteractiveShellEmbed()
+        def_colors = ipshell.colors
+    else:
+        def_colors = get_ipython.im_self.colors
+
+    from IPython.utils import io
+
+    if 'nose' in sys.modules.keys():
+        def update_stdout():
+            # setup stdout to ensure output is available with nose
+            io.stdout = sys.stdout = sys.__stdout__
+    else:
+        def update_stdout():
+            pass
+else:
+    from IPython.Debugger import Pdb, BdbQuit_excepthook
+    from IPython.Shell import IPShell
+    from IPython import ipapi
+
+    ip = ipapi.get()
+    if ip is None:
+        IPShell(argv=[''])
+        ip = ipapi.get()
+    def_colors = ip.options.colors
+
+    from IPython.Shell import Term
+
+    if 'nose' in sys.modules.keys():
+        def update_stdout():
+            # setup stdout to ensure output is available with nose
+            Term.cout = sys.stdout = sys.__stdout__
+    else:
+        def update_stdout():
+            pass
+
+
+def wrap_sys_excepthook():
+    # make sure we wrap it only once or we would end up with a cycle
+    #  BdbQuit_excepthook.excepthook_ori == BdbQuit_excepthook
+    if sys.excepthook != BdbQuit_excepthook:
+        BdbQuit_excepthook.excepthook_ori = sys.excepthook
+        sys.excepthook = BdbQuit_excepthook
+
+
+def set_trace(frame=None):
+    update_stdout()
+    wrap_sys_excepthook()
+    if frame is None:
+        frame = sys._getframe().f_back
+    Pdb(def_colors).set_trace(frame)
+
+
+def post_mortem(tb):
+    update_stdout()
+    wrap_sys_excepthook()
+    p = Pdb(def_colors)
+    p.reset()
+    if tb is None:
+        return
+    p.interaction(None, tb)
+
+
+def pm():
+    post_mortem(sys.last_traceback)
+
+
+def run(statement, globals=None, locals=None):
+    Pdb(def_colors).run(statement, globals, locals)
+
+
+def runcall(*args, **kwargs):
+    return Pdb(def_colors).runcall(*args, **kwargs)
+
+
+def runeval(expression, globals=None, locals=None):
+    return Pdb(def_colors).runeval(expression, globals, locals)
+
+
+@contextmanager
+def launch_ipdb_on_exception():
+    try:
+        yield
+    except Exception:
+        e, m, tb = sys.exc_info()
+        print(m.__repr__(), file=sys.stderr)
+        post_mortem(tb)
+    finally:
+        pass
+
+
+def main():
+    if not sys.argv[1:] or sys.argv[1] in ("--help", "-h"):
+        print("usage: ipdb.py scriptfile [arg] ...")
+        sys.exit(2)
+
+    mainpyfile = sys.argv[1]     # Get script filename
+    if not os.path.exists(mainpyfile):
+        print('Error:', mainpyfile, 'does not exist')
+        sys.exit(1)
+
+    del sys.argv[0]         # Hide "pdb.py" from argument list
+
+    # Replace pdb's dir with script's dir in front of module search path.
+    sys.path[0] = os.path.dirname(mainpyfile)
+
+    # Note on saving/restoring sys.argv: it's a good idea when sys.argv was
+    # modified by the script being debugged. It's a bad idea when it was
+    # changed by the user from the command line. There is a "restart" command
+    # which allows explicit specification of command line arguments.
+    pdb = Pdb(def_colors)
+    while 1:
+        try:
+            pdb._runscript(mainpyfile)
+            if pdb._user_requested_quit:
+                break
+            print("The program finished and will be restarted")
+        except Restart:
+            print("Restarting", mainpyfile, "with arguments:")
+            print("\t" + " ".join(sys.argv[1:]))
+        except SystemExit:
+            # In most cases SystemExit does not warrant a post-mortem session.
+            print("The program exited via sys.exit(). Exit status: ", end='')
+            print(sys.exc_info()[1])
+        except:
+            traceback.print_exc()
+            print("Uncaught exception. Entering post mortem debugging")
+            print("Running 'cont' or 'step' will restart the program")
+            t = sys.exc_info()[2]
+            pdb.interaction(None, t)
+            print("Post mortem debugger finished. The " + mainpyfile +
+                  " will be restarted")
+
+if __name__ == '__main__':
+    main()
--- a/DSL_SentimentAnalysis/model.npz.pkl
+++ b/DSL_SentimentAnalysis/model.npz.pkl
@ -0,0 +1,128 @@
+(dp1
+S'monitor_grad'
+p2
+I00
+sS'dropout_output'
+p3
+F0.5
+sS'n_words'
+p4
+I10000
+sS'start_epoch'
+p5
+I0
+sS'dataset'
+p6
+S'text'
+p7
+sS'patience'
+p8
+I10
+sS'skip_steps2'
+p9
+I-1
+sS'hier_len'
+p10
+NsS'max_epochs'
+p11
+I5000
+sS'dispFreq'
+p12
+I50
+sS'newDumpFreq'
+p13
+I5000000
+sS'self'
+p14
+NsS'hybrid'
+p15
+I00
+sS'clip_c'
+p16
+F-1
+sS'dim_proj'
+p17
+I1024
+sS'saveto'
+p18
+S'model.npz'
+p19
+sS'start_iter'
+p20
+I0
+sS'lastHiddenLayer'
+p21
+NsS'noise_std'
+p22
+F0
+sS'batch_len_threshold'
+p23
+NsS'valid_batch_size'
+p24
+I16
+sS'corpus'
+p25
+S'imdb.pkl'
+p26
+sS'reload_options'
+p27
+NsS'optimizer'
+p28
+S'adadelta'
+p29
+sS'validFreq'
+p30
+I2000
+sS'dropout_input'
+p31
+F0.80000000000000004
+sS'warm_LM'
+p32
+NsS'batch_size'
+p33
+I16
+sS'encoder'
+p34
+S'lstm'
+p35
+sS'hierarchical'
+p36
+I00
+sS'reload_model'
+p37
+S'winner/warmClassifier.npz'
+p38
+sS'lrate'
+p39
+F1
+sS'truncate_grad'
+p40
+I-1
+sS'decay_c'
+p41
+F-1
+sS'encoder2'
+p42
+NsS'test_size'
+p43
+NsS'dim_word'
+p44
+I500
+sS'unit_depth'
+p45
+I-1
+sS'maxlen'
+p46
+NsS'skip_steps'
+p47
+I-1
+sS'embedding'
+p48
+NsS'logFile'
+p49
+S'log2'
+p50
+sS'mean_pooling'
+p51
+I00
+s.
--- a/DSL_SentimentAnalysis/monitor.py
+++ b/DSL_SentimentAnalysis/monitor.py
@ -0,0 +1,209 @@
+from config import config_params
+import os
+os.environ['THEANO_FLAGS']='floatX=float32,device=cuda%d' % (config_params.gpu)
+if os.name == 'nt':
+    cmdstr = '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\" '
+    os.system(cmdstr)
+else:
+    os.system(r'nvidia-smi')
+
+from CLM.CLM import CLM_worker
+from Classifier.Models import Model as Classifier
+import theano
+import theano.tensor as tensor
+import numpy
+from Util_basic import sgd_joint, prepare_data_x, unzip, itemlist_NoEmb, adadelta_joint, Optim
+from Data import load_data, get_minibatches_idx, get_minibatches_idx_bucket
+from collections import OrderedDict
+
+def grad_clipping(grads, clip_c):
+    g2 = 0.
+    for g in grads:
+        g2 += (g**2).sum()
+    new_grads = []
+    for g in grads:
+        new_grads.append(tensor.switch(g2 > (clip_c**2), g/tensor.sqrt(g2) * clip_c, g))
+    return new_grads, tensor.sqrt(g2)
+
+class monitor(object):
+    def __init__(self):
+        print config_params
+        self.CLM = CLM_worker(lrate=1.,
+                              optimizer='adadelta',
+                              batch_size=config_params.minibatch,
+                              saveto='model.npz',
+                              validFreq=2000,
+                              dispFreq=100,
+                              dropout_input=config_params.CLM_drop_in,
+                              dropout_output=config_params.CLM_drop_out,
+                              reload_model=config_params.model_dir + '/' + config_params.model_L2S,
+                              reload_option=None,
+                              log='log1'
+                              )
+        self.classifier = Classifier(lrate=1.,  # Learning rate for sgd (not used for adadelta and rmsprop)
+                                     optimizer='adadelta',
+                                     saveto='model.npz',  # The best model will be saved there
+                                     dispFreq=50,  # Display the training progress after this number of updates
+                                     validFreq=2000,  # Compute the validation error after this number of updates
+                                     batch_size=config_params.minibatch,  # The batch size during training.
+                                     batch_len_threshold=None, # use dynamic batch size if sequence lengths exceed this threshold
+                                     valid_batch_size=config_params.minibatch,  # The batch size used for validation/test set.
+                                     lastHiddenLayer=None,
+                                     dropout_output=config_params.classifier_drop_out,  
+                                     dropout_input=config_params.classifier_drop_in,
+                                     reload_options=None, # Path to a saved model options we want to start from
+                                     reload_model=config_params.model_dir + '/' + config_params.model_S2L,
+                                     embedding=None, # Path to the word embedding file (otherwise randomized)
+                                     warm_LM=None,
+                                     logFile='log2' # Path to log file
+                                     )
+        self.trainSet, self.validSet, self.testSet = \
+            load_data(path=config_params.data_dir, n_words=10000, maxlen=None, sort_by_len=True, fixed_valid=True)
+        self.LMscore = numpy.load(config_params.LMScoreFile)
+        self.LMscore = self.LMscore[self.LMscore.files[0]]
+        self.build()
+
+    def build(self):
+        LMsores = tensor.vector('LMScore', dtype='float32')
+        lrate = tensor.scalar(dtype='float32')
+   
+        CLM_srcx, CLM_srcx_mask, CLM_ctx_, CLM_cost, CLM_sentenceLen = self.CLM.GetNll()
+        classifier_x, classifier_mask, classifier_y, classifier_nlls = self.classifier.GetNll()
+        consistent_loss = (((classifier_nlls + numpy.log(0.5))/CLM_sentenceLen + LMsores - CLM_cost) ** 2).mean()
+        CLM_cost_avg = CLM_cost.mean()
+        overall_L2S = CLM_cost_avg + config_params.trade_off_L2S * config_params.trade_off_L2S * consistent_loss
+        classifier_nlls_avg = classifier_nlls.mean()
+        overall_S2L = classifier_nlls_avg + config_params.trade_off_S2L * config_params.trade_off_S2L * consistent_loss
+
+        if config_params.FreezeEmb:
+            grads_L2S = tensor.grad(overall_L2S, wrt=itemlist_NoEmb(self.CLM.tparams))
+        else:    
+            grads_L2S = tensor.grad(overall_L2S, wrt=self.CLM.tparams.values())
+        if config_params.clip_L2S > 0.:
+            grads_L2S, norm_grads_L2S = grad_clipping(grads_L2S, config_params.clip_L2S)
+        else:
+            norm_grads_L2S = tensor.alloc(-1.)
+
+        if config_params.FreezeEmb:
+            grads_S2L = tensor.grad(overall_S2L, wrt=itemlist_NoEmb(self.classifier.tparams))
+        else:    
+            grads_S2L = tensor.grad(overall_S2L, wrt=self.classifier.tparams.values())
+        if config_params.clip_S2L > 0.:
+            grads_S2L, norm_grads_S2L = grad_clipping(grads_S2L, config_params.clip_S2L)
+        else:
+            norm_grads_S2L = tensor.alloc(-1.)
+
+        if config_params.dual_style == 'all':
+            merged_var_dic = OrderedDict()
+            if config_params.FreezeEmb:
+                merged_var_dic.update(OrderedDict((k + '_S2L',v) for (k,v) in self.classifier.tparams.iteritems() if 'Wemb' not in k ))
+                merged_var_dic.update(OrderedDict((k + '_L2S',v) for (k,v) in self.CLM.tparams.iteritems() if 'Wemb' not in k ))
+            else:
+                merged_var_dic.update(OrderedDict((k + '_S2L',v) for (k,v) in self.classifier.tparams.iteritems()))
+                merged_var_dic.update(OrderedDict((k + '_L2S',v) for (k,v) in self.CLM.tparams.iteritems()))
+                
+            inps = [CLM_srcx, CLM_srcx_mask, CLM_ctx_, classifier_x, classifier_mask, classifier_y, LMsores]
+            outs = [CLM_cost_avg, classifier_nlls_avg, consistent_loss, overall_L2S, overall_S2L, norm_grads_L2S, norm_grads_S2L]
+            self.f_grad_shared, self.f_update = Optim[config_params.optim](lrate, merged_var_dic, grads_S2L + grads_L2S, inps, outs)
+        elif config_params.dual_style == 'S2L':
+            if config_params.FreezeEmb:
+                merged_var_dic = OrderedDict((k + '_S2L',v) for (k,v) in self.classifier.tparams.iteritems() if 'Wemb' not in k )
+            else:
+                merged_var_dic = OrderedDict((k + '_S2L',v) for (k,v) in self.classifier.tparams.iteritems())
+
+            inps = [CLM_srcx, CLM_srcx_mask, CLM_ctx_, classifier_x, classifier_mask, classifier_y, LMsores]
+            norm_grads_L2S = tensor.alloc(-1.)                            
+            outs = [CLM_cost_avg, classifier_nlls_avg, consistent_loss, overall_L2S, overall_S2L, norm_grads_L2S, norm_grads_S2L]
+            self.f_grad_shared, self.f_update = Optim[config_params.optim](lrate, merged_var_dic, grads_S2L, inps, outs)
+        elif config_params.dual_style == 'L2S': 
+            if config_params.FreezeEmb:
+                merged_var_dic = OrderedDict((k + '_L2S',v) for (k,v) in self.CLM.tparams.iteritems() if 'Wemb' not in k )
+            else:
+                merged_var_dic = OrderedDict((k + '_L2S',v) for (k,v) in self.CLM.tparams.iteritems())
+                
+            inps = [CLM_srcx, CLM_srcx_mask, CLM_ctx_, classifier_x, classifier_mask, classifier_y, LMsores]
+            norm_grads_S2L = tensor.alloc(-1.)
+            outs = [CLM_cost_avg, classifier_nlls_avg, consistent_loss, overall_L2S, overall_S2L, norm_grads_L2S, norm_grads_S2L]
+            self.f_grad_shared, self.f_update = Optim[config_params.optim](lrate, merged_var_dic, grads_L2S, inps, outs)
+        else:
+            raise Exception('Not support {} in dual_style'.format(config_params.dual_style)) 
+        
+    def train_one_minibatch(self, seqx, seqy, LMscore):
+        CLM_x, CLM_xmask = prepare_data_x(seqx, pad_eos=True)
+        labels = numpy.array(seqy).astype('int64')
+        classifier_x, classifier_xmask = prepare_data_x(seqx, pad_eos=False)
+        CLM_cost_avg, classifier_nlls_avg, consistent_loss, overall_L2S, overall_S2L, norm_grads_L2S, norm_grads_S2L  = self.f_grad_shared(
+            CLM_x, CLM_xmask, labels, classifier_x, classifier_xmask, labels, LMscore
+        )
+        print 'CLM_cost_avg=%f, classifier_nlls_avg=%f, norm_grads_L2S=%f, norm_grads_S2L=%f, consistent_loss=%f,' \
+              ' overall_L2S=%f, overall_S2L=%f' % (
+            CLM_cost_avg, classifier_nlls_avg, norm_grads_L2S, norm_grads_S2L, consistent_loss, overall_L2S, overall_S2L )
+        self.f_update(config_params.lrate)
+
+    def train(self):
+        uidx = 0
+        for eidx in xrange(0, config_params.maxEpoch):
+            n_samples = 0
+            self.kf_train = get_minibatches_idx_bucket(self.trainSet[0],config_params.minibatch,shuffle=True)
+
+            for _, train_index in self.kf_train:
+                uidx += 1
+                self.classifier.use_noise.set_value(1.)
+                self.CLM.use_noise.set_value(1.)
+
+                # Select the random examples for this minibatch
+                seqx = [self.trainSet[0][t] for t in train_index]
+                seqy = [self.trainSet[1][t] for t in train_index]
+                LMscore = [self.LMscore[t] for t in train_index]
+                self.train_one_minibatch(seqx, seqy, numpy.array(LMscore).astype('float32'))
+
+                if uidx % config_params.validFreq == 0:
+                    self.classifier.use_noise.set_value(0.)
+                    self.CLM.use_noise.set_value(0.)
+                    
+                    if config_params.dual_style == 'all':
+                        suffix_S2L = self.valid_S2L()
+                        suffix_L2S = self.valid_L2S()
+
+                        S2Lpath = config_params.model_dir + '/model_S2L_' + suffix_S2L + '_uidx' + str(uidx)
+                        L2Spath = config_params.model_dir + '/model_L2S_' + suffix_L2S + '_uidx' + str(uidx)
+
+                        numpy.savez(S2Lpath, history_errs=[], **unzip(self.classifier.tparams) )
+                        numpy.savez(L2Spath, history_errs=[], **unzip(self.CLM.tparams) )
+                    elif config_params.dual_style == 'S2L':
+                        suffix_S2L = self.valid_S2L()
+                        S2Lpath = config_params.model_dir + '/model_S2L_' + suffix_S2L + '_uidx' + str(uidx)
+                        numpy.savez(S2Lpath, history_errs=[], **unzip(self.classifier.tparams) )
+                    elif config_params.dual_style == 'L2S':
+                        suffix_L2S = self.valid_L2S()
+                        L2Spath = config_params.model_dir + '/model_L2S_' + suffix_L2S + '_uidx' + str(uidx)
+                        numpy.savez(L2Spath, history_errs=[], **unzip(self.CLM.tparams) )
+
+
+    def valid_S2L(self):
+        acc = self.classifier.evaluate(self.trainSet, self.validSet, self.testSet)
+        print 'TrainAcc=%f, ValidAcc=%f, TestAcc=%f' % (acc[0], acc[1], acc[2])
+        return 'train_{}_valid_{}_test_{}'.format(acc[0], acc[1], acc[2])
+
+    def valid_L2S(self):
+        valid_ppl,  test_ppl = self.CLM.evaluate(self.validSet, self.testSet)
+        print 'Valid_PPL=%f, Test_PPL=%f' % (valid_ppl, test_ppl)
+        return 'valid_{}_test_{}'.format(valid_ppl, test_ppl)
+
+
+if __name__ == '__main__':
+    runner = monitor()
+    runner.train()
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/DSL_SentimentAnalysis/train.bat
+++ b/DSL_SentimentAnalysis/train.bat
@ -0,0 +1 @@
+python monitor.py --classifier_drop_in=0.8 --classifier_drop_out=0.5 --clip_L2S=5.0 --trade_off_S2L=5.0 --trade_off_L2S=5.0 --validFreq=5000 --model_dir=your_model_folder --lrS2L=0.1 --lrL2S=0.1 --lrate=0.1 --bias=0.0 --dual_style=all --optim=adadelta
--- a/DSL_SentimentAnalysis/train_linux.sh
+++ b/DSL_SentimentAnalysis/train_linux.sh
@ -0,0 +1 @@
+python monitor.py --classifier_drop_in=0.8 --classifier_drop_out=0.5 --clip_L2S=5.0 --trade_off_S2L=5.0 --trade_off_L2S=5.0 --validFreq=5000 --model_dir=Sentiment_model --lrS2L=0.1 --lrL2S=0.1 --lrate=0.1 --bias=0.0 --dual_style=all --optim=adadelta
--- a/DSL_SentimentAnalysis/valid.bat
+++ b/DSL_SentimentAnalysis/valid.bat
@ -0,0 +1 @@
+python inference.py --model_dir=winner --model_S2L=warmClassifier.npz --model_L2S=warmCLM.npz
--- a/DSL_SentimentAnalysis/valid_linux.sh
+++ b/DSL_SentimentAnalysis/valid_linux.sh
@ -0,0 +1 @@
+python inference.py --model_dir=winner --model_S2L=warmClassifier.npz --model_L2S=warmCLM.npz --gpu=3
--- a/README.md
+++ b/README.md
@ -12,3 +12,7 @@ provided by the bot. You will only need to do this once across all repos using o
 This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
 contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
+
+The code consists of two parts:
+(1) dual supervised learning for image processing: DSL_ImgProcess
+(2) dual supervised learning for sentiment analysis: DSL_SentimentAnalysis
				`@ -0,0 +1 @@`
				`python monitor.py --classifier_drop_in=0.8 --classifier_drop_out=0.5 --clip_L2S=5.0 --trade_off_S2L=5.0 --trade_off_L2S=5.0 --validFreq=5000 --model_dir=your_model_folder --lrS2L=0.1 --lrL2S=0.1 --lrate=0.1 --bias=0.0 --dual_style=all --optim=adadelta`
				`@ -0,0 +1 @@`
				`python inference.py --model_dir=winner --model_S2L=warmClassifier.npz --model_L2S=warmCLM.npz`