This commit is contained in:
xyc1207 2017-12-02 22:55:11 +08:00
Родитель a8a8eefebd
Коммит 1068687f83
88 изменённых файлов: 13349 добавлений и 0 удалений

11
DSL_ImgProcess/README.MD Normal file
Просмотреть файл

@ -0,0 +1,11 @@
Thanks a lot for your interests to our work.
I quickly wrap up a multi-gpu version code. (Note that in the submission phase, we use a 1gpu version code. Please let me know if anything we can improve)
Training demo code: example.sh
Inference demo code: batch_test_script_mainbody.sh
The data and the checkpoint are available at:
https://www.dropbox.com/sh/fpnvtcmyj4mul2s/AAB4wvsxoS8pf7ExnZYe4VV1a?dl=0
You need to download them and put them in the working dir. An example is in ``example.sh''

Просмотреть файл

@ -0,0 +1,40 @@
export PATH=/usr/anaconda2/bin:$PATH
#export LD_LIBRARY_PATH=~/Downloads/cuda/lib64:"$LD_LIBRAYR_PATH"
export CUDA_VISIBLE_DEVICES=6
model_dir=checkpoints
for (( e=345;e<=345;e+=2 ));do
filename=$(ls "$model_dir" | grep -o 'params_'${e}'uidx[^\.]*\.ckpt\.index')
filename=${filename:0:-6}
python monitor_checkleft.py --data_dir=./cifar10_data --save_dir=$model_dir --batch_size=12 --show_interval=100 --load_params=${filename} --mode=I2L --useSoftLabel=0
done
for (( e=345;e<=345;e+=2 ));do
filename=$(ls "$model_dir" | grep -o 'params_'${e}'uidx[^\.]*\.ckpt\.index')
filename=${filename:0:-6}
python monitor_checkleft.py --data_dir=./cifar10_data --save_dir=$model_dir --batch_size=12 --show_interval=100 --load_params=${filename} --mode=L2I --useSoftLabel=0
done
: <<'VIRTUAL_ENV'
source ~/virtual_py/bin/activate
export CUDA_VISIBLE_DEVICES=0
model_dir=debug_room
python monitor.py --data_dir=./cifar10_data --save_dir=$model_dir --batch_size=12 --show_interval=100 --load_params=params_9uidx13880.ckpt --mode=L2I --useSoftLabel=0
deactivate
VIRTUAL_ENV

Просмотреть файл

@ -0,0 +1,42 @@
export PATH=/usr/anaconda2/bin:$PATH
#export LD_LIBRARY_PATH=~/Downloads/cuda/lib64:"$LD_LIBRAYR_PATH"
export CUDA_VISIBLE_DEVICES=6
model_dir=checkpoints
for (( e=345;e<=345;e+=2 ));do
filename=$(ls "$model_dir" | grep -o 'params_'${e}'uidx[^\.]*\.ckpt\.index')
filename=${filename:0:-6}
python monitor.py --data_dir=./cifar10_data --save_dir=$model_dir --batch_size=12 --show_interval=100 --load_params=${filename} --mode=I2L --useSoftLabel=0
done
for (( e=345;e<=345;e+=2 ));do
filename=$(ls "$model_dir" | grep -o 'params_'${e}'uidx[^\.]*\.ckpt\.index')
filename=${filename:0:-6}
python monitor.py --data_dir=./cifar10_data --save_dir=$model_dir --batch_size=12 --show_interval=100 --load_params=${filename} --mode=L2I --useSoftLabel=0
done
# When using "--oneside" in training mode, you should also add the
# corresponding "--oneside" in the inference phase.
: <<'VIRTUAL_ENV'
source ~/virtual_py/bin/activate
export CUDA_VISIBLE_DEVICES=0
model_dir=debug_room
python monitor.py --data_dir=./cifar10_data --save_dir=$model_dir --batch_size=12 --show_interval=100 --load_params=params_9uidx13880.ckpt --mode=L2I --useSoftLabel=0
deactivate
VIRTUAL_ENV

Просмотреть файл

@ -0,0 +1,117 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""CIFAR dataset input module.
"""
import tensorflow as tf
def build_input(dataset, data_path, batch_size, mode):
"""Build CIFAR image and labels.
Args:
dataset: Either 'cifar10' or 'cifar100'.
data_path: Filename for data.
batch_size: Input batch size.
mode: Either 'train' or 'eval'.
Returns:
images: Batches of images. [batch_size, image_size, image_size, 3]
labels: Batches of labels. [batch_size, num_classes]
Raises:
ValueError: when the specified dataset is not supported.
"""
image_size = 32
if dataset == 'cifar10':
label_bytes = 1
label_offset = 0
num_classes = 10
elif dataset == 'cifar100':
label_bytes = 1
label_offset = 1
num_classes = 100
else:
raise ValueError('Not supported dataset %s', dataset)
depth = 3
image_bytes = image_size * image_size * depth
record_bytes = label_bytes + label_offset + image_bytes
data_files = tf.gfile.Glob(data_path)
file_queue = tf.train.string_input_producer(data_files, shuffle=True)
# Read examples from files in the filename queue.
reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
_, value = reader.read(file_queue)
# Convert these examples to dense labels and processed images.
record = tf.reshape(tf.decode_raw(value, tf.uint8), [record_bytes])
label = tf.cast(tf.slice(record, [label_offset], [label_bytes]), tf.int32)
# Convert from string to [depth * height * width] to [depth, height, width].
depth_major = tf.reshape(tf.slice(record, [label_bytes], [image_bytes]),
[depth, image_size, image_size])
# Convert from [depth, height, width] to [height, width, depth].
image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)
if mode == 'train':
image = tf.image.resize_image_with_crop_or_pad(
image, image_size+4, image_size+4)
image = tf.random_crop(image, [image_size, image_size, 3])
image = tf.image.random_flip_left_right(image)
# Brightness/saturation/constrast provides small gains .2%~.5% on cifar.
# image = tf.image.random_brightness(image, max_delta=63. / 255.)
# image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
# image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
image = tf.image.per_image_standardization(image)
example_queue = tf.RandomShuffleQueue(
capacity=16 * batch_size,
min_after_dequeue=8 * batch_size,
dtypes=[tf.float32, tf.int32],
shapes=[[image_size, image_size, depth], [1]])
num_threads = 16
else:
image = tf.image.resize_image_with_crop_or_pad(
image, image_size, image_size)
image = tf.image.per_image_whitening(image)
example_queue = tf.FIFOQueue(
3 * batch_size,
dtypes=[tf.float32, tf.int32],
shapes=[[image_size, image_size, depth], [1]])
num_threads = 1
example_enqueue_op = example_queue.enqueue([image, label])
tf.train.add_queue_runner(tf.train.queue_runner.QueueRunner(
example_queue, [example_enqueue_op] * num_threads))
# Read 'batch' labels + images from the example queue.
images, labels = example_queue.dequeue_many(batch_size)
labels = tf.reshape(labels, [batch_size, 1])
indices = tf.reshape(tf.range(0, batch_size, 1), [batch_size, 1])
labels = tf.sparse_to_dense(
tf.concat(1, [indices, labels]),
[batch_size, num_classes], 1.0, 0.0)
assert len(images.get_shape()) == 4
assert images.get_shape()[0] == batch_size
assert images.get_shape()[-1] == 3
assert len(labels.get_shape()) == 2
assert labels.get_shape()[0] == batch_size
assert labels.get_shape()[1] == num_classes
# Display the training images in the visualizer.
tf.image_summary('images', images)
return images, labels

Просмотреть файл

Просмотреть файл

@ -0,0 +1,129 @@
"""
Utilities for downloading and unpacking the CIFAR-10 dataset, originally published
by Krizhevsky et al. and hosted here: https://www.cs.toronto.edu/~kriz/cifar.html
"""
import os
import sys
import tarfile
from six.moves import urllib
import numpy as np
def maybe_download_and_extract(data_dir, url='http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'):
if not os.path.exists(os.path.join(data_dir, 'cifar-10-batches-py')):
if not os.path.exists(data_dir):
os.makedirs(data_dir)
filename = url.split('/')[-1]
filepath = os.path.join(data_dir, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(data_dir)
def unpickle(file):
fo = open(file, 'rb')
if (sys.version_info >= (3, 0)):
import pickle
d = pickle.load(fo, encoding='latin1')
else:
import cPickle
d = cPickle.load(fo)
fo.close()
return {'x': d['data'].reshape((10000,3,32,32)), 'y': np.array(d['labels']).astype(np.uint8)}
def load(data_dir, subset='train'):
maybe_download_and_extract(data_dir)
if subset=='train':
train_data = [unpickle(os.path.join(data_dir,'cifar-10-batches-py','data_batch_' + str(i))) for i in range(1,6)]
trainx = np.concatenate([d['x'] for d in train_data],axis=0)
trainy = np.concatenate([d['y'] for d in train_data],axis=0)
return trainx, trainy
elif subset=='test':
test_data = unpickle(os.path.join(data_dir,'cifar-10-batches-py','test_batch'))
testx = test_data['x']
testy = test_data['y']
return testx, testy
else:
raise NotImplementedError('subset should be either train or test')
class DataLoader(object):
""" an object that generates batches of CIFAR-10 data for training """
def __init__(self, data_dir, subset, batch_size, rng=None, shuffle=False, return_labels=False, filter_labels=None):
"""
- data_dir is location where to store files
- subset is train|test
- batch_size is int, of #examples to load at once
- rng is np.random.RandomState object for reproducibility
"""
self.data_dir = data_dir
self.batch_size = batch_size
self.shuffle = shuffle
self.return_labels = return_labels
# create temporary storage for the data, if not yet created
if not os.path.exists(data_dir):
print('creating folder', data_dir)
os.makedirs(data_dir)
# load CIFAR-10 training data to RAM
self.data, self.labels = load(os.path.join(data_dir,'cifar-10-python'), subset=subset)
if filter_labels is not None:
selected_idx = self.labels == filter_labels
self.data = self.data[selected_idx]
self.labels = self.labels[selected_idx]
print('There are %d samples left' % self.labels.size)
self.data = np.transpose(self.data, (0,2,3,1)) # (N,3,32,32) -> (N,32,32,3)
self.p = 0 # pointer to where we are in iteration
self.rng = np.random.RandomState(1) if rng is None else rng
def get_observation_size(self):
return self.data.shape[1:]
def get_num_labels(self):
return np.amax(self.labels) + 1
def reset(self):
self.p = 0
def __iter__(self):
return self
def __next__(self, n=None):
""" n is the number of examples to fetch """
if n is None: n = self.batch_size
# on first iteration lazily permute all data
if self.p == 0 and self.shuffle:
inds = self.rng.permutation(self.data.shape[0])
self.data = self.data[inds]
self.labels = self.labels[inds]
# on last iteration reset the counter and raise StopIteration
if self.p + n > self.data.shape[0]:
self.reset() # reset for next time we get called
raise StopIteration
# on intermediate iterations fetch the next batch
x = self.data[self.p : self.p + n]
y = self.labels[self.p : self.p + n]
self.p += self.batch_size
if self.return_labels:
return x,y
else:
return x
next = __next__ # Python 2 compatibility (https://stackoverflow.com/questions/29578469/how-to-make-an-object-both-a-python2-and-python3-iterator)

Просмотреть файл

@ -0,0 +1,36 @@
import cifar10_data
import argparse
import plotting
import numpy as np
data_dir = '/home/tim/data'
parser = argparse.ArgumentParser()
parser.add_argument('--save_dir', type=str, default='./log')
parser.add_argument('--data_dir', type=str, default='/home/tim/data')
parser.add_argument('--plot_title', type=str, default=None)
args = parser.parse_args()
print(args)
data_dir = args.data_dir
trainx, trainy = cifar10_data.load(data_dir)
ids = [[] for i in range(10)]
for i in range(trainx.shape[0]):
if len(ids[trainy[i]]) < 10:
ids[trainy[i]].append(i)
if np.alltrue(np.asarray([len(_ids) >= 10 for _ids in ids])):
break
images = np.zeros((10*10,32,32,3),dtype='uint8')
for i in range(len(ids)):
for j in range(len(ids[i])):
images[10*j+i] = trainx[ids[i][j]].transpose([1,2,0])
print(ids)
img_tile = plotting.img_tile(images, aspect_ratio=1.0, border_color=1.0, stretch=True)
img = plotting.plot_img(img_tile, title=args.plot_title if args.plot_title != 'None' else None)
plotting.plt.savefig(args.save_dir + '/cifar10_orig_images.png')
plotting.plt.close('all')

Просмотреть файл

@ -0,0 +1,137 @@
"""
Utilities for loading the small ImageNet dataset used in Oord et al.
use scripts/png_to_npz.py to create the npz files
The code here currently assumes that the preprocessing was done manually.
TODO: make automatic and painless
"""
import os
import sys
import tarfile
from six.moves import urllib
import numpy as np
from scipy.misc import imread
def fetch(url, filepath):
filename = url.split('/')[-1]
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
print(url)
filepath, headers = urllib.request.urlretrieve(url, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
def maybe_download_and_extract(data_dir):
# more info on the dataset at http://image-net.org/small/download.php
# downloads and extracts the two tar files for train/val
train_dir = os.path.join(data_dir, 'train_32x32')
if not os.path.exists(train_dir):
train_url = 'http://image-net.org/small/train_32x32.tar' # 4GB
filepath = os.path.join(data_dir, 'train_32x32.tar')
fetch(train_url, filepath)
print('unpacking the tar file', filepath)
tarfile.open(filepath, 'r').extractall(data_dir) # creates the train_32x32 folder
test_dir = os.path.join(data_dir, 'valid_32x32')
if not os.path.exists(test_dir):
test_url = 'http://image-net.org/small/valid_32x32.tar' # 154MB
filepath = os.path.join(data_dir, 'valid_32x32.tar')
fetch(test_url, filepath)
print('unpacking the tar file', filepath)
tarfile.open(filepath, 'r').extractall(data_dir) # creates the valid_32x32 folder
def maybe_preprocess(data_dir):
npz_file = os.path.join(data_dir, 'imgnet_32x32.npz')
if os.path.exists(npz_file):
return # all good
trainx = []
train_dir = os.path.join(data_dir, 'train_32x32')
for f in os.listdir(train_dir):
if f.endswith('.png'):
print('reading', f)
filepath = os.path.join(train_dir, f)
trainx.append(imread(filepath).reshape((1,32,32,3)))
trainx = np.concatenate(trainx, axis=0)
testx = []
test_dir = os.path.join(data_dir, 'valid_32x32')
for f in os.listdir(test_dir):
if f.endswith('.png'):
print('reading', f)
filepath = os.path.join(test_dir, f)
testx.append(imread(filepath).reshape((1,32,32,3)))
testx = np.concatenate(testx, axis=0)
np.savez(npz_file, trainx=trainx, testx=testx)
def load(data_dir, subset='train'):
if not os.path.exists(data_dir):
print('creating folder', data_dir)
os.makedirs(data_dir)
maybe_download_and_extract(data_dir)
maybe_preprocess(data_dir)
imagenet_data = np.load(os.path.join(data_dir,'imgnet_32x32.npz'))
return imagenet_data['trainx'] if subset == 'train' else imagenet_data['testx']
class DataLoader(object):
""" an object that generates batches of CIFAR-10 data for training """
def __init__(self, data_dir, subset, batch_size, rng=None, shuffle=False):
"""
- data_dir is location where the files are stored
- subset is train|test
- batch_size is int, of #examples to load at once
- rng is np.random.RandomState object for reproducibility
"""
self.data_dir = data_dir
self.batch_size = batch_size
self.shuffle = shuffle
self.data = load(os.path.join(data_dir,'small_imagenet'), subset=subset)
self.p = 0 # pointer to where we are in iteration
self.rng = np.random.RandomState(1) if rng is None else rng
def get_observation_size(self):
return self.data.shape[1:]
def reset(self):
self.p = 0
def __iter__(self):
return self
def __next__(self, n=None):
""" n is the number of examples to fetch """
if n is None: n = self.batch_size
# on first iteration lazily permute all data
if self.p == 0 and self.shuffle:
inds = self.rng.permutation(self.data.shape[0])
self.data = self.data[inds]
# on last iteration reset the counter and raise StopIteration
if self.p + n > self.data.shape[0]:
self.reset() # reset for next time we get called
raise StopIteration
# on intermediate iterations fetch the next batch
x = self.data[self.p : self.p + n]
self.p += self.batch_size
return x
next = __next__ # Python 2 compatibility (https://stackoverflow.com/questions/29578469/how-to-make-an-object-both-a-python2-and-python3-iterator)

Двоичные данные
DSL_ImgProcess/data/pixelcnn_samples.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 577 KiB

Просмотреть файл

Просмотреть файл

@ -0,0 +1,131 @@
"""
Utilities for downloading and unpacking the CIFAR-10 dataset, originally published
by Krizhevsky et al. and hosted here: https://www.cs.toronto.edu/~kriz/cifar.html
"""
import os
import sys
import tarfile
from six.moves import urllib
import numpy as np
def maybe_download_and_extract(data_dir, url='http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'):
if not os.path.exists(os.path.join(data_dir, 'cifar-10-batches-py')):
if not os.path.exists(data_dir):
os.makedirs(data_dir)
filename = url.split('/')[-1]
filepath = os.path.join(data_dir, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(data_dir)
def unpickle(file):
fo = open(file, 'rb')
if (sys.version_info >= (3, 0)):
import pickle
d = pickle.load(fo, encoding='latin1')
else:
import cPickle
d = cPickle.load(fo)
fo.close()
return {'x': d['data'].reshape((10000,3,32,32)), 'y': np.array(d['labels']).astype(np.uint8)}
def load(data_dir, subset='train'):
maybe_download_and_extract(data_dir)
if subset=='train':
train_data = [unpickle(os.path.join(data_dir,'cifar-10-batches-py','data_batch_' + str(i))) for i in range(1,6)]
trainx = np.concatenate([d['x'] for d in train_data],axis=0)
trainy = np.concatenate([d['y'] for d in train_data],axis=0)
return trainx, trainy
elif subset=='test':
test_data = unpickle(os.path.join(data_dir,'cifar-10-batches-py','test_batch'))
testx = test_data['x']
testy = test_data['y']
return testx, testy
else:
raise NotImplementedError('subset should be either train or test')
class DataLoader(object):
""" an object that generates batches of CIFAR-10 data for training """
def __init__(self, data_dir, subset, batch_size, LMscore=None, rng=None, shuffle=False, return_labels=False):
"""
- data_dir is location where to store files
- subset is train|test
- batch_size is int, of #examples to load at once
- rng is np.random.RandomState object for reproducibility
"""
self.data_dir = data_dir
self.batch_size = batch_size
self.shuffle = shuffle
self.return_labels = return_labels
# create temporary storage for the data, if not yet created
if not os.path.exists(data_dir):
print('creating folder', data_dir)
os.makedirs(data_dir)
# load CIFAR-10 training data to RAM
self.data, self.labels = load(os.path.join(data_dir,'cifar-10-python'), subset=subset)
self.data = np.transpose(self.data, (0,2,3,1)) # (N,3,32,32) -> (N,32,32,3)
if subset == 'train':
self.LM = np.load(LMscore + '.train.npz')['arr_0']
elif subset == 'test':
self.LM = np.load(LMscore + '.test.npz')
else:
raise 'Not found proper LMscore folder'
self.p = 0 # pointer to where we are in iteration
self.rng = np.random.RandomState(1) if rng is None else rng
def get_observation_size(self):
return self.data.shape[1:]
def get_num_labels(self):
return np.amax(self.labels) + 1
def reset(self):
self.p = 0
def __iter__(self):
return self
def __next__(self, n=None):
""" n is the number of examples to fetch """
if n is None: n = self.batch_size
# on first iteration lazily permute all data
if self.p == 0 and self.shuffle:
inds = self.rng.permutation(self.data.shape[0])
self.data = self.data[inds]
self.labels = self.labels[inds]
self.LM = self.LM[inds]
# on last iteration reset the counter and raise StopIteration
if self.p + n > self.data.shape[0]:
self.reset() # reset for next time we get called
raise StopIteration
# on intermediate iterations fetch the next batch
x = self.data[self.p : self.p + n]
y = self.labels[self.p : self.p + n]
lmscore = self.LM[self.p : self.p + n]
self.p += self.batch_size
if self.return_labels:
return x,y, lmscore
else:
return x, lmscore
next = __next__ # Python 2 compatibility (https://stackoverflow.com/questions/29578469/how-to-make-an-object-both-a-python2-and-python3-iterator)

Просмотреть файл

@ -0,0 +1,36 @@
import cifar10_data
import argparse
import plotting
import numpy as np
data_dir = '/home/tim/data'
parser = argparse.ArgumentParser()
parser.add_argument('--save_dir', type=str, default='./log')
parser.add_argument('--data_dir', type=str, default='/home/tim/data')
parser.add_argument('--plot_title', type=str, default=None)
args = parser.parse_args()
print(args)
data_dir = args.data_dir
trainx, trainy = cifar10_data.load(data_dir)
ids = [[] for i in range(10)]
for i in range(trainx.shape[0]):
if len(ids[trainy[i]]) < 10:
ids[trainy[i]].append(i)
if np.alltrue(np.asarray([len(_ids) >= 10 for _ids in ids])):
break
images = np.zeros((10*10,32,32,3),dtype='uint8')
for i in range(len(ids)):
for j in range(len(ids[i])):
images[10*j+i] = trainx[ids[i][j]].transpose([1,2,0])
print(ids)
img_tile = plotting.img_tile(images, aspect_ratio=1.0, border_color=1.0, stretch=True)
img = plotting.plot_img(img_tile, title=args.plot_title if args.plot_title != 'None' else None)
plotting.plt.savefig(args.save_dir + '/cifar10_orig_images.png')
plotting.plt.close('all')

Просмотреть файл

@ -0,0 +1,137 @@
"""
Utilities for loading the small ImageNet dataset used in Oord et al.
use scripts/png_to_npz.py to create the npz files
The code here currently assumes that the preprocessing was done manually.
TODO: make automatic and painless
"""
import os
import sys
import tarfile
from six.moves import urllib
import numpy as np
from scipy.misc import imread
def fetch(url, filepath):
filename = url.split('/')[-1]
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
print(url)
filepath, headers = urllib.request.urlretrieve(url, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
def maybe_download_and_extract(data_dir):
# more info on the dataset at http://image-net.org/small/download.php
# downloads and extracts the two tar files for train/val
train_dir = os.path.join(data_dir, 'train_32x32')
if not os.path.exists(train_dir):
train_url = 'http://image-net.org/small/train_32x32.tar' # 4GB
filepath = os.path.join(data_dir, 'train_32x32.tar')
fetch(train_url, filepath)
print('unpacking the tar file', filepath)
tarfile.open(filepath, 'r').extractall(data_dir) # creates the train_32x32 folder
test_dir = os.path.join(data_dir, 'valid_32x32')
if not os.path.exists(test_dir):
test_url = 'http://image-net.org/small/valid_32x32.tar' # 154MB
filepath = os.path.join(data_dir, 'valid_32x32.tar')
fetch(test_url, filepath)
print('unpacking the tar file', filepath)
tarfile.open(filepath, 'r').extractall(data_dir) # creates the valid_32x32 folder
def maybe_preprocess(data_dir):
npz_file = os.path.join(data_dir, 'imgnet_32x32.npz')
if os.path.exists(npz_file):
return # all good
trainx = []
train_dir = os.path.join(data_dir, 'train_32x32')
for f in os.listdir(train_dir):
if f.endswith('.png'):
print('reading', f)
filepath = os.path.join(train_dir, f)
trainx.append(imread(filepath).reshape((1,32,32,3)))
trainx = np.concatenate(trainx, axis=0)
testx = []
test_dir = os.path.join(data_dir, 'valid_32x32')
for f in os.listdir(test_dir):
if f.endswith('.png'):
print('reading', f)
filepath = os.path.join(test_dir, f)
testx.append(imread(filepath).reshape((1,32,32,3)))
testx = np.concatenate(testx, axis=0)
np.savez(npz_file, trainx=trainx, testx=testx)
def load(data_dir, subset='train'):
if not os.path.exists(data_dir):
print('creating folder', data_dir)
os.makedirs(data_dir)
maybe_download_and_extract(data_dir)
maybe_preprocess(data_dir)
imagenet_data = np.load(os.path.join(data_dir,'imgnet_32x32.npz'))
return imagenet_data['trainx'] if subset == 'train' else imagenet_data['testx']
class DataLoader(object):
""" an object that generates batches of CIFAR-10 data for training """
def __init__(self, data_dir, subset, batch_size, rng=None, shuffle=False):
"""
- data_dir is location where the files are stored
- subset is train|test
- batch_size is int, of #examples to load at once
- rng is np.random.RandomState object for reproducibility
"""
self.data_dir = data_dir
self.batch_size = batch_size
self.shuffle = shuffle
self.data = load(os.path.join(data_dir,'small_imagenet'), subset=subset)
self.p = 0 # pointer to where we are in iteration
self.rng = np.random.RandomState(1) if rng is None else rng
def get_observation_size(self):
return self.data.shape[1:]
def reset(self):
self.p = 0
def __iter__(self):
return self
def __next__(self, n=None):
""" n is the number of examples to fetch """
if n is None: n = self.batch_size
# on first iteration lazily permute all data
if self.p == 0 and self.shuffle:
inds = self.rng.permutation(self.data.shape[0])
self.data = self.data[inds]
# on last iteration reset the counter and raise StopIteration
if self.p + n > self.data.shape[0]:
self.reset() # reset for next time we get called
raise StopIteration
# on intermediate iterations fetch the next batch
x = self.data[self.p : self.p + n]
self.p += self.batch_size
return x
next = __next__ # Python 2 compatibility (https://stackoverflow.com/questions/29578469/how-to-make-an-object-both-a-python2-and-python3-iterator)

Двоичные данные
DSL_ImgProcess/data2/pixelcnn_samples.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 577 KiB

Просмотреть файл

Просмотреть файл

@ -0,0 +1,133 @@
"""
Utilities for downloading and unpacking the CIFAR-10 dataset, originally published
by Krizhevsky et al. and hosted here: https://www.cs.toronto.edu/~kriz/cifar.html
"""
import os
import sys
import tarfile
from six.moves import urllib
import numpy as np
def maybe_download_and_extract(data_dir, url='http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'):
if not os.path.exists(os.path.join(data_dir, 'cifar-10-batches-py')):
if not os.path.exists(data_dir):
os.makedirs(data_dir)
filename = url.split('/')[-1]
filepath = os.path.join(data_dir, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(data_dir)
def unpickle(file):
fo = open(file, 'rb')
if (sys.version_info >= (3, 0)):
import pickle
d = pickle.load(fo, encoding='latin1')
else:
import cPickle
d = cPickle.load(fo)
fo.close()
return {'x': d['data'].reshape((10000,3,32,32)), 'y': np.array(d['labels']).astype(np.uint8)}
def load(data_dir, subset='train'):
maybe_download_and_extract(data_dir)
if subset=='train':
train_data = [unpickle(os.path.join(data_dir,'cifar-10-batches-py','data_batch_' + str(i))) for i in range(1,6)]
trainx = np.concatenate([d['x'] for d in train_data],axis=0)
trainy = np.concatenate([d['y'] for d in train_data],axis=0)
return trainx, trainy
elif subset=='test':
test_data = unpickle(os.path.join(data_dir,'cifar-10-batches-py','test_batch'))
testx = test_data['x']
testy = test_data['y']
return testx, testy
else:
raise NotImplementedError('subset should be either train or test')
class DataLoader(object):
""" an object that generates batches of CIFAR-10 data for training """
def __init__(self, data_dir, subset, batch_size, rng=None, shuffle=False, return_labels=False, filter_labels=None,final=8):
"""
- data_dir is location where to store files
- subset is train|test
- batch_size is int, of #examples to load at once
- rng is np.random.RandomState object for reproducibility
"""
self.data_dir = data_dir
self.batch_size = batch_size
self.shuffle = shuffle
self.return_labels = return_labels
# create temporary storage for the data, if not yet created
if not os.path.exists(data_dir):
print('creating folder', data_dir)
os.makedirs(data_dir)
# load CIFAR-10 training data to RAM
self.data, self.labels = load(os.path.join(data_dir,'cifar-10-python'), subset=subset)
if final > 0:
self.data = np.tile(self.data[-final:],[3,1,1,1])
self.labels = np.tile(self.labels[-final:],[3])
if filter_labels is not None:
selected_idx = self.labels == filter_labels
self.data = self.data[selected_idx]
self.labels = self.labels[selected_idx]
print('There are %d samples left' % self.labels.size)
self.data = np.transpose(self.data, (0,2,3,1)) # (N,3,32,32) -> (N,32,32,3)
self.p = 0 # pointer to where we are in iteration
self.rng = np.random.RandomState(1) if rng is None else rng
def get_observation_size(self):
return self.data.shape[1:]
def get_num_labels(self):
return np.amax(self.labels) + 1
def reset(self):
self.p = 0
def __iter__(self):
return self
def __next__(self, n=None):
""" n is the number of examples to fetch """
if n is None: n = self.batch_size
# on first iteration lazily permute all data
if self.p == 0 and self.shuffle:
inds = self.rng.permutation(self.data.shape[0])
self.data = self.data[inds]
self.labels = self.labels[inds]
# on last iteration reset the counter and raise StopIteration
if self.p + n > self.data.shape[0]:
self.reset() # reset for next time we get called
raise StopIteration
# on intermediate iterations fetch the next batch
x = self.data[self.p : self.p + n]
y = self.labels[self.p : self.p + n]
self.p += self.batch_size
if self.return_labels:
return x,y
else:
return x
next = __next__ # Python 2 compatibility (https://stackoverflow.com/questions/29578469/how-to-make-an-object-both-a-python2-and-python3-iterator)

Просмотреть файл

@ -0,0 +1,36 @@
import cifar10_data
import argparse
import plotting
import numpy as np
data_dir = '/home/tim/data'
parser = argparse.ArgumentParser()
parser.add_argument('--save_dir', type=str, default='./log')
parser.add_argument('--data_dir', type=str, default='/home/tim/data')
parser.add_argument('--plot_title', type=str, default=None)
args = parser.parse_args()
print(args)
data_dir = args.data_dir
trainx, trainy = cifar10_data.load(data_dir)
ids = [[] for i in range(10)]
for i in range(trainx.shape[0]):
if len(ids[trainy[i]]) < 10:
ids[trainy[i]].append(i)
if np.alltrue(np.asarray([len(_ids) >= 10 for _ids in ids])):
break
images = np.zeros((10*10,32,32,3),dtype='uint8')
for i in range(len(ids)):
for j in range(len(ids[i])):
images[10*j+i] = trainx[ids[i][j]].transpose([1,2,0])
print(ids)
img_tile = plotting.img_tile(images, aspect_ratio=1.0, border_color=1.0, stretch=True)
img = plotting.plot_img(img_tile, title=args.plot_title if args.plot_title != 'None' else None)
plotting.plt.savefig(args.save_dir + '/cifar10_orig_images.png')
plotting.plt.close('all')

Просмотреть файл

@ -0,0 +1,137 @@
"""
Utilities for loading the small ImageNet dataset used in Oord et al.
use scripts/png_to_npz.py to create the npz files
The code here currently assumes that the preprocessing was done manually.
TODO: make automatic and painless
"""
import os
import sys
import tarfile
from six.moves import urllib
import numpy as np
from scipy.misc import imread
def fetch(url, filepath):
filename = url.split('/')[-1]
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
print(url)
filepath, headers = urllib.request.urlretrieve(url, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
def maybe_download_and_extract(data_dir):
# more info on the dataset at http://image-net.org/small/download.php
# downloads and extracts the two tar files for train/val
train_dir = os.path.join(data_dir, 'train_32x32')
if not os.path.exists(train_dir):
train_url = 'http://image-net.org/small/train_32x32.tar' # 4GB
filepath = os.path.join(data_dir, 'train_32x32.tar')
fetch(train_url, filepath)
print('unpacking the tar file', filepath)
tarfile.open(filepath, 'r').extractall(data_dir) # creates the train_32x32 folder
test_dir = os.path.join(data_dir, 'valid_32x32')
if not os.path.exists(test_dir):
test_url = 'http://image-net.org/small/valid_32x32.tar' # 154MB
filepath = os.path.join(data_dir, 'valid_32x32.tar')
fetch(test_url, filepath)
print('unpacking the tar file', filepath)
tarfile.open(filepath, 'r').extractall(data_dir) # creates the valid_32x32 folder
def maybe_preprocess(data_dir):
npz_file = os.path.join(data_dir, 'imgnet_32x32.npz')
if os.path.exists(npz_file):
return # all good
trainx = []
train_dir = os.path.join(data_dir, 'train_32x32')
for f in os.listdir(train_dir):
if f.endswith('.png'):
print('reading', f)
filepath = os.path.join(train_dir, f)
trainx.append(imread(filepath).reshape((1,32,32,3)))
trainx = np.concatenate(trainx, axis=0)
testx = []
test_dir = os.path.join(data_dir, 'valid_32x32')
for f in os.listdir(test_dir):
if f.endswith('.png'):
print('reading', f)
filepath = os.path.join(test_dir, f)
testx.append(imread(filepath).reshape((1,32,32,3)))
testx = np.concatenate(testx, axis=0)
np.savez(npz_file, trainx=trainx, testx=testx)
def load(data_dir, subset='train'):
if not os.path.exists(data_dir):
print('creating folder', data_dir)
os.makedirs(data_dir)
maybe_download_and_extract(data_dir)
maybe_preprocess(data_dir)
imagenet_data = np.load(os.path.join(data_dir,'imgnet_32x32.npz'))
return imagenet_data['trainx'] if subset == 'train' else imagenet_data['testx']
class DataLoader(object):
""" an object that generates batches of CIFAR-10 data for training """
def __init__(self, data_dir, subset, batch_size, rng=None, shuffle=False):
"""
- data_dir is location where the files are stored
- subset is train|test
- batch_size is int, of #examples to load at once
- rng is np.random.RandomState object for reproducibility
"""
self.data_dir = data_dir
self.batch_size = batch_size
self.shuffle = shuffle
self.data = load(os.path.join(data_dir,'small_imagenet'), subset=subset)
self.p = 0 # pointer to where we are in iteration
self.rng = np.random.RandomState(1) if rng is None else rng
def get_observation_size(self):
return self.data.shape[1:]
def reset(self):
self.p = 0
def __iter__(self):
return self
def __next__(self, n=None):
""" n is the number of examples to fetch """
if n is None: n = self.batch_size
# on first iteration lazily permute all data
if self.p == 0 and self.shuffle:
inds = self.rng.permutation(self.data.shape[0])
self.data = self.data[inds]
# on last iteration reset the counter and raise StopIteration
if self.p + n > self.data.shape[0]:
self.reset() # reset for next time we get called
raise StopIteration
# on intermediate iterations fetch the next batch
x = self.data[self.p : self.p + n]
self.p += self.batch_size
return x
next = __next__ # Python 2 compatibility (https://stackoverflow.com/questions/29578469/how-to-make-an-object-both-a-python2-and-python3-iterator)

12
DSL_ImgProcess/example.sh Normal file
Просмотреть файл

@ -0,0 +1,12 @@
export PATH=/usr/anaconda2/bin:$PATH
#export LD_LIBRARY_PATH=~/Downloads/cuda/lib64:"$LD_LIBRAYR_PATH"
export CUDA_VISIBLE_DEVICES=0,1,2,3
# train two models (test 4 gpu)
python monitor.py --data_dir=./cifar10_data --save_dir=./checkpoints_All --batch_size=12 --show_interval=10 --learning_rate=1e-4 --load_params=params_345uidx480248.ckpt --learning_rate_I2L=2e-4 --trade_off_I2L=30. --trade_off_L2I=1.5 --save_interval=1 --bias=0.02 --valid_interval=8 --lr_decay=1. --nr_gpu=4
# train image classifier only (test single gpu)
# python monitor.py --data_dir=./cifar10_data --save_dir=./checkpoints_I2L --batch_size=12 --show_interval=10 --learning_rate=1e-4 --load_params=params_345uidx480248.ckpt --learning_rate_I2L=2e-4 --trade_off_I2L=30. --trade_off_L2I=1.5 --save_interval=1 --bias=0.02 --valid_interval=8 --lr_decay=1. --nr_gpu=1 --oneside=I2L
# train image generator only (test 2 gpu)
# python monitor.py --data_dir=./cifar10_data --save_dir=./checkpoints_L2I --batch_size=12 --show_interval=10 --learning_rate=1e-4 --load_params=params_345uidx480248.ckpt --learning_rate_I2L=2e-4 --trade_off_I2L=30. --trade_off_L2I=1.5 --save_interval=1 --bias=0.02 --valid_interval=8 --lr_decay=1. --nr_gpu=2 --oneside=L2I

462
DSL_ImgProcess/monitor.py Normal file
Просмотреть файл

@ -0,0 +1,462 @@
import time
import sys
import os
import cifar_input
import numpy as np
import resnet_model_basic as resnet_model
import tensorflow as tf
import data.cifar10_data as cifar10_data
import data2.cifar10_data as cifar_10data2
import json
from worker_I2L import worker_I2L, lr_I2L
from worker_L2I import worker_L2I
import argparse
import time
# -----------------------------------------------------------------------------
parser = argparse.ArgumentParser()
# data I/O
parser.add_argument('-i', '--data_dir', type=str, default='/tmp/pxpp/data', help='Location for the dataset')
parser.add_argument('-o', '--save_dir', type=str, default='/tmp/pxpp/save', help='Location for parameter checkpoints and samples')
parser.add_argument('-d', '--data_set', type=str, default='cifar', help='Can be either cifar|imagenet')
parser.add_argument('-t', '--save_interval', type=int, default=2, help='Every how many epochs to write checkpoint/samples?')
parser.add_argument('--valid_interval', type=int, default=1, help='Every how many epochs to valid?')
parser.add_argument('-r', '--load_params', type=str, default=None, help='The detailed model name')
# model
parser.add_argument('-q', '--nr_resnet', type=int, default=5, help='Number of residual blocks per stage of the model')
parser.add_argument('-n', '--nr_filters', type=int, default=160, help='Number of filters to use across the model. Higher = larger model.')
parser.add_argument('-m', '--nr_logistic_mix', type=int, default=10, help='Number of logistic components in the mixture. Higher = more flexible model')
parser.add_argument('-z', '--resnet_nonlinearity', type=str, default='concat_elu', help='Which nonlinearity to use in the ResNet layers. One of "concat_elu", "elu", "relu" ')
parser.add_argument('-c', '--class_conditional', dest='class_conditional', action='store_true', help='Condition generative model on labels?')
parser.add_argument('--trade_off_I2L', type=float, default=5e-3, help='the consistence tradeoff')
parser.add_argument('--trade_off_L2I', type=float, default=0.3, help='the consistence tradeoff')
parser.add_argument('-w', '--use_wide_resnet', dest='use_wide_resnet', action='store_true', help='Condition generative model on labels?')
parser.add_argument('--show_interval', type=int, default=100, help='Batch size during training per GPU')
parser.add_argument('--steal_params_L2I', type=str, default=None, help='Provide the file, which stores the warm values of L2I')
parser.add_argument('--steal_params_I2L', type=str, default=None, help='Provide the file, which stores the warm values of I2L')
parser.add_argument('--oneside', dest='oneside', type=str, default=None, help='None | I2L | L2I')
# optimization
parser.add_argument('--learning_rate_I2L', type=float, default=0.001, help='Base learning rate')
parser.add_argument('-l', '--learning_rate', type=float, default=0.001, help='Base learning rate')
parser.add_argument('-e', '--lr_decay', type=float, default=0.999995, help='Learning rate decay, applied every step of the optimization')
parser.add_argument('-b', '--batch_size', type=int, default=12, help='Batch size during training per GPU')
parser.add_argument('-a', '--init_batch_size', type=int, default=100, help='How much data to use for data-dependent initialization.')
parser.add_argument('-p', '--dropout_p', type=float, default=0.5, help='Dropout strength (i.e. 1 - keep_prob). 0 = No dropout, higher = more dropout.')
parser.add_argument('-x', '--max_epochs', type=int, default=5000, help='How many epochs to run in total?')
parser.add_argument('-g', '--nr_gpu', type=int, default=1, help='How many GPUs to distribute the training across?')
parser.add_argument('--num_classes', type=int, default=10, help='number of classes')
parser.add_argument('--L2I_normalization', dest='L2I_normalization', action='store_true', help='Use L2I normalization')
parser.add_argument('--L2IuseSGD', dest='L2IuseSGD', action='store_true', help='Whether to use pure SGD to tune L2I')
parser.add_argument('--useSoftLabel', type=int, default=0, help='0: no use | 1: use | 2: -0.1')
# Activate "useSoftLabel" or not does not make significant differences. So my suggestion is that we do not need it. Also, I did not test useSoftLabel under multiple GPU settings
parser.add_argument('--bias', type=float, default=0.0, help='introduce the bias')
# evaluation
parser.add_argument('--polyak_decay', type=float, default=0.9995, help='Exponential decay rate of the sum of previous model iterates during Polyak averaging')
parser.add_argument('--mode', type=str, default='train', help='train | I2L | L2I | ImgGen' )
# reproducibility
parser.add_argument('-s', '--seed', type=int, default=1, help='Random seed to use')
args = parser.parse_args()
print('input args:\n', json.dumps(vars(args), indent=4, separators=(',',':'))) # pretty print args
DataLoader = cifar10_data.DataLoader
DataLoader_train = cifar_10data2.DataLoader
rng = np.random.RandomState(args.seed)
train_data_iterator = DataLoader_train(args.data_dir, 'train', args.batch_size * args.nr_gpu,
'./cifar10_data/cifar10-LMscore',
rng=rng, shuffle=True, return_labels=True)
test_data_iterator = DataLoader(args.data_dir, 'test', args.batch_size * args.nr_gpu, shuffle=False, return_labels=True)
class monitor(object):
def __init__(self):
if not os.path.exists(args.save_dir):
os.makedirs(args.save_dir)
self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
self.Worker_L2I = worker_L2I(args, train_data_iterator.get_num_labels(), train_data_iterator.get_observation_size())
self.Worker_I2L = worker_I2L(args)
self.image_LM = [tf.placeholder(tf.float32, shape=(args.batch_size,)) for _ in range(args.nr_gpu)]
self.trade_off_I2L = tf.placeholder(tf.float32, shape=())
self.trade_off_L2I = tf.placeholder(tf.float32, shape=())
self.I2L_grads = []
self.train_uidx = 0
self._build_onestep(oneside=args.oneside)
self.lr_l2i = self.Worker_L2I.args.learning_rate
self.current_epoch = 0
self.assign_op = lambda ref_, val_: tf.assign(ref_, val_)
def get_I2L_lr(self):
if args.use_wide_resnet:
step_wise = [60, 120, 160]
#step_wise = [51000, 76000, 102000] # counted by iter with batch_size 100
if self.current_epoch < step_wise[0]:
return args.learning_rate_I2L
elif self.current_epoch < step_wise[1]:
return args.learning_rate_I2L * 0.2
elif self.current_epoch < step_wise[2]:
return args.learning_rate_I2L * 0.04
else:
return args.learning_rate_I2L * 0.008
else:
step_wise = [102, 153, 204]
#step_wise = [51000, 76000, 102000] # counted by iter with batch_size 100
if self.current_epoch < step_wise[0]:
return args.learning_rate_I2L
elif self.current_epoch < step_wise[1]:
return args.learning_rate_I2L * 0.1
elif self.current_epoch < step_wise[2]:
return args.learning_rate_I2L * 0.01
else:
return args.learning_rate_I2L * 0.001
def get_L2I_lr(self):
self.lr_l2i *= self.Worker_L2I.args.lr_decay
return self.lr_l2i
def __del__(self):
self.sess.close()
def _build_onestep(self, oneside=None):
# Calculate all the costs and gradients
# Let us NOT use weight decay, since we have aleardy had a regularization term
# self.weightDecay_I2L = self.Worker_I2L.model.GetWeightDecay()
self.nlls_I2L = self.Worker_I2L.model.nlls
self.soft_labels = self.Worker_I2L.model.predictions # this is the soft labels [optional, may not use it]
nlls_L2I, loss_gen_test = self.Worker_L2I.GetLoss()
nlls_L2I_train_bpd_list, nlls_L2I_test_bpd_list, consistent_loss_list = \
[None for _ in xrange(args.nr_gpu)], [None for _ in xrange(args.nr_gpu)], [None for _ in xrange(args.nr_gpu)]
overall_cost_I2L_list, overall_cost_L2I_list, nlls_I2L_batchMean_list = \
[None for _ in xrange(args.nr_gpu)], [None for _ in xrange(args.nr_gpu)], [None for _ in xrange(args.nr_gpu)]
grads_I2L_list, grads_L2I_list = [None for _ in xrange(args.nr_gpu)], [None for _ in xrange(args.nr_gpu)]
for i in range(args.nr_gpu):
with tf.device('/gpu:%d' % i):
nlls_L2I_train_bpd = tf.reduce_mean(nlls_L2I[i]) / (np.log(2.) * 32 * 32 * 3 )
nlls_L2I_test_bpd = tf.reduce_mean(loss_gen_test[i]) / (np.log(2.) * 32 * 32 * 3 * args.batch_size)
if args.L2I_normalization:
consistent_loss = tf.reduce_mean(
(self.image_LM[i] * np.log(2.) + self.nlls_I2L[i] + tf.log(0.1) - nlls_L2I[i] / (32. * 32 * 3)) ** 2.)
else:
consistent_loss = tf.reduce_mean(
(self.image_LM[i] * np.log(2.) + (self.nlls_I2L[i] + tf.log(0.1) - nlls_L2I[i]) / 3072. + args.bias) ** 2.)
nlls_L2I_train_bpd_list[i] = nlls_L2I_train_bpd
nlls_L2I_test_bpd_list[i] = nlls_L2I_test_bpd
consistent_loss_list[i] = consistent_loss
nlls_I2L_batchMean = tf.reduce_mean(self.nlls_I2L[i])
overall_cost_I2L = nlls_I2L_batchMean + (self.trade_off_I2L ** 2.) * consistent_loss
overall_cost_L2I = nlls_L2I_train_bpd + (self.trade_off_L2I ** 2.) * consistent_loss
nlls_I2L_batchMean_list[i] = nlls_I2L_batchMean
overall_cost_I2L_list[i] = overall_cost_I2L
overall_cost_L2I_list[i] = overall_cost_L2I
if oneside is None:
grads_I2L_list[i] = tf.gradients(overall_cost_I2L, self.Worker_I2L.model.trainable_variables)
grads_L2I_list[i] = tf.gradients(overall_cost_L2I, self.Worker_L2I.all_params)
elif oneside == 'I2L':
grads_I2L_list[i] = tf.gradients(overall_cost_I2L, self.Worker_I2L.model.trainable_variables)
elif oneside == 'L2I':
grads_L2I_list[i] = tf.gradients(overall_cost_L2I, self.Worker_L2I.all_params)
with tf.device('/gpu:0'):
for i in range(1, args.nr_gpu):
nlls_L2I_train_bpd_list[0] += nlls_L2I_train_bpd_list[i]
nlls_L2I_test_bpd_list[0] += nlls_L2I_test_bpd_list[i]
consistent_loss_list[0] += consistent_loss_list[i]
overall_cost_I2L_list[0] += overall_cost_I2L_list[i]
overall_cost_L2I_list[0] += overall_cost_L2I_list[i]
nlls_I2L_batchMean_list[0] += nlls_I2L_batchMean_list[i]
if oneside != 'L2I':
for j in range(len(grads_I2L_list[0])):
grads_I2L_list[0][j] += grads_I2L_list[i][j]
if oneside != 'I2L':
for j in range(len(grads_L2I_list[0])):
grads_L2I_list[0][j] += grads_L2I_list[i][j]
if oneside != 'L2I':
for j in range(len(grads_I2L_list[0])):
grads_I2L_list[0][j] /= (args.nr_gpu * 1.)
if oneside != 'I2L':
for j in range(len(grads_L2I_list[0])):
grads_L2I_list[0][j] /= (args.nr_gpu * 1.)
if oneside is None:
self.Worker_I2L.model.Update(grads_I2L_list[0])
self.Worker_L2I.Update(grads_L2I_list[0])
elif oneside == 'I2L':
self.Worker_I2L.model.Update(grads_I2L_list[0])
elif oneside == 'L2I':
self.Worker_L2I.Update(grads_L2I_list[0])
self.nlls_L2I_train_bpd = nlls_L2I_train_bpd_list[0] / args.nr_gpu
self.nlls_L2I_test_bpd = nlls_L2I_test_bpd_list[0] / args.nr_gpu
self.consistent_loss = consistent_loss_list[0] /args.nr_gpu
self.nlls_I2L_batchMean = nlls_I2L_batchMean_list[0] / args.nr_gpu
self.overall_cost_I2L = overall_cost_I2L_list[0] / args.nr_gpu
self.overall_cost_L2I = overall_cost_L2I_list[0] / args.nr_gpu
# Build the sampler
self.Worker_L2I.build_sample_from_model()
def step(self, images, labels, LMscores, currEpoch, use_soft_label=0):
fetches = [self.nlls_I2L_batchMean, self.nlls_L2I_train_bpd, self.nlls_L2I_test_bpd, self.consistent_loss,
self.overall_cost_I2L, self.overall_cost_L2I]
if args.oneside is None:
fetches.append(self.Worker_I2L.model.update_ops)
fetches.append(self.Worker_L2I.update_ops)
elif args.oneside == 'I2L':
fetches.append(self.Worker_I2L.model.update_ops)
elif args.oneside == 'L2I':
fetches.append(self.Worker_L2I.update_ops)
else:
raise Exception('Currently, only support None | I2L | L2I')
feed_dict={
# self.Worker_I2L.model.input_image: images.astype('float32'),
# self.Worker_I2L.model.input_label: labels[:,None],
self.Worker_I2L.model.lrn_rate: self.get_I2L_lr(),
self.Worker_I2L.model.needImgAug: True,
self.Worker_L2I.tf_lr: self.get_L2I_lr(),
# self.image_LM: LMscores,
self.trade_off_I2L: args.trade_off_I2L,
self.trade_off_L2I: args.trade_off_L2I
}
splitted_image = np.split(images.astype('float32'), args.nr_gpu)
splitted_label = np.split(labels, args.nr_gpu)
splitted_LM = np.split(LMscores, args.nr_gpu)
feed_dict.update({self.image_LM[i]: splitted_LM[i] for i in range(args.nr_gpu)})
feed_dict.update({self.Worker_I2L.model.input_image[i]: splitted_image[i] for i in range(args.nr_gpu)})
feed_dict.update({self.Worker_I2L.model.input_label[i]: splitted_label[i][:,None] for i in range(args.nr_gpu)})
# Deal with xs and ys:
x = np.cast[np.float32]((images - 127.5) / 127.5)
x = np.split(x, self.Worker_L2I.args.nr_gpu)
feed_dict.update({self.Worker_L2I.xs[i] : x[i] for i in range(self.Worker_L2I.args.nr_gpu)})
if (use_soft_label == 2) or (use_soft_label == 1 and np.random.rand() < 0.8):
soft_labels_ = self.sess.run(self.soft_labels, feed_dict={
self.Worker_I2L.model.input_image: images.astype('float32'),
self.Worker_I2L.model.needImgAug: True
})
if use_soft_label == 2:
soft_labels_ -= 0.1
feed_dict.update({self.Worker_L2I.hs[i]: soft_labels_ for i in range(self.Worker_L2I.args.nr_gpu)})
else:
y = np.split(labels, self.Worker_L2I.args.nr_gpu)
feed_dict.update({self.Worker_L2I.ys[i] : y[i] for i in range(self.Worker_L2I.args.nr_gpu)})
if args.oneside is None:
nlls_I2L_mean, nlls_L2I_mean, nlls_L2I_mean_test, consistent_loss, overall_cost_I2L, overall_cost_L2I, _, _ = \
self.sess.run(fetches, feed_dict)
else:
nlls_I2L_mean, nlls_L2I_mean, nlls_L2I_mean_test, consistent_loss, overall_cost_I2L, overall_cost_L2I, _, = \
self.sess.run(fetches, feed_dict)
if self.train_uidx % args.show_interval == (args.show_interval - 1):
print('iter={}, I2L={}, L2I={}/{}, Consistent={}, Overall_I2L={}, Overall_L2I={}'.format(
self.train_uidx, '{0:.4f}'.format(nlls_I2L_mean), '{0:.4f}'.format(nlls_L2I_mean),
'{0:.4f}'.format(nlls_L2I_mean_test), '{0:.4f}'.format(consistent_loss), '{0:.4f}'.format(overall_cost_I2L),
'{0:.4f}'.format(overall_cost_L2I)
))
self.train_uidx += 1
def data_dependent_init(self):
global_init = tf.global_variables_initializer()
_images, _labels, _ = train_data_iterator.next(self.Worker_L2I.args.init_batch_size)
initializer_dict = {
self.Worker_L2I.x_init: (np.cast[np.float32](_images) - 127.5)/127.5,
self.Worker_L2I.y_init: _labels
}
train_data_iterator.reset()
self.sess.run(global_init, initializer_dict)
def L2I_TestNll(self, alpha_=1.):
all_testnll = []
for images, labels in test_data_iterator:
feed_dict = {}
x = np.cast[np.float32]((images - 127.5) / 127.5)
x = np.split(x, args.nr_gpu)
feed_dict.update({self.Worker_L2I.xs[i]: x[i] for i in range(args.nr_gpu)})
if args.useSoftLabel == 1:
soft_labels_ = self.sess.run(self.soft_labels, feed_dict={
self.Worker_I2L.model.input_image: images.astype('float32'),
self.Worker_I2L.model.needImgAug: False
})
one_hot_labels_ = np.zeros((args.batch_size, 10), dtype=np.float32)
one_hot_labels_[np.arange(args.batch_size), labels] = 1.
feed_dict.update({self.Worker_L2I.hs[i]: (1. - alpha_)*soft_labels_+alpha_*one_hot_labels_ for i in range(self.Worker_L2I.args.nr_gpu)})
else:
y = np.split(labels, args.nr_gpu)
feed_dict.update({self.Worker_L2I.ys[i]: y[i] for i in range(args.nr_gpu)})
all_testnll.append(self.sess.run([self.nlls_L2I_test_bpd], feed_dict))
avg_testnll = np.mean(all_testnll)
print('testnll=%f' % avg_testnll)
def build_saver(self):
self.saver = tf.train.Saver(max_to_keep=None)
#tf.reset_default_graph()
if args.load_params is not None:
print('Reload from ', args.save_dir)
self.saver.restore(self.sess, args.save_dir + '/' + args.load_params)
print('Done')
else:
print('Start to initialize the two models')
self.data_dependent_init()
print('Done')
def _steal_L2I(self):
if args.steal_params_L2I is not None:
# try to retrieve parameters NOT starting with "Variable" from a well-trained model
success_ = 0
import pickle
with open(args.steal_params_L2I, 'rb') as f:
old_model = pickle.load(f)
for vidx, v in enumerate(tf.global_variables()):
if v.name in old_model and not v.name.startswith('I2L/'):
self.sess.run(self.assign_op(v, old_model[v.name][0]))
success_ += 1
print(vidx, len(tf.global_variables()))
print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_L2I))
'''
# this version can only reload "trainable vars"
success_ = 0
import pickle
with open(args.steal_params_L2I, 'rb') as f:
old_model = pickle.load(f)
for vidx, v in enumerate(self.Worker_L2I.all_params):
if v.name in old_model:
self.sess.run(self.assign_op(v, old_model[v.name][0]))
success_ += 1
print(vidx, len(self.Worker_L2I.all_params))
print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_L2I))
'''
def _steal_I2L(self):
if args.steal_params_I2L is not None:
# try to retrieve parameters from a well-trained model
success_ = 0
import pickle
with open(args.steal_params_I2L, 'rb') as f:
old_model = pickle.load(f)
for vidx, v in enumerate(self.Worker_I2L.model.all_variables):
if v.name[4:] in old_model:
self.sess.run(self.assign_op(v, old_model[v.name[4:]][0]))
success_ += 1
print(vidx, len(self.Worker_I2L.model.all_variables))
print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_I2L))
def _reload_from_pkl(self, filename):
success_ = 0
import pickle
with open(filename, 'rb') as f:
old_model = pickle.load(f)
for vidx, v in enumerate(self.Worker_I2L.model.all_variables):
if v.name in old_model:
self.sess.run(self.assign_op(v, old_model[v.name][0]))
success_ += 1
print(vidx, len(self.Worker_I2L.model.all_variables))
for vidx, v in enumerate(self.Worker_L2I.all_params):
if v.name in old_model:
self.sess.run(self.assign_op(v, old_model[v.name][0]))
success_ += 1
print(vidx, len(tf.global_variables()))
print('Retrieve %d / %d parameters from model ' % (success_, len(old_model)))
def train(self):
# do not delete the following three lines
# self._reload_from_pkl('warm_values')
# self.saver.save(self.sess, args.save_dir + '/params_stealt_models.ckpt')
# return
if args.load_params is None:
self._steal_L2I()
self._steal_I2L()
self.saver.save(self.sess, args.save_dir + '/params_stealt_models.ckpt')
for epoch in range(args.max_epochs):
self.current_epoch = epoch
for images, labels, LMscores in train_data_iterator:
self.step(images, labels, LMscores, epoch, args.useSoftLabel)
# if epoch % args.valid_interval == (args.valid_interval - 1):
# self.Worker_I2L.Valid(test_data_iterator, self.sess)
# self.L2I_TestNll()
if epoch % args.save_interval == (args.save_interval - 1):
self.saver.save(self.sess, args.save_dir + '/params_' + str(epoch) + 'uidx' + str(self.train_uidx) + '.ckpt')
self.Worker_L2I.Gen_Images(self.sess, self.current_epoch)
def valid_I2L(self):
self.Worker_I2L.Valid(test_data_iterator, self.sess)
def valid_L2I(self):
self.L2I_TestNll()
'''
for alpha_ in range(11):
print('alpha=%f' % (alpha_ * 0.1))
self.L2I_TestNll(alpha_ * 0.1)
'''
def valid_ImgGen(self):
self.Worker_L2I.Gen_Images(self.sess, self.current_epoch)
def dump_model_to_pkl(self):
warm_models = {}
print('Classifier')
classifier_size = len(self.Worker_I2L.model.all_variables)
for idx, v in enumerate(self.Worker_I2L.model.all_variables):
vv = self.sess.run([v])
warm_models[v.name] = vv
if idx % 10 == 0:
print('{}-{}'.format(idx, classifier_size))
print('Generator')
generator_size = len(self.Worker_L2I.all_params)
for idx, v in enumerate(self.Worker_L2I.all_params):
vv = self.sess.run([v])
warm_models[v.name] = vv
if idx % 10 == 0:
print('{}-{}'.format(idx, generator_size))
import pickle
with open('warm_values', 'wb') as f:
pickle.dump(warm_models, f, protocol=2)
def main(_):
#L2Ipath='./pxpp_c_2.95/params_cifar.ckpt'
monitor_ = monitor()
monitor_.build_saver()
if args.mode == 'train':
monitor_.train()
elif args.mode == 'I2L':
monitor_.valid_I2L()
elif args.mode == 'L2I':
monitor_.valid_L2I()
elif args.mode == 'ImgGen':
monitor_.valid_ImgGen()
else:
print('Un supported mode: ' + args.mode)
if __name__ == '__main__':
tf.app.run()

Просмотреть файл

@ -0,0 +1,332 @@
import time
import sys
import os
import cifar_input
import numpy as np
import resnet_model_basic as resnet_model
import tensorflow as tf
import data.cifar10_data as cifar10_data
import data2.cifar10_data as cifar_10data2
import json
from worker_I2L import worker_I2L, lr_I2L
from worker_L2I import worker_L2I
import argparse
import time
# -----------------------------------------------------------------------------
parser = argparse.ArgumentParser()
# data I/O
parser.add_argument('-i', '--data_dir', type=str, default='/tmp/pxpp/data', help='Location for the dataset')
parser.add_argument('-o', '--save_dir', type=str, default='/tmp/pxpp/save', help='Location for parameter checkpoints and samples')
parser.add_argument('-d', '--data_set', type=str, default='cifar', help='Can be either cifar|imagenet')
parser.add_argument('-t', '--save_interval', type=int, default=2, help='Every how many epochs to write checkpoint/samples?')
parser.add_argument('--valid_interval', type=int, default=1, help='Every how many epochs to valid?')
parser.add_argument('-r', '--load_params', type=str, default=None, help='The detailed model name')
# model
parser.add_argument('-q', '--nr_resnet', type=int, default=5, help='Number of residual blocks per stage of the model')
parser.add_argument('-n', '--nr_filters', type=int, default=160, help='Number of filters to use across the model. Higher = larger model.')
parser.add_argument('-m', '--nr_logistic_mix', type=int, default=10, help='Number of logistic components in the mixture. Higher = more flexible model')
parser.add_argument('-z', '--resnet_nonlinearity', type=str, default='concat_elu', help='Which nonlinearity to use in the ResNet layers. One of "concat_elu", "elu", "relu" ')
parser.add_argument('-c', '--class_conditional', dest='class_conditional', action='store_true', help='Condition generative model on labels?')
parser.add_argument('--trade_off_I2L', type=float, default=5e-3, help='the consistence tradeoff')
parser.add_argument('--trade_off_L2I', type=float, default=0.3, help='the consistence tradeoff')
parser.add_argument('-w', '--use_wide_resnet', dest='use_wide_resnet', action='store_true', help='Condition generative model on labels?')
parser.add_argument('--show_interval', type=int, default=100, help='Batch size during training per GPU')
parser.add_argument('--steal_params_L2I', type=str, default=None, help='Provide the file, which stores the warm values of L2I')
parser.add_argument('--steal_params_I2L', type=str, default=None, help='Provide the file, which stores the warm values of I2L')
parser.add_argument('--freezeL2I', dest='freezeL2I', action='store_true', help='Freeze L2I to quickly train L2I')
# optimization
parser.add_argument('--learning_rate_I2L', type=float, default=0.001, help='Base learning rate')
parser.add_argument('-l', '--learning_rate', type=float, default=0.001, help='Base learning rate')
parser.add_argument('-e', '--lr_decay', type=float, default=0.999995, help='Learning rate decay, applied every step of the optimization')
parser.add_argument('-b', '--batch_size', type=int, default=12, help='Batch size during training per GPU')
parser.add_argument('-a', '--init_batch_size', type=int, default=100, help='How much data to use for data-dependent initialization.')
parser.add_argument('-p', '--dropout_p', type=float, default=0.5, help='Dropout strength (i.e. 1 - keep_prob). 0 = No dropout, higher = more dropout.')
parser.add_argument('-x', '--max_epochs', type=int, default=5000, help='How many epochs to run in total?')
parser.add_argument('-g', '--nr_gpu', type=int, default=1, help='How many GPUs to distribute the training across?')
parser.add_argument('--num_classes', type=int, default=10, help='number of classes')
parser.add_argument('--L2I_normalization', dest='L2I_normalization', action='store_true', help='Use L2I normalization')
parser.add_argument('--L2IuseSGD', dest='L2IuseSGD', action='store_true', help='Whether to use pure SGD to tune L2I')
parser.add_argument('--useSoftLabel', type=int, default=0, help='0: no use | 1: use | 2: -0.1')
parser.add_argument('--bias', type=float, default=0.0, help='introduce the bias')
# evaluation
parser.add_argument('--polyak_decay', type=float, default=0.9995, help='Exponential decay rate of the sum of previous model iterates during Polyak averaging')
parser.add_argument('--mode', type=str, default='train', help='train | I2L | L2I | ImgGen' )
# reproducibility
parser.add_argument('-s', '--seed', type=int, default=1, help='Random seed to use')
args = parser.parse_args()
print('input args:\n', json.dumps(vars(args), indent=4, separators=(',',':'))) # pretty print args
DataLoader = cifar10_data.DataLoader
DataLoader_train = cifar_10data2.DataLoader
rng = np.random.RandomState(args.seed)
train_data_iterator = DataLoader_train(args.data_dir, 'train', args.batch_size * args.nr_gpu, './cifar10_data/cifar10-LMscore', './cifar10_data/cifar10-CLMscore', rng=rng, shuffle=True, return_labels=True)
test_data_iterator = DataLoader(args.data_dir, 'test', args.batch_size * args.nr_gpu, shuffle=False, return_labels=True)
class moitor(object):
def __init__(self):
if not os.path.exists(args.save_dir):
os.makedirs(args.save_dir)
self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
self.Worker_L2I = worker_L2I(args, train_data_iterator.get_num_labels(), train_data_iterator.get_observation_size())
self.Worker_I2L = worker_I2L(args)
self.image_LM = tf.placeholder(tf.float32, shape=(args.batch_size,))
self.trade_off_I2L = tf.placeholder(tf.float32, shape=())
self.trade_off_L2I = tf.placeholder(tf.float32, shape=())
self.I2L_grads = []
self.train_uidx = 0
self._build_onestep()
self.lr_l2i = self.Worker_L2I.args.learning_rate
self.current_epoch = 0
self.assign_op = lambda ref_, val_: tf.assign(ref_, val_)
def get_I2L_lr(self):
if args.use_wide_resnet:
step_wise = [60, 120, 160]
#step_wise = [51000, 76000, 102000] # counted by iter with batch_size 100
if self.current_epoch < step_wise[0]:
return args.learning_rate_I2L
elif self.current_epoch < step_wise[1]:
return args.learning_rate_I2L * 0.2
elif self.current_epoch < step_wise[2]:
return args.learning_rate_I2L * 0.04
else:
return args.learning_rate_I2L * 0.008
else:
step_wise = [102, 153, 204]
#step_wise = [51000, 76000, 102000] # counted by iter with batch_size 100
if self.current_epoch < step_wise[0]:
return args.learning_rate_I2L
elif self.current_epoch < step_wise[1]:
return args.learning_rate_I2L * 0.1
elif self.current_epoch < step_wise[2]:
return args.learning_rate_I2L * 0.01
else:
return args.learning_rate_I2L * 0.001
def get_L2I_lr(self):
self.lr_l2i *= self.Worker_L2I.args.lr_decay
return self.lr_l2i
def __del__(self):
self.sess.close()
def _build_onestep(self):
# Calculate all the costs and gradients
# Let us NOT use weight decay, since we have aleardy had a regularization term
# self.weightDecay_I2L = self.Worker_I2L.model.GetWeightDecay()
self.nlls_I2L = self.Worker_I2L.model.nlls
self.soft_labels = self.Worker_I2L.model.predictions # this is the soft labels [optional, may not use it]
nlls_L2I, loss_gen_test = self.Worker_L2I.GetLoss()
self.nlls_L2I_train_bpd = tf.reduce_mean(nlls_L2I) / (np.log(2.) * 32 * 32 * 3 * args.nr_gpu )
self.nlls_L2I_test_bpd = tf.reduce_mean(loss_gen_test) / (np.log(2.) * 32 * 32 * 3 * args.nr_gpu * args.batch_size)
if args.L2I_normalization:
self.consistent_loss = tf.reduce_mean((self.image_LM * np.log(2.) + self.nlls_I2L + tf.log(0.1) - nlls_L2I / (32. * 32 * 3)) ** 2.)
else:
self.consistent_loss = tf.reduce_mean((self.image_LM * np.log(2.) + (self.nlls_I2L + tf.log(0.1))/3072. + args.bias) ** 2.)
self.nlls_I2L_batchMean = tf.reduce_mean(self.nlls_I2L)
self.overall_cost_I2L = self.nlls_I2L_batchMean + (self.trade_off_I2L ** 2.) * self.consistent_loss
self.overall_cost_L2I = self.nlls_L2I_train_bpd + (self.trade_off_L2I ** 2.) * self.consistent_loss
#self.overall_cost_L2I = self.nlls_I2L_batchMean + self.nlls_L2I_train_bpd + self.weightDecay_I2L + self.trade_off * self.consistent_loss
grads_I2L = tf.gradients(self.overall_cost_I2L, self.Worker_I2L.model.trainable_variables)
grads_L2I = tf.gradients(self.overall_cost_L2I, self.Worker_L2I.all_params)
# Update the parameters
self.Worker_I2L.model.Update(grads_I2L)
self.Worker_L2I.Update(grads_L2I, args.L2IuseSGD)
# Build the sampler
self.Worker_L2I.build_sample_from_model()
def step(self, images, labels, LMscores, currEpoch, use_soft_label=0):
fetches = [self.nlls_I2L_batchMean, self.consistent_loss,
self.overall_cost_I2L,
self.Worker_I2L.model.update_ops]
feed_dict={
self.Worker_I2L.model.input_image: images.astype('float32'),
self.Worker_I2L.model.input_label: labels[:,None],
self.Worker_I2L.model.lrn_rate: self.get_I2L_lr(),
self.Worker_I2L.model.needImgAug: True,
self.Worker_L2I.tf_lr: self.get_L2I_lr(),
self.image_LM: LMscores,
self.trade_off_I2L: args.trade_off_I2L,
self.trade_off_L2I: args.trade_off_L2I
}
# Deal with xs and ys:
x = np.cast[np.float32]((images - 127.5) / 127.5)
x = np.split(x, self.Worker_L2I.args.nr_gpu)
feed_dict.update({self.Worker_L2I.xs[i] : x[i] for i in range(self.Worker_L2I.args.nr_gpu)})
if (use_soft_label == 2) or (use_soft_label == 1 and np.random.rand() < 0.8):
soft_labels_ = self.sess.run(self.soft_labels, feed_dict={
self.Worker_I2L.model.input_image: images.astype('float32'),
self.Worker_I2L.model.needImgAug: True
})
if use_soft_label == 2:
soft_labels_ -= 0.1
feed_dict.update({self.Worker_L2I.hs[i]: soft_labels_ for i in range(self.Worker_L2I.args.nr_gpu)})
else:
y = np.split(labels, self.Worker_L2I.args.nr_gpu)
feed_dict.update({self.Worker_L2I.ys[i] : y[i] for i in range(self.Worker_L2I.args.nr_gpu)})
nlls_I2L_mean, consistent_loss, overall_cost_I2L, _ = \
self.sess.run(fetches, feed_dict)
if self.train_uidx % args.show_interval == (args.show_interval - 1):
print('iter={}, I2L={}, Consistent={}, Overall_I2L={}'.format(
self.train_uidx, '{0:.6f}'.format(nlls_I2L_mean), '{0:.6f}'.format(consistent_loss), '{0:.6f}'.format(overall_cost_I2L),
))
self.train_uidx += 1
def data_dependent_init(self):
global_init = tf.global_variables_initializer()
_images, _labels, _ = train_data_iterator.next(self.Worker_L2I.args.init_batch_size)
initializer_dict = {
self.Worker_L2I.x_init: (np.cast[np.float32](_images) - 127.5)/127.5,
self.Worker_L2I.y_init: _labels
}
train_data_iterator.reset()
self.sess.run(global_init, initializer_dict)
def L2I_TestNll(self, alpha_=1.):
all_testnll = []
for images, labels in test_data_iterator:
feed_dict = {}
x = np.cast[np.float32]((images - 127.5) / 127.5)
x = np.split(x, args.nr_gpu)
feed_dict.update({self.Worker_L2I.xs[i]: x[i] for i in range(args.nr_gpu)})
if args.useSoftLabel == 1:
soft_labels_ = self.sess.run(self.soft_labels, feed_dict={
self.Worker_I2L.model.input_image: images.astype('float32'),
self.Worker_I2L.model.needImgAug: False
})
one_hot_labels_ = np.zeros((args.batch_size, 10), dtype=np.float32)
one_hot_labels_[np.arange(args.batch_size), labels] = 1.
feed_dict.update({self.Worker_L2I.hs[i]: (1. - alpha_)*soft_labels_+alpha_*one_hot_labels_ for i in range(self.Worker_L2I.args.nr_gpu)})
else:
y = np.split(labels, args.nr_gpu)
feed_dict.update({self.Worker_L2I.ys[i]: y[i] for i in range(args.nr_gpu)})
all_testnll.append(self.sess.run([self.nlls_L2I_test_bpd], feed_dict))
avg_testnll = np.mean(all_testnll)
print('[L2I], testnll={0:.6f}'.format(avg_testnll))
def build_saver(self):
self.saver = tf.train.Saver(max_to_keep=None)
if args.load_params is not None:
print('Reload from ', args.save_dir)
self.saver.restore(self.sess, args.save_dir + '/' + args.load_params)
print('Done')
else:
print('Start to initialize the two models')
self.data_dependent_init()
print('Done')
def _steal_L2I(self):
if args.steal_params_L2I is not None:
# try to retrieve parameters NOT starting with "Variable" from a well-trained model
success_ = 0
import pickle
with open(args.steal_params_L2I, 'rb') as f:
old_model = pickle.load(f)
for vidx, v in enumerate(tf.global_variables()):
if v.name in old_model and not v.name.startswith('I2L/'):
self.sess.run(self.assign_op(v, old_model[v.name][0]))
success_ += 1
print(vidx, len(tf.global_variables()))
print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_L2I))
'''
# this version can only reload "trainable vars"
success_ = 0
import pickle
with open(args.steal_params_L2I, 'rb') as f:
old_model = pickle.load(f)
for vidx, v in enumerate(self.Worker_L2I.all_params):
if v.name in old_model:
self.sess.run(self.assign_op(v, old_model[v.name][0]))
success_ += 1
print(vidx, len(self.Worker_L2I.all_params))
print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_L2I))
'''
def _steal_I2L(self):
if args.steal_params_I2L is not None:
# try to retrieve parameters from a well-trained model
success_ = 0
import pickle
with open(args.steal_params_I2L, 'rb') as f:
old_model = pickle.load(f)
for vidx, v in enumerate(self.Worker_I2L.model.all_variables):
if v.name[4:] in old_model:
self.sess.run(self.assign_op(v, old_model[v.name[4:]][0]))
success_ += 1
print(vidx, len(self.Worker_I2L.model.all_variables))
print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_I2L))
def train(self):
if args.load_params is None:
self._steal_L2I()
self._steal_I2L()
self.saver.save(self.sess, args.save_dir + '/params_stealt_models.ckpt')
for epoch in range(args.max_epochs):
self.current_epoch = epoch
for images, labels, LMscores, CLMscores in train_data_iterator:
self.step(images, labels, LMscores - CLMscores, epoch, args.useSoftLabel)
#if epoch % args.valid_interval == (args.valid_interval - 1):
#self.Worker_I2L.Valid(test_data_iterator, self.sess)
#self.L2I_TestNll()
if epoch % args.save_interval == (args.save_interval - 1):
self.saver.save(self.sess, args.save_dir + '/params_' + str(epoch) + 'uidx' + str(self.train_uidx) + '.ckpt')
#self.Worker_L2I.Gen_Images(self.sess, self.current_epoch)
def valid_I2L(self):
self.Worker_I2L.Valid(test_data_iterator, self.sess)
def valid_L2I(self):
self.L2I_TestNll()
'''
for alpha_ in range(11):
print('alpha=%f' % (alpha_ * 0.1))
self.L2I_TestNll(alpha_ * 0.1)
'''
def valid_ImgGen(self):
self.Worker_L2I.Gen_Images(self.sess, self.current_epoch)
def main(_):
#L2Ipath='./pxpp_c_2.95/params_cifar.ckpt'
monitor_ = moitor()
monitor_.build_saver()
if args.mode == 'train':
monitor_.train()
elif args.mode == 'I2L':
monitor_.valid_I2L()
elif args.mode == 'L2I':
monitor_.valid_L2I()
elif args.mode == 'ImgGen':
monitor_.valid_ImgGen()
else:
print('Un supported mode: ' + args.mode)
if __name__ == '__main__':
tf.app.run()

Просмотреть файл

@ -0,0 +1,358 @@
import time
import sys
import os
import cifar_input
import numpy as np
import resnet_model_basic as resnet_model
import tensorflow as tf
import data.cifar10_data as cifar10_data
import data2.cifar10_data as cifar_10data2
import data4.cifar10_data as cifar_10data3
import json
from worker_I2L import worker_I2L, lr_I2L
from worker_L2I import worker_L2I
import argparse
import time
# -----------------------------------------------------------------------------
parser = argparse.ArgumentParser()
# data I/O
parser.add_argument('-i', '--data_dir', type=str, default='/tmp/pxpp/data', help='Location for the dataset')
parser.add_argument('-o', '--save_dir', type=str, default='/tmp/pxpp/save', help='Location for parameter checkpoints and samples')
parser.add_argument('-d', '--data_set', type=str, default='cifar', help='Can be either cifar|imagenet')
parser.add_argument('-t', '--save_interval', type=int, default=2, help='Every how many epochs to write checkpoint/samples?')
parser.add_argument('--valid_interval', type=int, default=1, help='Every how many epochs to valid?')
parser.add_argument('-r', '--load_params', type=str, default=None, help='The detailed model name')
# model
parser.add_argument('-q', '--nr_resnet', type=int, default=5, help='Number of residual blocks per stage of the model')
parser.add_argument('-n', '--nr_filters', type=int, default=160, help='Number of filters to use across the model. Higher = larger model.')
parser.add_argument('-m', '--nr_logistic_mix', type=int, default=10, help='Number of logistic components in the mixture. Higher = more flexible model')
parser.add_argument('-z', '--resnet_nonlinearity', type=str, default='concat_elu', help='Which nonlinearity to use in the ResNet layers. One of "concat_elu", "elu", "relu" ')
parser.add_argument('-c', '--class_conditional', dest='class_conditional', action='store_true', help='Condition generative model on labels?')
parser.add_argument('--trade_off_I2L', type=float, default=5e-3, help='the consistence tradeoff')
parser.add_argument('--trade_off_L2I', type=float, default=0.3, help='the consistence tradeoff')
parser.add_argument('-w', '--use_wide_resnet', dest='use_wide_resnet', action='store_true', help='Condition generative model on labels?')
parser.add_argument('--show_interval', type=int, default=100, help='Batch size during training per GPU')
parser.add_argument('--steal_params_L2I', type=str, default=None, help='Provide the file, which stores the warm values of L2I')
parser.add_argument('--steal_params_I2L', type=str, default=None, help='Provide the file, which stores the warm values of I2L')
# optimization
parser.add_argument('--learning_rate_I2L', type=float, default=0.001, help='Base learning rate')
parser.add_argument('-l', '--learning_rate', type=float, default=0.001, help='Base learning rate')
parser.add_argument('-e', '--lr_decay', type=float, default=0.999995, help='Learning rate decay, applied every step of the optimization')
parser.add_argument('-b', '--batch_size', type=int, default=12, help='Batch size during training per GPU')
parser.add_argument('-a', '--init_batch_size', type=int, default=100, help='How much data to use for data-dependent initialization.')
parser.add_argument('-p', '--dropout_p', type=float, default=0.5, help='Dropout strength (i.e. 1 - keep_prob). 0 = No dropout, higher = more dropout.')
parser.add_argument('-x', '--max_epochs', type=int, default=5000, help='How many epochs to run in total?')
parser.add_argument('-g', '--nr_gpu', type=int, default=1, help='How many GPUs to distribute the training across?')
parser.add_argument('--num_classes', type=int, default=10, help='number of classes')
parser.add_argument('--L2I_normalization', dest='L2I_normalization', action='store_true', help='Use L2I normalization')
parser.add_argument('--L2IuseSGD', dest='L2IuseSGD', action='store_true', help='Whether to use pure SGD to tune L2I')
parser.add_argument('--useSoftLabel', type=int, default=0, help='0: no use | 1: use | 2: -0.1')
parser.add_argument('--bias', type=float, default=0.0, help='introduce the bias')
# evaluation
parser.add_argument('--polyak_decay', type=float, default=0.9995, help='Exponential decay rate of the sum of previous model iterates during Polyak averaging')
parser.add_argument('--mode', type=str, default='train', help='train | I2L | L2I | ImgGen' )
# reproducibility
parser.add_argument('-s', '--seed', type=int, default=1, help='Random seed to use')
args = parser.parse_args()
print('input args:\n', json.dumps(vars(args), indent=4, separators=(',',':'))) # pretty print args
DataLoader = cifar_10data3.DataLoader
DataLoader_train = cifar_10data2.DataLoader
rng = np.random.RandomState(args.seed)
train_data_iterator = DataLoader_train(args.data_dir, 'train', args.batch_size * args.nr_gpu, './cifar10_data/cifar10-LMscore', rng=rng, shuffle=True, return_labels=True)
test_data_iterator = DataLoader(args.data_dir, 'test', args.batch_size * args.nr_gpu, shuffle=False, return_labels=True,final=4)
class moitor(object):
def __init__(self):
if not os.path.exists(args.save_dir):
os.makedirs(args.save_dir)
self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
self.Worker_L2I = worker_L2I(args, train_data_iterator.get_num_labels(), train_data_iterator.get_observation_size())
self.Worker_I2L = worker_I2L(args)
self.image_LM = tf.placeholder(tf.float32, shape=(args.batch_size,))
self.trade_off_I2L = tf.placeholder(tf.float32, shape=())
self.trade_off_L2I = tf.placeholder(tf.float32, shape=())
self.I2L_grads = []
self.train_uidx = 0
self._build_onestep()
self.lr_l2i = self.Worker_L2I.args.learning_rate
self.current_epoch = 0
self.assign_op = lambda ref_, val_: tf.assign(ref_, val_)
'''
self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
self.Worker_L2I = worker_L2I(args, train_data_iterator.get_num_labels(), train_data_iterator.get_observation_size())
self.saver = tf.train.Saver()
if load_warm_start_models is None:
print('Start to retrieve the (warm) initial L2I model')
self.saver.restore(self.sess, L2Ipath)
print('Done')
self.Worker_I2L = worker_I2L(args)
if load_warm_start_models is None:
print('Start to initialize I2L model')
self.sess.run(tf.variables_initializer(self.Worker_I2L.model.all_variables, name='coldInit_I2L_model'))
print('Done')
if load_warm_start_models:
self.saver.restore(self.sess, load_warm_start_models)
self.image_LM = tf.placeholder(tf.float32, shape=(args.batch_size,))
self.trade_off = tf.placeholder(tf.float32, shape=())
self.I2L_grads = []
self.train_uidx = 0
'''
def get_I2L_lr(self):
if args.use_wide_resnet:
step_wise = [60, 120, 160]
#step_wise = [51000, 76000, 102000] # counted by iter with batch_size 100
if self.current_epoch < step_wise[0]:
return args.learning_rate_I2L
elif self.current_epoch < step_wise[1]:
return args.learning_rate_I2L * 0.2
elif self.current_epoch < step_wise[2]:
return args.learning_rate_I2L * 0.04
else:
return args.learning_rate_I2L * 0.008
else:
step_wise = [102, 153, 204]
#step_wise = [51000, 76000, 102000] # counted by iter with batch_size 100
if self.current_epoch < step_wise[0]:
return args.learning_rate_I2L
elif self.current_epoch < step_wise[1]:
return args.learning_rate_I2L * 0.1
elif self.current_epoch < step_wise[2]:
return args.learning_rate_I2L * 0.01
else:
return args.learning_rate_I2L * 0.001
def get_L2I_lr(self):
self.lr_l2i *= self.Worker_L2I.args.lr_decay
return self.lr_l2i
def __del__(self):
self.sess.close()
def _build_onestep(self):
# Calculate all the costs and gradients
# Let us NOT use weight decay, since we have aleardy had a regularization term
# self.weightDecay_I2L = self.Worker_I2L.model.GetWeightDecay()
self.nlls_I2L = self.Worker_I2L.model.nlls
self.soft_labels = self.Worker_I2L.model.predictions # this is the soft labels [optional, may not use it]
nlls_L2I, loss_gen_test = self.Worker_L2I.GetLoss()
self.nlls_L2I_train_bpd = tf.reduce_mean(nlls_L2I) / (np.log(2.) * 32 * 32 * 3 * args.nr_gpu )
self.nlls_L2I_test_bpd = tf.reduce_mean(loss_gen_test) / (np.log(2.) * 32 * 32 * 3 * args.nr_gpu * args.batch_size)
if args.L2I_normalization:
self.consistent_loss = tf.reduce_mean((self.image_LM * np.log(2.) + self.nlls_I2L + tf.log(0.1) - nlls_L2I / (32. * 32 * 3)) ** 2.)
else:
self.consistent_loss = tf.reduce_mean((self.image_LM * np.log(2.) + (self.nlls_I2L + tf.log(0.1) - nlls_L2I)/3072. + args.bias) ** 2.)
self.nlls_I2L_batchMean = tf.reduce_mean(self.nlls_I2L)
self.overall_cost_I2L = self.nlls_I2L_batchMean + (self.trade_off_I2L ** 2.) * self.consistent_loss
self.overall_cost_L2I = self.nlls_L2I_train_bpd + (self.trade_off_L2I ** 2.) * self.consistent_loss
#self.overall_cost_L2I = self.nlls_I2L_batchMean + self.nlls_L2I_train_bpd + self.weightDecay_I2L + self.trade_off * self.consistent_loss
grads_I2L = tf.gradients(self.overall_cost_I2L, self.Worker_I2L.model.trainable_variables)
grads_L2I = tf.gradients(self.overall_cost_L2I, self.Worker_L2I.all_params)
# Update the parameters
self.Worker_I2L.model.Update(grads_I2L)
self.Worker_L2I.Update(grads_L2I, args.L2IuseSGD)
# Build the sampler
self.Worker_L2I.build_sample_from_model()
def step(self, images, labels, LMscores, currEpoch, use_soft_label=0):
fetches = [self.nlls_I2L_batchMean, self.nlls_L2I_train_bpd, self.nlls_L2I_test_bpd, self.consistent_loss,
self.overall_cost_I2L, self.overall_cost_L2I,
self.Worker_I2L.model.update_ops, self.Worker_L2I.update_ops]
feed_dict={
self.Worker_I2L.model.input_image: images.astype('float32'),
self.Worker_I2L.model.input_label: labels[:,None],
self.Worker_I2L.model.lrn_rate: self.get_I2L_lr(),
self.Worker_I2L.model.needImgAug: True,
self.Worker_L2I.tf_lr: self.get_L2I_lr(),
self.image_LM: LMscores,
self.trade_off_I2L: args.trade_off_I2L if currEpoch>3 else 0.,
self.trade_off_L2I: args.trade_off_L2I if currEpoch>3 else 0.
}
# Deal with xs and ys:
x = np.cast[np.float32]((images - 127.5) / 127.5)
x = np.split(x, self.Worker_L2I.args.nr_gpu)
feed_dict.update({self.Worker_L2I.xs[i] : x[i] for i in range(self.Worker_L2I.args.nr_gpu)})
if (use_soft_label == 2) or (use_soft_label == 1 and np.random.rand() < 0.8):
soft_labels_ = self.sess.run(self.soft_labels, feed_dict={
self.Worker_I2L.model.input_image: images.astype('float32'),
self.Worker_I2L.model.needImgAug: True
})
if use_soft_label == 2:
soft_labels_ -= 0.1
feed_dict.update({self.Worker_L2I.hs[i]: soft_labels_ for i in range(self.Worker_L2I.args.nr_gpu)})
else:
y = np.split(labels, self.Worker_L2I.args.nr_gpu)
feed_dict.update({self.Worker_L2I.ys[i] : y[i] for i in range(self.Worker_L2I.args.nr_gpu)})
nlls_I2L_mean, nlls_L2I_mean, nlls_L2I_mean_test, consistent_loss, overall_cost_I2L, overall_cost_L2I, _, _ = \
self.sess.run(fetches, feed_dict)
if self.train_uidx % args.show_interval == (args.show_interval - 1):
print('iter={}, I2L={}, L2I={}/{}, Consistent={}, Overall_I2L={}, Overall_L2I={}'.format(
self.train_uidx, '{0:.4f}'.format(nlls_I2L_mean), '{0:.4f}'.format(nlls_L2I_mean),
'{0:.4f}'.format(nlls_L2I_mean_test), '{0:.4f}'.format(consistent_loss), '{0:.4f}'.format(overall_cost_I2L),
'{0:.4f}'.format(overall_cost_L2I)
))
self.train_uidx += 1
def data_dependent_init(self):
global_init = tf.global_variables_initializer()
_images, _labels, _ = train_data_iterator.next(self.Worker_L2I.args.init_batch_size)
initializer_dict = {
self.Worker_L2I.x_init: (np.cast[np.float32](_images) - 127.5)/127.5,
self.Worker_L2I.y_init: _labels
}
train_data_iterator.reset()
self.sess.run(global_init, initializer_dict)
def L2I_TestNll(self, alpha_=1.):
all_testnll = []
for images, labels in test_data_iterator:
feed_dict = {}
x = np.cast[np.float32]((images - 127.5) / 127.5)
x = np.split(x, args.nr_gpu)
feed_dict.update({self.Worker_L2I.xs[i]: x[i] for i in range(args.nr_gpu)})
if args.useSoftLabel == 1:
soft_labels_ = self.sess.run(self.soft_labels, feed_dict={
self.Worker_I2L.model.input_image: images.astype('float32'),
self.Worker_I2L.model.needImgAug: False
})
one_hot_labels_ = np.zeros((args.batch_size, 10), dtype=np.float32)
one_hot_labels_[np.arange(args.batch_size), labels] = 1.
feed_dict.update({self.Worker_L2I.hs[i]: (1. - alpha_)*soft_labels_+alpha_*one_hot_labels_ for i in range(self.Worker_L2I.args.nr_gpu)})
else:
y = np.split(labels, args.nr_gpu)
feed_dict.update({self.Worker_L2I.ys[i]: y[i] for i in range(args.nr_gpu)})
all_testnll.append(self.sess.run([self.nlls_L2I_test_bpd], feed_dict))
avg_testnll = np.mean(all_testnll)
print('[L2I], testnll={0:.6f}'.format(avg_testnll))
def build_saver(self):
self.saver = tf.train.Saver(max_to_keep=None)
if args.load_params is not None:
print('Reload from ', args.save_dir)
self.saver.restore(self.sess, args.save_dir + '/' + args.load_params)
print('Done')
else:
print('Start to initialize the two models')
self.data_dependent_init()
print('Done')
def _steal_L2I(self):
if args.steal_params_L2I is not None:
# try to retrieve parameters NOT starting with "Variable" from a well-trained model
success_ = 0
import pickle
with open(args.steal_params_L2I, 'rb') as f:
old_model = pickle.load(f)
for vidx, v in enumerate(tf.global_variables()):
if v.name in old_model:
self.sess.run(self.assign_op(v, old_model[v.name][0]))
success_ += 1
print(vidx, len(tf.global_variables()))
print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_L2I))
'''
# this version can only reload "trainable vars"
success_ = 0
import pickle
with open(args.steal_params_L2I, 'rb') as f:
old_model = pickle.load(f)
for vidx, v in enumerate(self.Worker_L2I.all_params):
if v.name in old_model:
self.sess.run(self.assign_op(v, old_model[v.name][0]))
success_ += 1
print(vidx, len(self.Worker_L2I.all_params))
print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_L2I))
'''
def _steal_I2L(self):
if args.steal_params_I2L is not None:
# try to retrieve parameters from a well-trained model
success_ = 0
import pickle
with open(args.steal_params_I2L, 'rb') as f:
old_model = pickle.load(f)
for vidx, v in enumerate(self.Worker_I2L.model.all_variables):
if v.name[4:] in old_model:
self.sess.run(self.assign_op(v, old_model[v.name[4:]][0]))
success_ += 1
print(vidx, len(self.Worker_I2L.model.all_variables))
print('Retrieve %d / %d parameters from model %s' % (success_, len(old_model), args.steal_params_I2L))
def train(self):
if args.load_params is None:
self._steal_L2I()
self._steal_I2L()
self.saver.save(self.sess, args.save_dir + '/params_stealt_models.ckpt')
for epoch in range(args.max_epochs):
self.current_epoch = epoch
for images, labels, LMscores in train_data_iterator:
self.step(images, labels, LMscores, epoch, args.useSoftLabel)
if epoch % args.valid_interval == (args.valid_interval - 1):
self.Worker_I2L.Valid(test_data_iterator, self.sess)
self.L2I_TestNll()
if epoch % args.save_interval == (args.save_interval - 1):
self.saver.save(self.sess, args.save_dir + '/params_' + str(epoch) + 'uidx' + str(self.train_uidx) + '.ckpt')
self.Worker_L2I.Gen_Images(self.sess, self.current_epoch)
def valid_I2L(self):
self.Worker_I2L.Valid(test_data_iterator, self.sess)
def valid_L2I(self):
self.L2I_TestNll()
'''
for alpha_ in range(11):
print('alpha=%f' % (alpha_ * 0.1))
self.L2I_TestNll(alpha_ * 0.1)
'''
def valid_ImgGen(self):
self.Worker_L2I.Gen_Images(self.sess, self.current_epoch)
def main(_):
#L2Ipath='./pxpp_c_2.95/params_cifar.ckpt'
monitor_ = moitor()
monitor_.build_saver()
if args.mode == 'train':
monitor_.train()
elif args.mode == 'I2L':
monitor_.valid_I2L()
elif args.mode == 'L2I':
monitor_.valid_L2I()
elif args.mode == 'ImgGen':
monitor_.valid_ImgGen()
else:
print('Un supported mode: ' + args.mode)
if __name__ == '__main__':
tf.app.run()

Просмотреть файл

Просмотреть файл

@ -0,0 +1,85 @@
"""
The core Pixel-CNN model
"""
import tensorflow as tf
from tensorflow.contrib.framework.python.ops import arg_scope
import pixel_cnn_pp.nn as nn
def model_spec(x, h=None, init=False, ema=None, dropout_p=0.5, nr_resnet=5, nr_filters=160, nr_logistic_mix=10, resnet_nonlinearity='concat_elu'):
"""
We receive a Tensor x of shape (N,H,W,D1) (e.g. (12,32,32,3)) and produce
a Tensor x_out of shape (N,H,W,D2) (e.g. (12,32,32,100)), where each fiber
of the x_out tensor describes the predictive distribution for the RGB at
that position.
'h' is an optional N x K matrix of values to condition our generative model on
"""
counters = {}
with arg_scope([nn.conv2d, nn.deconv2d, nn.gated_resnet, nn.dense], counters=counters, init=init, ema=ema, dropout_p=dropout_p):
# parse resnet nonlinearity argument
if resnet_nonlinearity == 'concat_elu':
resnet_nonlinearity = nn.concat_elu
elif resnet_nonlinearity == 'elu':
resnet_nonlinearity = tf.nn.elu
elif resnet_nonlinearity == 'relu':
resnet_nonlinearity = tf.nn.relu
else:
raise('resnet nonlinearity ' + resnet_nonlinearity + ' is not supported')
with arg_scope([nn.gated_resnet], nonlinearity=resnet_nonlinearity, h=h):
# ////////// up pass through pixelCNN ////////
xs = nn.int_shape(x)
x_pad = tf.concat([x,tf.ones(xs[:-1]+[1])],3) # add channel of ones to distinguish image from padding later on
u_list = [nn.down_shift(nn.down_shifted_conv2d(x_pad, num_filters=nr_filters, filter_size=[2, 3]))] # stream for pixels above
ul_list = [nn.down_shift(nn.down_shifted_conv2d(x_pad, num_filters=nr_filters, filter_size=[1,3])) + \
nn.right_shift(nn.down_right_shifted_conv2d(x_pad, num_filters=nr_filters, filter_size=[2,1]))] # stream for up and to the left
for rep in range(nr_resnet):
u_list.append(nn.gated_resnet(u_list[-1], conv=nn.down_shifted_conv2d))
ul_list.append(nn.gated_resnet(ul_list[-1], u_list[-1], conv=nn.down_right_shifted_conv2d))
u_list.append(nn.down_shifted_conv2d(u_list[-1], num_filters=nr_filters, stride=[2, 2]))
ul_list.append(nn.down_right_shifted_conv2d(ul_list[-1], num_filters=nr_filters, stride=[2, 2]))
for rep in range(nr_resnet):
u_list.append(nn.gated_resnet(u_list[-1], conv=nn.down_shifted_conv2d))
ul_list.append(nn.gated_resnet(ul_list[-1], u_list[-1], conv=nn.down_right_shifted_conv2d))
u_list.append(nn.down_shifted_conv2d(u_list[-1], num_filters=nr_filters, stride=[2, 2]))
ul_list.append(nn.down_right_shifted_conv2d(ul_list[-1], num_filters=nr_filters, stride=[2, 2]))
for rep in range(nr_resnet):
u_list.append(nn.gated_resnet(u_list[-1], conv=nn.down_shifted_conv2d))
ul_list.append(nn.gated_resnet(ul_list[-1], u_list[-1], conv=nn.down_right_shifted_conv2d))
# /////// down pass ////////
u = u_list.pop()
ul = ul_list.pop()
for rep in range(nr_resnet):
u = nn.gated_resnet(u, u_list.pop(), conv=nn.down_shifted_conv2d)
ul = nn.gated_resnet(ul, tf.concat([u, ul_list.pop()],3), conv=nn.down_right_shifted_conv2d)
u = nn.down_shifted_deconv2d(u, num_filters=nr_filters, stride=[2, 2])
ul = nn.down_right_shifted_deconv2d(ul, num_filters=nr_filters, stride=[2, 2])
for rep in range(nr_resnet+1):
u = nn.gated_resnet(u, u_list.pop(), conv=nn.down_shifted_conv2d)
ul = nn.gated_resnet(ul, tf.concat([u, ul_list.pop()],3), conv=nn.down_right_shifted_conv2d)
u = nn.down_shifted_deconv2d(u, num_filters=nr_filters, stride=[2, 2])
ul = nn.down_right_shifted_deconv2d(ul, num_filters=nr_filters, stride=[2, 2])
for rep in range(nr_resnet+1):
u = nn.gated_resnet(u, u_list.pop(), conv=nn.down_shifted_conv2d)
ul = nn.gated_resnet(ul, tf.concat([u, ul_list.pop()],3), conv=nn.down_right_shifted_conv2d)
x_out = nn.nin(tf.nn.elu(ul),10*nr_logistic_mix)
assert len(u_list) == 0
assert len(ul_list) == 0
return x_out

Просмотреть файл

@ -0,0 +1,319 @@
"""
Various tensorflow utilities
"""
import numpy as np
import tensorflow as tf
from tensorflow.contrib.framework.python.ops import add_arg_scope
def int_shape(x):
return list(map(int, x.get_shape()))
def concat_elu(x):
""" like concatenated ReLU (http://arxiv.org/abs/1603.05201), but then with ELU """
axis = len(x.get_shape())-1
return tf.nn.elu(tf.concat([x, -x],axis))
def log_sum_exp(x):
""" numerically stable log_sum_exp implementation that prevents overflow """
axis = len(x.get_shape())-1
m = tf.reduce_max(x, axis)
m2 = tf.reduce_max(x, axis, keep_dims=True)
return m + tf.log(tf.reduce_sum(tf.exp(x-m2), axis))
def log_prob_from_logits(x):
""" numerically stable log_softmax implementation that prevents overflow """
axis = len(x.get_shape())-1
m = tf.reduce_max(x, axis, keep_dims=True)
return x - m - tf.log(tf.reduce_sum(tf.exp(x-m), axis, keep_dims=True))
def discretized_mix_logistic_loss(x,l,sum_all=True):
""" log-likelihood for mixture of discretized logistics, assumes the data has been rescaled to [-1,1] interval """
xs = int_shape(x) # true image (i.e. labels) to regress to, e.g. (B,32,32,3)
ls = int_shape(l) # predicted distribution, e.g. (B,32,32,100)
nr_mix = int(ls[-1] / 10) # here and below: unpacking the params of the mixture of logistics
logit_probs = l[:,:,:,:nr_mix]
l = tf.reshape(l[:,:,:,nr_mix:], xs + [nr_mix*3])
means = l[:,:,:,:,:nr_mix]
log_scales = tf.maximum(l[:,:,:,:,nr_mix:2*nr_mix], -7.)
coeffs = tf.nn.tanh(l[:,:,:,:,2*nr_mix:3*nr_mix])
x = tf.reshape(x, xs + [1]) + tf.zeros(xs + [nr_mix]) # here and below: getting the means and adjusting them based on preceding sub-pixels
m2 = tf.reshape(means[:,:,:,1,:] + coeffs[:, :, :, 0, :] * x[:, :, :, 0, :], [xs[0],xs[1],xs[2],1,nr_mix])
m3 = tf.reshape(means[:, :, :, 2, :] + coeffs[:, :, :, 1, :] * x[:, :, :, 0, :] + coeffs[:, :, :, 2, :] * x[:, :, :, 1, :], [xs[0],xs[1],xs[2],1,nr_mix])
means = tf.concat([tf.reshape(means[:,:,:,0,:], [xs[0],xs[1],xs[2],1,nr_mix]), m2, m3],3)
centered_x = x - means
inv_stdv = tf.exp(-log_scales)
plus_in = inv_stdv * (centered_x + 1./255.)
cdf_plus = tf.nn.sigmoid(plus_in)
min_in = inv_stdv * (centered_x - 1./255.)
cdf_min = tf.nn.sigmoid(min_in)
log_cdf_plus = plus_in - tf.nn.softplus(plus_in) # log probability for edge case of 0 (before scaling)
log_one_minus_cdf_min = -tf.nn.softplus(min_in) # log probability for edge case of 255 (before scaling)
cdf_delta = cdf_plus - cdf_min # probability for all other cases
mid_in = inv_stdv * centered_x
log_pdf_mid = mid_in - log_scales - 2.*tf.nn.softplus(mid_in) # log probability in the center of the bin, to be used in extreme cases (not actually used in our code)
# now select the right output: left edge case, right edge case, normal case, extremely low prob case (doesn't actually happen for us)
# this is what we are really doing, but using the robust version below for extreme cases in other applications and to avoid NaN issue with tf.select()
# log_probs = tf.select(x < -0.999, log_cdf_plus, tf.select(x > 0.999, log_one_minus_cdf_min, tf.log(cdf_delta)))
# robust version, that still works if probabilities are below 1e-5 (which never happens in our code)
# tensorflow backpropagates through tf.select() by multiplying with zero instead of selecting: this requires use to use some ugly tricks to avoid potential NaNs
# the 1e-12 in tf.maximum(cdf_delta, 1e-12) is never actually used as output, it's purely there to get around the tf.select() gradient issue
# if the probability on a sub-pixel is below 1e-5, we use an approximation based on the assumption that the log-density is constant in the bin of the observed sub-pixel value
log_probs = tf.where(x < -0.999, log_cdf_plus, tf.where(x > 0.999, log_one_minus_cdf_min, tf.where(cdf_delta > 1e-5, tf.log(tf.maximum(cdf_delta, 1e-12)), log_pdf_mid - np.log(127.5))))
log_probs = tf.reduce_sum(log_probs,3) + log_prob_from_logits(logit_probs)
if sum_all:
return -tf.reduce_sum(log_sum_exp(log_probs))
else:
return -tf.reduce_sum(log_sum_exp(log_probs),[1,2])
def sample_from_discretized_mix_logistic(l,nr_mix):
ls = int_shape(l)
xs = ls[:-1] + [3]
# unpack parameters
logit_probs = l[:, :, :, :nr_mix]
l = tf.reshape(l[:, :, :, nr_mix:], xs + [nr_mix*3])
# sample mixture indicator from softmax
sel = tf.one_hot(tf.argmax(logit_probs - tf.log(-tf.log(tf.random_uniform(logit_probs.get_shape(), minval=1e-5, maxval=1. - 1e-5))), 3), depth=nr_mix, dtype=tf.float32)
sel = tf.reshape(sel, xs[:-1] + [1,nr_mix])
# select logistic parameters
means = tf.reduce_sum(l[:,:,:,:,:nr_mix]*sel,4)
log_scales = tf.maximum(tf.reduce_sum(l[:,:,:,:,nr_mix:2*nr_mix]*sel,4), -7.)
coeffs = tf.reduce_sum(tf.nn.tanh(l[:,:,:,:,2*nr_mix:3*nr_mix])*sel,4)
# sample from logistic & clip to interval
# we don't actually round to the nearest 8bit value when sampling
u = tf.random_uniform(means.get_shape(), minval=1e-5, maxval=1. - 1e-5)
x = means + tf.exp(log_scales)*(tf.log(u) - tf.log(1. - u))
x0 = tf.minimum(tf.maximum(x[:,:,:,0], -1.), 1.)
x1 = tf.minimum(tf.maximum(x[:,:,:,1] + coeffs[:,:,:,0]*x0, -1.), 1.)
x2 = tf.minimum(tf.maximum(x[:,:,:,2] + coeffs[:,:,:,1]*x0 + coeffs[:,:,:,2]*x1, -1.), 1.)
return tf.concat([tf.reshape(x0,xs[:-1]+[1]), tf.reshape(x1,xs[:-1]+[1]), tf.reshape(x2,xs[:-1]+[1])],3)
def get_var_maybe_avg(var_name, ema, **kwargs):
''' utility for retrieving polyak averaged params '''
v = tf.get_variable(var_name, **kwargs)
if ema is not None:
v = ema.average(v)
return v
def get_vars_maybe_avg(var_names, ema, **kwargs):
''' utility for retrieving polyak averaged params '''
vars = []
for vn in var_names:
vars.append(get_var_maybe_avg(vn, ema, **kwargs))
return vars
def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999):
''' Adam optimizer '''
updates = []
if type(cost_or_grads) is not list:
grads = tf.gradients(cost_or_grads, params)
else:
grads = cost_or_grads
t = tf.Variable(1., 'adam_t')
for p, g in zip(params, grads):
mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
if mom1>0:
v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
v_t = mom1*v + (1. - mom1)*g
v_hat = v_t / (1. - tf.pow(mom1,t))
updates.append(v.assign(v_t))
else:
v_hat = g
mg_t = mom2*mg + (1. - mom2)*tf.square(g)
mg_hat = mg_t / (1. - tf.pow(mom2,t))
g_t = v_hat / tf.sqrt(mg_hat + 1e-8)
p_t = p - lr * g_t
updates.append(mg.assign(mg_t))
updates.append(p.assign(p_t))
updates.append(t.assign_add(1))
return tf.group(*updates)
def get_name(layer_name, counters):
''' utlity for keeping track of layer names '''
if not layer_name in counters:
counters[layer_name] = 0
name = layer_name + '_' + str(counters[layer_name])
counters[layer_name] += 1
return name
@add_arg_scope
def dense(x, num_units, nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
''' fully connected layer '''
name = get_name('dense', counters)
with tf.variable_scope(name):
if init:
# data based initialization of parameters
V = tf.get_variable('V', [int(x.get_shape()[1]),num_units], tf.float32, tf.random_normal_initializer(0, 0.05), trainable=True)
V_norm = tf.nn.l2_normalize(V.initialized_value(), [0])
x_init = tf.matmul(x, V_norm)
m_init, v_init = tf.nn.moments(x_init, [0])
scale_init = init_scale/tf.sqrt(v_init + 1e-10)
g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init*scale_init, trainable=True)
x_init = tf.reshape(scale_init,[1,num_units])*(x_init-tf.reshape(m_init,[1,num_units]))
if nonlinearity is not None:
x_init = nonlinearity(x_init)
return x_init
else:
V,g,b = get_vars_maybe_avg(['V','g','b'], ema)
# According to the comments at
# https: // github.com / openai / pixel - cnn / issues / 17,
# I simply comment the following line
# tf.assert_variables_initialized([V,g,b])
# use weight normalization (Salimans & Kingma, 2016)
x = tf.matmul(x, V)
scaler = g/tf.sqrt(tf.reduce_sum(tf.square(V),[0]))
x = tf.reshape(scaler,[1,num_units])*x + tf.reshape(b,[1,num_units])
# apply nonlinearity
if nonlinearity is not None:
x = nonlinearity(x)
return x
@add_arg_scope
def conv2d(x, num_filters, filter_size=[3,3], stride=[1,1], pad='SAME', nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
''' convolutional layer '''
name = get_name('conv2d', counters)
with tf.variable_scope(name):
if init:
# data based initialization of parameters
V = tf.get_variable('V', filter_size+[int(x.get_shape()[-1]),num_filters], tf.float32, tf.random_normal_initializer(0, 0.05), trainable=True)
V_norm = tf.nn.l2_normalize(V.initialized_value(), [0,1,2])
x_init = tf.nn.conv2d(x, V_norm, [1]+stride+[1], pad)
m_init, v_init = tf.nn.moments(x_init, [0,1,2])
scale_init = init_scale/tf.sqrt(v_init + 1e-8)
g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init*scale_init, trainable=True)
x_init = tf.reshape(scale_init,[1,1,1,num_filters])*(x_init-tf.reshape(m_init,[1,1,1,num_filters]))
if nonlinearity is not None:
x_init = nonlinearity(x_init)
return x_init
else:
V, g, b = get_vars_maybe_avg(['V', 'g', 'b'], ema)
# tf.assert_variables_initialized([V,g,b])
# use weight normalization (Salimans & Kingma, 2016)
W = tf.reshape(g,[1,1,1,num_filters])*tf.nn.l2_normalize(V,[0,1,2])
# calculate convolutional layer output
x = tf.nn.bias_add(tf.nn.conv2d(x, W, [1]+stride+[1], pad), b)
# apply nonlinearity
if nonlinearity is not None:
x = nonlinearity(x)
return x
@add_arg_scope
def deconv2d(x, num_filters, filter_size=[3,3], stride=[1,1], pad='SAME', nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
''' transposed convolutional layer '''
name = get_name('deconv2d', counters)
xs = int_shape(x)
if pad=='SAME':
target_shape = [xs[0], xs[1]*stride[0], xs[2]*stride[1], num_filters]
else:
target_shape = [xs[0], xs[1]*stride[0] + filter_size[0]-1, xs[2]*stride[1] + filter_size[1]-1, num_filters]
with tf.variable_scope(name):
if init:
# data based initialization of parameters
V = tf.get_variable('V', filter_size+[num_filters,int(x.get_shape()[-1])], tf.float32, tf.random_normal_initializer(0, 0.05), trainable=True)
V_norm = tf.nn.l2_normalize(V.initialized_value(), [0,1,3])
x_init = tf.nn.conv2d_transpose(x, V_norm, target_shape, [1]+stride+[1], padding=pad)
m_init, v_init = tf.nn.moments(x_init, [0,1,2])
scale_init = init_scale/tf.sqrt(v_init + 1e-8)
g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init*scale_init, trainable=True)
x_init = tf.reshape(scale_init,[1,1,1,num_filters])*(x_init-tf.reshape(m_init,[1,1,1,num_filters]))
if nonlinearity is not None:
x_init = nonlinearity(x_init)
return x_init
else:
V, g, b = get_vars_maybe_avg(['V', 'g', 'b'], ema)
# tf.assert_variables_initialized([V,g,b])
# use weight normalization (Salimans & Kingma, 2016)
W = tf.reshape(g,[1,1,num_filters,1])*tf.nn.l2_normalize(V,[0,1,3])
# calculate convolutional layer output
x = tf.nn.conv2d_transpose(x, W, target_shape, [1]+stride+[1], padding=pad)
x = tf.nn.bias_add(x, b)
# apply nonlinearity
if nonlinearity is not None:
x = nonlinearity(x)
return x
@add_arg_scope
def nin(x, num_units, **kwargs):
""" a network in network layer (1x1 CONV) """
s = int_shape(x)
x = tf.reshape(x, [np.prod(s[:-1]),s[-1]])
x = dense(x, num_units, **kwargs)
return tf.reshape(x, s[:-1]+[num_units])
''' meta-layer consisting of multiple base layers '''
@add_arg_scope
def gated_resnet(x, a=None, h=None, nonlinearity=concat_elu, conv=conv2d, init=False, counters={}, ema=None, dropout_p=0., **kwargs):
xs = int_shape(x)
num_filters = xs[-1]
c1 = conv(nonlinearity(x), num_filters)
if a is not None: # add short-cut connection if auxiliary input 'a' is given
c1 += nin(nonlinearity(a), num_filters)
c1 = nonlinearity(c1)
if dropout_p > 0:
c1 = tf.nn.dropout(c1, keep_prob=1. - dropout_p)
c2 = conv(c1, num_filters * 2, init_scale=0.1)
# add projection of h vector if included: conditional generation
if h is not None:
with tf.variable_scope(get_name('conditional_weights', counters)):
hw = get_var_maybe_avg('hw', ema, shape=[int_shape(h)[-1], 2 * num_filters], dtype=tf.float32,
initializer=tf.random_normal_initializer(0, 0.05), trainable=True)
if init:
hw = hw.initialized_value()
c2 += tf.reshape(tf.matmul(h, hw), [xs[0], 1, 1, 2 * num_filters])
a, b = tf.split(c2, 2, 3)
c3 = a * tf.nn.sigmoid(b)
return x + c3
''' utilities for shifting the image around, efficient alternative to masking convolutions '''
def down_shift(x):
xs = int_shape(x)
return tf.concat([tf.zeros([xs[0],1,xs[2],xs[3]]), x[:,:xs[1]-1,:,:]],1)
def right_shift(x):
xs = int_shape(x)
return tf.concat([tf.zeros([xs[0],xs[1],1,xs[3]]), x[:,:,:xs[2]-1,:]],2)
@add_arg_scope
def down_shifted_conv2d(x, num_filters, filter_size=[2,3], stride=[1,1], **kwargs):
x = tf.pad(x, [[0,0],[filter_size[0]-1,0], [int((filter_size[1]-1)/2),int((filter_size[1]-1)/2)],[0,0]])
return conv2d(x, num_filters, filter_size=filter_size, pad='VALID', stride=stride, **kwargs)
@add_arg_scope
def down_shifted_deconv2d(x, num_filters, filter_size=[2,3], stride=[1,1], **kwargs):
x = deconv2d(x, num_filters, filter_size=filter_size, pad='VALID', stride=stride, **kwargs)
xs = int_shape(x)
return x[:,:(xs[1]-filter_size[0]+1),int((filter_size[1]-1)/2):(xs[2]-int((filter_size[1]-1)/2)),:]
@add_arg_scope
def down_right_shifted_conv2d(x, num_filters, filter_size=[2,2], stride=[1,1], **kwargs):
x = tf.pad(x, [[0,0],[filter_size[0]-1, 0], [filter_size[1]-1, 0],[0,0]])
return conv2d(x, num_filters, filter_size=filter_size, pad='VALID', stride=stride, **kwargs)
@add_arg_scope
def down_right_shifted_deconv2d(x, num_filters, filter_size=[2,2], stride=[1,1], **kwargs):
x = deconv2d(x, num_filters, filter_size=filter_size, pad='VALID', stride=stride, **kwargs)
xs = int_shape(x)
return x[:,:(xs[1]-filter_size[0]+1):,:(xs[2]-filter_size[1]+1),:]

Просмотреть файл

@ -0,0 +1,194 @@
import numpy as np
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
# Plot image examples.
def plot_img(img, title=None):
plt.figure()
plt.imshow(img, interpolation='nearest')
if title is not None:
plt.title(title)
plt.axis('off')
plt.tight_layout()
def img_stretch(img):
img = img.astype(float)
img -= np.min(img)
img /= np.max(img)+1e-12
return img
def img_tile(imgs, aspect_ratio=1.0, tile_shape=None, border=1,
border_color=0, stretch=False):
''' Tile images in a grid.
If tile_shape is provided only as many images as specified in tile_shape
will be included in the output.
'''
# Prepare images
if stretch:
imgs = img_stretch(imgs)
imgs = np.array(imgs)
if imgs.ndim != 3 and imgs.ndim != 4:
raise ValueError('imgs has wrong number of dimensions.')
n_imgs = imgs.shape[0]
# Grid shape
img_shape = np.array(imgs.shape[1:3])
if tile_shape is None:
img_aspect_ratio = img_shape[1] / float(img_shape[0])
aspect_ratio *= img_aspect_ratio
tile_height = int(np.ceil(np.sqrt(n_imgs * aspect_ratio)))
tile_width = int(np.ceil(np.sqrt(n_imgs / aspect_ratio)))
grid_shape = np.array((tile_height, tile_width))
else:
assert len(tile_shape) == 2
grid_shape = np.array(tile_shape)
# Tile image shape
tile_img_shape = np.array(imgs.shape[1:])
tile_img_shape[:2] = (img_shape[:2] + border) * grid_shape[:2] - border
# Assemble tile image
tile_img = np.empty(tile_img_shape)
tile_img[:] = border_color
for i in range(grid_shape[0]):
for j in range(grid_shape[1]):
img_idx = j + i*grid_shape[1]
if img_idx >= n_imgs:
# No more images - stop filling out the grid.
break
img = imgs[img_idx]
yoff = (img_shape[0] + border) * i
xoff = (img_shape[1] + border) * j
tile_img[yoff:yoff+img_shape[0], xoff:xoff+img_shape[1], ...] = img
return tile_img
def conv_filter_tile(filters):
n_filters, n_channels, height, width = filters.shape
tile_shape = None
if n_channels == 3:
# Interpret 3 color channels as RGB
filters = np.transpose(filters, (0, 2, 3, 1))
else:
# Organize tile such that each row corresponds to a filter and the
# columns are the filter channels
tile_shape = (n_channels, n_filters)
filters = np.transpose(filters, (1, 0, 2, 3))
filters = np.resize(filters, (n_filters*n_channels, height, width))
filters = img_stretch(filters)
return img_tile(filters, tile_shape=tile_shape)
def scale_to_unit_interval(ndar, eps=1e-8):
""" Scales all values in the ndarray ndar to be between 0 and 1 """
ndar = ndar.copy()
ndar -= ndar.min()
ndar *= 1.0 / (ndar.max() + eps)
return ndar
def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
scale_rows_to_unit_interval=True,
output_pixel_vals=True):
"""
Transform an array with one flattened image per row, into an array in
which images are reshaped and layed out like tiles on a floor.
This function is useful for visualizing datasets whose rows are images,
and also columns of matrices for transforming those rows
(such as the first layer of a neural net).
:type X: a 2-D ndarray or a tuple of 4 channels, elements of which can
be 2-D ndarrays or None;
:param X: a 2-D array in which every row is a flattened image.
:type img_shape: tuple; (height, width)
:param img_shape: the original shape of each image
:type tile_shape: tuple; (rows, cols)
:param tile_shape: the number of images to tile (rows, cols)
:param output_pixel_vals: if output should be pixel values (i.e. int8
values) or floats
:param scale_rows_to_unit_interval: if the values need to be scaled before
being plotted to [0,1] or not
:returns: array suitable for viewing as an image.
(See:`PIL.Image.fromarray`.)
:rtype: a 2-d array with same dtype as X.
"""
assert len(img_shape) == 2
assert len(tile_shape) == 2
assert len(tile_spacing) == 2
# The expression below can be re-written in a more C style as
# follows :
#
# out_shape = [0,0]
# out_shape[0] = (img_shape[0] + tile_spacing[0]) * tile_shape[0] -
# tile_spacing[0]
# out_shape[1] = (img_shape[1] + tile_spacing[1]) * tile_shape[1] -
# tile_spacing[1]
out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
in zip(img_shape, tile_shape, tile_spacing)]
if isinstance(X, tuple):
assert len(X) == 4
# Create an output numpy ndarray to store the image
if output_pixel_vals:
out_array = np.zeros((out_shape[0], out_shape[1], 4), dtype='uint8')
else:
out_array = np.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype)
#colors default to 0, alpha defaults to 1 (opaque)
if output_pixel_vals:
channel_defaults = [0, 0, 0, 255]
else:
channel_defaults = [0., 0., 0., 1.]
for i in range(4):
if X[i] is None:
# if channel is None, fill it with zeros of the correct
# dtype
out_array[:, :, i] = np.zeros(out_shape,
dtype='uint8' if output_pixel_vals else out_array.dtype
) + channel_defaults[i]
else:
# use a recurrent call to compute the channel and store it
# in the output
out_array[:, :, i] = tile_raster_images(X[i], img_shape, tile_shape, tile_spacing, scale_rows_to_unit_interval, output_pixel_vals)
return out_array
else:
# if we are dealing with only one channel
H, W = img_shape
Hs, Ws = tile_spacing
# generate a matrix to store the output
out_array = np.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
for tile_row in range(tile_shape[0]):
for tile_col in range(tile_shape[1]):
if tile_row * tile_shape[1] + tile_col < X.shape[0]:
if scale_rows_to_unit_interval:
# if we should scale values to be between 0 and 1
# do this by calling the `scale_to_unit_interval`
# function
this_img = scale_to_unit_interval(X[tile_row * tile_shape[1] + tile_col].reshape(img_shape))
else:
this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)
# add the slice to the corresponding position in the
# output array
out_array[
tile_row * (H+Hs): tile_row * (H + Hs) + H,
tile_col * (W+Ws): tile_col * (W + Ws) + W
] \
= this_img * (255 if output_pixel_vals else 1)
return out_array

Просмотреть файл

@ -0,0 +1,436 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ResNet model.
Related papers:
https://arxiv.org/pdf/1603.05027v2.pdf
https://arxiv.org/pdf/1512.03385v1.pdf
https://arxiv.org/pdf/1605.07146v1.pdf
"""
from collections import namedtuple
import numpy as np
import tensorflow as tf
from tensorflow.python.training import moving_averages
HParams = namedtuple('HParams',
'batch_size, num_classes, min_lrn_rate, lrn_rate, '
'num_residual_units, use_bottleneck, weight_decay_rate, '
'relu_leakiness, optimizer')
class ResNet(object):
"""ResNet model."""
def __init__(self, hps, mode, image_size=32, use_wide_resnet=False, nr_gpu=1):
self.hps = hps
self.batch_size = self.hps.batch_size
self.input_image = [tf.placeholder(tf.float32, shape=(self.batch_size,image_size,image_size,3)) for _ in range(nr_gpu)]
self.input_label = [tf.placeholder(tf.int32, shape=(self.batch_size,1)) for _ in range(nr_gpu)]
self.mode = mode
self.needImgAug = tf.placeholder(tf.bool, shape=())
self.image_size = image_size
self.nr_gpu = nr_gpu
self._extra_train_ops = []
self.lrn_rate = tf.placeholder(tf.float32, shape=())
self.use_wide_resnet = use_wide_resnet
def build_graph(self):
"""Build a whole graph for the model."""
with tf.variable_scope('I2L'):
self.global_step = tf.contrib.framework.get_or_create_global_step()
self._build_model()
self.trainable_variables = [v for v in tf.trainable_variables() if v.name.startswith('I2L/')]
self.all_variables = [v for v in tf.global_variables() if v.name.startswith('I2L/')]
#if self.mode == 'train':
# self._build_train_op()
def _stride_arr(self, stride):
"""Map a stride scalar to the stride array for tf.nn.conv2d."""
return [1, stride, stride, 1]
def _PreprocessImages(self):
def _aug_one_img(img):
img = tf.image.resize_image_with_crop_or_pad(img, self.image_size+4, self.image_size+4)
img = tf.random_crop(img, [self.image_size, self.image_size, 3])
img = tf.image.random_flip_left_right(img)
return img
def _deal_one_img(img):
img = tf.cond(self.needImgAug, lambda: _aug_one_img(img), lambda: img)
img = tf.image.per_image_standardization(img)
return img
#images = tf.map_fn(lambda img: _deal_one_img(img), self.input_image)
#self.image = images
self.image = [tf.map_fn(lambda img: _deal_one_img(img), X) for X in self.input_image]
def _make_1hot_labels(self):
self.labels = []
for L in self.input_label:
labels = tf.reshape(L, [self.batch_size, 1])
indices = tf.reshape(tf.range(0, self.batch_size, 1), [self.batch_size, 1])
labels = tf.sparse_to_dense(
tf.concat([indices, labels],1),
[self.batch_size, self.hps.num_classes], 1.0, 0.0)
self.labels.append(labels)
def _build_basic_structure(self, x, y):
with tf.variable_scope('init'):
x = self._conv('init_conv', x, 3, 3, 16, self._stride_arr(1))
strides = [1, 2, 2]
activate_before_residual = [True, False, False]
if self.hps.use_bottleneck:
res_func = self._bottleneck_residual
filters = [16, 64, 128, 256]
else:
res_func = self._residual
if self.use_wide_resnet:
filters = [16, 160, 320, 640]
else:
filters = [16, 16, 32, 64]
# Uncomment the following codes to use w28-10 wide residual network.
# It is more memory efficient than very deep residual network and has
# comparably good performance.
# https://arxiv.org/pdf/1605.07146v1.pdf
# filters = [16, 160, 320, 640]
# Update hps.num_residual_units to 9
with tf.variable_scope('unit_1_0'):
x = res_func(x, filters[0], filters[1], self._stride_arr(strides[0]),
activate_before_residual[0])
for i in range(1, self.hps.num_residual_units):
with tf.variable_scope('unit_1_%d' % i):
x = res_func(x, filters[1], filters[1], self._stride_arr(1), False)
with tf.variable_scope('unit_2_0'):
x = res_func(x, filters[1], filters[2], self._stride_arr(strides[1]),
activate_before_residual[1])
for i in range(1, self.hps.num_residual_units):
with tf.variable_scope('unit_2_%d' % i):
x = res_func(x, filters[2], filters[2], self._stride_arr(1), False)
with tf.variable_scope('unit_3_0'):
x = res_func(x, filters[2], filters[3], self._stride_arr(strides[2]),
activate_before_residual[2])
for i in range(1, self.hps.num_residual_units):
with tf.variable_scope('unit_3_%d' % i):
x = res_func(x, filters[3], filters[3], self._stride_arr(1), False)
with tf.variable_scope('unit_last'):
x = self._batch_norm('final_bn', x)
x = self._relu(x, self.hps.relu_leakiness)
x = self._global_avg_pool(x)
with tf.variable_scope('logit'):
logits = self._fully_connected(x, self.hps.num_classes)
predictions_ = tf.nn.softmax(logits)
with tf.variable_scope('costs'):
xent = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
nlls_ = xent
cost_ = tf.reduce_mean(xent, name='xent')
cost_ += self._decay()
return nlls_, cost_, predictions_
#tf.scalar_summary('cost', self.cost)
def _build_model(self):
"""Build the core model within the graph."""
# Preprocess
self._PreprocessImages()
self._make_1hot_labels()
self.nlls = [None for _ in range(self.nr_gpu)]
self.cost = [None for _ in range(self.nr_gpu)]
self.predictions = [None for _ in range(self.nr_gpu)]
for i in range(self.nr_gpu):
with tf.variable_scope('I2L', reuse=True if i >= 1 else None):
with tf.device('/gpu:%d' % i):
nll_, cost_, predicted_ = self._build_basic_structure(self.image[i], self.labels[i])
self.nlls[i] = nll_
self.cost[i] = cost_
self.predictions[i] = predicted_
'''
def _build_model(self):
"""Build the core model within the graph."""
# Preprocess
self._PreprocessImages()
self._make_1hot_labels()
with tf.variable_scope('init'):
x = self.image
x = self._conv('init_conv', x, 3, 3, 16, self._stride_arr(1))
strides = [1, 2, 2]
activate_before_residual = [True, False, False]
if self.hps.use_bottleneck:
res_func = self._bottleneck_residual
filters = [16, 64, 128, 256]
else:
res_func = self._residual
if self.use_wide_resnet:
filters = [16, 160, 320, 640]
else:
filters = [16, 16, 32, 64]
# Uncomment the following codes to use w28-10 wide residual network.
# It is more memory efficient than very deep residual network and has
# comparably good performance.
# https://arxiv.org/pdf/1605.07146v1.pdf
# filters = [16, 160, 320, 640]
# Update hps.num_residual_units to 9
with tf.variable_scope('unit_1_0'):
x = res_func(x, filters[0], filters[1], self._stride_arr(strides[0]),
activate_before_residual[0])
for i in range(1, self.hps.num_residual_units):
with tf.variable_scope('unit_1_%d' % i):
x = res_func(x, filters[1], filters[1], self._stride_arr(1), False)
with tf.variable_scope('unit_2_0'):
x = res_func(x, filters[1], filters[2], self._stride_arr(strides[1]),
activate_before_residual[1])
for i in range(1, self.hps.num_residual_units):
with tf.variable_scope('unit_2_%d' % i):
x = res_func(x, filters[2], filters[2], self._stride_arr(1), False)
with tf.variable_scope('unit_3_0'):
x = res_func(x, filters[2], filters[3], self._stride_arr(strides[2]),
activate_before_residual[2])
for i in range(1, self.hps.num_residual_units):
with tf.variable_scope('unit_3_%d' % i):
x = res_func(x, filters[3], filters[3], self._stride_arr(1), False)
with tf.variable_scope('unit_last'):
x = self._batch_norm('final_bn', x)
x = self._relu(x, self.hps.relu_leakiness)
x = self._global_avg_pool(x)
with tf.variable_scope('logit'):
logits = self._fully_connected(x, self.hps.num_classes)
self.predictions = tf.nn.softmax(logits)
with tf.variable_scope('costs'):
xent = tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=logits)
self.nlls = xent
self.cost = tf.reduce_mean(xent, name='xent')
self.cost += self._decay()
#tf.scalar_summary('cost', self.cost)
'''
def _build_train_op(self):
"""Build training specific ops for the graph."""
self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32)
#tf.scalar_summary('learning rate', self.lrn_rate)
trainable_variables = tf.trainable_variables()
#self.trainable_variables = [v for v in tf.trainable_variables() if v.name.startswith('LM/')]
grads = tf.gradients(self.cost, trainable_variables)
if self.hps.optimizer == 'sgd':
optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate)
elif self.hps.optimizer == 'mom':
optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9)
apply_op = optimizer.apply_gradients(
zip(grads, trainable_variables),
global_step=self.global_step, name='train_step')
train_ops = [apply_op] + self._extra_train_ops
self.train_op = tf.group(*train_ops)
def Update(self, grads):
"""Build training specific ops for the graph."""
if self.hps.optimizer == 'sgd':
optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate)
elif self.hps.optimizer == 'mom':
optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9)
apply_op = optimizer.apply_gradients(
zip(grads, self.trainable_variables),
global_step=self.global_step, name='train_step')
train_ops = [apply_op] + self._extra_train_ops
self.update_ops = tf.group(*train_ops)
# TODO(xpan): Consider batch_norm in contrib/layers/python/layers/layers.py
def _batch_norm(self, name, x):
"""Batch normalization."""
with tf.variable_scope(name):
params_shape = [x.get_shape()[-1]]
beta = tf.get_variable(
'beta', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
gamma = tf.get_variable(
'gamma', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32))
if self.mode == 'train':
mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
moving_mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
moving_variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_mean, mean, 0.9))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, 0.9))
else:
mean = tf.get_variable(
'moving_mean', params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
variance = tf.get_variable(
'moving_variance', params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
#tf.histogram_summary(mean.op.name, mean)
#tf.histogram_summary(variance.op.name, variance)
# elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
y = tf.nn.batch_normalization(
x, mean, variance, beta, gamma, 0.001)
y.set_shape(x.get_shape())
return y
def _residual(self, x, in_filter, out_filter, stride,
activate_before_residual=False):
"""Residual unit with 2 sub layers."""
if activate_before_residual:
with tf.variable_scope('shared_activation'):
x = self._batch_norm('init_bn', x)
x = self._relu(x, self.hps.relu_leakiness)
orig_x = x
else:
with tf.variable_scope('residual_only_activation'):
orig_x = x
x = self._batch_norm('init_bn', x)
x = self._relu(x, self.hps.relu_leakiness)
with tf.variable_scope('sub1'):
x = self._conv('conv1', x, 3, in_filter, out_filter, stride)
with tf.variable_scope('sub2'):
x = self._batch_norm('bn2', x)
x = self._relu(x, self.hps.relu_leakiness)
x = self._conv('conv2', x, 3, out_filter, out_filter, [1, 1, 1, 1])
with tf.variable_scope('sub_add'):
if in_filter != out_filter:
orig_x = tf.nn.avg_pool(orig_x, stride, stride, 'VALID')
orig_x = tf.pad(
orig_x, [[0, 0], [0, 0], [0, 0],
[(out_filter-in_filter)//2, (out_filter-in_filter)//2]])
x += orig_x
tf.logging.info('image after unit %s', x.get_shape())
return x
def _bottleneck_residual(self, x, in_filter, out_filter, stride,
activate_before_residual=False):
"""Bottleneck resisual unit with 3 sub layers."""
if activate_before_residual:
with tf.variable_scope('common_bn_relu'):
x = self._batch_norm('init_bn', x)
x = self._relu(x, self.hps.relu_leakiness)
orig_x = x
else:
with tf.variable_scope('residual_bn_relu'):
orig_x = x
x = self._batch_norm('init_bn', x)
x = self._relu(x, self.hps.relu_leakiness)
with tf.variable_scope('sub1'):
x = self._conv('conv1', x, 1, in_filter, out_filter/4, stride)
with tf.variable_scope('sub2'):
x = self._batch_norm('bn2', x)
x = self._relu(x, self.hps.relu_leakiness)
x = self._conv('conv2', x, 3, out_filter/4, out_filter/4, [1, 1, 1, 1])
with tf.variable_scope('sub3'):
x = self._batch_norm('bn3', x)
x = self._relu(x, self.hps.relu_leakiness)
x = self._conv('conv3', x, 1, out_filter/4, out_filter, [1, 1, 1, 1])
with tf.variable_scope('sub_add'):
if in_filter != out_filter:
orig_x = self._conv('project', orig_x, 1, in_filter, out_filter, stride)
x += orig_x
tf.logging.info('image after unit %s', x.get_shape())
return x
def _decay(self):
"""L2 weight decay loss."""
costs = []
for var in tf.trainable_variables():
if var.op.name.find(r'DW') > 0:
costs.append(tf.nn.l2_loss(var))
# tf.histogram_summary(var.op.name, var)
return tf.multiply(self.hps.weight_decay_rate, tf.add_n(costs))
def GetWeightDecay(self):
"""L2 weight decay loss."""
costs = []
for var in self.trainable_variables:
if var.op.name.find(r'DW') > 0:
costs.append(tf.nn.l2_loss(var))
# tf.histogram_summary(var.op.name, var)
return tf.mul(self.hps.weight_decay_rate, tf.add_n(costs))
def _conv(self, name, x, filter_size, in_filters, out_filters, strides):
"""Convolution."""
with tf.variable_scope(name):
n = filter_size * filter_size * out_filters
kernel = tf.get_variable(
'DW', [filter_size, filter_size, in_filters, out_filters],
tf.float32, initializer=tf.random_normal_initializer(
stddev=np.sqrt(2.0/n)))
return tf.nn.conv2d(x, kernel, strides, padding='SAME')
def _relu(self, x, leakiness=0.0):
"""Relu, with optional leaky support."""
return tf.where(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu')
def _fully_connected(self, x, out_dim):
"""FullyConnected layer for final output."""
x = tf.reshape(x, [self.batch_size, -1])
w = tf.get_variable(
'DW', [x.get_shape()[1], out_dim],
initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
b = tf.get_variable('biases', [out_dim],
initializer=tf.constant_initializer())
return tf.nn.xw_plus_b(x, w, b)
def _global_avg_pool(self, x):
assert x.get_shape().ndims == 4
return tf.reduce_mean(x, [1, 2])

Просмотреть файл

@ -0,0 +1,196 @@
"""
Trains a Pixel-CNN++ generative model on CIFAR-10 or Tiny ImageNet data.
Uses multiple GPUs, indicated by the flag --nr-gpu
Example usage:
CUDA_VISIBLE_DEVICES=0,1,2,3 python train_double_cnn.py --nr_gpu 4
"""
import os
import sys
import time
import json
import argparse
import numpy as np
import tensorflow as tf
import pixel_cnn_pp.nn as nn
import pixel_cnn_pp.plotting as plotting
from pixel_cnn_pp.model import model_spec
import data.cifar10_data as cifar10_data
import data.imagenet_data as imagenet_data
# -----------------------------------------------------------------------------
parser = argparse.ArgumentParser()
# data I/O
parser.add_argument('-i', '--data_dir', type=str, default='/tmp/pxpp/data', help='Location for the dataset')
parser.add_argument('-o', '--save_dir', type=str, default='/tmp/pxpp/save', help='Location for parameter checkpoints and samples')
parser.add_argument('-d', '--data_set', type=str, default='cifar', help='Can be either cifar|imagenet')
parser.add_argument('-t', '--save_interval', type=int, default=20, help='Every how many epochs to write checkpoint/samples?')
parser.add_argument('-r', '--load_params', dest='load_params', action='store_true', help='Restore training from previous model checkpoint?')
# model
parser.add_argument('-q', '--nr_resnet', type=int, default=5, help='Number of residual blocks per stage of the model')
parser.add_argument('-n', '--nr_filters', type=int, default=160, help='Number of filters to use across the model. Higher = larger model.')
parser.add_argument('-m', '--nr_logistic_mix', type=int, default=10, help='Number of logistic components in the mixture. Higher = more flexible model')
parser.add_argument('-z', '--resnet_nonlinearity', type=str, default='concat_elu', help='Which nonlinearity to use in the ResNet layers. One of "concat_elu", "elu", "relu" ')
parser.add_argument('-c', '--class_conditional', dest='class_conditional', action='store_true', help='Condition generative model on labels?')
# optimization
parser.add_argument('-l', '--learning_rate', type=float, default=0.001, help='Base learning rate')
parser.add_argument('-e', '--lr_decay', type=float, default=0.999995, help='Learning rate decay, applied every step of the optimization')
parser.add_argument('-b', '--batch_size', type=int, default=12, help='Batch size during training per GPU')
parser.add_argument('-a', '--init_batch_size', type=int, default=100, help='How much data to use for data-dependent initialization.')
parser.add_argument('-p', '--dropout_p', type=float, default=0.5, help='Dropout strength (i.e. 1 - keep_prob). 0 = No dropout, higher = more dropout.')
parser.add_argument('-x', '--max_epochs', type=int, default=5000, help='How many epochs to run in total?')
parser.add_argument('-g', '--nr_gpu', type=int, default=8, help='How many GPUs to distribute the training across?')
# evaluation
parser.add_argument('--polyak_decay', type=float, default=0.9995, help='Exponential decay rate of the sum of previous model iterates during Polyak averaging')
# reproducibility
parser.add_argument('-s', '--seed', type=int, default=1, help='Random seed to use')
args = parser.parse_args()
print('input args:\n', json.dumps(vars(args), indent=4, separators=(',',':'))) # pretty print args
# -----------------------------------------------------------------------------
# fix random seed for reproducibility
rng = np.random.RandomState(args.seed)
tf.set_random_seed(args.seed)
# initialize data loaders for train/test splits
if args.data_set == 'imagenet' and args.class_conditional:
raise("We currently don't have labels for the small imagenet data set")
DataLoader = {'cifar':cifar10_data.DataLoader, 'imagenet':imagenet_data.DataLoader}[args.data_set]
train_data = DataLoader(args.data_dir, 'train', args.batch_size * args.nr_gpu, rng=rng, shuffle=False, return_labels=args.class_conditional)
test_data = DataLoader(args.data_dir, 'test', args.batch_size * args.nr_gpu, shuffle=False, return_labels=args.class_conditional)
obs_shape = train_data.get_observation_size() # e.g. a tuple (32,32,3)
assert len(obs_shape) == 3, 'assumed right now'
# data place holders
x_init = tf.placeholder(tf.float32, shape=(args.init_batch_size,) + obs_shape)
xs = [tf.placeholder(tf.float32, shape=(args.batch_size, ) + obs_shape) for i in range(args.nr_gpu)]
# if the model is class-conditional we'll set up label placeholders + one-hot encodings 'h' to condition on
if args.class_conditional:
num_labels = train_data.get_num_labels()
y_init = tf.placeholder(tf.int32, shape=(args.init_batch_size,))
h_init = tf.one_hot(y_init, num_labels)
y_sample = np.split(np.mod(np.arange(args.batch_size*args.nr_gpu), num_labels), args.nr_gpu)
h_sample = [tf.one_hot(tf.Variable(y_sample[i], trainable=False), num_labels) for i in range(args.nr_gpu)]
ys = [tf.placeholder(tf.int32, shape=(args.batch_size,)) for i in range(args.nr_gpu)]
hs = [tf.one_hot(ys[i], num_labels) for i in range(args.nr_gpu)]
else:
h_init = None
h_sample = [None] * args.nr_gpu
hs = h_sample
# create the model
model_opt = { 'nr_resnet': args.nr_resnet, 'nr_filters': args.nr_filters, 'nr_logistic_mix': args.nr_logistic_mix, 'resnet_nonlinearity': args.resnet_nonlinearity }
model = tf.make_template('model', model_spec)
# run once for data dependent initialization of parameters
gen_par = model(x_init, h_init, init=True, dropout_p=args.dropout_p, **model_opt)
# keep track of moving average
all_params = tf.trainable_variables()
ema = tf.train.ExponentialMovingAverage(decay=args.polyak_decay)
maintain_averages_op = tf.group(ema.apply(all_params))
# get loss gradients over multiple GPUs
grads = []
loss_gen = []
loss_gen_test = []
for i in range(args.nr_gpu):
with tf.device('/gpu:%d' % i):
# train
gen_par = model(xs[i], hs[i], ema=None, dropout_p=args.dropout_p, **model_opt)
loss_gen.append(nn.discretized_mix_logistic_loss(xs[i], gen_par))
# gradients
grads.append(tf.gradients(loss_gen[i], all_params))
# test
gen_par = model(xs[i], hs[i], ema=ema, dropout_p=0., **model_opt)
loss_gen_test.append(nn.discretized_mix_logistic_loss(xs[i], gen_par))
# add losses and gradients together and get training updates
tf_lr = tf.placeholder(tf.float32, shape=[])
with tf.device('/gpu:0'):
for i in range(1,args.nr_gpu):
loss_gen[0] += loss_gen[i]
loss_gen_test[0] += loss_gen_test[i]
for j in range(len(grads[0])):
grads[0][j] += grads[i][j]
# training op
optimizer = tf.group(nn.adam_updates(all_params, grads[0], lr=tf_lr, mom1=0.95, mom2=0.9995), maintain_averages_op)
# convert loss to bits/dim
bits_per_dim = loss_gen[0]/(args.nr_gpu*np.log(2.)*np.prod(obs_shape)*args.batch_size)
bits_per_dim_test = loss_gen_test[0]/(args.nr_gpu*np.log(2.)*np.prod(obs_shape)*args.batch_size)
# sample from the model
new_x_gen = []
for i in range(args.nr_gpu):
with tf.device('/gpu:%d' % i):
gen_par = model(xs[i], h_sample[i], ema=ema, dropout_p=0, **model_opt)
new_x_gen.append(nn.sample_from_discretized_mix_logistic(gen_par, args.nr_logistic_mix))
def sample_from_model(sess):
x_gen = [np.zeros((args.batch_size,) + obs_shape, dtype=np.float32) for i in range(args.nr_gpu)]
for yi in range(obs_shape[0]):
for xi in range(obs_shape[1]):
new_x_gen_np = sess.run(new_x_gen, {xs[i]: x_gen[i] for i in range(args.nr_gpu)})
for i in range(args.nr_gpu):
x_gen[i][:,yi,xi,:] = new_x_gen_np[i][:,yi,xi,:]
return np.concatenate(x_gen, axis=0)
# init & save
initializer = tf.initialize_all_variables()
saver = tf.train.Saver()
# turn numpy inputs into feed_dict for use with tensorflow
def make_feed_dict(data, init=False):
if type(data) is tuple:
x,y = data
else:
x = data
y = None
x = np.cast[np.float32]((x - 127.5) / 127.5) # input to pixelCNN is scaled from uint8 [0,255] to float in range [-1,1]
if init:
feed_dict = {x_init: x}
if y is not None:
feed_dict.update({y_init: y})
else:
x = np.split(x, args.nr_gpu)
feed_dict = {xs[i]: x[i] for i in range(args.nr_gpu)}
if y is not None:
y = np.split(y, args.nr_gpu)
feed_dict.update({ys[i]: y[i] for i in range(args.nr_gpu)})
return feed_dict
# //////////// perform testing //////////////
print('starting testing')
test_bpd = []
lr = args.learning_rate
with tf.Session() as sess:
# compute likelihood over test data
ckpt_file = args.save_dir + '/params_' + args.data_set + '.ckpt'
print('restoring parameters from', ckpt_file)
saver.restore(sess, ckpt_file)
test_losses = []
uidx = 0
for d in train_data:
feed_dict = make_feed_dict(d)
l = sess.run(bits_per_dim_test, feed_dict)
test_losses.append(l)
uidx += 1
if uidx % 100 == 0:
print(uidx, l)
test_loss_gen = np.mean(test_losses)
print(uidx, ' -- ', test_loss_gen)
test_bpd.append(test_loss_gen)
print('Test nll=%.2f' % test_loss_gen)
np.savez('./TMD', np.array(test_losses))

222
DSL_ImgProcess/train.py Normal file
Просмотреть файл

@ -0,0 +1,222 @@
"""
Trains a Pixel-CNN++ generative model on CIFAR-10 or Tiny ImageNet data.
Uses multiple GPUs, indicated by the flag --nr-gpu
Example usage:
CUDA_VISIBLE_DEVICES=0,1,2,3 python train_double_cnn.py --nr_gpu 4
"""
import os
import sys
import time
import json
import argparse
import numpy as np
import tensorflow as tf
import pixel_cnn_pp.nn as nn
import pixel_cnn_pp.plotting as plotting
from pixel_cnn_pp.model import model_spec
import data.cifar10_data as cifar10_data
import data.imagenet_data as imagenet_data
# -----------------------------------------------------------------------------
parser = argparse.ArgumentParser()
# data I/O
parser.add_argument('-i', '--data_dir', type=str, default='/tmp/pxpp/data', help='Location for the dataset')
parser.add_argument('-o', '--save_dir', type=str, default='/tmp/pxpp/save', help='Location for parameter checkpoints and samples')
parser.add_argument('-d', '--data_set', type=str, default='cifar', help='Can be either cifar|imagenet')
parser.add_argument('-t', '--save_interval', type=int, default=20, help='Every how many epochs to write checkpoint/samples?')
parser.add_argument('-r', '--load_params', dest='load_params', action='store_true', help='Restore training from previous model checkpoint?')
# model
parser.add_argument('-q', '--nr_resnet', type=int, default=5, help='Number of residual blocks per stage of the model')
parser.add_argument('-n', '--nr_filters', type=int, default=160, help='Number of filters to use across the model. Higher = larger model.')
parser.add_argument('-m', '--nr_logistic_mix', type=int, default=10, help='Number of logistic components in the mixture. Higher = more flexible model')
parser.add_argument('-z', '--resnet_nonlinearity', type=str, default='concat_elu', help='Which nonlinearity to use in the ResNet layers. One of "concat_elu", "elu", "relu" ')
parser.add_argument('-c', '--class_conditional', dest='class_conditional', action='store_true', help='Condition generative model on labels?')
# optimization
parser.add_argument('-l', '--learning_rate', type=float, default=0.001, help='Base learning rate')
parser.add_argument('-e', '--lr_decay', type=float, default=0.999995, help='Learning rate decay, applied every step of the optimization')
parser.add_argument('-b', '--batch_size', type=int, default=12, help='Batch size during training per GPU')
parser.add_argument('-a', '--init_batch_size', type=int, default=100, help='How much data to use for data-dependent initialization.')
parser.add_argument('-p', '--dropout_p', type=float, default=0.5, help='Dropout strength (i.e. 1 - keep_prob). 0 = No dropout, higher = more dropout.')
parser.add_argument('-x', '--max_epochs', type=int, default=5000, help='How many epochs to run in total?')
parser.add_argument('-g', '--nr_gpu', type=int, default=8, help='How many GPUs to distribute the training across?')
# evaluation
parser.add_argument('--polyak_decay', type=float, default=0.9995, help='Exponential decay rate of the sum of previous model iterates during Polyak averaging')
# reproducibility
parser.add_argument('-s', '--seed', type=int, default=1, help='Random seed to use')
args = parser.parse_args()
print('input args:\n', json.dumps(vars(args), indent=4, separators=(',',':'))) # pretty print args
# -----------------------------------------------------------------------------
# fix random seed for reproducibility
rng = np.random.RandomState(args.seed)
tf.set_random_seed(args.seed)
# initialize data loaders for train/test splits
if args.data_set == 'imagenet' and args.class_conditional:
raise("We currently don't have labels for the small imagenet data set")
DataLoader = {'cifar':cifar10_data.DataLoader, 'imagenet':imagenet_data.DataLoader}[args.data_set]
train_data = DataLoader(args.data_dir, 'train', args.batch_size * args.nr_gpu, rng=rng, shuffle=True, return_labels=args.class_conditional)
test_data = DataLoader(args.data_dir, 'test', args.batch_size * args.nr_gpu, shuffle=False, return_labels=args.class_conditional)
obs_shape = train_data.get_observation_size() # e.g. a tuple (32,32,3)
assert len(obs_shape) == 3, 'assumed right now'
# data place holders
x_init = tf.placeholder(tf.float32, shape=(args.init_batch_size,) + obs_shape)
xs = [tf.placeholder(tf.float32, shape=(args.batch_size, ) + obs_shape) for i in range(args.nr_gpu)]
# if the model is class-conditional we'll set up label placeholders + one-hot encodings 'h' to condition on
if args.class_conditional:
num_labels = train_data.get_num_labels()
y_init = tf.placeholder(tf.int32, shape=(args.init_batch_size,))
h_init = tf.one_hot(y_init, num_labels)
y_sample = np.split(np.mod(np.arange(args.batch_size*args.nr_gpu), num_labels), args.nr_gpu)
h_sample = [tf.one_hot(tf.Variable(y_sample[i], trainable=False), num_labels) for i in range(args.nr_gpu)]
ys = [tf.placeholder(tf.int32, shape=(args.batch_size,)) for i in range(args.nr_gpu)]
hs = [tf.one_hot(ys[i], num_labels) for i in range(args.nr_gpu)]
else:
h_init = None
h_sample = [None] * args.nr_gpu
hs = h_sample
# create the model
model_opt = { 'nr_resnet': args.nr_resnet, 'nr_filters': args.nr_filters, 'nr_logistic_mix': args.nr_logistic_mix, 'resnet_nonlinearity': args.resnet_nonlinearity }
model = tf.make_template('model', model_spec)
# run once for data dependent initialization of parameters
gen_par = model(x_init, h_init, init=True, dropout_p=args.dropout_p, **model_opt)
# keep track of moving average
all_params = tf.trainable_variables()
ema = tf.train.ExponentialMovingAverage(decay=args.polyak_decay)
maintain_averages_op = tf.group(ema.apply(all_params))
# get loss gradients over multiple GPUs
grads = []
loss_gen = []
loss_gen_test = []
for i in range(args.nr_gpu):
with tf.device('/gpu:%d' % i):
# train
gen_par = model(xs[i], hs[i], ema=None, dropout_p=args.dropout_p, **model_opt)
loss_gen.append(nn.discretized_mix_logistic_loss(xs[i], gen_par))
# gradients
grads.append(tf.gradients(loss_gen[i], all_params))
# test
gen_par = model(xs[i], hs[i], ema=ema, dropout_p=0., **model_opt)
loss_gen_test.append(nn.discretized_mix_logistic_loss(xs[i], gen_par))
# add losses and gradients together and get training updates
tf_lr = tf.placeholder(tf.float32, shape=[])
with tf.device('/gpu:0'):
for i in range(1,args.nr_gpu):
loss_gen[0] += loss_gen[i]
loss_gen_test[0] += loss_gen_test[i]
for j in range(len(grads[0])):
grads[0][j] += grads[i][j]
# training op
optimizer = tf.group(nn.adam_updates(all_params, grads[0], lr=tf_lr, mom1=0.95, mom2=0.9995), maintain_averages_op)
# convert loss to bits/dim
bits_per_dim = loss_gen[0]/(args.nr_gpu*np.log(2.)*np.prod(obs_shape)*args.batch_size)
bits_per_dim_test = loss_gen_test[0]/(args.nr_gpu*np.log(2.)*np.prod(obs_shape)*args.batch_size)
# sample from the model
new_x_gen = []
for i in range(args.nr_gpu):
with tf.device('/gpu:%d' % i):
gen_par = model(xs[i], h_sample[i], ema=ema, dropout_p=0, **model_opt)
new_x_gen.append(nn.sample_from_discretized_mix_logistic(gen_par, args.nr_logistic_mix))
def sample_from_model(sess):
x_gen = [np.zeros((args.batch_size,) + obs_shape, dtype=np.float32) for i in range(args.nr_gpu)]
for yi in range(obs_shape[0]):
for xi in range(obs_shape[1]):
new_x_gen_np = sess.run(new_x_gen, {xs[i]: x_gen[i] for i in range(args.nr_gpu)})
for i in range(args.nr_gpu):
x_gen[i][:,yi,xi,:] = new_x_gen_np[i][:,yi,xi,:]
return np.concatenate(x_gen, axis=0)
# init & save
initializer = tf.initialize_all_variables()
saver = tf.train.Saver()
# turn numpy inputs into feed_dict for use with tensorflow
def make_feed_dict(data, init=False):
if type(data) is tuple:
x,y = data
else:
x = data
y = None
x = np.cast[np.float32]((x - 127.5) / 127.5) # input to pixelCNN is scaled from uint8 [0,255] to float in range [-1,1]
if init:
feed_dict = {x_init: x}
if y is not None:
feed_dict.update({y_init: y})
else:
x = np.split(x, args.nr_gpu)
feed_dict = {xs[i]: x[i] for i in range(args.nr_gpu)}
if y is not None:
y = np.split(y, args.nr_gpu)
feed_dict.update({ys[i]: y[i] for i in range(args.nr_gpu)})
return feed_dict
# //////////// perform training //////////////
if not os.path.exists(args.save_dir):
os.makedirs(args.save_dir)
print('starting training')
test_bpd = []
lr = args.learning_rate
with tf.Session() as sess:
for epoch in range(args.max_epochs):
begin = time.time()
# init
if epoch == 0:
feed_dict = make_feed_dict(train_data.next(args.init_batch_size), init=True) # manually retrieve exactly init_batch_size examples
train_data.reset() # rewind the iterator back to 0 to do one full epoch
sess.run(initializer, feed_dict)
print('initializing the model...')
if args.load_params:
ckpt_file = args.save_dir + '/params_' + args.data_set + '.ckpt'
print('restoring parameters from', ckpt_file)
saver.restore(sess, ckpt_file)
# train for one epoch
train_losses = []
for d in train_data:
feed_dict = make_feed_dict(d)
# forward/backward/update model on each gpu
lr *= args.lr_decay
feed_dict.update({ tf_lr: lr })
l,_ = sess.run([bits_per_dim, optimizer], feed_dict)
train_losses.append(l)
train_loss_gen = np.mean(train_losses)
# compute likelihood over test data
test_losses = []
for d in test_data:
feed_dict = make_feed_dict(d)
l = sess.run(bits_per_dim_test, feed_dict)
test_losses.append(l)
test_loss_gen = np.mean(test_losses)
test_bpd.append(test_loss_gen)
# log progress to console
print("Iteration %d, time = %ds, train bits_per_dim = %.4f, test bits_per_dim = %.4f" % (epoch, time.time()-begin, train_loss_gen, test_loss_gen))
sys.stdout.flush()
if epoch % args.save_interval == 0:
# generate samples from the model
sample_x = sample_from_model(sess)
img_tile = plotting.img_tile(sample_x[:int(np.floor(np.sqrt(args.batch_size*args.nr_gpu))**2)], aspect_ratio=1.0, border_color=1.0, stretch=True)
img = plotting.plot_img(img_tile, title=args.data_set + ' samples')
plotting.plt.savefig(os.path.join(args.save_dir,'%s_sample%d.png' % (args.data_set, epoch)))
plotting.plt.close('all')
# save params
saver.save(sess, args.save_dir + '/params_' + args.data_set + '.ckpt')
np.savez(args.save_dir + '/test_bpd_' + args.data_set + '.npz', test_bpd=np.array(test_bpd))

Просмотреть файл

@ -0,0 +1,91 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ResNet Train/Eval module.
"""
import time
import sys
import os
import cifar_input
import numpy as np
import resnet_model_basic as resnet_model
import tensorflow as tf
import data.cifar10_data as cifar10_data
def lr_I2L(train_step):
#step_wise = [40000,60000,80000] # this is the one for original
step_wise = [51000,76000,102000]
if train_step < step_wise[0]:
return 0.1
elif train_step < step_wise[1]:
return 0.01
elif train_step < step_wise[2]:
return 0.001
else:
return 0.0001
class worker_I2L(object):
def __init__(self, args):
hps = resnet_model.HParams(batch_size=args.batch_size,
num_classes=10,
min_lrn_rate=0.0001,
lrn_rate=0.1,
num_residual_units=18,
use_bottleneck=False,
weight_decay_rate=0.0002,
relu_leakiness=0.1,
optimizer='mom')
self.args = args
self.model = resnet_model.ResNet(hps, args.mode, use_wide_resnet=args.use_wide_resnet, nr_gpu=args.nr_gpu)
self.model.build_graph()
truth = tf.argmax(tf.concat(self.model.labels, axis=0), axis=1)
predictions = tf.argmax(tf.concat(self.model.predictions,axis=0), axis=1)
self.right_decision = tf.reduce_sum(tf.to_float(tf.equal(predictions, truth)))
def GetLoss(self):
return self.model.nlls, self.model.GetWeightDecay()
def Valid(self, test_data, sess):
with tf.device('/gpu:0'):
cost_all = self.model.nlls[0]
for i in range(1, self.args.nr_gpu):
cost_all += self.model.nlls[i]
m_sample = 0
m_correct = 0.
costs = 0.
for test_image, test_label in test_data:
m_sample += test_image.shape[0]
splitted_image = np.split(test_image.astype('float32'), self.args.nr_gpu)
splitted_label = np.split(test_label, self.args.nr_gpu)
feed_dict = {self.model.needImgAug: False}
feed_dict.update({self.model.input_image[i]: splitted_image[i] for i in range(self.args.nr_gpu)})
feed_dict.update({self.model.input_label[i]: splitted_label[i][:, None] for i in range(self.args.nr_gpu)})
_cost, _right_decision = sess.run([cost_all, self.right_decision], feed_dict)
costs += np.sum(_cost)
m_correct += _right_decision
test_loss = costs / m_sample
test_acc = m_correct * 1. / m_sample
print('[I2L] test_nll={},test_acc={}'.format(
'{0:.4f}'.format(test_loss), '{0:.6f}'.format(test_acc) )
)

Просмотреть файл

@ -0,0 +1,134 @@
"""
Trains a Pixel-CNN++ generative model on CIFAR-10 or Tiny ImageNet data.
Uses multiple GPUs, indicated by the flag --nr-gpu
Example usage:
CUDA_VISIBLE_DEVICES=0,1,2,3 python train_double_cnn.py --nr_gpu 4
"""
import os
import sys
import time
import json
import argparse
import numpy as np
import tensorflow as tf
import pixel_cnn_pp.nn as nn
import pixel_cnn_pp.plotting as plotting
from pixel_cnn_pp.model import model_spec
import data.cifar10_data as cifar10_data
class worker_L2I(object):
def __init__(self, args, num_labels, image_shape):
# Default parameters
self.num_labels = num_labels
self.image_shape=image_shape
self.args = args
# Data userd for data-dependent parameter initialization
self.x_init = tf.placeholder(tf.float32, shape=(args.init_batch_size,) + self.image_shape)
self.xs = [tf.placeholder(tf.float32, shape=(args.batch_size, ) + self.image_shape) for _ in range(args.nr_gpu)]
self.y_init = tf.placeholder(tf.int32, shape=(args.init_batch_size,))
self.h_init = tf.one_hot(self.y_init, self.num_labels)
# parameters used for sampling
self.y_sample = np.split(np.mod(np.arange(args.batch_size*args.nr_gpu), self.num_labels), args.nr_gpu)
# self.h_sample = [tf.one_hot(tf.Variable(self.y_sample[i], trainable=False), self.num_labels) for i in range(args.nr_gpu)]
# the above line is the version used for icml paper. I revise it as follows
self.h_sample = [tf.one_hot(self.y_sample[i], self.num_labels) for i in range(args.nr_gpu)]
self.ys = [tf.placeholder(tf.int32, shape=(args.batch_size,)) for i in range(args.nr_gpu)]
self.hs = [tf.one_hot(self.ys[i], self.num_labels) for i in range(args.nr_gpu)]
# create the model
self.model_opt = { 'nr_resnet': args.nr_resnet, 'nr_filters': args.nr_filters, 'nr_logistic_mix': args.nr_logistic_mix, 'resnet_nonlinearity': args.resnet_nonlinearity }
self.model = tf.make_template('model', model_spec)
# run once for data dependent initialization of parameters
# in the original code, it is " gen_par = self.model(...)"; when init=True, it will run initilization automatically
self.model(self.x_init, self.h_init, init=True, dropout_p=args.dropout_p, **self.model_opt)
# keep track of moving average
self.all_params = tf.trainable_variables()
self.ema = tf.train.ExponentialMovingAverage(decay=args.polyak_decay)
self.maintain_averages_op = tf.group(self.ema.apply(self.all_params))
# parameters for optimization
self.tf_lr = tf.placeholder(tf.float32, shape=())
def GetLoss(self):
# get loss gradients over multiple GPUs
loss_gen = []
loss_gen_test = []
for i in range(self.args.nr_gpu):
with tf.device('/gpu:%d' % i):
# train
gen_par = self.model(self.xs[i], self.hs[i], ema=None, dropout_p=self.args.dropout_p, **self.model_opt)
loss_gen.append(nn.discretized_mix_logistic_loss(self.xs[i], gen_par, sum_all=False))
# test
gen_par = self.model(self.xs[i], self.hs[i], ema=self.ema, dropout_p=0., **self.model_opt)
loss_gen_test.append(nn.discretized_mix_logistic_loss(self.xs[i], gen_par))
return loss_gen, loss_gen_test
def GetOverallLoss(self):
# get loss gradients over multiple GPUs
loss_gen = []
loss_gen_test = []
for i in range(self.args.nr_gpu):
with tf.device('/gpu:%d' % i):
# train
gen_par = self.model(self.xs[i], self.hs[i], ema=None, dropout_p=self.args.dropout_p, **self.model_opt)
loss_gen.append(nn.discretized_mix_logistic_loss(self.xs[i], gen_par, sum_all=False))
# test
gen_par = self.model(self.xs[i], self.hs[i], ema=self.ema, dropout_p=0., **self.model_opt)
loss_gen_test.append(nn.discretized_mix_logistic_loss(self.xs[i], gen_par))
# add the lossx to /gpu:0
with tf.device('/gpu:0'):
for i in range(1,self.args.nr_gpu):
loss_gen[0] += loss_gen[i]
loss_gen_test[0] += loss_gen_test[i]
# training op
#optimizer = tf.group(nn.adam_updates(self.all_params, grads[0], lr=self.tf_lr, mom1=0.95, mom2=0.9995), self.maintain_averages_op)
# convert loss to bits/dim
self.bits_per_dim = loss_gen[0]/(self.args.nr_gpu*np.log(2.)*np.prod(self.image_shape)*self.args.batch_size)
self.bits_per_dim_test = loss_gen_test[0]/(self.args.nr_gpu*np.log(2.)*np.prod(self.image_shape)*self.args.batch_size)
def Update(self, grads, useSGD=False):
if useSGD:
print('Use pure SGD for Label-->Image tasks')
optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.tf_lr)
apply_op = optimizer.apply_gradients(zip(grads, self.all_params))
self.update_ops = tf.group(apply_op)
else:
self.update_ops = tf.group(nn.adam_updates(self.all_params, grads, lr=self.tf_lr, mom1=0.95, mom2=0.9995), self.maintain_averages_op)
def build_sample_from_model(self):
# sample from the model
self.new_x_gen = []
for i in range(self.args.nr_gpu):
with tf.device('/gpu:%d' % i):
gen_par = self.model(self.xs[i], self.h_sample[i], ema=self.ema, dropout_p=0, **self.model_opt)
self.new_x_gen.append(nn.sample_from_discretized_mix_logistic(gen_par, self.args.nr_logistic_mix))
def _sample_from_model(self, sess):
x_gen = [np.zeros((self.args.batch_size,) + self.image_shape, dtype=np.float32) for _ in range(self.args.nr_gpu)]
for yi in range(self.image_shape[0]):
for xi in range(self.image_shape[1]):
new_x_gen_np = sess.run(self.new_x_gen, {self.xs[i]: x_gen[i] for i in range(self.args.nr_gpu)})
for i in range(self.args.nr_gpu):
x_gen[i][:,yi,xi,:] = new_x_gen_np[i][:,yi,xi,:]
return np.concatenate(x_gen, axis=0)
def Gen_Images(self, sess, epoch):
sample_x = self._sample_from_model(sess)
img_tile = plotting.img_tile(sample_x[:int(np.floor(np.sqrt(self.args.batch_size*self.args.nr_gpu))**2)], aspect_ratio=1.0, border_color=1.0, stretch=True)
img = plotting.plot_img(img_tile, title=self.args.data_set + ' samples')
plotting.plt.savefig(os.path.join(self.args.save_dir,'%s_sample%d.png' % (self.args.data_set, epoch)))
plotting.plt.close('all')

Просмотреть файл

@ -0,0 +1,40 @@
import sys
mapper_machine_freecard = {}
mapper_machine_rank = {}
def MapIDs(m_machine):
for i in range(m_machine):
fo = open('record' + str(i))
id = 0
m_line = 0
machine_name = ''
for line in fo:
if id == 0:
machine_name = line[:-1]
mapper_machine_freecard[machine_name] = []
if mapper_machine_rank.has_key(machine_name):
mapper_machine_rank[machine_name].append(i)
else:
mapper_machine_rank[machine_name] = [i]
elif id > 1:
mapper_machine_freecard[machine_name].append(int(line))
id = id + 1
fo.close()
def Map_Rank_Card(m_machine):
MapIDs(m_machine)
allocations = range(m_machine)
for k in mapper_machine_rank.keys():
ranks = mapper_machine_rank[k]
cards = mapper_machine_freecard[k]
#if len(ranks) == len(cards):
for i in range(len(ranks)):
allocations[ranks[i]] = cards[i]
for l in allocations:
print l
if __name__ == '__main__':
Map_Rank_Card(int(sys.argv[1]))

Просмотреть файл

@ -0,0 +1,40 @@
import sys
mapper_machine_freecard = {}
mapper_machine_rank = {}
def MapIDs(m_machine):
for i in range(m_machine):
fo = open('record' + str(i))
id = 0
m_line = 0
machine_name = ''
for line in fo:
if id == 0:
machine_name = line[:-1]
mapper_machine_freecard[machine_name] = []
if mapper_machine_rank.has_key(machine_name):
mapper_machine_rank[machine_name].append(i)
else:
mapper_machine_rank[machine_name] = [i]
elif id > 1:
mapper_machine_freecard[machine_name].append(int(line))
id = id + 1
fo.close()
def Map_Rank_Card(m_machine):
MapIDs(m_machine)
allocations = range(m_machine)
for k in mapper_machine_rank.keys():
ranks = mapper_machine_rank[k]
cards = mapper_machine_freecard[k]
#if len(ranks) == len(cards):
for i in range(len(ranks)):
allocations[ranks[i]] = cards[i]
for l in allocations:
print l
if __name__ == '__main__':
Map_Rank_Card(int(sys.argv[1]))

Просмотреть файл

@ -0,0 +1,455 @@
from nmt_base import *
from Data import *
def _p(pp, name):
return '%s_%s' % (pp, name)
class CLM_worker(object):
def __init__(self,
round = 0,
dim_word=500, # word vector dimensionality
dim_proj=1024, # the number of GRU units
encoder='lstm',
patience=10, # early stopping patience
max_epochs=5000,
finish_after=10000000000000, # finish after this many updates
decay_c=-1., # L2 weight decay penalty
clip_c=5.,
lrate=1.,
n_words=10000, # vocabulary size
maxlen=None, # maximum length of the description
minlen=1,
start_iter=0,
start_epoch=0,
optimizer='adadelta',
batch_size=16,
valid_batch_size=16,
saveto='model.npz',
validFreq=2000,
dispFreq=100,
saveFreq=100000, # save the parameters after every saveFreq updates
newDumpFreq=10000,
syncFreq = 500000000000,
sampleFreq=10000000000, # generate some samples after every sampleFreq
valid_dataset=None,
test_dataset=None,
dictionary=None,
sampleFileName="sampleFile.txt",
embedding=None,
dropout_input=None,
dropout_output=None,
reload_model=None,
reload_option=None,
log=None,
monitor_grad=False,
pad_sos=False):
# Model options
if pad_sos:
n_words += 1
self.options = locals().copy()
print('log = ', log)
F_log = open(log, "a")
voc_size = n_words - 1 if pad_sos else n_words
# reload options
if reload_option is not None and os.path.exists(reload_option):
print "Reloading model options...",
with open('%s' % reload_option, 'rb') as f:
model_options = pkl.load(f)
print "Done"
# init parameters
print 'Initializing model parameters...',
params = init_lm_params(self.options)
print 'Done'
# load pre-trained word embedding
if embedding is not None and os.path.exists(embedding):
print 'Load Embedding from ', embedding
Wemb = numpy.array(numpy.load(open(embedding, "rb")))
assert Wemb.shape[0] == self.options['n_words']
assert Wemb.shape[1] == self.options['dim_word']
print 'Using pre-trained word embedding...',
params['Wemb'] = Wemb.astype(numpy.float32)
print 'vocab size', params['Wemb'].shape[0], ', dim', params['Wemb'].shape[1]
# reload parameters
if reload_model is not None and os.path.exists(reload_model):
print "Reloading model parameters...",
params = load_params(reload_model, params)
print "Done"
# create shared variables for parameters
self.tparams = init_tparams(params)
# build the symbolic computational graph
print 'Building model...'
self.trng = RandomStreams(1234)
self.use_noise = theano.shared(numpy.float32(0.))
def GetNll(self):
srcx, srcx_mask, ctx_, cost, sentenceLen = self.build_lm_model()
print 'Done'
print 'Building f_log_probs',
self.f_log_probs = theano.function([srcx, srcx_mask, ctx_], cost, profile = profile)
print 'Done'
return srcx, srcx_mask, ctx_, cost, sentenceLen
# build a training model
def build_lm_model(self):
srcx = tensor.matrix('x', dtype='int64')
srcx_mask = tensor.matrix('x_mask', dtype='float32')
ctx_ = tensor.vector('ctx_', dtype='int64')
x = srcx[:-1, :]
y = srcx[1:,:]
n_timesteps = x.shape[0]
n_samples = x.shape[1]
print('check init ok')
emb = self.tparams['Wemb'][x.flatten()]
emb = emb.reshape([n_timesteps, n_samples, self.options['dim_word']])
emb_ctx = self.tparams['Wemb_ctx'][ctx_].reshape([n_samples, self.options['dim_word']])
print('check embed ok')
# input
if self.options['dropout_input'] is not None and self.options['dropout_input'] > 0:
print 'Applying drop-out on input embedding (dropout_input:', self.options['dropout_input'], ")"
emb = dropout_layer(emb, self.use_noise, self.trng, self.options['dropout_input'])
emb_ctx = dropout_layer(emb_ctx, self.use_noise, self.trng, self.options['dropout_input'])
init_state = tensor.alloc(0., n_samples, self.options['dim_proj'])
init_cell = tensor.alloc(0., n_samples, self.options['dim_proj'])
# pass through gru layer, recurrence here
print 'Using', self.options['encoder'], 'unit for encoder'
print 'Training with successive sentences'
init_states = [init_state, init_cell]
proj = lstm_layer(self.tparams, emb, emb_ctx, self.options,
prefix='encoder',
init_state=init_state,
cell_state=init_cell,
mask = srcx_mask[:-1,:])
proj_h = proj[0] # all hidden states
next_states = [st[-1] for st in proj] # first last hidden_state, second last cell_state
if self.options['dropout_output'] is not None and self.options['dropout_output'] > 0:
print 'Applying drop-out on hidden states (dropout_proj:', self.options['dropout_output'], ")"
proj_h = dropout_layer(proj_h, self.use_noise, self.trng, self.options['dropout_output'])
# compute word probabilities
def _prob(proj_h):
logit_lstm = get_layer('ff')[1](self.tparams, proj_h, self.options, prefix='ff_logit_lstm', activ='linear')
logit_prev = get_layer('ff')[1](self.tparams, emb, self.options, prefix='ff_logit_prev', activ='linear')
logit_label = get_layer('ff')[1](self.tparams, emb_ctx, self.options, prefix='ff_logit_label', activ='linear')
logit = tensor.tanh(logit_lstm + logit_prev + logit_label)
#logit = tensor.tanh(logit_lstm)
# split to calculate
logit = get_layer('ff')[1](self.tparams, logit, self.options, prefix='ff_logit', activ='linear')
logit_shp = logit.shape # n_timesteps * n_samples * n_words
probs = tensor.nnet.softmax(logit.reshape([logit_shp[0] * logit_shp[1], logit_shp[2]]))
return probs
probs = _prob(proj_h)
# cost
y_flat = y.flatten()
y_flat_idx = tensor.arange(y_flat.shape[0]) * self.options['n_words'] + y_flat
# probs:(seq,batch,worddim) <-> x:(seq,batch) become the right place value
# y:(seq_len, batch_size)
def _cost(probs):
cost = -tensor.log(probs.flatten()[y_flat_idx] + 1e-10)
cost = cost.reshape([y.shape[0], y.shape[1]])
sentenceLen = srcx_mask[1:,:].sum(axis=0)
cost = (cost * srcx_mask[1:, :]).sum(axis=0) / sentenceLen
return cost, sentenceLen
cost, sentenceLen = _cost(probs)
return srcx, srcx_mask, ctx_, cost, sentenceLen #(seq, batch, worddim)
# calculate the log probablities on a given corpus using language model
def pred_probs(self, valid_Data, valid_batch_size):
self.use_noise.set_value(0.)
nlls = []
dataLen = []
valid_x, valid_y = valid_Data[0], valid_Data[1]
for idx in xrange((len(valid_x) + valid_batch_size - 1) // valid_batch_size ):
data = valid_x[idx * valid_batch_size : (idx + 1) * valid_batch_size]
label = valid_y[idx * valid_batch_size : (idx + 1) * valid_batch_size]
dataLen += [len(tt) for tt in data]
x, x_mask = prepare_data_x(data, pad_sos=self.options['pad_sos'], n_word=self.options['n_words'])
cost = self.f_log_probs(x, x_mask, numpy.array(label).astype('int64'))
nlls += cost.tolist()
nlls = numpy.array(nlls).astype('float32')
dataLen = numpy.array(dataLen).astype('float32')
return numpy.exp((nlls * dataLen).sum() / dataLen.sum())
def evaluate(self, validSet, testSet):
valid_ppl = self.pred_probs(validSet, 32)
test_ppl = self.pred_probs(testSet, 32)
return valid_ppl, test_ppl
'''
def train(round = 0,
dim_word=1000, # word vector dimensionality
dim_proj=1000, # the number of GRU units
encoder='lstm',
patience=10, # early stopping patience
max_epochs=5000,
finish_after=10000000000000, # finish after this many updates
decay_c=0., # L2 weight decay penalty
clip_c=5.,
lrate=1.,
n_words = 10000, # vocabulary size
maxlen=None, # maximum length of the description
minlen=1,
start_iter=0,
start_epoch=0,
optimizer='adadelta',
batch_size=32,
valid_batch_size=20,
saveto='model.npz',
validFreq=1000,
dispFreq=100,
saveFreq=1000, # save the parameters after every saveFreq updates
newDumpFreq=10000,
syncFreq = 50,
sampleFreq=100, # generate some samples after every sampleFreq
sampleNum = 50, # generate sampleNum sentences
dataset=None,
valid_dataset=None,
test_dataset=None,
dictionary=None,
sampleFileName="sampleFile.txt",
embedding=None,
dropout_input=None,
dropout_output=None,
reload_model=None,
reload_option=None,
log=None,
monitor_grad=False,
pad_sos=False):
# Model options
if pad_sos:
n_words += 1
model_options = locals().copy()
print "model options:"
for kk, vv in model_options.iteritems():
print "\t"+kk+":\t"+str(vv)
print('log = ', log)
F_log = open(log, "a")
if start_iter == 0:
F_log.write("model options:\n")
for kk, vv in model_options.iteritems():
F_log.write("\t"+kk+":\t"+str(vv)+"\n")
F_log.write("-----------------------------------------\n\n")
print 'Loading training dataset...'
voc_size = n_words - 1 if pad_sos else n_words
trainSet, validSet, testSet = load_data(path=dataset, n_words=n_words, maxlen=maxlen, sort_by_len=True, fixed_valid=True)
# reload options
if reload_option is not None and os.path.exists(reload_option):
print "Reloading model options...",
with open('%s' % reload_option, 'rb') as f:
model_options = pkl.load(f)
print "Done"
# init parameters
print 'Initializing model parameters...',
params = init_lm_params(model_options)
print 'Done'
# load pre-trained word embedding
if embedding is not None and os.path.exists(embedding):
print 'Load Embedding from ', embedding
Wemb = numpy.array(numpy.load(open(embedding, "rb")))
if Wemb.shape[0] == model_options['n_words'] and Wemb.shape[1] == model_options['dim_word']:
print 'Using pre-trained word embedding...',
params['Wemb'] = Wemb.astype(numpy.float32)
print 'vocab size', params['Wemb'].shape[0], ', dim', params['Wemb'].shape[1]
# reload parameters
if reload_model is not None and os.path.exists(reload_model):
print "Reloading model parameters...",
params = load_params(reload_model, params)
print "Done"
# create shared variables for parameters
tparams = init_tparams(params)
# build the symbolic computational graph
print 'Building model...'
trng, use_noise, srcx, srcx_mask, ctx_, cost = build_lm_model(tparams, model_options)
print 'Building f_log_probs',
f_log_probs = theano.function([srcx, srcx_mask, ctx_], cost, profile = profile)
print 'Done'
cost = cost.mean(axis=0)
# apply L2 regularization on weights
if decay_c > 0.:
print "Applying L2 regularization (decay_c: "+str(decay_c)+')...',
cost = l2_regularization(tparams, cost, decay_c)
print "Done"
# after any regularizer - compile the computational graph for cost
print 'Building f_cost',
f_cost = theano.function([srcx, srcx_mask, ctx_], cost, profile = profile)
print 'Done'
print 'Computing gradient',
grads = tensor.grad(cost, wrt=itemlist(tparams))
print 'Done'
# apply gradient clipping here
if clip_c > 0.:
print 'Applying gradient clipping (clip_c:'+str(clip_c)+')...',
grads = grad_clipping(grads, clip_c)
print 'Done'
# compile the optimizer, the actual computational graph is compiled here
print 'Building optimizers...',
lr = tensor.scalar(name='lr')
f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, [srcx, srcx_mask, ctx_], cost)
#f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)
print 'Done'
sys.stdout.flush()
history_errs = []
# reload history
if reload_model is not None and os.path.exists(reload_model):
history_errs = list(numpy.load(reload_model)['history_errs'])
best_p = None
bad_count = 0
# Training loop
bad_counter = 0
uidx = start_iter
estop = False
start_time = time.time()
n_samples = 0
cost_accu = 0
for eidx in xrange(start_epoch, max_epochs):
epoch_start_time = time.time()
print "Start epoch ", eidx
n_samples = 0
print 'Start epoch', eidx
epoch_start_time = time.time()
n_samples = 0
kf_train = get_minibatches_idx(len(trainSet[0]), batch_size, shuffle=True)
for _, train_index in kf_train:
uidx += 1
x = [trainSet[0][t] for t in train_index]
y = [trainSet[1][t] for t in train_index]
n_samples += len(x)
use_noise.set_value(1.) #training mode
# pad batch and create mask
x, x_mask = prepare_data_x(x, pad_eos=True,pad_sos=model_options['pad_sos'],n_word=model_options['n_words'])
if x is None:
print 'Minibatch with zero sample under length ', maxlen
uidx -= 1
continue
ud_start = time.time()
# compute cost, grads and copy grads to shared variables
cost = f_grad_shared(x, x_mask, y) # input argument issue fixed
# do the update on parameters
f_update(lrate)
ud = time.time() - ud_start
# check for bad numbers
if numpy.isnan(cost) or numpy.isinf(cost):
print 'NaN detected'
F_log.write("=========================================\nNaN detected\n")
F_log.write('Epoch'+str(eidx)+'\tIter '+str(uidx)+'\tBatch Length '+str(x.shape[0])+'\n')
return 1.
cost_accu += cost
if numpy.mod(uidx, dispFreq) == 0:
print 'Epoch ', eidx, '\tIter ', uidx, '\tLoss ', cost_accu/float(dispFreq), '\tUD ', ud,
print '\tLength', x.shape[0], '\tSize ', x.shape[1]
F_log.write('Epoch '+str(eidx)+'\tIter '+str(uidx)+'\tLoss '+str(cost_accu/float(dispFreq))
+'\tUD '+str(ud)+'\tLength '+str(x.shape[0])+'\tSize '+str(x.shape[1])+'\n')
cost_accu = 0
sys.stdout.flush()
# validate model on validation set and early stop if necessary
if numpy.mod(uidx, validFreq) == 0:
print "Validating...",
use_noise.set_value(0.)
# fixed for successive mode
valid_ppl = pred_probs(f_log_probs, prepare_data_x, model_options, validSet, batch_size)
history_errs.append(valid_ppl)
print "Done"
if uidx == 0 or valid_ppl <= numpy.array(history_errs).min():
best_p = unzip(tparams)
bad_counter = 0
if len(history_errs) > patience and valid_ppl >= numpy.array(history_errs)[:-patience].min():
bad_counter += 1
if bad_counter > patience:
print 'Early Stop!'
F_log.write('##############\nEarly Stop!\n##############\n')
estop = True
break
# perplexity
test_ppl = pred_probs(f_log_probs, prepare_data_x, model_options, testSet, batch_size)
print 'Perplexity: { Valid', valid_ppl, ', Test', test_ppl, '}'
F_log.write('Perplexity: Valid '+str(valid_ppl)+'\tTest '+str(test_ppl)+'\n')
F_log.write('====================================\n')
sys.stdout.flush()
# save the current models
savefile = saveto + "_e" + str(eidx) + "_i" + str(uidx) + "_valid_" + str(valid_ppl) + '_test_' + str(test_ppl)
numpy.savez(savefile, history_errs=history_errs, **unzip(tparams))
pkl.dump(model_options, open('%s.option.pkl' % saveto, 'wb'))
# finish after this many updates
if uidx >= finish_after:
print 'Finishing after %d iterations!' % uidx
F_log.write('##############\nFinishing after '+str(uidx)+' iterations!\n##############\n')
estop = True
break
epoch_end_time = time.time()
print 'Epoch', eidx, 'completed, Seen', n_samples, 'samples, Time', epoch_end_time-epoch_start_time
F_log.write("-----------------------------------------------------------\n")
F_log.write("Epoch "+str(eidx)+" completed, Seen "+str(n_samples)+" samples, Time "+str(epoch_end_time-epoch_start_time)+"\n")
F_log.write("------------------------------------------------------------\n")
if estop:
break
end_time = time.time()
'''

Просмотреть файл

@ -0,0 +1,369 @@
"""
data loading and minibatch generation
"""
__author__ = 'v-yirwan'
import cPickle as pkl
import gzip
import os
import numpy
from theano import config
def get_dataset_file(dataset, default_dataset, origin):
'''
Look for it as if it was a full path, if not, try local file,
if not try in the data directory.
Download dataset if it is not present
'''
data_dir, data_file = os.path.split(dataset)
if data_dir == "" and not os.path.isfile(dataset):
# Check if dataset is in the data directory.
new_path = os.path.join(
os.path.split(__file__)[0],
"..",
"data",
dataset
)
if os.path.isfile(new_path) or data_file == default_dataset:
dataset = new_path
if (not os.path.isfile(dataset)) and data_file == default_dataset:
from six.moves import urllib
print('Downloading data from %s' % origin)
urllib.request.urlretrieve(origin, dataset)
return dataset
def load_data(path="imdb.pkl", n_words=100000, maxlen=None,
sort_by_len=True, fixed_valid=True, valid_portion=0.1):
'''
Loads the dataset
:type path: String
:param path: The path to the dataset (here IMDB)
:type n_words: int
:param n_words: The number of word to keep in the vocabulary.
All extra words are set to unknow (1).
:type maxlen: None or positive int
:param maxlen: the max sequence length we use in the train/valid set.
:type sort_by_len: bool
:name sort_by_len: Sort by the sequence lenght for the train,
valid and test set. This allow faster execution as it cause
less padding per minibatch. Another mechanism must be used to
shuffle the train set at each epoch.
:type fixed_valid: bool
:param fixed_valid: load fixed validation set from the corpus file,
which would otherwise be picked randomly from the training set with
proportion [valid_portion]
:type valid_portion: float
:param valid_portion: The proportion of the full train set used for
the validation set.
'''
# Load the dataset
path = get_dataset_file(
path, "imdb.pkl",
"http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
if path.endswith(".gz"):
f = gzip.open(path, 'rb')
else:
f = open(path, 'rb')
train_set = pkl.load(f)
if fixed_valid:
valid_set = pkl.load(f)
test_set = pkl.load(f)
f.close()
def _truncate_data(train_set):
'''
truncate sequences with lengths exceed max-len threshold
:param train_set: a list of sequences list and corresponding labels list
:return: truncated train_set
'''
new_train_set_x = []
new_train_set_y = []
for x, y in zip(train_set[0], train_set[1]):
if len(x) < maxlen:
new_train_set_x.append(x)
new_train_set_y.append(y)
train_set = (new_train_set_x, new_train_set_y)
del new_train_set_x, new_train_set_y
return train_set
def _set_valid(train_set, valid_portion):
'''
set validation with [valid_portion] proportion of training set
'''
train_set_x, train_set_y = train_set
n_samples = len(train_set_x)
sidx = numpy.random.permutation(n_samples) # shuffle data
n_train = int(numpy.round(n_samples * (1. - valid_portion)))
valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
train_set_x = [train_set_x[s] for s in sidx[:n_train]]
train_set_y = [train_set_y[s] for s in sidx[:n_train]]
train_set = (train_set_x, train_set_y)
valid_set = (valid_set_x, valid_set_y)
del train_set_x, train_set_y, valid_set_x, valid_set_y
return train_set, valid_set
if maxlen:
train_set = _truncate_data(train_set)
if fixed_valid:
print 'Loading with fixed validation set...',
valid_set = _truncate_data(valid_set)
else:
print 'Setting validation set with proportion:', valid_portion, '...',
train_set, valid_set = _set_valid(train_set, valid_portion)
test_set = _truncate_data(test_set)
if maxlen is None and not fixed_valid:
train_set, valid_set = _set_valid(train_set, valid_portion)
def remove_unk(x):
return [[1 if w >= n_words else w for w in sen] for sen in x]
test_set_x, test_set_y = test_set
valid_set_x, valid_set_y = valid_set
train_set_x, train_set_y = train_set
# remove unk from dataset
train_set_x = remove_unk(train_set_x) # use 1 if unk
valid_set_x = remove_unk(valid_set_x)
test_set_x = remove_unk(test_set_x)
def len_argsort(seq):
return sorted(range(len(seq)), key=lambda x: len(seq[x]))
if sort_by_len:
sorted_index = len_argsort(test_set_x)
# ranked from shortest to longest
test_set_x = [test_set_x[i] for i in sorted_index]
test_set_y = [test_set_y[i] for i in sorted_index]
sorted_index = len_argsort(valid_set_x)
valid_set_x = [valid_set_x[i] for i in sorted_index]
valid_set_y = [valid_set_y[i] for i in sorted_index]
sorted_index = len_argsort(train_set_x)
train_set_x = [train_set_x[i] for i in sorted_index]
train_set_y = [train_set_y[i] for i in sorted_index]
train = (train_set_x, train_set_y)
valid = (valid_set_x, valid_set_y)
test = (test_set_x, test_set_y)
return train, valid, test
def load_mnist(path='mnist.pkl', fixed_permute=True, rand_permute=False):
f = open(path, 'rb')
train = pkl.load(f)
valid = pkl.load(f)
test = pkl.load(f)
f.close()
def _permute(data, perm):
x, y = data
x_new = []
for xx in x:
xx_new = [xx[pp] for pp in perm]
x_new.append(xx_new)
return (x_new, y)
def _trans2list(data):
x, y = data
x = [list(xx) for xx in x]
return (x, y)
if rand_permute:
print 'Using a fixed random permutation of pixels...',
perm = numpy.random.permutation(range(784))
train = _permute(train, perm)
valid = _permute(valid, perm)
test = _permute(test, perm)
elif fixed_permute:
print 'Using permuted dataset...',
_trans2list(train)
_trans2list(valid)
_trans2list(test)
return train, valid, test
def get_minibatches_idx(n, minibatch_size, shuffle=False):
"""
Used to shuffle the dataset at each iteration.
"""
idx_list = numpy.arange(n, dtype="int32")
if shuffle:
numpy.random.shuffle(idx_list)
minibatches = []
minibatch_start = 0
for i in range(n // minibatch_size):
minibatches.append(idx_list[minibatch_start:
minibatch_start + minibatch_size])
minibatch_start += minibatch_size
if (minibatch_start != n):
# Make a minibatch out of what is left
minibatches.append(idx_list[minibatch_start:])
return zip(range(len(minibatches)), minibatches)
def get_minibatches_idx_bucket(dataset, minibatch_size, shuffle=False):
"""
divide into different buckets according to sequence lengths
dynamic batch size
"""
# divide into buckets
slen = [len(ss) for ss in dataset]
bucket1000 = [sidx for sidx in xrange(len(dataset))
if slen[sidx] > 0 and slen[sidx] <= 1000]
bucket3000 = [sidx for sidx in xrange(len(dataset))
if slen[sidx] > 1000 and slen[sidx] <= 3000]
bucket_long = [sidx for sidx in xrange(len(dataset))
if slen[sidx] > 3000]
# shuffle each bucket
if shuffle:
numpy.random.shuffle(bucket1000)
numpy.random.shuffle(bucket3000)
numpy.random.shuffle(bucket_long)
# make minibatches
def _make_batch(minibatches, bucket, minibatch_size):
minibatch_start = 0
n = len(bucket)
for i in range(n // minibatch_size):
minibatches.append(bucket[minibatch_start : minibatch_start + minibatch_size])
minibatch_start += minibatch_size
if (minibatch_start != n):
# Make a minibatch out of what is left
minibatches.append(bucket[minibatch_start:])
return minibatches
minibatches = []
_make_batch(minibatches, bucket1000, minibatch_size=minibatch_size)
_make_batch(minibatches, bucket3000, minibatch_size=minibatch_size//2)
_make_batch(minibatches, bucket_long, minibatch_size=minibatch_size//8)
# shuffle minibatches
numpy.random.shuffle(minibatches)
return zip(range(len(minibatches)), minibatches)
def prepare_data(seqs, labels, maxlen=None, dataset='text'):
"""Create the matrices from the datasets.
This pad each sequence to the same lenght: the lenght of the
longuest sequence or maxlen.
if maxlen is set, we will cut all sequence to this maximum
lenght.
This swap the axis!
"""
# x: a list of sentences
lengths = [len(s) for s in seqs]
if maxlen is not None:
new_seqs = []
new_labels = []
new_lengths = []
for l, s, y in zip(lengths, seqs, labels):
if l < maxlen:
new_seqs.append(s)
new_labels.append(y)
new_lengths.append(l)
lengths = new_lengths
labels = new_labels
seqs = new_seqs
if len(lengths) < 1:
return None, None, None
n_samples = len(seqs)
maxlen = numpy.max(lengths)
if dataset == 'mnist':
x = numpy.zeros((maxlen, n_samples)).astype('float32')
else:
x = numpy.zeros((maxlen, n_samples)).astype('int64')
x_mask = numpy.zeros((maxlen, n_samples)).astype(config.floatX)
for idx, s in enumerate(seqs):
x[:lengths[idx], idx] = s
x_mask[:lengths[idx], idx] = 1.
return x, x_mask, labels
def prepare_data_hier(seqs, labels, hier_len, maxlen=None, dataset='text'):
'''
prepare minibatch for hierarchical model
'''
# sort (long->short)
sorted_idx = sorted(range(len(seqs)), key=lambda x: len(seqs[x]), reverse=True)
seqs = [seqs[i] for i in sorted_idx]
labels = [labels[i] for i in sorted_idx]
# truncate data
lengths = [len(s) for s in seqs]
if maxlen is not None:
new_seqs = []
new_labels = []
new_lengths = []
for l, s, y in zip(lengths, seqs, labels):
if l <maxlen :
new_seqs.append(s)
new_labels.append(y)
new_lengths.append(l)
lengths = new_lengths
labels = new_labels
seqs = new_seqs
if len(lengths) < 1:
return None, None, None
# set batch size
n_samples = len(seqs)
maxlen = numpy.max(lengths)
if maxlen % hier_len == 0:
n_batch = maxlen/hier_len
else:
n_batch = maxlen//hier_len + 1
maxlen = n_batch * hier_len
# padding whole batch
if dataset == 'mnist':
x = numpy.zeros((maxlen, n_samples)).astype('float32')
else:
x = numpy.zeros((maxlen, n_samples)).astype('int64')
x_mask = numpy.zeros((maxlen, n_samples)).astype(config.floatX)
for idx, s in enumerate(seqs):
x[:lengths[idx], idx] = s
x_mask[:lengths[idx], idx] = 1
# slice to mini-batches
x_batch = [x[bidx*hier_len:(bidx+1)*hier_len, :] for bidx in range(n_batch)]
if dataset == 'mnist':
x_batch = numpy.array(x_batch).astype('float32')
else:
x_batch = numpy.array(x_batch).astype('int64')
mask_batch = [x_mask[bidx*hier_len:(bidx+1)*hier_len, :] for bidx in range(n_batch)]
mask_batch = numpy.array(mask_batch).astype(config.floatX)
# mask for hier-level
mask_hier = numpy.ones((n_batch, n_samples)).astype(config.floatX)
for idx in range(n_samples):
mpos = numpy.where(x_mask[:, idx]==0)[0]
if len(mpos) == 0:
continue
bidx = min(mpos[0]//hier_len+1, n_batch)
if mpos[0] % hier_len == 0:
bidx -= 1 # bug fixed TODO: more elegant solution?
mask_hier[bidx:, idx] = 0
return x_batch, mask_batch, mask_hier, labels

Просмотреть файл

@ -0,0 +1,38 @@
import re
import os
import socket
import sys
filename = r'.\gpu_usage_draft_'
default_gpu = 58 + 30
def GrabGPU(rank):
cmdstr = '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\" > ' + filename + rank
os.system(cmdstr)
def GetGPUUSage(rank):
pattern = re.compile(r'(?P<num>[0-9]{1,5})MiB[\s]+/')
id = 0
GPUs = []
fo = open(filename + rank, 'r')
for line in fo:
result = pattern.search(line)
if result:
if int(result.group("num")) < default_gpu:
GPUs.append(id)
id = id + 1
fo.close()
print len(GPUs)
for gpu in GPUs:
print gpu
if __name__ == '__main__':
rank = sys.argv[1]
GrabGPU(rank)
print socket.gethostname()
GetGPUUSage(rank)
#os.system('del /q ' + filename + rank)

Просмотреть файл

@ -0,0 +1,17 @@
import os
def MapDeviceIds(comm):
rank = comm.Get_rank()
num_machine = comm.Get_size()
os.system('python GPU_Usage.py ' + str(rank) + ' > record' + str(rank))
comm.Barrier()
if rank == 0:
os.system('python AllocateGPU.py ' + str(num_machine) + ' > DirtyRecord')
comm.Barrier()
cardid = str(0)
with open('DirtyRecord', 'r') as f:
for idx, line in enumerate(f):
if idx == rank:
cardid = line.strip()
break
return cardid

Просмотреть файл

Просмотреть файл

@ -0,0 +1,32 @@
import sys
import codecs
if len(sys.argv) < 3:
raise Exception('Not enough argv')
theano_rc = r"""
[global]
mode = FAST_RUN
device = gpu
floatX = float32
on_unused_input = warn
optimizer = fast_run
#allow_gc=False
cuda.disable_gcc_cudnn_check=True
[lib]
cnmem = 0.75
[nvcc]
flags=-L{0}\libs
root=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
fast_math = True
"""
theano_rc = theano_rc.format(sys.argv[1])
print(theano_rc)
with codecs.open(sys.argv[2], 'w', 'utf-8') as f:
f.write(theano_rc)

Просмотреть файл

@ -0,0 +1,24 @@
# Copyright (c) 2007, 2010, 2011, 2012 Godefroid Chapelle
#
# This file is part of ipdb.
# GNU package is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 2 of the License, or (at your option)
# any later version.
#
# GNU package is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
# You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
from __main__ import set_trace, post_mortem, pm, run, runcall, runeval, launch_ipdb_on_exception
pm # please pyflakes
post_mortem # please pyflakes
run # please pyflakes
runcall # please pyflakes
runeval # please pyflakes
set_trace # please pyflakes
launch_ipdb_on_exception # please pyflakes

Просмотреть файл

@ -0,0 +1,184 @@
# Copyright (c) 2011, 2012 Godefroid Chapelle
#
# This file is part of ipdb.
# GNU package is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 2 of the License, or (at your option)
# any later version.
#
# GNU package is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
from __future__ import print_function
import sys
import os
import traceback
from contextlib import contextmanager
try:
from pdb import Restart
except ImportError:
class Restart(Exception):
pass
import IPython
if IPython.__version__ > '0.10.2':
from IPython.core.debugger import Pdb, BdbQuit_excepthook
try:
get_ipython
except NameError:
# Make it more resilient to different versions of IPython and try to
# find a module.
possible_modules = ['IPython.terminal.embed', # Newer IPython
'IPython.frontend.terminal.embed'] # Older IPython
count = len(possible_modules)
for module in possible_modules:
try:
embed = __import__(module, fromlist=["InteractiveShellEmbed"])
InteractiveShellEmbed = embed.InteractiveShellEmbed
except ImportError:
count -= 1
if count == 0:
raise
else:
break
ipshell = InteractiveShellEmbed()
def_colors = ipshell.colors
else:
def_colors = get_ipython.im_self.colors
from IPython.utils import io
if 'nose' in sys.modules.keys():
def update_stdout():
# setup stdout to ensure output is available with nose
io.stdout = sys.stdout = sys.__stdout__
else:
def update_stdout():
pass
else:
from IPython.Debugger import Pdb, BdbQuit_excepthook
from IPython.Shell import IPShell
from IPython import ipapi
ip = ipapi.get()
if ip is None:
IPShell(argv=[''])
ip = ipapi.get()
def_colors = ip.options.colors
from IPython.Shell import Term
if 'nose' in sys.modules.keys():
def update_stdout():
# setup stdout to ensure output is available with nose
Term.cout = sys.stdout = sys.__stdout__
else:
def update_stdout():
pass
def wrap_sys_excepthook():
# make sure we wrap it only once or we would end up with a cycle
# BdbQuit_excepthook.excepthook_ori == BdbQuit_excepthook
if sys.excepthook != BdbQuit_excepthook:
BdbQuit_excepthook.excepthook_ori = sys.excepthook
sys.excepthook = BdbQuit_excepthook
def set_trace(frame=None):
update_stdout()
wrap_sys_excepthook()
if frame is None:
frame = sys._getframe().f_back
Pdb(def_colors).set_trace(frame)
def post_mortem(tb):
update_stdout()
wrap_sys_excepthook()
p = Pdb(def_colors)
p.reset()
if tb is None:
return
p.interaction(None, tb)
def pm():
post_mortem(sys.last_traceback)
def run(statement, globals=None, locals=None):
Pdb(def_colors).run(statement, globals, locals)
def runcall(*args, **kwargs):
return Pdb(def_colors).runcall(*args, **kwargs)
def runeval(expression, globals=None, locals=None):
return Pdb(def_colors).runeval(expression, globals, locals)
@contextmanager
def launch_ipdb_on_exception():
try:
yield
except Exception:
e, m, tb = sys.exc_info()
print(m.__repr__(), file=sys.stderr)
post_mortem(tb)
finally:
pass
def main():
if not sys.argv[1:] or sys.argv[1] in ("--help", "-h"):
print("usage: ipdb.py scriptfile [arg] ...")
sys.exit(2)
mainpyfile = sys.argv[1] # Get script filename
if not os.path.exists(mainpyfile):
print('Error:', mainpyfile, 'does not exist')
sys.exit(1)
del sys.argv[0] # Hide "pdb.py" from argument list
# Replace pdb's dir with script's dir in front of module search path.
sys.path[0] = os.path.dirname(mainpyfile)
# Note on saving/restoring sys.argv: it's a good idea when sys.argv was
# modified by the script being debugged. It's a bad idea when it was
# changed by the user from the command line. There is a "restart" command
# which allows explicit specification of command line arguments.
pdb = Pdb(def_colors)
while 1:
try:
pdb._runscript(mainpyfile)
if pdb._user_requested_quit:
break
print("The program finished and will be restarted")
except Restart:
print("Restarting", mainpyfile, "with arguments:")
print("\t" + " ".join(sys.argv[1:]))
except SystemExit:
# In most cases SystemExit does not warrant a post-mortem session.
print("The program exited via sys.exit(). Exit status: ", end='')
print(sys.exc_info()[1])
except:
traceback.print_exc()
print("Uncaught exception. Entering post mortem debugging")
print("Running 'cont' or 'step' will restart the program")
t = sys.exc_info()[2]
pdb.interaction(None, t)
print("Post mortem debugger finished. The " + mainpyfile +
" will be restarted")
if __name__ == '__main__':
main()

Двоичные данные
DSL_SentimentAnalysis/CLM/multiverso/Multiverso.dll Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1,5 @@
#!/usr/bin/env python
# coding:utf8
from api import init, shutdown, barrier, workers_num, worker_id, server_id, is_master_worker
from tables import ArrayTableHandler, MatrixTableHandler

Просмотреть файл

@ -0,0 +1,66 @@
#!/usr/bin/env python
# coding:utf8
import ctypes
from utils import Loader
import numpy as np
mv_lib = Loader.get_lib()
def init(sync=False):
'''Initialize mutliverso.
This should be called only once before training at the beginning of the
whole project.
If sync is True, a sync server will be created. Otherwise an async server
will be created.
'''
args = [""] # the first argument will be ignored. So we put a placeholder here
if sync:
args.append("-sync=true")
n = len(args)
args_type = ctypes.c_char_p * n
mv_lib.MV_Init(ctypes.pointer(ctypes.c_int(n)), args_type(*[ctypes.c_char_p(arg) for arg in args]))
def shutdown():
'''Set a barrier for all workers to wait.
Workers will wait until all workers reach a specific barrier.
'''
mv_lib.MV_ShutDown()
def barrier():
'''Shutdown multiverso.
This should be called only once after finishing training at the end of the
whole project.
'''
mv_lib.MV_Barrier()
def workers_num():
'''Return the total number of workers.'''
return mv_lib.MV_NumWorkers()
def worker_id():
'''Return the id (zero-based index) for current worker.'''
return mv_lib.MV_WorkerId()
def server_id():
return mv_lib.MV_ServerId()
def is_master_worker():
'''If the worker is master worker
Some things only need one worker process, such as validation, outputing the
result, initializing the parameters and so on. So we mark the worker 0 as
the master worker to finish these things.
'''
return worker_id() == 0

Просмотреть файл

@ -0,0 +1,163 @@
#!/usr/bin/env python
# coding:utf8
import ctypes
from utils import Loader
from utils import convert_data
import numpy as np
import api
mv_lib = Loader.get_lib()
class TableHandler(object):
'''`TableHandler` is an interface to sync different kinds of values.
If you are not writing python code based on theano or lasagne, you are
supposed to sync models (for initialization) and gradients (during
training) so as to let multiverso help you manage the models in distributed
environments.
Otherwise, you'd better use the classes in `multiverso.theano_ext` or
`multiverso.theano_ext.lasagne_ext`
'''
def __init__(self, size, init_value=None):
raise NotImplementedError("You must implement the __init__ method.")
def get(self, size):
raise NotImplementedError("You must implement the get method.")
def add(self, data, sync=False):
raise NotImplementedError("You must implement the add method.")
# types
C_FLOAT_P = ctypes.POINTER(ctypes.c_float)
class ArrayTableHandler(TableHandler):
'''`ArrayTableHandler` is used to sync array-like (one-dimensional) value.'''
def __init__(self, size, init_value=None):
'''Constructor for syncing array-like (one-dimensional) value.
The `size` should be a int equal to the size of value we want to sync.
If init_value is None, zeros will be used to initialize the tables,
otherwise the table will be initialized as the init_value.
Notice: if the init_value is different in different processes, the
average of them will be used.
'''
self._handler = ctypes.c_void_p()
self._size = size
mv_lib.MV_NewArrayTable(size, ctypes.byref(self._handler))
if init_value is not None:
init_value = convert_data(init_value)
# sync add is used because we want to make sure that the initial
# value has taken effect when the call returns.
self.add(init_value / api.workers_num(), sync=True)
def get(self):
'''get the latest value from multiverso ArrayTable
Data type of return value is numpy.ndarray with one-dimensional
'''
data = np.zeros((self._size, ), dtype=np.dtype("float32"))
mv_lib.MV_GetArrayTable(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
return data
def add(self, data, sync=False):
'''add the data to the multiverso ArrayTable
Data type of `data` is numpy.ndarray with one-dimensional
If sync is True, this call will blocked by IO until the call finish.
Otherwise it will return immediately
'''
data = convert_data(data)
assert(data.size == self._size)
if sync:
mv_lib.MV_AddArrayTable(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
else:
mv_lib.MV_AddAsyncArrayTable(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
class MatrixTableHandler(TableHandler):
def __init__(self, num_row, num_col, init_value=None):
'''Constructor for syncing matrix-like (two-dimensional) value.
The `num_row` should be the number of rows and the `num_col` should be
the number of columns.
If init_value is None, zeros will be used to initialize the tables,
otherwise the table will be initialized as the init_value.
Notice: if the init_value is different in different processes, the
average of them will be used.
'''
self._handler = ctypes.c_void_p()
self._num_row = num_row
self._num_col = num_col
self._size = num_col * num_row
mv_lib.MV_NewMatrixTable(num_row, num_col, ctypes.byref(self._handler))
if init_value is not None:
init_value = convert_data(init_value)
# sync add is used because we want to make sure that the initial
# value has taken effect when the call returns.
self.add(init_value / api.workers_num(), sync=True)
def get(self, row_ids=None):
'''get the latest value from multiverso MatrixTable
If row_ids is None, we will return all rows as numpy.narray , e.g.
array([[1, 3], [3, 4]]).
Otherwise we will return the data according to the row_ids(e.g. you can
pass [1] to row_ids to get only the first row, it will return a
two-dimensional numpy.ndarray with one row)
Data type of return value is numpy.ndarray with two-dimensional
'''
if row_ids is None:
data = np.zeros((self._num_row, self._num_col), dtype=np.dtype("float32"))
mv_lib.MV_GetMatrixTableAll(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
return data
else:
row_ids_n = len(row_ids)
int_array_type = ctypes.c_int * row_ids_n
data = np.zeros((row_ids_n, self._num_col), dtype=np.dtype("float32"))
mv_lib.MV_GetMatrixTableByRows(self._handler, data.ctypes.data_as(C_FLOAT_P),
row_ids_n * self._num_col,
int_array_type(*row_ids), row_ids_n)
return data
def add(self, data=None, row_ids=None, sync=False):
'''add the data to the multiverso MatrixTable
If row_ids is None, we will add all data, and the data
should be a list, e.g. [1, 2, 3, ...]
Otherwise we will add the data according to the row_ids
Data type of `data` is numpy.ndarray with two-dimensional
If sync is True, this call will blocked by IO until the call finish.
Otherwise it will return immediately
'''
assert(data is not None)
data = convert_data(data)
if row_ids is None:
assert(data.size == self._size)
if sync:
mv_lib.MV_AddMatrixTableAll(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
else:
mv_lib.MV_AddAsyncMatrixTableAll(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
else:
row_ids_n = len(row_ids)
assert(data.size == row_ids_n * self._num_col)
int_array_type = ctypes.c_int * row_ids_n
if sync:
mv_lib.MV_AddMatrixTableByRows(self._handler, data.ctypes.data_as(C_FLOAT_P),
row_ids_n * self._num_col,
int_array_type(*row_ids), row_ids_n)
else:
mv_lib.MV_AddAsyncMatrixTableByRows(self._handler, data.ctypes.data_as(C_FLOAT_P),
row_ids_n * self._num_col,
int_array_type(*row_ids), row_ids_n)

Просмотреть файл

@ -0,0 +1,111 @@
#!/usr/bin/env python
# coding:utf8
import multiverso as mv
import unittest
import numpy as np
import theano
from multiverso.theano_ext import sharedvar
def setUpModule():
mv.init()
def tearDownModule():
mv.shutdown()
class TestMultiversoTables(unittest.TestCase):
'''
Use the commands below to run test
$ nosetests
'''
def _test_array(self, size):
tbh = mv.ArrayTableHandler(size)
mv.barrier()
for i in xrange(100):
tbh.add(range(1, size + 1))
tbh.add(range(1, size + 1))
mv.barrier()
for j, actual in enumerate(tbh.get()):
self.assertEqual((j + 1) * (i + 1) * 2 * mv.workers_num(), actual)
mv.barrier()
def test_small_array(self):
# TODO : this is not supported by multiverso because of the size
# limited. Waiting for the solution of this issue
# https://github.com/Microsoft/multiverso/issues/69
# self._test_array(1)
pass
def test_array(self):
self._test_array(10000)
def test_matrix(self):
num_row = 11
num_col = 10
size = num_col * num_row
workers_num = mv.workers_num()
tbh = mv.MatrixTableHandler(num_row, num_col)
mv.barrier()
for count in xrange(1, 21):
row_ids = [0, 1, 5, 10]
tbh.add(range(size))
tbh.add([range(rid * num_col, (1 + rid) * num_col) for rid in row_ids], row_ids)
mv.barrier()
data = tbh.get()
mv.barrier()
for i, row in enumerate(data):
for j, actual in enumerate(row):
expected = (i * num_col + j) * count * workers_num
if i in row_ids:
expected += (i * num_col + j) * count * workers_num
self.assertEqual(expected, actual)
data = tbh.get(row_ids)
mv.barrier()
for i, row in enumerate(data):
for j, actual in enumerate(row):
expected = (row_ids[i] * num_col + j) * count * workers_num * 2
self.assertEqual(expected, actual)
class TestMultiversoSharedVariable(unittest.TestCase):
'''
Use the commands below to run test
$ nosetests
'''
def _test_sharedvar(self, row, col):
W = sharedvar.mv_shared(
value=np.zeros(
(row, col),
dtype=theano.config.floatX
),
name='W',
borrow=True
)
delta = np.array(range(1, row * col + 1),
dtype=theano.config.floatX).reshape((row, col))
train_model = theano.function([], updates=[(W, W + delta)])
mv.barrier()
for i in xrange(100):
train_model()
train_model()
sharedvar.sync_all_mv_shared_vars()
mv.barrier()
# to get the newest value, we must sync again
sharedvar.sync_all_mv_shared_vars()
for j, actual in enumerate(W.get_value().reshape(-1)):
self.assertEqual((j + 1) * (i + 1) * 2 * mv.workers_num(), actual)
mv.barrier()
def test_sharedvar(self):
self._test_sharedvar(200, 200)
if __name__ == '__main__':
unittest.main()

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -0,0 +1,64 @@
#!/usr/bin/env python
# coding:utf8
import lasagne
import numpy as np
import multiverso as mv
class MVNetParamManager(object):
'''
MVNetParamManager is manager to make managing and synchronizing the
variables in lasagne more easily
'''
def __init__(self, network):
''' The constructor of MVNetParamManager
The constructor will associate the parameter with multiverso array
table. The initial value of ArrayTableHandler will be same as the
parameters of network. If different parameters are used in different
processes, the average of them will be used as the initial value
'''
self.shapes = []
self.dtypes = []
self.sizes = []
self.all_param_list = []
self.network = network
for arr in lasagne.layers.get_all_param_values(self.network):
self.shapes.append(arr.shape)
# TODO: Now only float32 is supported in multiverso. So I store all
# the parameters in a float32 array. This place need modification
# after other types are supported
assert(np.dtype("float32") == arr.dtype)
self.dtypes.append(arr.dtype)
self.sizes.append(arr.size)
self.all_param_list.extend([i for i in np.nditer(arr)])
self.all_param_list = np.array(self.all_param_list)
self.tbh = mv.ArrayTableHandler(len(self.all_param_list), init_value=self.all_param_list)
mv.barrier() # add barrier to make sure the initial values have token effect
self.all_param_list = self.tbh.get()
self._set_all_param_to_net()
def _set_all_param_to_net(self):
n = 0
params = []
for i, size in enumerate(self.sizes):
params.append(self.all_param_list[n:n + size].reshape(self.shapes[i]))
n += size
lasagne.layers.set_all_param_values(self.network, params)
def sync_all_param(self):
'''sync all parameters with multiverso server
This function will
1) calc all the delta of params in the network and add the delta to multiverso server
2) get the latest value from the multiverso server
'''
cur_network_params = np.concatenate([
arr.reshape(-1) for arr in lasagne.layers.get_all_param_values(self.network)])
params_delta = cur_network_params - self.all_param_list
self.tbh.add(params_delta)
self.all_param_list = self.tbh.get()
self._set_all_param_to_net()

Просмотреть файл

@ -0,0 +1,100 @@
#!/usr/bin/env python
# coding:utf8
from theano.tensor.basic import TensorType, _tensor_py_operators
from theano.compile import SharedVariable
from theano.compile.sharedvalue import shared
from theano.gof import Variable, utils
import numpy
import multiverso as mv
class MVSharedVariable(object):
'''MVSharedVariable is an wrapper of SharedVariable
It will act same as SharedVariable. The only difference is a multiverso
ArrayTable is addded to make it easier to sync values.
'''
def __init__(self, svobj):
'''Constructor of the MVSharedVariable
The constructor will create ArrayTableHandler and associate the shared
variable with it. The initial value of ArrayTableHandler will be same
as the value of SharedVariable. If different initial value is used in
different processes, the average of them will be used as the initial
value
'''
assert(isinstance(svobj, SharedVariable))
self._svobj = svobj
self._mv_array = mv.ArrayTableHandler(self._svobj.get_value().size,
init_value=self._svobj.get_value().reshape((-1,)))
mv.barrier() # add barrier to make sure the initial values have token effect
# _last_mv_data restore a copy of value. It will be used for calculate
# the update for multiverso when calling mv_sync
self._last_mv_data = self._mv_array.get().reshape(self._svobj.get_value().shape)
self._svobj.set_value(self._last_mv_data, borrow=False)
def mv_sync(self):
''' sync values with multiverso server
mv_sync will add the delta of SharedVariable, which is usually the
gradients in typical examples, to parameter server and then get the
latest value in multiverso.
'''
# because multiverso always use add method to sync value, the delta
# will be the difference of the current value of last synced value
self._mv_array.add(self._svobj.get_value() - self._last_mv_data)
self._svobj.set_value(self._mv_array.get().reshape(self._svobj.get_value().shape))
self._last_mv_data = self._svobj.get_value(borrow=False)
def __getstate__(self):
'''This is for cPickle to store state.
It is usually called when you want to dump the model to file with
cPickle
'''
odict = self.__dict__.copy() # copy the dict since we change it
del odict['_mv_array'] # remove mv_array, because we can't pickle it
return odict
def __getattribute__(self, attr):
'''This function make MVSharedVariable act same as SharedVariable'''
if attr in ['_svobj', '_mv_array', '_last_mv_data']:
# If get the attribute of self, use parent __getattribute__ to get
# attribute from the object, otherwise it will fall into infinite
# loop
return object.__getattribute__(self, attr)
elif attr in ['mv_sync', "__getstate__"]:
# If call method of MVSharedVariable, then call the method directly
# and bound the method to self object
return getattr(MVSharedVariable, attr).__get__(self)
else:
# Otherwise I will get attribute from the wrapped object
return getattr(self._svobj, attr)
def mv_shared(*args, **kwargs):
'''mv_shared works same as `theano.shared`
It calls `theano.shared` to create the SharedVariable and use
MVSharedVariable to wrap it.
'''
var = shared(*args, **kwargs)
mv_shared.shared_vars.append(MVSharedVariable(var))
return var
mv_shared.shared_vars = [] # all shared_vars in multiverso will be recorded here
def sync_all_mv_shared_vars():
'''Sync shared value created by `mv_shared` with multiverso
It is often used when you are training model, and it will add the gradients
(delta value) to the server and update the latest value from the server.
Notice: It will **only** sync shared value created by `mv_shared`
'''
for sv in mv_shared.shared_vars:
sv.mv_sync()

Просмотреть файл

@ -0,0 +1,77 @@
#!/usr/bin/env python
# coding:utf8
import ctypes
import os
import platform
from ctypes.util import find_library
import numpy as np
PACKAGE_PATH = os.path.abspath(os.path.dirname(__file__))
class Loader(object):
'''
This loader is responsible for loading multiverso dynamic library in both
*nux and windows
'''
LIB = None
@classmethod
def _find_mv_path(cls):
if platform.system() == "Windows":
mv_lib_path = find_library("Multiverso")
if mv_lib_path is None:
print "* Fail to load Multiverso.dll from the windows $PATH."\
"Because Multiverso.dll can not be found in the $PATH "\
"directories. Go on loading Multiverso from the package."
else:
return mv_lib_path
mv_lib_path = os.path.join(PACKAGE_PATH, "Multiverso.dll")
if not os.path.exists(mv_lib_path):
print "* Fail to load Multiverso.dll from the package. Because"\
" the file " + mv_lib_path + " can not be found."
else:
return mv_lib_path
else:
mv_lib_path = find_library("multiverso")
if mv_lib_path is None:
print "* Fail to load libmultiverso.so from the system"\
"libraries. Because libmultiverso.so can't be found in"\
"library paths. Go on loading Multiverso from the package."
else:
return mv_lib_path
mv_lib_path = os.path.join(PACKAGE_PATH, "libmultiverso.so")
if not os.path.exists(mv_lib_path):
print "* Fail to load libmultiverso.so from the package. Because"\
" the file " + mv_lib_path + " can not be found."
else:
return mv_lib_path
return None
@classmethod
def load_lib(cls):
mv_lib_path = cls._find_mv_path()
if mv_lib_path is None:
print "Fail to load the multiverso library. Please make sure you"\
" have installed multiverso successfully"
else:
print "Find the multiverso library successfully(%s)" % mv_lib_path
return ctypes.cdll.LoadLibrary(mv_lib_path)
@classmethod
def get_lib(cls):
if not cls.LIB:
cls.LIB = cls.load_lib()
cls.LIB.MV_NumWorkers.restype = ctypes.c_int
return cls.LIB
def convert_data(data):
'''convert the data to float32 ndarray'''
if not isinstance(data, np.ndarray):
data = np.array(data)
return data.astype(np.float32)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,58 @@
from CLM import train
def log_with_print(log, context):
print >>log, context
print context
logfile = __file__ + 'log'
log = open(logfile, 'w')
round = 0
log_with_print(log, 'round ' + str(round) + 'begin ------------------------------- !!')
# change some for round
max_epochs = 100000
obj_directory = r'..\Sentiment_CLM_WithDropout'
reload_model = obj_directory + r'\T.npz'
train(round = round,
saveto = obj_directory + '\\round%d_model_lstm.npz'%(round),
reload_model = reload_model,
reload_option = reload_model + '.pkl',
dataset = r'../data/imdb.pkl', #%(work_id + 1),
encoder = 'lstm',
dropout_input = 0.5,
dropout_output= 0.5,
clip_c = 5.,
dim_word = 500,
dim_proj = 1024,
n_words = 10000,
#n_words_sqrt = n_words_sqrt,
optimizer = 'adadelta',
lrate = 0.5,
maxlen = None,
minlen = 1,
start_iter = 0,
start_epoch = 0,
max_epochs = max_epochs, #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
batch_size = 16,
patience = 100,
validFreq = 5000,
saveFreq = 50000000,
dispFreq = 1,
sampleFreq = 20000000,
newDumpFreq = 20000,
syncFreq = 5000000000,
sampleNum = 25,
decay_c = 0.,
log = logfile,
monitor_grad = False,
sampleFileName= obj_directory + '\\round%d_sample.txt'%(round),
pad_sos = False,
embedding = '../data/embedding500.npz'
)

Просмотреть файл

@ -0,0 +1,58 @@
from CLM import train
def log_with_print(log, context):
print >>log, context
print context
logfile = __file__ + 'log'
log = open(logfile, 'w')
round = 0
log_with_print(log, 'round ' + str(round) + 'begin ------------------------------- !!')
# change some for round
max_epochs = 100000
obj_directory = r'..\Sentiment_CLM_nodrop'
reload_model = obj_directory + r'\de.npz'
train(round = round,
saveto = obj_directory + '\\round%d_model_lstm.npz'%(round),
reload_model = None, #reload_model,
reload_option = None, #reload_model + '.pkl',
dataset = r'../data/imdb.pkl', #%(work_id + 1),
encoder = 'lstm',
dropout_input = None,
dropout_output= None,
clip_c = 5.,
dim_word = 500,
dim_proj = 1024,
n_words = 10000,
#n_words_sqrt = n_words_sqrt,
optimizer = 'adadelta',
lrate = 1.0,
maxlen = None,
minlen = 1,
start_iter = 0,
start_epoch = 0,
max_epochs = max_epochs, #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
batch_size = 16,
patience = 100,
validFreq = 10000,
saveFreq = 50000000,
dispFreq = 1,
sampleFreq = 20000000,
newDumpFreq = 20000,
syncFreq = 5000000000,
sampleNum = 25,
decay_c = 0.,
log = logfile,
monitor_grad = False,
sampleFileName= obj_directory + '\\round%d_sample.txt'%(round),
pad_sos = False,
embedding = '../data/embedding500.npz'
)

Просмотреть файл

@ -0,0 +1,58 @@
from CLM import train
def log_with_print(log, context):
print >>log, context
print context
logfile = __file__ + 'log'
log = open(logfile, 'w')
round = 0
log_with_print(log, 'round ' + str(round) + 'begin ------------------------------- !!')
# change some for round
max_epochs = 100000
obj_directory = r'..\Sentiment_CLM_nodrop_lr0.5'
reload_model = obj_directory + r'\T.npz'
train(round = round,
saveto = obj_directory + '\\round%d_model_lstm.npz'%(round),
reload_model = reload_model,
reload_option = reload_model + '.pkl',
dataset = r'../data/imdb.pkl', #%(work_id + 1),
encoder = 'lstm',
dropout_input = None,
dropout_output= None,
clip_c = 5.,
dim_word = 500,
dim_proj = 1024,
n_words = 10000,
#n_words_sqrt = n_words_sqrt,
optimizer = 'adadelta',
lrate = 0.5,
maxlen = None,
minlen = 1,
start_iter = 0,
start_epoch = 0,
max_epochs = max_epochs, #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
batch_size = 16,
patience = 100,
validFreq = 5000,
saveFreq = 50000000,
dispFreq = 1,
sampleFreq = 20000000,
newDumpFreq = 20000,
syncFreq = 5000000000,
sampleNum = 25,
decay_c = 0.,
log = logfile,
monitor_grad = False,
sampleFileName= obj_directory + '\\round%d_sample.txt'%(round),
pad_sos = False,
embedding = '../data/embedding500.npz'
)

Просмотреть файл

@ -0,0 +1,4 @@
@echo off
setlocal ENABLEDELAYEDEXPANSION
set THEANO_FLAGS=device=gpu1
python train_clm_WithDropout_lr0.5.py

Просмотреть файл

@ -0,0 +1,148 @@
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@rem Windows batch file to use Theano on GCR
@rem
@rem Updated: April 7, 2016
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@rem set the PATH system variable
@rem Start from the 26th letter
set working_sub_dir=%cd:~26%
set PATH=^
C:\Windows\system32;^
C:\Windows\System32\Wbem;^
C:\Windows\System32\WindowsPowerShell\v1.0\;^
C:\Windows;^
C:\Program Files\Microsoft HPC Pack 2012\Bin\;^
C:\Program Files\Microsoft MPI\Bin\;^
C:\Program Files (x86)\Windows Kits\8.1\Windows Performance Toolkit\
pushd \\gcr\Scratch\RR1\v-yixia\Theano
set ToolkitFolderDriver=%cd%
@rem set the environment variable for the CUDA 7.5 Toolkit
rem set CUDA_HOME=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 rem the old version
set CUDA_HOME=%ToolkitFolderDriver%\CUDA\v7.0+cudnn4008
set CUDA_BIN=%CUDA_HOME%\bin
set CUDA_INCLUDE=%CUDA_HOME%\include
set CUDA_LIB=%CUDA_HOME%\lib\x64
set CUDA_LIBNVVP=%CUDA_HOME%\libnvvp
@rem add all CUDA Toolkit folders to the PATH system variable
set PATH=^
%CUDA_HOME%;^
%CUDA_BIN%;^
%CUDA_INCLUDE%;^
%CUDA_LIB%;^
%CUDA_LIBNVVP%;^
%PATH%
@echo %PATH%
@rem setting up VC complier
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@rem connect to the shared toolkit folder \\gcr\Tools\Shared_Toolkits\Theano
rem pushd \\gcr\Tools\Shared_Toolkits\Theano
@rem unset these variables
@set Framework40Version=
@set FrameworkDIR32=
@set FrameworkVersion32=
@set FSHARPINSTALLDIR=
@set VSINSTALLDIR=
@set WindowsSDK_ExecutablePath_x64=
@set WindowsSDK_ExecutablePath_x86=
@set VCINSTALLDIR=%ToolkitFolderDriver%\VS_portable\Microsoft Visual Studio 12.0v2\VC\
@set WindowsSdkDir=%ToolkitFolderDriver%\Windows Kits\8.1v2\
:amd64
@rem set Windows SDK include/lib path
@rem --------------------------------------------------
if not "%WindowsSdkDir%" == "" @set PATH=%WindowsSdkDir%bin\x64;%WindowsSdkDir%bin\x86;%PATH%
if not "%WindowsSdkDir%" == "" @set INCLUDE=%WindowsSdkDir%include\shared;%WindowsSdkDir%include\um;%WindowsSdkDir%include\winrt;%INCLUDE%
if not "%WindowsSdkDir%" == "" @set LIB=%WindowsSdkDir%lib\winv6.3\um\x64;%LIB%
if not "%WindowsSdkDir%" == "" @set LIBPATH=%WindowsLibPath%;%ExtensionSDKDir%\Microsoft.VCLibs\14.0\References\CommonConfiguration\neutral;%LIBPATH%
@rem set the environment variables for Microsoft Visual Studio
@rem --------------------------------------------------
@rem PATH
@rem --------------------------------------------------
if exist "%VCINSTALLDIR%VCPackages" set PATH=%VCINSTALLDIR%VCPackages;%PATH%
if exist "%VCINSTALLDIR%BIN\amd64" set PATH=%VCINSTALLDIR%BIN\amd64;%PATH%
@rem --------------------------------------------------
@rem INCLUDE
@rem --------------------------------------------------
if exist "%VCINSTALLDIR%ATLMFC\INCLUDE" set INCLUDE=%VCINSTALLDIR%ATLMFC\INCLUDE;%INCLUDE%
if exist "%VCINSTALLDIR%INCLUDE" set INCLUDE=%VCINSTALLDIR%INCLUDE;%INCLUDE%
@rem --------------------------------------------------
@rem LIB
@rem --------------------------------------------------
if exist "%VCINSTALLDIR%ATLMFC\LIB\amd64" set LIB=%VCINSTALLDIR%ATLMFC\LIB\amd64;%LIB%
if exist "%VCINSTALLDIR%LIB\amd64" set LIB=%VCINSTALLDIR%LIB\amd64;%LIB%
@rem --------------------------------------------------
@rem LIBPATH
@rem --------------------------------------------------
if exist "%VCINSTALLDIR%ATLMFC\LIB\amd64" set LIBPATH=%VCINSTALLDIR%ATLMFC\LIB\amd64;%LIBPATH%
if exist "%VCINSTALLDIR%LIB\amd64" set LIBPATH=%VCINSTALLDIR%LIB\amd64;%LIBPATH%
@rem set the environment variables for the cuDNN v4 (Feb 10, 2016) for CUDA 7.0 and later.
rem set CUDNN_PATH=%ToolkitFolderDriver%\Shared_Toolkits\Theano\CUDA\cudnn-4.0.7\cuda
rem set INCLUDE=%CUDNN_PATH%\include;%INCLUDE%
rem set LIB=%CUDNN_PATH%\lib\x64;%LIB%
rem set PATH=%CUDNN_PATH%\bin;%PATH%
set Platform=X64
set CommandPromptType=Native
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@rem connect to your scratch storage \\gcr\Scratch\<location>\<alias>
@rem Note: Please copy ANACONDA2 to \\gcr\Scratch\<location>\<alias>\Anaconda2
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
pushd \\gcr\scratch\RR1\v-yixia
set CONDANETDRIVE=%cd:~0,2%
@rem set the environment variable for the Anaconda2
set ANACONDA2=%CONDANETDRIVE%\RR1\taoqin\Anaconda-gpu002
set ANACONDA2_SCRIPTS=%ANACONDA2%\Scripts
set ANACONDA2_BIN=%ANACONDA2%\Library\bin
@rem add Anaconda2 folders to the PATH system variable
set PATH=^
%ANACONDA2%;^
%ANACONDA2_BIN%;^
%ANACONDA2_SCRIPTS%;^
%PATH%
@echo %PATH%
@rem example files from DeepLearningTutorials are available at \\gcr\Tools\Shared_Toolkits\Theano\Examples
set PROJDRIVE=%CONDANETDRIVE%
set MYHOME=%PROJDRIVE%\RR1\v-yixia
set PROJHOME=%MYHOME%\%working_sub_dir%
%PROJDRIVE%
cd %PROJHOME%
@rem setup theano env (generate .theanorc.txt)
call python gen_theanorc.py %ANACONDA2% .theanorc.txt
del %userprofile%\.theanorc.txt /Q /F
copy .theanorc.txt %userprofile% /Y
call python write_script.py %*
call worker.bat
@echo delete theano env
del %userprofile%\.theanorc.txt /Q /F
popd
popd

Просмотреть файл

@ -0,0 +1,148 @@
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@rem Windows batch file to use Theano on GCR
@rem
@rem Updated: April 7, 2016
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@rem set the PATH system variable
@rem Start from the 26th letter
set working_sub_dir=%cd:~26%
set PATH=^
C:\Windows\system32;^
C:\Windows\System32\Wbem;^
C:\Windows\System32\WindowsPowerShell\v1.0\;^
C:\Windows;^
C:\Program Files\Microsoft HPC Pack 2012\Bin\;^
C:\Program Files\Microsoft MPI\Bin\;^
C:\Program Files (x86)\Windows Kits\8.1\Windows Performance Toolkit\
pushd \\gcr\Scratch\RR1\v-yixia\Theano
set ToolkitFolderDriver=%cd%
@rem set the environment variable for the CUDA 7.5 Toolkit
rem set CUDA_HOME=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 rem the old version
set CUDA_HOME=%ToolkitFolderDriver%\CUDA\v7.0
set CUDA_BIN=%CUDA_HOME%\bin
set CUDA_INCLUDE=%CUDA_HOME%\include
set CUDA_LIB=%CUDA_HOME%\lib\x64
set CUDA_LIBNVVP=%CUDA_HOME%\libnvvp
@rem add all CUDA Toolkit folders to the PATH system variable
set PATH=^
%CUDA_HOME%;^
%CUDA_BIN%;^
%CUDA_INCLUDE%;^
%CUDA_LIB%;^
%CUDA_LIBNVVP%;^
%PATH%
@echo %PATH%
@rem setting up VC complier
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@rem connect to the shared toolkit folder \\gcr\Tools\Shared_Toolkits\Theano
rem pushd \\gcr\Tools\Shared_Toolkits\Theano
@rem unset these variables
@set Framework40Version=
@set FrameworkDIR32=
@set FrameworkVersion32=
@set FSHARPINSTALLDIR=
@set VSINSTALLDIR=
@set WindowsSDK_ExecutablePath_x64=
@set WindowsSDK_ExecutablePath_x86=
@set VCINSTALLDIR=%ToolkitFolderDriver%\VS_portable\Microsoft Visual Studio 12.0\VC\
@set WindowsSdkDir=%ToolkitFolderDriver%\Windows Kits\8.1\
:amd64
@rem set Windows SDK include/lib path
@rem --------------------------------------------------
if not "%WindowsSdkDir%" == "" @set PATH=%WindowsSdkDir%bin\x64;%WindowsSdkDir%bin\x86;%PATH%
if not "%WindowsSdkDir%" == "" @set INCLUDE=%WindowsSdkDir%include\%WindowsSDKVersion%shared;%WindowsSdkDir%include\%WindowsSDKVersion%um;%WindowsSdkDir%include\%WindowsSDKVersion%winrt;%INCLUDE%
if not "%WindowsSdkDir%" == "" @set LIB=%WindowsSdkDir%lib\%WindowsSDKLibVersion%um\x64;%LIB%
if not "%WindowsSdkDir%" == "" @set LIBPATH=%WindowsLibPath%;%ExtensionSDKDir%\Microsoft.VCLibs\14.0\References\CommonConfiguration\neutral;%LIBPATH%
@rem set the environment variables for Microsoft Visual Studio
@rem --------------------------------------------------
@rem PATH
@rem --------------------------------------------------
if exist "%VCINSTALLDIR%VCPackages" set PATH=%VCINSTALLDIR%VCPackages;%PATH%
if exist "%VCINSTALLDIR%BIN\amd64" set PATH=%VCINSTALLDIR%BIN\amd64;%PATH%
@rem --------------------------------------------------
@rem INCLUDE
@rem --------------------------------------------------
if exist "%VCINSTALLDIR%ATLMFC\INCLUDE" set INCLUDE=%VCINSTALLDIR%ATLMFC\INCLUDE;%INCLUDE%
if exist "%VCINSTALLDIR%INCLUDE" set INCLUDE=%VCINSTALLDIR%INCLUDE;%INCLUDE%
@rem --------------------------------------------------
@rem LIB
@rem --------------------------------------------------
if exist "%VCINSTALLDIR%ATLMFC\LIB\amd64" set LIB=%VCINSTALLDIR%ATLMFC\LIB\amd64;%LIB%
if exist "%VCINSTALLDIR%LIB\amd64" set LIB=%VCINSTALLDIR%LIB\amd64;%LIB%
@rem --------------------------------------------------
@rem LIBPATH
@rem --------------------------------------------------
if exist "%VCINSTALLDIR%ATLMFC\LIB\amd64" set LIBPATH=%VCINSTALLDIR%ATLMFC\LIB\amd64;%LIBPATH%
if exist "%VCINSTALLDIR%LIB\amd64" set LIBPATH=%VCINSTALLDIR%LIB\amd64;%LIBPATH%
@rem set the environment variables for the cuDNN v4 (Feb 10, 2016) for CUDA 7.0 and later.
set CUDNN_PATH=%ToolkitFolderDriver%\Shared_Toolkits\Theano\CUDA\cudnn-4.0.7\cuda
set INCLUDE=%CUDNN_PATH%\include;%INCLUDE%
set LIB=%CUDNN_PATH%\lib\x64;%LIB%
set PATH=%CUDNN_PATH%\bin;%PATH%
set Platform=X64
set CommandPromptType=Native
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@rem connect to your scratch storage \\gcr\Scratch\<location>\<alias>
@rem Note: Please copy ANACONDA2 to \\gcr\Scratch\<location>\<alias>\Anaconda2
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
pushd \\gcr\scratch\RR1\v-yixia
set CONDANETDRIVE=%cd:~0,2%
@rem set the environment variable for the Anaconda2
set ANACONDA2=%CONDANETDRIVE%\RR1\v-yirwan\Anaconda2
set ANACONDA2_SCRIPTS=%ANACONDA2%\Scripts
set ANACONDA2_BIN=%ANACONDA2%\Library\bin
@rem add Anaconda2 folders to the PATH system variable
set PATH=^
%ANACONDA2%;^
%ANACONDA2_BIN%;^
%ANACONDA2_SCRIPTS%;^
%PATH%
@echo %PATH%
@rem example files from DeepLearningTutorials are available at \\gcr\Tools\Shared_Toolkits\Theano\Examples
set PROJDRIVE=%CONDANETDRIVE%
set MYHOME=%PROJDRIVE%\RR1\v-yixia
set PROJHOME=%MYHOME%\%working_sub_dir%
%PROJDRIVE%
cd %PROJHOME%
@rem setup theano env (generate .theanorc.txt)
call python gen_theanorc.py %ANACONDA2% .theanorc.txt
del %userprofile%\.theanorc.txt /Q /F
copy .theanorc.txt %userprofile% /Y
call python write_script.py %1
call worker.bat
@echo delete theano env
del %userprofile%\.theanorc.txt /Q /F
popd
popd

Просмотреть файл

@ -0,0 +1,39 @@
import re, os, numpy, sys
filename = r'.\gpu_usage_draft'
def GrabGPU():
cmdstr = '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\" > ' + filename
os.system(cmdstr)
def GetGPUUSage():
pattern = re.compile(r'(?P<num>[0-9]+)MiB[\s]+/')
mem = []
fo = open(filename, 'r')
for line in fo:
result = pattern.search(line)
if result:
mem.append(int(result.group('num')))
fo.close()
return numpy.array(mem).argsort()[0]
def print_script(cmd):
GrabGPU()
with open('worker.bat', 'w') as f:
f.write('@echo off\nsetlocal ENABLEDELAYEDEXPANSION\n')
if len(cmd) == 1:
f.write('set THEANO_FLAGS=device=gpu%d\n' % GetGPUUSage())
f.write('python ' + cmd[0])
elif len(cmd) == 2:
f.write('set THEANO_FLAGS=device=gpu' + cmd[1] + '\n')
f.write('python ' + cmd[0])
if __name__ == '__main__':
print_script(sys.argv[1:])
# os.system('del /q ' + filename + rank)

Просмотреть файл

@ -0,0 +1,40 @@
import sys
mapper_machine_freecard = {}
mapper_machine_rank = {}
def MapIDs(m_machine):
for i in range(m_machine):
fo = open('record' + str(i))
id = 0
m_line = 0
machine_name = ''
for line in fo:
if id == 0:
machine_name = line[:-1]
mapper_machine_freecard[machine_name] = []
if mapper_machine_rank.has_key(machine_name):
mapper_machine_rank[machine_name].append(i)
else:
mapper_machine_rank[machine_name] = [i]
elif id > 1:
mapper_machine_freecard[machine_name].append(int(line))
id = id + 1
fo.close()
def Map_Rank_Card(m_machine):
MapIDs(m_machine)
allocations = range(m_machine)
for k in mapper_machine_rank.keys():
ranks = mapper_machine_rank[k]
cards = mapper_machine_freecard[k]
#if len(ranks) == len(cards):
for i in range(len(ranks)):
allocations[ranks[i]] = cards[i]
for l in allocations:
print l
if __name__ == '__main__':
Map_Rank_Card(int(sys.argv[1]))

Просмотреть файл

@ -0,0 +1,369 @@
"""
data loading and minibatch generation
"""
__author__ = 'v-yirwan'
import cPickle as pkl
import gzip
import os
import numpy
from theano import config
def get_dataset_file(dataset, default_dataset, origin):
'''
Look for it as if it was a full path, if not, try local file,
if not try in the data directory.
Download dataset if it is not present
'''
data_dir, data_file = os.path.split(dataset)
if data_dir == "" and not os.path.isfile(dataset):
# Check if dataset is in the data directory.
new_path = os.path.join(
os.path.split(__file__)[0],
"..",
"data",
dataset
)
if os.path.isfile(new_path) or data_file == default_dataset:
dataset = new_path
if (not os.path.isfile(dataset)) and data_file == default_dataset:
from six.moves import urllib
print('Downloading data from %s' % origin)
urllib.request.urlretrieve(origin, dataset)
return dataset
def load_data(path="imdb.pkl", n_words=100000, maxlen=None,
sort_by_len=True, fixed_valid=True, valid_portion=0.1):
'''
Loads the dataset
:type path: String
:param path: The path to the dataset (here IMDB)
:type n_words: int
:param n_words: The number of word to keep in the vocabulary.
All extra words are set to unknow (1).
:type maxlen: None or positive int
:param maxlen: the max sequence length we use in the train/valid set.
:type sort_by_len: bool
:name sort_by_len: Sort by the sequence lenght for the train,
valid and test set. This allow faster execution as it cause
less padding per minibatch. Another mechanism must be used to
shuffle the train set at each epoch.
:type fixed_valid: bool
:param fixed_valid: load fixed validation set from the corpus file,
which would otherwise be picked randomly from the training set with
proportion [valid_portion]
:type valid_portion: float
:param valid_portion: The proportion of the full train set used for
the validation set.
'''
# Load the dataset
path = get_dataset_file(
path, "imdb.pkl",
"http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
if path.endswith(".gz"):
f = gzip.open(path, 'rb')
else:
f = open(path, 'rb')
train_set = pkl.load(f)
if fixed_valid:
valid_set = pkl.load(f)
test_set = pkl.load(f)
f.close()
def _truncate_data(train_set):
'''
truncate sequences with lengths exceed max-len threshold
:param train_set: a list of sequences list and corresponding labels list
:return: truncated train_set
'''
new_train_set_x = []
new_train_set_y = []
for x, y in zip(train_set[0], train_set[1]):
if len(x) < maxlen:
new_train_set_x.append(x)
new_train_set_y.append(y)
train_set = (new_train_set_x, new_train_set_y)
del new_train_set_x, new_train_set_y
return train_set
def _set_valid(train_set, valid_portion):
'''
set validation with [valid_portion] proportion of training set
'''
train_set_x, train_set_y = train_set
n_samples = len(train_set_x)
sidx = numpy.random.permutation(n_samples) # shuffle data
n_train = int(numpy.round(n_samples * (1. - valid_portion)))
valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
train_set_x = [train_set_x[s] for s in sidx[:n_train]]
train_set_y = [train_set_y[s] for s in sidx[:n_train]]
train_set = (train_set_x, train_set_y)
valid_set = (valid_set_x, valid_set_y)
del train_set_x, train_set_y, valid_set_x, valid_set_y
return train_set, valid_set
if maxlen:
train_set = _truncate_data(train_set)
if fixed_valid:
print 'Loading with fixed validation set...',
valid_set = _truncate_data(valid_set)
else:
print 'Setting validation set with proportion:', valid_portion, '...',
train_set, valid_set = _set_valid(train_set, valid_portion)
test_set = _truncate_data(test_set)
if maxlen is None and not fixed_valid:
train_set, valid_set = _set_valid(train_set, valid_portion)
def remove_unk(x):
return [[1 if w >= n_words else w for w in sen] for sen in x]
test_set_x, test_set_y = test_set
valid_set_x, valid_set_y = valid_set
train_set_x, train_set_y = train_set
# remove unk from dataset
train_set_x = remove_unk(train_set_x) # use 1 if unk
valid_set_x = remove_unk(valid_set_x)
test_set_x = remove_unk(test_set_x)
def len_argsort(seq):
return sorted(range(len(seq)), key=lambda x: len(seq[x]))
if sort_by_len:
sorted_index = len_argsort(test_set_x)
# ranked from shortest to longest
test_set_x = [test_set_x[i] for i in sorted_index]
test_set_y = [test_set_y[i] for i in sorted_index]
sorted_index = len_argsort(valid_set_x)
valid_set_x = [valid_set_x[i] for i in sorted_index]
valid_set_y = [valid_set_y[i] for i in sorted_index]
sorted_index = len_argsort(train_set_x)
train_set_x = [train_set_x[i] for i in sorted_index]
train_set_y = [train_set_y[i] for i in sorted_index]
train = (train_set_x, train_set_y)
valid = (valid_set_x, valid_set_y)
test = (test_set_x, test_set_y)
return train, valid, test
def load_mnist(path='mnist.pkl', fixed_permute=True, rand_permute=False):
f = open(path, 'rb')
train = pkl.load(f)
valid = pkl.load(f)
test = pkl.load(f)
f.close()
def _permute(data, perm):
x, y = data
x_new = []
for xx in x:
xx_new = [xx[pp] for pp in perm]
x_new.append(xx_new)
return (x_new, y)
def _trans2list(data):
x, y = data
x = [list(xx) for xx in x]
return (x, y)
if rand_permute:
print 'Using a fixed random permutation of pixels...',
perm = numpy.random.permutation(range(784))
train = _permute(train, perm)
valid = _permute(valid, perm)
test = _permute(test, perm)
elif fixed_permute:
print 'Using permuted dataset...',
_trans2list(train)
_trans2list(valid)
_trans2list(test)
return train, valid, test
def get_minibatches_idx(n, minibatch_size, shuffle=False):
"""
Used to shuffle the dataset at each iteration.
"""
idx_list = numpy.arange(n, dtype="int32")
if shuffle:
numpy.random.shuffle(idx_list)
minibatches = []
minibatch_start = 0
for i in range(n // minibatch_size):
minibatches.append(idx_list[minibatch_start:
minibatch_start + minibatch_size])
minibatch_start += minibatch_size
if (minibatch_start != n):
# Make a minibatch out of what is left
minibatches.append(idx_list[minibatch_start:])
return zip(range(len(minibatches)), minibatches)
def get_minibatches_idx_bucket(dataset, minibatch_size, shuffle=False):
"""
divide into different buckets according to sequence lengths
dynamic batch size
"""
# divide into buckets
slen = [len(ss) for ss in dataset]
bucket1000 = [sidx for sidx in xrange(len(dataset))
if slen[sidx] > 0 and slen[sidx] <= 1000]
bucket3000 = [sidx for sidx in xrange(len(dataset))
if slen[sidx] > 1000 and slen[sidx] <= 3000]
bucket_long = [sidx for sidx in xrange(len(dataset))
if slen[sidx] > 3000]
# shuffle each bucket
if shuffle:
numpy.random.shuffle(bucket1000)
numpy.random.shuffle(bucket3000)
numpy.random.shuffle(bucket_long)
# make minibatches
def _make_batch(minibatches, bucket, minibatch_size):
minibatch_start = 0
n = len(bucket)
for i in range(n // minibatch_size):
minibatches.append(bucket[minibatch_start : minibatch_start + minibatch_size])
minibatch_start += minibatch_size
if (minibatch_start != n):
# Make a minibatch out of what is left
minibatches.append(bucket[minibatch_start:])
return minibatches
minibatches = []
_make_batch(minibatches, bucket1000, minibatch_size=minibatch_size)
_make_batch(minibatches, bucket3000, minibatch_size=minibatch_size//2)
_make_batch(minibatches, bucket_long, minibatch_size=minibatch_size//8)
# shuffle minibatches
numpy.random.shuffle(minibatches)
return zip(range(len(minibatches)), minibatches)
def prepare_data(seqs, labels, maxlen=None, dataset='text'):
"""Create the matrices from the datasets.
This pad each sequence to the same lenght: the lenght of the
longuest sequence or maxlen.
if maxlen is set, we will cut all sequence to this maximum
lenght.
This swap the axis!
"""
# x: a list of sentences
lengths = [len(s) for s in seqs]
if maxlen is not None:
new_seqs = []
new_labels = []
new_lengths = []
for l, s, y in zip(lengths, seqs, labels):
if l < maxlen:
new_seqs.append(s)
new_labels.append(y)
new_lengths.append(l)
lengths = new_lengths
labels = new_labels
seqs = new_seqs
if len(lengths) < 1:
return None, None, None
n_samples = len(seqs)
maxlen = numpy.max(lengths)
if dataset == 'mnist':
x = numpy.zeros((maxlen, n_samples)).astype('float32')
else:
x = numpy.zeros((maxlen, n_samples)).astype('int64')
x_mask = numpy.zeros((maxlen, n_samples)).astype(config.floatX)
for idx, s in enumerate(seqs):
x[:lengths[idx], idx] = s
x_mask[:lengths[idx], idx] = 1.
return x, x_mask, labels
def prepare_data_hier(seqs, labels, hier_len, maxlen=None, dataset='text'):
'''
prepare minibatch for hierarchical model
'''
# sort (long->short)
sorted_idx = sorted(range(len(seqs)), key=lambda x: len(seqs[x]), reverse=True)
seqs = [seqs[i] for i in sorted_idx]
labels = [labels[i] for i in sorted_idx]
# truncate data
lengths = [len(s) for s in seqs]
if maxlen is not None:
new_seqs = []
new_labels = []
new_lengths = []
for l, s, y in zip(lengths, seqs, labels):
if l <maxlen :
new_seqs.append(s)
new_labels.append(y)
new_lengths.append(l)
lengths = new_lengths
labels = new_labels
seqs = new_seqs
if len(lengths) < 1:
return None, None, None
# set batch size
n_samples = len(seqs)
maxlen = numpy.max(lengths)
if maxlen % hier_len == 0:
n_batch = maxlen/hier_len
else:
n_batch = maxlen//hier_len + 1
maxlen = n_batch * hier_len
# padding whole batch
if dataset == 'mnist':
x = numpy.zeros((maxlen, n_samples)).astype('float32')
else:
x = numpy.zeros((maxlen, n_samples)).astype('int64')
x_mask = numpy.zeros((maxlen, n_samples)).astype(config.floatX)
for idx, s in enumerate(seqs):
x[:lengths[idx], idx] = s
x_mask[:lengths[idx], idx] = 1
# slice to mini-batches
x_batch = [x[bidx*hier_len:(bidx+1)*hier_len, :] for bidx in range(n_batch)]
if dataset == 'mnist':
x_batch = numpy.array(x_batch).astype('float32')
else:
x_batch = numpy.array(x_batch).astype('int64')
mask_batch = [x_mask[bidx*hier_len:(bidx+1)*hier_len, :] for bidx in range(n_batch)]
mask_batch = numpy.array(mask_batch).astype(config.floatX)
# mask for hier-level
mask_hier = numpy.ones((n_batch, n_samples)).astype(config.floatX)
for idx in range(n_samples):
mpos = numpy.where(x_mask[:, idx]==0)[0]
if len(mpos) == 0:
continue
bidx = min(mpos[0]//hier_len+1, n_batch)
if mpos[0] % hier_len == 0:
bidx -= 1 # bug fixed TODO: more elegant solution?
mask_hier[bidx:, idx] = 0
return x_batch, mask_batch, mask_hier, labels

Просмотреть файл

@ -0,0 +1,38 @@
import re
import os
import socket
import sys
filename = r'.\gpu_usage_draft_'
default_gpu = 58 + 30
def GrabGPU(rank):
cmdstr = '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\" > ' + filename + rank
os.system(cmdstr)
def GetGPUUSage(rank):
pattern = re.compile(r'(?P<num>[0-9]{1,5})MiB[\s]+/')
id = 0
GPUs = []
fo = open(filename + rank, 'r')
for line in fo:
result = pattern.search(line)
if result:
if int(result.group("num")) < default_gpu:
GPUs.append(id)
id = id + 1
fo.close()
print len(GPUs)
for gpu in GPUs:
print gpu
if __name__ == '__main__':
rank = sys.argv[1]
GrabGPU(rank)
print socket.gethostname()
GetGPUUSage(rank)
#os.system('del /q ' + filename + rank)

Просмотреть файл

@ -0,0 +1,838 @@
"""
supports simple-rnn, lstm, hierarchical lstm
supports lstm with identity skip-connections(soft), parametric skip-connections(soft)
supports resnet, resnet with identity skip-connections(full and soft), parametric skip connections(soft)
supports hybrid structure (lstm+resnet)
"""
__author__ = 'v-yirwan'
import theano.tensor as tensor
from Util import *
layers = {'lstm': ('param_init_lstm', 'lstm_layer'),
'lstm_skip': ('param_init_lstm', 'lstm_skip_layer'),
'lstm_pskip': ('param_init_lstm_pskip', 'lstm_pskip_layer'),
'residual': ('param_init_residual', 'residual_layer'),
'residual_full_skip': ('param_init_residual', 'residual_full_skip_layer'),
'residual_skip': ('param_init_residual', 'residual_skip_layer'),
'residual_pskip': ('param_init_residual_pskip', 'residual_pskip_layer'),
'rnn': ('param_init_rnn', 'rnn_layer'),
'rnn_pskip': ('param_init_rnn_pskip', 'rnn_pskip_layer'),
# modules for ResNet Modifications
'presidual': ('param_init_presidual', 'presidual_layer'),
'pxresidual': ('param_init_pxresidual', 'pxresidual_layer'),
'residual_pskip_mod': ('param_init_residual_pskip', 'residual_pskip_mod_layer')
}
def _p(pp, name):
return '%s_%s' % (pp, name)
def get_layer(name):
fns = layers[name]
return (eval(fns[0]), eval(fns[1]))
# ===========================
# LSTM-related layers
# LSTM, LSTM with identity and parametric skip connections (soft)
# ===========================
def param_init_lstm(options, params, prefix='lstm', hier_level=False):
"""
Init the LSTM parameter
Support hierarchical architecture
"""
if hier_level:
# bug fixed: dimension matching for hier-mode
W = numpy.concatenate([ortho_weight(options['dim_proj']),
ortho_weight(options['dim_proj']),
ortho_weight(options['dim_proj']),
ortho_weight(options['dim_proj'])], axis=1)
else:
# bug fixed: different dim for embedding and hidden state
W = numpy.concatenate([norm_weight(options['dim_word'], options['dim_proj']),
norm_weight(options['dim_word'], options['dim_proj']),
norm_weight(options['dim_word'], options['dim_proj']),
norm_weight(options['dim_word'], options['dim_proj'])], axis=1)
params[_p(prefix, 'W')] = W
U = numpy.concatenate([ortho_weight(options['dim_proj']),
ortho_weight(options['dim_proj']),
ortho_weight(options['dim_proj']),
ortho_weight(options['dim_proj'])], axis=1)
params[_p(prefix, 'U')] = U
b = numpy.zeros((4 * options['dim_proj'],))
params[_p(prefix, 'b')] = b.astype(config.floatX)
return params
def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
nsteps = state_below.shape[0]
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
assert mask is not None
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
def _step(m_, x_, h_, c_):
preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
preact += x_
i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
c = f * c_ + i * c
c = m_[:, None] * c + (1. - m_)[:, None] * c_
h = o * tensor.tanh(c)
h = m_[:, None] * h + (1. - m_)[:, None] * h_
return h, c
state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
tparams[_p(prefix, 'b')])
dim_proj = options['dim_proj']
rval, updates = theano.scan(_step,
sequences=[mask, state_below],
outputs_info=[tensor.alloc(numpy_floatX(0.),
n_samples,
dim_proj),
tensor.alloc(numpy_floatX(0.),
n_samples,
dim_proj)],
name=_p(prefix, '_layers'),
n_steps=nsteps,
truncate_gradient=options['truncate_grad'])
return rval[0]
def lstm_skip_layer(tparams, state_below, options, prefix='lstm_skip', mask=None):
'''
lstm layer with soft identity skip connections
'''
nsteps = state_below.shape[0]
n_skip = options['skip_steps']
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
assert mask is not None
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
def _lstm_unit(m_, x_, h_, c_, h_skip, hcnt):
skip_flag = tensor.eq(hcnt % n_skip, 0)
preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
preact += x_
# gates
i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
# cell state
c = f * c_ + i * c
c = m_[:, None] * c + (1. - m_)[:, None] * c_
# new hidden stae
h = o * tensor.tanh(c) + h_skip * skip_flag
h = m_[:, None] * h + (1. - m_)[:, None] * h_
# update h_skip
h_skip = h_skip * (1-skip_flag) + h * skip_flag
hcnt += 1
return h, c, h_skip, hcnt
state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
tparams[_p(prefix, 'b')])
dim_proj = options['dim_proj']
h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
c = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
h_skip = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
hcnt = tensor.zeros_like(theano.shared(10.).astype('float32'))
rval, updates = theano.scan(_lstm_unit,
sequences=[mask, state_below],
outputs_info=[h, c, h_skip, hcnt],
name=_p(prefix, 'layers'),
n_steps=nsteps,
truncate_gradient=options['truncate_grad'])
# return all hidden states h(t)
return rval[0]
def param_init_lstm_pskip(options, params, prefix='lstm_pskip', hier_level=False):
"""
Init the LSTM-pskip parameter
"""
# same as vanilla lstm layer
params = param_init_lstm(options, params, prefix=prefix, hier_level=hier_level)
# weight for skip connection
params[_p(prefix, 'W_skip')] = numpy.array([numpy.random.random_sample()]).astype('float32')[0]
# random value in (0,1)
return params
def lstm_pskip_layer(tparams, state_below, options, prefix='lstm_pskip', mask=None):
'''
lstm layer with soft parametric weighted skip connections
'''
nsteps = state_below.shape[0]
n_skip = options['skip_steps']
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
assert mask is not None
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
def _lstm_unit(m_, x_, h_, c_, h_skip, hcnt):
'''
lstm_soft_pskip unit at each time step
:param m_: mask
:param x_: x(t) input
:param h_: h(t-1) recurrent hidden state
:param c_: c(t-1) cell state
:param h_skip: h(t-n_skip) for skip connection
:param hcnt: mark current time stamp (to determine whether skip connection exists)
:return: h(t), c(t), h_skip, hcnt
'''
skip_flag = tensor.eq(hcnt % n_skip, 0)
preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
preact += x_
# gates
i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
# cell state
c = f * c_ + i * c
c = m_[:, None] * c + (1. - m_)[:, None] * c_
# new hidden stae
h = o * tensor.tanh(c) + h_skip * skip_flag * tparams[_p(prefix, 'W_skip')]
h = m_[:, None] * h + (1. - m_)[:, None] * h_
# update h_skip
h_skip = h_skip * (1-skip_flag) + h * skip_flag
hcnt += 1 # bug fixed T^T
return h, c, h_skip, hcnt
state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
tparams[_p(prefix, 'b')])
dim_proj = options['dim_proj']
h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
c = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
h_skip = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
hcnt = tensor.zeros_like(theano.shared(10.).astype('float32'))
rval, updates = theano.scan(_lstm_unit,
sequences=[mask, state_below],
outputs_info=[h, c, h_skip, hcnt],
name=_p(prefix, 'layers'),
n_steps=nsteps,
truncate_gradient=options['truncate_grad'])
# return all hidden states h(t)
return rval[0]
# ===========================
# ResNet-related layers
# ResNet, ResNet with identity skip connections (full and soft),
# ResNet with parametric skip connections(soft)
# ===========================
def param_init_residual(options, params, prefix='residual'):
"""
Init the residual_network parameter:
"""
# weight for input x
depth = options['unit_depth']
Wx = dict()
for idx in xrange(depth):
Wx[idx] = norm_weight(options['dim_word'], options['dim_proj'])
W = numpy.concatenate([ww for kk, ww in Wx.iteritems()], axis=1)
params[_p(prefix, 'W')] = W
b = numpy.zeros((depth * options['dim_proj'],))
params[_p(prefix, 'b')] = b.astype(config.floatX)
# weight for identity connection
'''
w_res = numpy.array([numpy.random.random_sample()]).astype('float32')[0]
params[_p(prefix, 'w_res')] = w_res.astype(config.floatX)
'''
# weight for inter-states
for idx in xrange(depth):
U = ortho_weight(options['dim_proj'])
params[_p(prefix, 'U'+str(idx+1))] = U
return params
def residual_layer(tparams, state_below, options, prefix='residual', mask=None):
'''
vanilla residual layer (recurrent depth adjustable)
'''
# here state_below in x_emb
nsteps = state_below.shape[0]
depth = options['unit_depth']
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
assert mask is not None
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
def _resblock(m_, x_, h_):
y = h_
for idx in xrange(depth):
hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
# y(i) = sigmoid(Wx(t)+b + Uy(i-1))
y = tensor.nnet.sigmoid(_slice(x_, idx, options['dim_proj']) + hy)
h = tensor.tanh(h_ + y)
h = m_[:, None] * h + (1. - m_)[:, None] * h_
return h
# state_below = W*x(t)+b (for all inter_state y)
state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
tparams[_p(prefix, 'b')])
h = tensor.alloc(numpy_floatX(0.), n_samples, options['dim_proj'])
rval, updates = theano.scan(_resblock,
sequences=[mask, state_below],
outputs_info=[h],
name=_p(prefix, 'layers'),
n_steps=nsteps,
truncate_gradient=options['truncate_grad'])
return rval # bug fixed: not rval[0], attention here
def residual_full_skip_layer(tparams, state_below, options, prefix='residual_full_skip', mask=None):
'''
residual layer with full skip connections (direct link without weight)
'''
# here state_below in x_emb
nsteps = state_below.shape[0]
depth = options['unit_depth']
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
assert mask is not None
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
# input mask, x(t), h(t-1), H(t-1)
def _resblock(m_, x_, h_, H_):
y = h_
for idx in xrange(depth):
hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
# y(i) = sigmoid(Wx(t)+b + Uy(i-1))
y = tensor.nnet.sigmoid(_slice(x_, idx, options['dim_proj']) + hy)
# new hidden state
h = tensor.tanh(h_ + y + H_[:,:,0])
h = m_[:, None] * h + (1. - m_)[:, None] * h_ # mask
# update skip hidden matrix
H = tensor.zeros_like(H_)
H = tensor.set_subtensor(H[:,:,:-1], H_[:,:,1:])
H = tensor.set_subtensor(H[:,:,-1], h)
return h, H
# state_below = W*x(t)+b (for all inter_state y)
state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
tparams[_p(prefix, 'b')])
dim_proj = options['dim_proj']
n_skip = options['skip_steps']
h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
H = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj, n_skip)
rval, updates = theano.scan(_resblock,
sequences=[mask, state_below],
outputs_info=[h, H],
name=_p(prefix, '_layers'),
n_steps=nsteps,
truncate_gradient=options['truncate_grad'])
return rval[0] # return all hidden states h
def residual_skip_layer(tparams, state_below, options, prefix='residual_skip', mask=None):
'''
residual layer with (soft) skip connections (direct link without weight)
'''
# here state_below in x_emb
nsteps = state_below.shape[0]
depth = options['unit_depth']
dim_proj = options['dim_proj']
n_skip = options['skip_steps']
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
assert mask is not None
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
# input mask, x(t), h(t-1), h(skip), time_idx
def _resblock(m_, x_, h_, h_skip, hcnt):
y = h_
skip_flag = theano.tensor.eq(hcnt%n_skip, 0)
for idx in xrange(depth):
hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
# y(i) = sigmoid(Wx(t)+b + Uy(i-1))
y = tensor.nnet.sigmoid(_slice(x_, idx, options['dim_proj']) + hy)
# new hidden state
h = tensor.tanh(h_ + y + h_skip*skip_flag)
h = m_[:, None] * h + (1. - m_)[:, None] * h_ # mask
# update h(skip)
h_skip = h_skip*(1-skip_flag) + h*skip_flag
hcnt += 1
return h, h_skip, hcnt
# state_below = W*x(t)+b (for all inter_state y)
state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
tparams[_p(prefix, 'b')])
h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
h_skip = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
# fixme: 0-dim init
hcnt = tensor.zeros_like(theano.shared(10.).astype('float32'))
rval, updates = theano.scan(_resblock,
sequences=[mask, state_below],
outputs_info=[h, h_skip, hcnt],
name=_p(prefix, '_layers'),
n_steps=nsteps,
truncate_gradient=options['truncate_grad'])
return rval[0] # return all hidden states h
def param_init_residual_pskip(options, params, prefix='residual_pskip'):
"""
Init the residual network with parametric weighted skip connections:
"""
# weight for input x
depth = options['unit_depth']
Wx = dict()
for idx in xrange(depth):
Wx[idx] = norm_weight(options['dim_word'], options['dim_proj'])
W = numpy.concatenate([ww for kk, ww in Wx.iteritems()], axis=1)
params[_p(prefix, 'W')] = W
b = numpy.zeros((depth * options['dim_proj'],))
params[_p(prefix, 'b')] = b.astype(config.floatX)
# weight for skip connection
params[_p(prefix, 'W_skip')] = numpy.array([numpy.random.random_sample()]).astype('float32')[0]
# random value in (0,1)
# weight for inter-states
for idx in xrange(depth):
U = ortho_weight(options['dim_proj'])
params[_p(prefix, 'U'+str(idx+1))] = U
return params
def residual_pskip_layer(tparams, state_below, options, prefix='residual_pskip', mask=None):
'''
residual layer with soft parametric weighted skip connections
'''
# here state_below in x_emb
nsteps = state_below.shape[0]
depth = options['unit_depth']
dim_proj = options['dim_proj']
n_skip = options['skip_steps']
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
assert mask is not None
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
# input mask, x(t), h(t-1), h(skip), time_idx
def _resblock(m_, x_, h_, h_skip, hcnt):
y = h_
skip_flag = theano.tensor.eq(hcnt%n_skip, 0)
for idx in xrange(depth):
hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
# y(i) = sigmoid(Wx(t)+b + Uy(i-1))
y = tensor.nnet.sigmoid(_slice(x_, idx, options['dim_proj']) + hy)
# new hidden state
h = tensor.tanh(h_ + y + h_skip * skip_flag * tparams[_p(prefix, 'W_skip')])
h = m_[:, None] * h + (1. - m_)[:, None] * h_ # mask
# update h(skip)
h_skip = h_skip*(1-skip_flag) + h*skip_flag
hcnt += 1
return h, h_skip, hcnt
# state_below = W*x(t)+b (for all inter_state y)
state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
tparams[_p(prefix, 'b')])
h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
h_skip = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
# fixme: 0-dim init
hcnt = tensor.zeros_like(theano.shared(10.).astype('float32'))
rval, updates = theano.scan(_resblock,
sequences=[mask, state_below],
outputs_info=[h, h_skip, hcnt],
name=_p(prefix, '_layers'),
n_steps=nsteps,
truncate_gradient=options['truncate_grad'])
return rval[0] # return all hidden states h
# ===========================
# RNN-related layers
# simple rnn and rnn with parametric skip connections (soft)
# ===========================
def param_init_rnn(options, params, prefix='rnn', hier_level=False):
'''
Initialize parameters for simple rnn unit
Support hierarchical architecture
'''
if hier_level:
W = ortho_weight(options['dim_proj'])
else:
W = norm_weight(options['dim_word'], options['dim_proj'])
params[_p(prefix, 'W')] = W
U = ortho_weight(options['dim_proj'])
params[_p(prefix, 'U')] = U
b = numpy.zeros((options['dim_proj']))
params[_p(prefix, 'b')] = b.astype(config.floatX)
return params
def rnn_layer(tparams, state_below, options, prefix='rnn', mask=None):
nsteps = state_below.shape[0]
dim_proj = options['dim_proj']
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
assert mask is not None
# input: mask, x(t), h(t-1)
def _rnn_unit(m_, x_, h_):
h = tensor.tanh(tensor.dot(x_, tparams[_p(prefix, 'W')]) +
tensor.dot(h_, tparams[_p(prefix, 'U')]) +
tparams[_p(prefix, 'b')])
h = m_[:, None] * h + (1.-m_)[:, None] * h_ # mask
return h
h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
rval, updates = theano.scan(_rnn_unit,
sequences=[mask, state_below],
outputs_info=[h],
name=_p(prefix, 'layers'),
n_steps=nsteps,
truncate_gradient=options['truncate_grad'])
return rval
def param_init_rnn_pskip(options, params, prefix='rnn_pskip', hier_level=False):
'''
Initialize parameters for simple-rnn unit with parametric soft skip connections
'''
# weight for vanilla simple-rnn
params = param_init_rnn(options, params, prefix=prefix, hier_level=hier_level)
# weight for skip connection
params[_p(prefix, 'W_skip')] = numpy.array([numpy.random.random_sample()]).astype('float32')[0]
return params
def rnn_pskip_layer(tparams, state_below, options, prefix='rnn_pskip', mask=None):
nsteps = state_below.shape[0]
n_skip = options['skip_steps']
dim_proj = options['dim_proj']
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
assert mask is not None
def _rnn_pskip(m_, x_, h_, h_skip, hcnt):
skip_flag = tensor.eq(hcnt % n_skip, 0)
h = tensor.tanh(tensor.dot(x_, tparams[_p(prefix, 'W')]) +
tensor.dot(h_, tparams[_p(prefix, 'U')]) +
tparams[_p(prefix, 'b')] +
skip_flag * h_skip * tparams[_p(prefix, 'W_skip')])
h = m_[:, None] * h + (1.-m_)[:, None] * h_
h_skip = skip_flag * h + (1-skip_flag) * h_skip
hcnt += 1
return h, h_skip, hcnt
h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
h_skip = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
hcnt = tensor.zeros_like(theano.shared(10.).astype('float32'))
rval, updates = theano.scan(_rnn_pskip,
sequences=[mask, state_below],
outputs_info=[h, h_skip, hcnt],
name=_p(prefix, 'layers'),
n_steps=nsteps,
truncate_gradient=options['truncate_grad'])
return rval[0]
# ===========================
# ResNet modifications
# ===========================
def residual_pskip_mod_layer(tparams, state_below, options, prefix='residual_pskip_mod', mask=None):
'''
residual layer with soft parametric weighted skip connections
modifications on original pskip model
'''
# here state_below in x_emb
nsteps = state_below.shape[0]
depth = options['unit_depth']
dim_proj = options['dim_proj']
n_skip = options['skip_steps']
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
assert mask is not None
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
# input mask, x(t), h(t-1), h(skip), time_idx
def _resblock_mod(m_, x_, h_, h_skip, hcnt):
y = h_
skip_flag = theano.tensor.eq(hcnt % n_skip, 0)
for idx in xrange(depth):
hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
# y(i) = sigmoid(Wx(t)+b + Uy(i-1))
y = tensor.nnet.sigmoid(_slice(x_, idx, options['dim_proj']) + hy)
# modification: skip connection after activation
h = tensor.tanh(h_ + y) + h_skip * skip_flag * tparams[_p(prefix, 'W_skip')]
h = m_[:, None] * h + (1. - m_)[:, None] * h_
h_skip = h_skip*(1-skip_flag) + h*skip_flag
hcnt += 1
return h, h_skip, hcnt
# state_below = W*x(t)+b (for all inter_state y)
state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
tparams[_p(prefix, 'b')])
h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
h_skip = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
# fixme: 0-dim init
hcnt = tensor.zeros_like(theano.shared(10.).astype('float32'))
rval, updates = theano.scan(_resblock_mod,
sequences=[mask, state_below],
outputs_info=[h, h_skip, hcnt],
name=_p(prefix, '_layers'),
n_steps=nsteps,
truncate_gradient=options['truncate_grad'])
return rval[0] # return all hidden states h
def param_init_presidual(options, params, prefix='presidual', nin=None, dim=None):
"""
Init the parametric_residual_network parameter:
"""
if nin is None:
nin = options['dim_word']
if dim is None:
dim = options['dim_proj']
# weight for input x
depth = options['unit_depth']
Wx = dict()
for idx in xrange(depth):
Wx[idx] = norm_weight(nin, dim)
W = numpy.concatenate([ww for kk, ww in Wx.iteritems()], axis=1)
params[_p(prefix, 'W')] = W
b = numpy.zeros((depth * dim,))
params[_p(prefix, 'b')] = b.astype(config.floatX)
w_res = rand_weight(dim, 1)
params[_p(prefix, 'w_res')] = w_res.astype(config.floatX)
b_res = numpy.array([numpy.random.random_sample()]).astype('float32')[0]
params[_p(prefix, 'b_res')] = b_res
# weight for inter-states
for idx in xrange(depth):
U = ortho_weight(dim)
params[_p(prefix, 'U'+str(idx+1))] = U
return params
def presidual_layer(tparams, state_below, options, prefix='presidual', mask=None,
one_step=False, init_state=None, **kwargs):
'''
parametric residual layer (recurrent depth adjustable)
parametric vector on identity connection
'''
if one_step:
assert init_state, 'previous state must be provided'
# here state_below in x_emb
nsteps = state_below.shape[0]
depth = options['unit_depth']
dim = options['dim_proj']
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
if mask is None:
mask = tensor.alloc(1., state_below.shape[0], 1)
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
# input mask, x(t), h(t-1)
def _presblock(m_, x_, h_):
y = h_
for idx in xrange(depth):
hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
y = tensor.nnet.sigmoid(_slice(x_, idx, dim) + hy)
# p = 2*sigmoid(wh(t-1)+b)-1
p = 2 * tensor.nnet.sigmoid(tensor.dot(h_, tparams[_p(prefix, 'w_res')]) + tparams[_p(prefix, 'b_res')]) - 1
p_vec = p.reshape(p.shape[0], 1)
# h(t) = tanh(ph(t-1)+y)
h = tensor.tanh(tensor.dot(tensor.nlinalg.alloc_diag(p_vec), h_) + y)
h = m_[:, None] * h + (1. - m_)[:, None] * h_
return h
# state_below = W*x(t)+b (for all inter_state y)
state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
tparams[_p(prefix, 'b')])
if init_state is None:
init_state = tensor.alloc(numpy_floatX(0.), n_samples, dim)
if one_step:
rval = _presblock(mask, state_below, init_state)
else:
rval, updates = theano.scan(_presblock,
sequences=[mask, state_below],
outputs_info=[init_state],
name=_p(prefix, 'layers'),
n_steps=nsteps)
# rval = [rval] # note: for consistency among model layers
return rval
def param_init_pxresidual(options, params, prefix='pxresidual', nin=None, dim=None):
"""
Init the parametric (with respect to input) residual network parameter:
"""
if nin is None:
nin = options['dim_word']
if dim is None:
dim = options['dim_proj']
# weight for input x
depth = options['unit_depth']
Wx = dict()
for idx in xrange(depth):
Wx[idx] = norm_weight(nin, dim)
W = numpy.concatenate([ww for kk, ww in Wx.iteritems()], axis=1)
params[_p(prefix, 'W')] = W
b = numpy.zeros((depth * dim,))
params[_p(prefix, 'b')] = b.astype(config.floatX)
w_res = rand_weight(dim, 1)
params[_p(prefix, 'w_res')] = w_res.astype(config.floatX)
u_res = rand_weight(nin, 1)
params[_p(prefix, 'u_res')] = u_res.astype(config.floatX)
b_res = numpy.array([numpy.random.random_sample()]).astype('float32')[0]
params[_p(prefix, 'b_res')] = b_res
# weight for inter-states
for idx in xrange(depth):
U = ortho_weight(dim)
params[_p(prefix, 'U'+str(idx+1))] = U
return params
def pxresidual_layer(tparams, state_below, options, prefix='pxresidual', mask=None,
one_step=False, init_state=None, **kwargs):
'''
parametric (with respect to input) residual layer (recurrent depth adjustable)
parametric vector on identity connection
'''
if one_step:
assert init_state, 'previous state must be provided'
# here state_below in x_emb
nsteps = state_below.shape[0]
depth = options['unit_depth']
dim = options['dim_proj']
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
if mask is None:
mask = tensor.alloc(1., state_below.shape[0], 1)
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
# input mask, x(t), h(t-1)
def _presblock(m_, x_, px_, h_):
y = h_
for idx in xrange(depth):
hy = tensor.dot(y, tparams[_p(prefix, 'U'+str(idx+1))])
y = tensor.nnet.sigmoid(_slice(x_, idx, dim) + hy)
# p = 2 * sigmoid(wh(t-1) + (ux(t)+b)) - 1
p = 2 * tensor.nnet.sigmoid(tensor.dot(h_, tparams[_p(prefix, 'w_res')]) + px_) - 1
p_vec = p.reshape(p.shape[0], 1)
# h(t) = tanh(p*h(t-1) + y)
h = tensor.tanh(tensor.dot(tensor.nlinalg.alloc_diag(p_vec), h_) + y)
h = m_[:, None] * h + (1. - m_)[:, None] * h_
return h
# state_below_x = W*x(t)+b (for all inter_state y)
state_below_x = tensor.dot(state_below, tparams[_p(prefix, 'W')]) \
+ tparams[_p(prefix, 'b')]
# state_below_px = u_res*x(t)+b_res (for parametric weight on identity connection)
state_below_px = tensor.dot(state_below, tparams[_p(prefix, 'u_res')]) \
+ tparams[_p(prefix, 'b_res')]
if init_state is None:
init_state = tensor.alloc(numpy_floatX(0.), n_samples, dim)
if one_step:
rval = _presblock(mask, state_below_x, state_below_px, init_state)
else:
rval, updates = theano.scan(_presblock,
sequences=[mask, state_below_x, state_below_px],
outputs_info=[init_state],
name=_p(prefix, 'layers'),
n_steps=nsteps)
# rval = [rval] # note: for consistency among model layers
return rval

Просмотреть файл

@ -0,0 +1,17 @@
import os
def MapDeviceIds(comm):
rank = comm.Get_rank()
num_machine = comm.Get_size()
os.system('python GPU_Usage.py ' + str(rank) + ' > record' + str(rank))
comm.Barrier()
if rank == 0:
os.system('python AllocateGPU.py ' + str(num_machine) + ' > DirtyRecord')
comm.Barrier()
cardid = str(0)
with open('DirtyRecord', 'r') as f:
for idx, line in enumerate(f):
if idx == rank:
cardid = line.strip()
break
return cardid

Просмотреть файл

@ -0,0 +1,379 @@
"""
model for classification task
supports simple-rnn, lstm, hierarchical lstm
supports lstm with identity skip-connections(soft), parametric skip-connections(soft)
supports resnet, resnet with identity skip-connections(hard and soft), parametric skip connections(soft)
supports hybrid structure (lstm+resnet)
supports dropout on non-recurrent layers, gradient clipping, L2-regularization
"""
__author__ = 'v-yirwan'
import sys
import time
import numpy
import cPickle as pkl
import theano
import theano.tensor as tensor
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
from Layers import get_layer
from Data import *
from Util import *
# Set the random number generators' seeds for consistency
SEED = 123
numpy.random.seed(SEED)
def _p(pp, name):
return '%s_%s' % (pp, name)
def init_params(options):
"""
Global (not LSTM) parameter. For the embedding and the classifier.
"""
params = OrderedDict()
# embedding
if options['dataset'] != 'mnist':
randn = rand_weight(options['n_words'], options['dim_word'])
params['Wemb'] = randn.astype(config.floatX)
# encoder layer
params = get_layer(options['encoder'])[0](options, params,
prefix=options['encoder'])
# classifier
if options['lastHiddenLayer'] is not None:
params['U'] = 0.01 * numpy.random.randn(options['lastHiddenLayer'],
options['ydim']).astype(config.floatX)
params['b'] = numpy.zeros((options['ydim'],)).astype(config.floatX)
params['ToLastHidden_W'] = 0.01 * numpy.random.randn(options['dim_proj'],
options['lastHiddenLayer']).astype(config.floatX)
params['ToLastHidden_b'] = numpy.zeros((options['lastHiddenLayer'],)).astype(config.floatX)
else:
params['U'] = 0.01 * numpy.random.randn(options['dim_proj'],
options['ydim']).astype(config.floatX)
params['b'] = numpy.zeros((options['ydim'],)).astype(config.floatX)
return params
def load_params(path, params):
failer=0
pp = numpy.load(path)
for kk, vv in params.items():
if kk not in pp:
failer += 1
raise Warning('%s is not in the archive' % kk)
params[kk] = pp[kk]
print failer, ' failed out of ', len(params)
return params
def init_tparams(params):
tparams = OrderedDict()
for kk, pp in params.items():
tparams[kk] = theano.shared(params[kk], name=kk)
return tparams
def encoder_word_layer(tparams, state_below, options, mask=None):
'''
word(bottom)-level encoder for hierarchical architecture
'''
def _encode(x_sub, mask_sub, proj_sub):
n_timesteps = x_sub.shape[0]
n_samples = x_sub.shape[1]
emb_sub = tparams['Wemb'][x_sub.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])
proj_sub = get_layer(options['encoder'])[1](tparams, emb_sub, options,
prefix=options['encoder']+'_word',
mask=mask_sub)
return proj_sub[-1]
proj_sub = tensor.alloc(numpy_floatX(0.), state_below.shape[2], options['dim_proj'])
rval, update = theano.scan(_encode,
sequences=[state_below, mask],
outputs_info=[proj_sub],
name='word_encoder_layer',
n_steps=state_below.shape[0])
return rval
def build_model(tparams, options):
trng = RandomStreams(SEED)
# Used for dropout.
use_noise = theano.shared(numpy_floatX(0.))
if options['dataset'] == 'mnist':
print 'Using mnist dataset with single number input'
x = tensor.matrix('x', dtype='float32')
else:
print 'Using text dataset with embedding input'
x = tensor.matrix('x', dtype='int64')
mask = tensor.matrix('mask', dtype=config.floatX)
y = tensor.vector('y', dtype='int64')
n_timesteps = x.shape[0]
n_samples = x.shape[1]
# input word embedding
if options['dataset'] == 'mnist':
emb = x.reshape([n_timesteps, n_samples, options['dim_word']])
else:
emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])
# dropout on embedding
if options['dropout_input'] > 0:
print 'Applying drop-out on input embedding (dropout_input:', options['dropout_input'], ')'
emb = dropout_layer(emb, options['dropout_input'], use_noise, trng)
# encoder information
print 'Using', options['encoder'], 'unit'
if options['truncate_grad'] is not None and options['truncate_grad'] > 0:
print 'Using gradient truncation to', options['truncate_grad'], 'steps'
else:
options['truncate_grad'] = -1
# encoding layer
proj = get_layer(options['encoder'])[1](tparams, emb, options,
prefix=options['encoder'],
mask=mask)
# pooling
if options['mean_pooling']:
print 'Using mean_pooling'
proj = (proj * mask[:, :, None]).sum(axis=0) # mean pooling
proj = proj / mask.sum(axis=0)[:, None]
else:
print 'Using last hidden state'
proj = proj[-1] # last hidden state
sys.stdout.flush()
# dropout on hidden states
if options['lastHiddenLayer'] is not None:
lastH = tensor.dot(proj, tparams['ToLastHidden_W']) + tparams['ToLastHidden_b']
lastH = tensor.nnet.sigmoid(lastH)
if options['dropout_output'] > 0:
lastH = dropout_layer(lastH, options['dropout_output'], use_noise, trng)
pred = tensor.nnet.softmax(tensor.dot(lastH, tparams['U']) + tparams['b'])
else:
if options['dropout_output'] > 0:
print 'Applying drop-out on hidden states (dropout_output:', options['dropout_output'], ")"
proj = dropout_layer(proj, options['dropout_output'], use_noise, trng)
pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b'])
# for training
f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob')
f_pred = theano.function([x, mask], pred.argmax(axis=1), name='f_pred') # sample by argmax
off = 1e-8
if pred.dtype == 'float16':
off = 1e-6
nlls = -tensor.log(pred[tensor.arange(n_samples), y] + off)
return use_noise, x, mask, y, f_pred_prob, f_pred, nlls
class Model:
def __init__(self,
dim_word=500, # word embeding dimension
dim_proj=1024, # LSTM number of hidden units
patience=10, # Number of epoch to wait before early stop if no progress
max_epochs=5000, # The maximum number of epoch to run
decay_c=-1., # Weight decay (for L2-regularization)
clip_c=-1., # gradient clipping threshold
lrate=1., # Learning rate for sgd (not used for adadelta and rmsprop)
n_words=10000, # Vocabulary size
optimizer='adadelta',
encoder='lstm', # name of encoder unit, refer to 'layers'
encoder2=None, # only used in hybrid mode
hierarchical=False, # whether use hierarchical structure
hier_len=None, # length of bottom (word-level) encoder
hybrid=False, # whether use hybrid model
mean_pooling=False, # use last hidden state if false
unit_depth=-1, # recurrent depth of residual unit
skip_steps=-1, # skip connection length (h(t) -> h(t+skip_steps))
skip_steps2=-1, # only used in hybrid mode
truncate_grad=-1, # e number of steps to use in truncated BPTT, set to -1 if not to apply
saveto='model.npz', # The best model will be saved there
dispFreq=50, # Display the training progress after this number of updates
validFreq=300, # Compute the validation error after this number of updates
newDumpFreq=5000000, # Dump model into a new file after this number of updates
maxlen=None, # Sequence longer then this get ignored
batch_size=16, # The batch size during training.
batch_len_threshold=None, # use dynamic batch size if sequence lengths exceed this threshold
valid_batch_size=16, # The batch size used for validation/test set.
dataset='text', # dataset dype
corpus='imdb.pkl', # path to load training data
start_iter=0,
start_epoch=0,
noise_std=0.,
lastHiddenLayer=None,
dropout_output=None, # Dropout on output hidden states (before softmax layer)
dropout_input=None, # Dropout on input embeddings
reload_options=None, # Path to a saved model options we want to start from
reload_model=None, # Path to a saved model we want to start from.
embedding=None, # Path to the word embedding file (otherwise randomized)
warm_LM=None,
test_size=None, # If >0, we keep only this number of test example.
monitor_grad=False, # Print gradient norm to log file at each iteration if set True
logFile='log.txt' # Path to log file
):
# Model options
self.model_options = locals().copy()
self.model_options['self'] = None
# log files
self.F_log = open(logFile, "a")
if start_iter == 0:
self.F_log.write("model options:\n")
for kk, vv in self.model_options.iteritems():
self.F_log.write("\t"+kk+":\t"+str(vv)+"\n")
self.F_log.write("-----------------------------------------\n")
pkl.dump(self.model_options, open('%s.pkl' % saveto, 'wb'))
print 'Loading data...',
if dataset == 'mnist':
self.trainSet, self.validSet, self.testSet = load_mnist(path=corpus,
fixed_permute=True,
rand_permute=False)
else:
self.trainSet, self.validSet, self.testSet = load_data(path=corpus,
n_words=n_words,
maxlen=maxlen,
sort_by_len=True,
fixed_valid=True)
print 'Done! '
print 'Training', len(self.trainSet[0]), 'Valid', len(self.validSet[0]), 'Test', len(self.testSet[0])
sys.stdout.flush()
if test_size > 0:
test_size = min(test_size, len(self.testSet[0]))
idx = numpy.arange(len(self.testSet[0]))
numpy.random.shuffle(idx)
idx = idx[:test_size]
self.testSet = ([self.testSet[0][n] for n in idx], [self.testSet[1][n] for n in idx])
# number of classes
ydim = numpy.max(self.trainSet[1]) + 1
self.model_options['ydim'] = ydim
print 'Initializing model parameters...',
params = init_params(self.model_options)
print 'Done'
print 'Model size:', self.model_options['dim_word'], '*', self.model_options['dim_proj']
sys.stdout.flush()
# load pre-trained word embedding
if embedding is not None and os.path.exists(embedding):
Wemb = numpy.array(numpy.load(open(embedding, "rb")))
if Wemb.shape[0] == self.model_options['n_words'] and \
Wemb.shape[1] == self.model_options['dim_word']:
print 'Using pre-trained word embedding'
params['Wemb'] = Wemb.astype(numpy.float32) # bug fixed
print 'vocab size', params['Wemb'].shape[0], ', dim', params['Wemb'].shape[1]
# reload options
if reload_options is not None and os.path.exists(reload_options):
print "Reloading model options...",
with open(reload_options, 'rb') as f:
self.model_options = pkl.load(f)
print "Done"
# reload parameters
self.start_iter = 0
self.start_epoch = 0
self.history_errs = []
if reload_model is not None and os.path.exists(reload_model): # bug fixed
print 'Reloading model parameters...',
load_params(reload_model, params)
self.start_iter = start_iter
self.start_epoch = start_epoch
#self.history_errs = list(numpy.load(self.model_options['reload_model'])['history_errs'])
print 'Done'
sys.stdout.flush()
if warm_LM is not None:
print 'Steal from language model'
warmLM_ = numpy.load(warm_LM)
assert params['lstm_W'].shape == warmLM_['encoder_W'].shape
assert params['lstm_b'].shape == warmLM_['encoder_b'].shape
assert params['lstm_U'].shape == warmLM_['encoder_U'].shape
assert params['Wemb'].shape == warmLM_['Wemb'].shape
params['lstm_W'] = warmLM_['encoder_W']
params['lstm_b'] = warmLM_['encoder_b']
params['lstm_U'] = warmLM_['encoder_U']
params['Wemb'] = warmLM_['Wemb']
self.tparams = init_tparams(params)
# build model
mask_proj = None
# vanilla structure
def GetNll(self):
print 'Using vanilla structure'
self.use_noise, x, mask, y, \
self.f_pred_prob, self.f_pred, nlls = \
build_model(self.tparams, self.model_options)
#inps = [x, mask, y]
return x, mask, y, nlls
def get_accu(self, data, iterator, hier_len=None):
"""
Just compute the error
modified to support hierarchical mode
"""
valid_acc = 0
for _, valid_index in iterator:
if hier_len is not None:
x, mask, mask_proj, y = prepare_data_hier([data[0][t] for t in valid_index],
numpy.array(data[1])[valid_index],
hier_len=hier_len)
preds = self.f_pred(x, mask, mask_proj)
else:
x, mask, y = prepare_data([data[0][t] for t in valid_index],
numpy.array(data[1])[valid_index],
maxlen=None,
dataset=self.model_options['dataset'])
preds = self.f_pred(x, mask) # result obtained by argmax
valid_acc += (preds == y).sum() # note that batch is sorted in hier-mode
valid_acc = numpy_floatX(valid_acc) / numpy_floatX(len(data[0])) # accuracy
return valid_acc
def save_model(self, savefile, best_p=None):
if best_p is not None: # save the best model so far
params = best_p
else:
params = unzip(self.tparams)
numpy.savez(savefile, history_errs=self.history_errs, **params)
pkl.dump(self.model_options, open('%s.pkl' % self.model_options['saveto'], 'wb'))
def valid(self):
train_acc = self.get_accu(self.trainSet, self.kf_train)
#hier_len=self.model_options['hier_len'])
valid_acc = self.get_accu(self.validSet, self.kf_valid)
#hier_len=self.model_options['hier_len'])
test_acc = self.get_accu(self.testSet, self.kf_test)
#hier_len=self.model_options['hier_len'])
return train_acc, valid_acc, test_acc
def evaluate(self, *dataset):
acc = []
for k in xrange(len(dataset)):
data = dataset[k]
idx = get_minibatches_idx(len(data[0]), 16)
acc.append(self.get_accu(data, idx))
return acc
if __name__ == '__main__':
pass

Просмотреть файл

@ -0,0 +1,257 @@
import numpy
from collections import OrderedDict
import theano
import theano.tensor as tensor
from theano import config
# ==========================
# some operations with hyper-parameters
# supports non-recurrent layer dropout, L2-regularization, gradient clipping
# ==========================
def l2_regularization(tparams, cost, decay_c):
decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c')
weight_decay = 0.
weight_decay += (tparams['U'] ** 2).sum()
weight_decay *= decay_c
cost += weight_decay
return cost
def grad_clipping(grads, clip_c):
g2 = 0.
for g in grads:
g2 += (g**2).sum()
new_grads = []
for g in grads:
new_grads.append(tensor.switch(g2 > (clip_c**2), g/tensor.sqrt(g2) * clip_c, g))
grads = new_grads
return grads
def dropout_layer(state_before, dropout, use_noise, trng):
proj = tensor.switch(use_noise,
(state_before *
trng.binomial(state_before.shape,
p=(1-dropout), n=1,
dtype=state_before.dtype)),
state_before * (1-dropout))
return proj
# ==========================
# optimizers
# supports sgd, adadelta and rmsprop
# only adadelta supports hierarchical structure
# ==========================
def sgd(lr, tparams, grads, x, mask, y, cost):
""" Stochastic Gradient Descent
:note: A more complicated version of sgd then needed. This is
done like that for adadelta and rmsprop.
"""
# New set of shared variable that will contain the gradient
# for a mini-batch.
gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
for k, p in tparams.items()]
gsup = [(gs, g) for gs, g in zip(gshared, grads)]
# Function that computes gradients for a mini-batch, but do not
# updates the weights.
f_grad_shared = theano.function([x, mask, y], cost, updates=gsup,
name='sgd_f_grad_shared')
pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)]
# Function that updates the weights from the previously computed
# gradient.
f_update = theano.function([lr], [], updates=pup,
name='sgd_f_update')
return f_grad_shared, f_update
def adadelta(lr, tparams, grads, x, mask, y, cost, mask_hier=None):
"""
An adaptive learning rate optimizer
# modified to support hierarchical mode
Parameters
----------
lr : Theano SharedVariable
Initial learning rate
tpramas: Theano SharedVariable
Model parameters
grads: Theano variable
Gradients of cost w.r.t to parameres
x: Theano variable
Model inputs
mask: Theano variable
Sequence mask
y: Theano variable
Targets
cost: Theano variable
Objective fucntion to minimize
Notes
-----
For more information, see [ADADELTA]_.
.. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
Rate Method*, arXiv:1212.5701.
"""
zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_grad' % k)
for k, p in tparams.items()]
running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_rup2' % k)
for k, p in tparams.items()]
running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_rgrad2' % k)
for k, p in tparams.items()]
zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
for rg2, g in zip(running_grads2, grads)]
if mask_hier is not None:
f_grad_shared = theano.function([x, mask, mask_hier, y], cost, updates=zgup + rg2up,
name='adadelta_f_grad_shared')
else:
f_grad_shared = theano.function([x, mask, y], cost, updates=zgup + rg2up,
name='adadelta_f_grad_shared')
updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
for zg, ru2, rg2 in zip(zipped_grads,
running_up2,
running_grads2)]
ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
for ru2, ud in zip(running_up2, updir)]
param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
f_update = theano.function([lr], [], updates=ru2up + param_up,
on_unused_input='ignore',
name='adadelta_f_update')
return f_grad_shared, f_update
def rmsprop(lr, tparams, grads, x, mask, y, cost):
"""
A variant of SGD that scales the step size by running average of the
recent step norms.
Parameters
----------
lr : Theano SharedVariable
Initial learning rate
tpramas: Theano SharedVariable
Model parameters
grads: Theano variable
Gradients of cost w.r.t to parameres
x: Theano variable
Model inputs
mask: Theano variable
Sequence mask
y: Theano variable
Targets
cost: Theano variable
Objective fucntion to minimize
Notes
-----
For more information, see [Hint2014]_.
.. [Hint2014] Geoff Hinton, *Neural Networks for Machine Learning*,
lecture 6a,
http://cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
"""
zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_grad' % k)
for k, p in tparams.items()]
running_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_rgrad' % k)
for k, p in tparams.items()]
running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_rgrad2' % k)
for k, p in tparams.items()]
zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
for rg2, g in zip(running_grads2, grads)]
f_grad_shared = theano.function([x, mask, y], cost,
updates=zgup + rgup + rg2up,
name='rmsprop_f_grad_shared')
updir = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_updir' % k)
for k, p in tparams.items()]
updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
running_grads2)]
param_up = [(p, p + udn[1])
for p, udn in zip(tparams.values(), updir_new)]
f_update = theano.function([lr], [], updates=updir_new + param_up,
on_unused_input='ignore',
name='rmsprop_f_update')
return f_grad_shared, f_update
# ==========================
# matrix initializations
# supports normalized, orthogonal and randomized
# ==========================
def ortho_weight(ndim):
W = numpy.random.randn(ndim, ndim)
u, s, v = numpy.linalg.svd(W)
return u.astype(config.floatX)
def norm_weight(nin, nout=None, scale=0.01, ortho=True):
if nout is None:
nout = nin
if nout == nin and ortho:
W = ortho_weight(nin)
else:
# bug fixed: set to be ortho_init
# W = scale * numpy.random.randn(nin, nout)
W = numpy.random.randn(nin, nout)
u, s, v = numpy.linalg.svd(W)
if nin > nout:
W = u[:, :nout]
else:
W = v[:nin, :]
return W.astype('float32')
def rand_weight(nin, nout=None, scale=0.01, ortho=True):
if nout is None:
nout = nin
if nout == nin and ortho:
W = ortho_weight(nin)
else:
W = scale * numpy.random.randn(nin, nout)
return W.astype('float32')
# ==========================
# some utility functions
# ==========================
def zipp(params, tparams):
"""
When we reload the model. Needed for the GPU stuff.
"""
for kk, vv in params.items():
tparams[kk].set_value(vv)
def unzip(zipped):
"""
When we pickle the model. Needed for the GPU stuff.
"""
new_params = OrderedDict()
for kk, vv in zipped.items():
new_params[kk] = vv.get_value()
return new_params
def numpy_floatX(data):
return numpy.asarray(data, dtype=config.floatX)

Просмотреть файл

Просмотреть файл

@ -0,0 +1,32 @@
import sys
import codecs
if len(sys.argv) < 3:
raise Exception('Not enough argv')
theano_rc = r"""
[global]
mode = FAST_RUN
device = gpu
floatX = float32
on_unused_input = warn
optimizer = fast_run
#allow_gc=False
cuda.disable_gcc_cudnn_check=True
[lib]
cnmem = 0.75
[nvcc]
flags=-L{0}\libs
root=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
fast_math = True
"""
theano_rc = theano_rc.format(sys.argv[1])
print(theano_rc)
with codecs.open(sys.argv[2], 'w', 'utf-8') as f:
f.write(theano_rc)

Просмотреть файл

@ -0,0 +1,4 @@
@echo off
setlocal ENABLEDELAYEDEXPANSION
set THEANO_FLAGS=device=gpu5
python train_classifier_LM_NoDrop_google_sgd0.2.py

Просмотреть файл

@ -0,0 +1,148 @@
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@rem Windows batch file to use Theano on GCR
@rem
@rem Updated: April 7, 2016
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@rem set the PATH system variable
@rem Start from the 26th letter
set working_sub_dir=%cd:~26%
set PATH=^
C:\Windows\system32;^
C:\Windows\System32\Wbem;^
C:\Windows\System32\WindowsPowerShell\v1.0\;^
C:\Windows;^
C:\Program Files\Microsoft HPC Pack 2012\Bin\;^
C:\Program Files\Microsoft MPI\Bin\;^
C:\Program Files (x86)\Windows Kits\8.1\Windows Performance Toolkit\
pushd \\gcr\Scratch\RR1\v-yixia\Theano
set ToolkitFolderDriver=%cd%
@rem set the environment variable for the CUDA 7.5 Toolkit
rem set CUDA_HOME=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 rem the old version
set CUDA_HOME=%ToolkitFolderDriver%\CUDA\v7.0+cudnn4008
set CUDA_BIN=%CUDA_HOME%\bin
set CUDA_INCLUDE=%CUDA_HOME%\include
set CUDA_LIB=%CUDA_HOME%\lib\x64
set CUDA_LIBNVVP=%CUDA_HOME%\libnvvp
@rem add all CUDA Toolkit folders to the PATH system variable
set PATH=^
%CUDA_HOME%;^
%CUDA_BIN%;^
%CUDA_INCLUDE%;^
%CUDA_LIB%;^
%CUDA_LIBNVVP%;^
%PATH%
@echo %PATH%
@rem setting up VC complier
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@rem connect to the shared toolkit folder \\gcr\Tools\Shared_Toolkits\Theano
rem pushd \\gcr\Tools\Shared_Toolkits\Theano
@rem unset these variables
@set Framework40Version=
@set FrameworkDIR32=
@set FrameworkVersion32=
@set FSHARPINSTALLDIR=
@set VSINSTALLDIR=
@set WindowsSDK_ExecutablePath_x64=
@set WindowsSDK_ExecutablePath_x86=
@set VCINSTALLDIR=%ToolkitFolderDriver%\VS_portable\Microsoft Visual Studio 12.0v2\VC\
@set WindowsSdkDir=%ToolkitFolderDriver%\Windows Kits\8.1v2\
:amd64
@rem set Windows SDK include/lib path
@rem --------------------------------------------------
if not "%WindowsSdkDir%" == "" @set PATH=%WindowsSdkDir%bin\x64;%WindowsSdkDir%bin\x86;%PATH%
if not "%WindowsSdkDir%" == "" @set INCLUDE=%WindowsSdkDir%include\shared;%WindowsSdkDir%include\um;%WindowsSdkDir%include\winrt;%INCLUDE%
if not "%WindowsSdkDir%" == "" @set LIB=%WindowsSdkDir%lib\winv6.3\um\x64;%LIB%
if not "%WindowsSdkDir%" == "" @set LIBPATH=%WindowsLibPath%;%ExtensionSDKDir%\Microsoft.VCLibs\14.0\References\CommonConfiguration\neutral;%LIBPATH%
@rem set the environment variables for Microsoft Visual Studio
@rem --------------------------------------------------
@rem PATH
@rem --------------------------------------------------
if exist "%VCINSTALLDIR%VCPackages" set PATH=%VCINSTALLDIR%VCPackages;%PATH%
if exist "%VCINSTALLDIR%BIN\amd64" set PATH=%VCINSTALLDIR%BIN\amd64;%PATH%
@rem --------------------------------------------------
@rem INCLUDE
@rem --------------------------------------------------
if exist "%VCINSTALLDIR%ATLMFC\INCLUDE" set INCLUDE=%VCINSTALLDIR%ATLMFC\INCLUDE;%INCLUDE%
if exist "%VCINSTALLDIR%INCLUDE" set INCLUDE=%VCINSTALLDIR%INCLUDE;%INCLUDE%
@rem --------------------------------------------------
@rem LIB
@rem --------------------------------------------------
if exist "%VCINSTALLDIR%ATLMFC\LIB\amd64" set LIB=%VCINSTALLDIR%ATLMFC\LIB\amd64;%LIB%
if exist "%VCINSTALLDIR%LIB\amd64" set LIB=%VCINSTALLDIR%LIB\amd64;%LIB%
@rem --------------------------------------------------
@rem LIBPATH
@rem --------------------------------------------------
if exist "%VCINSTALLDIR%ATLMFC\LIB\amd64" set LIBPATH=%VCINSTALLDIR%ATLMFC\LIB\amd64;%LIBPATH%
if exist "%VCINSTALLDIR%LIB\amd64" set LIBPATH=%VCINSTALLDIR%LIB\amd64;%LIBPATH%
@rem set the environment variables for the cuDNN v4 (Feb 10, 2016) for CUDA 7.0 and later.
rem set CUDNN_PATH=%ToolkitFolderDriver%\Shared_Toolkits\Theano\CUDA\cudnn-4.0.7\cuda
rem set INCLUDE=%CUDNN_PATH%\include;%INCLUDE%
rem set LIB=%CUDNN_PATH%\lib\x64;%LIB%
rem set PATH=%CUDNN_PATH%\bin;%PATH%
set Platform=X64
set CommandPromptType=Native
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
@rem connect to your scratch storage \\gcr\Scratch\<location>\<alias>
@rem Note: Please copy ANACONDA2 to \\gcr\Scratch\<location>\<alias>\Anaconda2
@rem =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
pushd \\gcr\scratch\RR1\v-yixia
set CONDANETDRIVE=%cd:~0,2%
@rem set the environment variable for the Anaconda2
set ANACONDA2=%CONDANETDRIVE%\RR1\taoqin\Anaconda-gpu002
set ANACONDA2_SCRIPTS=%ANACONDA2%\Scripts
set ANACONDA2_BIN=%ANACONDA2%\Library\bin
@rem add Anaconda2 folders to the PATH system variable
set PATH=^
%ANACONDA2%;^
%ANACONDA2_BIN%;^
%ANACONDA2_SCRIPTS%;^
%PATH%
@echo %PATH%
@rem example files from DeepLearningTutorials are available at \\gcr\Tools\Shared_Toolkits\Theano\Examples
set PROJDRIVE=%CONDANETDRIVE%
set MYHOME=%PROJDRIVE%\RR1\v-yixia
set PROJHOME=%MYHOME%\%working_sub_dir%
%PROJDRIVE%
cd %PROJHOME%
@rem setup theano env (generate .theanorc.txt)
call python gen_theanorc.py %ANACONDA2% .theanorc.txt
del %userprofile%\.theanorc.txt /Q /F
copy .theanorc.txt %userprofile% /Y
call python write_script.py %*
call worker.bat
@echo delete theano env
del %userprofile%\.theanorc.txt /Q /F
popd
popd

Просмотреть файл

@ -0,0 +1,39 @@
import re, os, numpy, sys
filename = r'.\gpu_usage_draft'
def GrabGPU():
cmdstr = '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\" > ' + filename
os.system(cmdstr)
def GetGPUUSage():
pattern = re.compile(r'(?P<num>[0-9]+)MiB[\s]+/')
mem = []
fo = open(filename, 'r')
for line in fo:
result = pattern.search(line)
if result:
mem.append(int(result.group('num')))
fo.close()
return numpy.array(mem).argsort()[0]
def print_script(cmd):
GrabGPU()
with open('worker.bat', 'w') as f:
f.write('@echo off\nsetlocal ENABLEDELAYEDEXPANSION\n')
if len(cmd) == 1:
f.write('set THEANO_FLAGS=device=gpu%d\n' % GetGPUUSage())
f.write('python ' + cmd[0])
elif len(cmd) == 2:
f.write('set THEANO_FLAGS=device=gpu' + cmd[1] + '\n')
f.write('python ' + cmd[0])
if __name__ == '__main__':
print_script(sys.argv[1:])
# os.system('del /q ' + filename + rank)

Просмотреть файл

@ -0,0 +1,369 @@
"""
data loading and minibatch generation
"""
__author__ = 'v-yirwan'
import cPickle as pkl
import gzip
import os
import numpy
from theano import config
def get_dataset_file(dataset, default_dataset, origin):
'''
Look for it as if it was a full path, if not, try local file,
if not try in the data directory.
Download dataset if it is not present
'''
data_dir, data_file = os.path.split(dataset)
if data_dir == "" and not os.path.isfile(dataset):
# Check if dataset is in the data directory.
new_path = os.path.join(
os.path.split(__file__)[0],
"..",
"data",
dataset
)
if os.path.isfile(new_path) or data_file == default_dataset:
dataset = new_path
if (not os.path.isfile(dataset)) and data_file == default_dataset:
from six.moves import urllib
print('Downloading data from %s' % origin)
urllib.request.urlretrieve(origin, dataset)
return dataset
def load_data(path="imdb.pkl", n_words=100000, maxlen=None,
sort_by_len=True, fixed_valid=True, valid_portion=0.1):
'''
Loads the dataset
:type path: String
:param path: The path to the dataset (here IMDB)
:type n_words: int
:param n_words: The number of word to keep in the vocabulary.
All extra words are set to unknow (1).
:type maxlen: None or positive int
:param maxlen: the max sequence length we use in the train/valid set.
:type sort_by_len: bool
:name sort_by_len: Sort by the sequence lenght for the train,
valid and test set. This allow faster execution as it cause
less padding per minibatch. Another mechanism must be used to
shuffle the train set at each epoch.
:type fixed_valid: bool
:param fixed_valid: load fixed validation set from the corpus file,
which would otherwise be picked randomly from the training set with
proportion [valid_portion]
:type valid_portion: float
:param valid_portion: The proportion of the full train set used for
the validation set.
'''
# Load the dataset
path = get_dataset_file(
path, "imdb.pkl",
"http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
if path.endswith(".gz"):
f = gzip.open(path, 'rb')
else:
f = open(path, 'rb')
train_set = pkl.load(f)
if fixed_valid:
valid_set = pkl.load(f)
test_set = pkl.load(f)
f.close()
def _truncate_data(train_set):
'''
truncate sequences with lengths exceed max-len threshold
:param train_set: a list of sequences list and corresponding labels list
:return: truncated train_set
'''
new_train_set_x = []
new_train_set_y = []
for x, y in zip(train_set[0], train_set[1]):
if len(x) < maxlen:
new_train_set_x.append(x)
new_train_set_y.append(y)
train_set = (new_train_set_x, new_train_set_y)
del new_train_set_x, new_train_set_y
return train_set
def _set_valid(train_set, valid_portion):
'''
set validation with [valid_portion] proportion of training set
'''
train_set_x, train_set_y = train_set
n_samples = len(train_set_x)
sidx = numpy.random.permutation(n_samples) # shuffle data
n_train = int(numpy.round(n_samples * (1. - valid_portion)))
valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
train_set_x = [train_set_x[s] for s in sidx[:n_train]]
train_set_y = [train_set_y[s] for s in sidx[:n_train]]
train_set = (train_set_x, train_set_y)
valid_set = (valid_set_x, valid_set_y)
del train_set_x, train_set_y, valid_set_x, valid_set_y
return train_set, valid_set
if maxlen:
train_set = _truncate_data(train_set)
if fixed_valid:
print 'Loading with fixed validation set...',
valid_set = _truncate_data(valid_set)
else:
print 'Setting validation set with proportion:', valid_portion, '...',
train_set, valid_set = _set_valid(train_set, valid_portion)
test_set = _truncate_data(test_set)
if maxlen is None and not fixed_valid:
train_set, valid_set = _set_valid(train_set, valid_portion)
def remove_unk(x):
return [[1 if w >= n_words else w for w in sen] for sen in x]
test_set_x, test_set_y = test_set
valid_set_x, valid_set_y = valid_set
train_set_x, train_set_y = train_set
# remove unk from dataset
train_set_x = remove_unk(train_set_x) # use 1 if unk
valid_set_x = remove_unk(valid_set_x)
test_set_x = remove_unk(test_set_x)
def len_argsort(seq):
return sorted(range(len(seq)), key=lambda x: len(seq[x]))
if sort_by_len:
sorted_index = len_argsort(test_set_x)
# ranked from shortest to longest
test_set_x = [test_set_x[i] for i in sorted_index]
test_set_y = [test_set_y[i] for i in sorted_index]
sorted_index = len_argsort(valid_set_x)
valid_set_x = [valid_set_x[i] for i in sorted_index]
valid_set_y = [valid_set_y[i] for i in sorted_index]
sorted_index = len_argsort(train_set_x)
train_set_x = [train_set_x[i] for i in sorted_index]
train_set_y = [train_set_y[i] for i in sorted_index]
train = (train_set_x, train_set_y)
valid = (valid_set_x, valid_set_y)
test = (test_set_x, test_set_y)
return train, valid, test
def load_mnist(path='mnist.pkl', fixed_permute=True, rand_permute=False):
f = open(path, 'rb')
train = pkl.load(f)
valid = pkl.load(f)
test = pkl.load(f)
f.close()
def _permute(data, perm):
x, y = data
x_new = []
for xx in x:
xx_new = [xx[pp] for pp in perm]
x_new.append(xx_new)
return (x_new, y)
def _trans2list(data):
x, y = data
x = [list(xx) for xx in x]
return (x, y)
if rand_permute:
print 'Using a fixed random permutation of pixels...',
perm = numpy.random.permutation(range(784))
train = _permute(train, perm)
valid = _permute(valid, perm)
test = _permute(test, perm)
elif fixed_permute:
print 'Using permuted dataset...',
_trans2list(train)
_trans2list(valid)
_trans2list(test)
return train, valid, test
def get_minibatches_idx(n, minibatch_size, shuffle=False):
"""
Used to shuffle the dataset at each iteration.
"""
idx_list = numpy.arange(n, dtype="int32")
if shuffle:
numpy.random.shuffle(idx_list)
minibatches = []
minibatch_start = 0
for i in range(n // minibatch_size):
minibatches.append(idx_list[minibatch_start:
minibatch_start + minibatch_size])
minibatch_start += minibatch_size
if (minibatch_start != n):
# Make a minibatch out of what is left
minibatches.append(idx_list[minibatch_start:])
return zip(range(len(minibatches)), minibatches)
def get_minibatches_idx_bucket(dataset, minibatch_size, shuffle=False):
"""
divide into different buckets according to sequence lengths
dynamic batch size
"""
# divide into buckets
slen = [len(ss) for ss in dataset]
bucket1000 = [sidx for sidx in xrange(len(dataset))
if slen[sidx] > 0 and slen[sidx] <= 1000]
bucket3000 = [sidx for sidx in xrange(len(dataset))
if slen[sidx] > 1000 and slen[sidx] <= 3000]
bucket_long = [sidx for sidx in xrange(len(dataset))
if slen[sidx] > 3000]
# shuffle each bucket
if shuffle:
numpy.random.shuffle(bucket1000)
numpy.random.shuffle(bucket3000)
numpy.random.shuffle(bucket_long)
# make minibatches
def _make_batch(minibatches, bucket, minibatch_size):
minibatch_start = 0
n = len(bucket)
for i in range(n // minibatch_size):
minibatches.append(bucket[minibatch_start : minibatch_start + minibatch_size])
minibatch_start += minibatch_size
if (minibatch_start != n):
# Make a minibatch out of what is left
minibatches.append(bucket[minibatch_start:])
return minibatches
minibatches = []
_make_batch(minibatches, bucket1000, minibatch_size=minibatch_size)
_make_batch(minibatches, bucket3000, minibatch_size=minibatch_size//2)
_make_batch(minibatches, bucket_long, minibatch_size=minibatch_size//8)
# shuffle minibatches
numpy.random.shuffle(minibatches)
return zip(range(len(minibatches)), minibatches)
def prepare_data(seqs, labels, maxlen=None, dataset='text'):
"""Create the matrices from the datasets.
This pad each sequence to the same lenght: the lenght of the
longuest sequence or maxlen.
if maxlen is set, we will cut all sequence to this maximum
lenght.
This swap the axis!
"""
# x: a list of sentences
lengths = [len(s) for s in seqs]
if maxlen is not None:
new_seqs = []
new_labels = []
new_lengths = []
for l, s, y in zip(lengths, seqs, labels):
if l < maxlen:
new_seqs.append(s)
new_labels.append(y)
new_lengths.append(l)
lengths = new_lengths
labels = new_labels
seqs = new_seqs
if len(lengths) < 1:
return None, None, None
n_samples = len(seqs)
maxlen = numpy.max(lengths)
if dataset == 'mnist':
x = numpy.zeros((maxlen, n_samples)).astype('float32')
else:
x = numpy.zeros((maxlen, n_samples)).astype('int64')
x_mask = numpy.zeros((maxlen, n_samples)).astype(config.floatX)
for idx, s in enumerate(seqs):
x[:lengths[idx], idx] = s
x_mask[:lengths[idx], idx] = 1.
return x, x_mask, labels
def prepare_data_hier(seqs, labels, hier_len, maxlen=None, dataset='text'):
'''
prepare minibatch for hierarchical model
'''
# sort (long->short)
sorted_idx = sorted(range(len(seqs)), key=lambda x: len(seqs[x]), reverse=True)
seqs = [seqs[i] for i in sorted_idx]
labels = [labels[i] for i in sorted_idx]
# truncate data
lengths = [len(s) for s in seqs]
if maxlen is not None:
new_seqs = []
new_labels = []
new_lengths = []
for l, s, y in zip(lengths, seqs, labels):
if l <maxlen :
new_seqs.append(s)
new_labels.append(y)
new_lengths.append(l)
lengths = new_lengths
labels = new_labels
seqs = new_seqs
if len(lengths) < 1:
return None, None, None
# set batch size
n_samples = len(seqs)
maxlen = numpy.max(lengths)
if maxlen % hier_len == 0:
n_batch = maxlen/hier_len
else:
n_batch = maxlen//hier_len + 1
maxlen = n_batch * hier_len
# padding whole batch
if dataset == 'mnist':
x = numpy.zeros((maxlen, n_samples)).astype('float32')
else:
x = numpy.zeros((maxlen, n_samples)).astype('int64')
x_mask = numpy.zeros((maxlen, n_samples)).astype(config.floatX)
for idx, s in enumerate(seqs):
x[:lengths[idx], idx] = s
x_mask[:lengths[idx], idx] = 1
# slice to mini-batches
x_batch = [x[bidx*hier_len:(bidx+1)*hier_len, :] for bidx in range(n_batch)]
if dataset == 'mnist':
x_batch = numpy.array(x_batch).astype('float32')
else:
x_batch = numpy.array(x_batch).astype('int64')
mask_batch = [x_mask[bidx*hier_len:(bidx+1)*hier_len, :] for bidx in range(n_batch)]
mask_batch = numpy.array(mask_batch).astype(config.floatX)
# mask for hier-level
mask_hier = numpy.ones((n_batch, n_samples)).astype(config.floatX)
for idx in range(n_samples):
mpos = numpy.where(x_mask[:, idx]==0)[0]
if len(mpos) == 0:
continue
bidx = min(mpos[0]//hier_len+1, n_batch)
if mpos[0] % hier_len == 0:
bidx -= 1 # bug fixed TODO: more elegant solution?
mask_hier[bidx:, idx] = 0
return x_batch, mask_batch, mask_hier, labels

Просмотреть файл

@ -0,0 +1,38 @@
import re
import os
import socket
import sys
filename = r'.\gpu_usage_draft_'
default_gpu = 58 + 30
def GrabGPU(rank):
cmdstr = '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\" > ' + filename + rank
os.system(cmdstr)
def GetGPUUSage(rank):
pattern = re.compile(r'(?P<num>[0-9]{1,5})MiB[\s]+/')
id = 0
GPUs = []
fo = open(filename + rank, 'r')
for line in fo:
result = pattern.search(line)
if result:
if int(result.group("num")) < default_gpu:
GPUs.append(id)
id = id + 1
fo.close()
print len(GPUs)
for gpu in GPUs:
print gpu
if __name__ == '__main__':
rank = sys.argv[1]
GrabGPU(rank)
print socket.gethostname()
GetGPUUSage(rank)
#os.system('del /q ' + filename + rank)

Просмотреть файл

@ -0,0 +1,17 @@
import os
def MapDeviceIds(comm):
rank = comm.Get_rank()
num_machine = comm.Get_size()
os.system('python GPU_Usage.py ' + str(rank) + ' > record' + str(rank))
comm.Barrier()
if rank == 0:
os.system('python AllocateGPU.py ' + str(num_machine) + ' > DirtyRecord')
comm.Barrier()
cardid = str(0)
with open('DirtyRecord', 'r') as f:
for idx, line in enumerate(f):
if idx == rank:
cardid = line.strip()
break
return cardid

Просмотреть файл

@ -0,0 +1,6 @@
Dual supervised learning for sentiment analysis.
The models are at:
https://www.dropbox.com/sh/sbl9lv6q0agsrrz/AADIYiS_4stp36X2waW2Wfiaa?dl=0
You can refer to "train.bat/train_linux.sh" and "valid.bat/valid_linux.sh" for how to run our code.

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,45 @@
import argparse
parser = argparse.ArgumentParser()
# data I/O
parser.add_argument('--data_dir', type=str, default='./data/imdb.pkl', help='Location for the dataset')
parser.add_argument('--LMScoreFile', type=str, default='./data/LMScore.npz', help='Location for the LMScoreFile')
parser.add_argument('--GCRmode', dest='GCRmode', action='store_true', help='GCRmode')
parser.add_argument('--gpu', type=int, default=0, help='')
# optimization parameters
parser.add_argument('--model_dir', type=str, default=None)
parser.add_argument('--model_S2L', type=str, default='warmClassifier.npz')
parser.add_argument('--model_S2L_pkl', type=str, default=None)
parser.add_argument('--model_L2S', type=str, default='warmCLM.npz')
parser.add_argument('--model_L2S_pkl', type=str, default=None)
parser.add_argument('--dual_style', type=str, default='all', help='all | S2L | L2S ')
parser.add_argument('--optim', type=str, default='adadelta')
parser.add_argument('--minibatch', type=int, default=16, help='')
parser.add_argument('--trade_off_S2L', type=float, default=5e-3, help='the consistence tradeoff')
parser.add_argument('--trade_off_L2S', type=float, default=5e-3, help='the consistence tradeoff')
parser.add_argument('--clip_S2L', type=float, default=-1., help='gradient clip S2L')
parser.add_argument('--clip_L2S', type=float, default=5., help='gradient clip L2S')
parser.add_argument('--bias', type=float, default=0.02, help='the bias')
parser.add_argument('--FreezeEmb', dest='FreezeEmb', action='store_true', help='FreezeEmb')
parser.add_argument('--lrS2L', type=float, default=0.1, help='')
parser.add_argument('--lrL2S', type=float, default=0.1, help='the bias')
parser.add_argument('--lrate', type=float, default=0.1, help='the bias')
parser.add_argument('--maxEpoch', type=int, default=100, help='')
parser.add_argument('--validFreq', type=int, default=2000, help='')
parser.add_argument('--classifier_drop_in', type=float, default=0.8, help='classifier_drop_in')
parser.add_argument('--classifier_drop_out', type=float, default=0.5, help='classifier_drop_out')
parser.add_argument('--CLM_drop_in', type=float, default=0.5, help='CLM_drop_in')
parser.add_argument('--CLM_drop_out', type=float, default=0.5, help='CLM_drop_out')
config_params = parser.parse_args()

Просмотреть файл

@ -0,0 +1,2 @@
Please download the files from
https://www.dropbox.com/sh/j9l5hhnjsyhtd02/AABMk8m6b_8tS8fuURqk66zCa?dl=0

Просмотреть файл

@ -0,0 +1,6 @@
from monitor import *
runner = monitor()
print 'valid classifier', runner.valid_S2L()
print 'valid CLM:', runner.valid_L2S()

Просмотреть файл

@ -0,0 +1,24 @@
# Copyright (c) 2007, 2010, 2011, 2012 Godefroid Chapelle
#
# This file is part of ipdb.
# GNU package is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 2 of the License, or (at your option)
# any later version.
#
# GNU package is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
# You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
from __main__ import set_trace, post_mortem, pm, run, runcall, runeval, launch_ipdb_on_exception
pm # please pyflakes
post_mortem # please pyflakes
run # please pyflakes
runcall # please pyflakes
runeval # please pyflakes
set_trace # please pyflakes
launch_ipdb_on_exception # please pyflakes

Просмотреть файл

@ -0,0 +1,184 @@
# Copyright (c) 2011, 2012 Godefroid Chapelle
#
# This file is part of ipdb.
# GNU package is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 2 of the License, or (at your option)
# any later version.
#
# GNU package is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
from __future__ import print_function
import sys
import os
import traceback
from contextlib import contextmanager
try:
from pdb import Restart
except ImportError:
class Restart(Exception):
pass
import IPython
if IPython.__version__ > '0.10.2':
from IPython.core.debugger import Pdb, BdbQuit_excepthook
try:
get_ipython
except NameError:
# Make it more resilient to different versions of IPython and try to
# find a module.
possible_modules = ['IPython.terminal.embed', # Newer IPython
'IPython.frontend.terminal.embed'] # Older IPython
count = len(possible_modules)
for module in possible_modules:
try:
embed = __import__(module, fromlist=["InteractiveShellEmbed"])
InteractiveShellEmbed = embed.InteractiveShellEmbed
except ImportError:
count -= 1
if count == 0:
raise
else:
break
ipshell = InteractiveShellEmbed()
def_colors = ipshell.colors
else:
def_colors = get_ipython.im_self.colors
from IPython.utils import io
if 'nose' in sys.modules.keys():
def update_stdout():
# setup stdout to ensure output is available with nose
io.stdout = sys.stdout = sys.__stdout__
else:
def update_stdout():
pass
else:
from IPython.Debugger import Pdb, BdbQuit_excepthook
from IPython.Shell import IPShell
from IPython import ipapi
ip = ipapi.get()
if ip is None:
IPShell(argv=[''])
ip = ipapi.get()
def_colors = ip.options.colors
from IPython.Shell import Term
if 'nose' in sys.modules.keys():
def update_stdout():
# setup stdout to ensure output is available with nose
Term.cout = sys.stdout = sys.__stdout__
else:
def update_stdout():
pass
def wrap_sys_excepthook():
# make sure we wrap it only once or we would end up with a cycle
# BdbQuit_excepthook.excepthook_ori == BdbQuit_excepthook
if sys.excepthook != BdbQuit_excepthook:
BdbQuit_excepthook.excepthook_ori = sys.excepthook
sys.excepthook = BdbQuit_excepthook
def set_trace(frame=None):
update_stdout()
wrap_sys_excepthook()
if frame is None:
frame = sys._getframe().f_back
Pdb(def_colors).set_trace(frame)
def post_mortem(tb):
update_stdout()
wrap_sys_excepthook()
p = Pdb(def_colors)
p.reset()
if tb is None:
return
p.interaction(None, tb)
def pm():
post_mortem(sys.last_traceback)
def run(statement, globals=None, locals=None):
Pdb(def_colors).run(statement, globals, locals)
def runcall(*args, **kwargs):
return Pdb(def_colors).runcall(*args, **kwargs)
def runeval(expression, globals=None, locals=None):
return Pdb(def_colors).runeval(expression, globals, locals)
@contextmanager
def launch_ipdb_on_exception():
try:
yield
except Exception:
e, m, tb = sys.exc_info()
print(m.__repr__(), file=sys.stderr)
post_mortem(tb)
finally:
pass
def main():
if not sys.argv[1:] or sys.argv[1] in ("--help", "-h"):
print("usage: ipdb.py scriptfile [arg] ...")
sys.exit(2)
mainpyfile = sys.argv[1] # Get script filename
if not os.path.exists(mainpyfile):
print('Error:', mainpyfile, 'does not exist')
sys.exit(1)
del sys.argv[0] # Hide "pdb.py" from argument list
# Replace pdb's dir with script's dir in front of module search path.
sys.path[0] = os.path.dirname(mainpyfile)
# Note on saving/restoring sys.argv: it's a good idea when sys.argv was
# modified by the script being debugged. It's a bad idea when it was
# changed by the user from the command line. There is a "restart" command
# which allows explicit specification of command line arguments.
pdb = Pdb(def_colors)
while 1:
try:
pdb._runscript(mainpyfile)
if pdb._user_requested_quit:
break
print("The program finished and will be restarted")
except Restart:
print("Restarting", mainpyfile, "with arguments:")
print("\t" + " ".join(sys.argv[1:]))
except SystemExit:
# In most cases SystemExit does not warrant a post-mortem session.
print("The program exited via sys.exit(). Exit status: ", end='')
print(sys.exc_info()[1])
except:
traceback.print_exc()
print("Uncaught exception. Entering post mortem debugging")
print("Running 'cont' or 'step' will restart the program")
t = sys.exc_info()[2]
pdb.interaction(None, t)
print("Post mortem debugger finished. The " + mainpyfile +
" will be restarted")
if __name__ == '__main__':
main()

Просмотреть файл

@ -0,0 +1,128 @@
(dp1
S'monitor_grad'
p2
I00
sS'dropout_output'
p3
F0.5
sS'n_words'
p4
I10000
sS'start_epoch'
p5
I0
sS'dataset'
p6
S'text'
p7
sS'patience'
p8
I10
sS'skip_steps2'
p9
I-1
sS'hier_len'
p10
NsS'max_epochs'
p11
I5000
sS'dispFreq'
p12
I50
sS'newDumpFreq'
p13
I5000000
sS'self'
p14
NsS'hybrid'
p15
I00
sS'clip_c'
p16
F-1
sS'dim_proj'
p17
I1024
sS'saveto'
p18
S'model.npz'
p19
sS'start_iter'
p20
I0
sS'lastHiddenLayer'
p21
NsS'noise_std'
p22
F0
sS'batch_len_threshold'
p23
NsS'valid_batch_size'
p24
I16
sS'corpus'
p25
S'imdb.pkl'
p26
sS'reload_options'
p27
NsS'optimizer'
p28
S'adadelta'
p29
sS'validFreq'
p30
I2000
sS'dropout_input'
p31
F0.80000000000000004
sS'warm_LM'
p32
NsS'batch_size'
p33
I16
sS'encoder'
p34
S'lstm'
p35
sS'hierarchical'
p36
I00
sS'reload_model'
p37
S'winner/warmClassifier.npz'
p38
sS'lrate'
p39
F1
sS'truncate_grad'
p40
I-1
sS'decay_c'
p41
F-1
sS'encoder2'
p42
NsS'test_size'
p43
NsS'dim_word'
p44
I500
sS'unit_depth'
p45
I-1
sS'maxlen'
p46
NsS'skip_steps'
p47
I-1
sS'embedding'
p48
NsS'logFile'
p49
S'log2'
p50
sS'mean_pooling'
p51
I00
s.

Просмотреть файл

@ -0,0 +1,209 @@
from config import config_params
import os
os.environ['THEANO_FLAGS']='floatX=float32,device=cuda%d' % (config_params.gpu)
if os.name == 'nt':
cmdstr = '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\" '
os.system(cmdstr)
else:
os.system(r'nvidia-smi')
from CLM.CLM import CLM_worker
from Classifier.Models import Model as Classifier
import theano
import theano.tensor as tensor
import numpy
from Util_basic import sgd_joint, prepare_data_x, unzip, itemlist_NoEmb, adadelta_joint, Optim
from Data import load_data, get_minibatches_idx, get_minibatches_idx_bucket
from collections import OrderedDict
def grad_clipping(grads, clip_c):
g2 = 0.
for g in grads:
g2 += (g**2).sum()
new_grads = []
for g in grads:
new_grads.append(tensor.switch(g2 > (clip_c**2), g/tensor.sqrt(g2) * clip_c, g))
return new_grads, tensor.sqrt(g2)
class monitor(object):
def __init__(self):
print config_params
self.CLM = CLM_worker(lrate=1.,
optimizer='adadelta',
batch_size=config_params.minibatch,
saveto='model.npz',
validFreq=2000,
dispFreq=100,
dropout_input=config_params.CLM_drop_in,
dropout_output=config_params.CLM_drop_out,
reload_model=config_params.model_dir + '/' + config_params.model_L2S,
reload_option=None,
log='log1'
)
self.classifier = Classifier(lrate=1., # Learning rate for sgd (not used for adadelta and rmsprop)
optimizer='adadelta',
saveto='model.npz', # The best model will be saved there
dispFreq=50, # Display the training progress after this number of updates
validFreq=2000, # Compute the validation error after this number of updates
batch_size=config_params.minibatch, # The batch size during training.
batch_len_threshold=None, # use dynamic batch size if sequence lengths exceed this threshold
valid_batch_size=config_params.minibatch, # The batch size used for validation/test set.
lastHiddenLayer=None,
dropout_output=config_params.classifier_drop_out,
dropout_input=config_params.classifier_drop_in,
reload_options=None, # Path to a saved model options we want to start from
reload_model=config_params.model_dir + '/' + config_params.model_S2L,
embedding=None, # Path to the word embedding file (otherwise randomized)
warm_LM=None,
logFile='log2' # Path to log file
)
self.trainSet, self.validSet, self.testSet = \
load_data(path=config_params.data_dir, n_words=10000, maxlen=None, sort_by_len=True, fixed_valid=True)
self.LMscore = numpy.load(config_params.LMScoreFile)
self.LMscore = self.LMscore[self.LMscore.files[0]]
self.build()
def build(self):
LMsores = tensor.vector('LMScore', dtype='float32')
lrate = tensor.scalar(dtype='float32')
CLM_srcx, CLM_srcx_mask, CLM_ctx_, CLM_cost, CLM_sentenceLen = self.CLM.GetNll()
classifier_x, classifier_mask, classifier_y, classifier_nlls = self.classifier.GetNll()
consistent_loss = (((classifier_nlls + numpy.log(0.5))/CLM_sentenceLen + LMsores - CLM_cost) ** 2).mean()
CLM_cost_avg = CLM_cost.mean()
overall_L2S = CLM_cost_avg + config_params.trade_off_L2S * config_params.trade_off_L2S * consistent_loss
classifier_nlls_avg = classifier_nlls.mean()
overall_S2L = classifier_nlls_avg + config_params.trade_off_S2L * config_params.trade_off_S2L * consistent_loss
if config_params.FreezeEmb:
grads_L2S = tensor.grad(overall_L2S, wrt=itemlist_NoEmb(self.CLM.tparams))
else:
grads_L2S = tensor.grad(overall_L2S, wrt=self.CLM.tparams.values())
if config_params.clip_L2S > 0.:
grads_L2S, norm_grads_L2S = grad_clipping(grads_L2S, config_params.clip_L2S)
else:
norm_grads_L2S = tensor.alloc(-1.)
if config_params.FreezeEmb:
grads_S2L = tensor.grad(overall_S2L, wrt=itemlist_NoEmb(self.classifier.tparams))
else:
grads_S2L = tensor.grad(overall_S2L, wrt=self.classifier.tparams.values())
if config_params.clip_S2L > 0.:
grads_S2L, norm_grads_S2L = grad_clipping(grads_S2L, config_params.clip_S2L)
else:
norm_grads_S2L = tensor.alloc(-1.)
if config_params.dual_style == 'all':
merged_var_dic = OrderedDict()
if config_params.FreezeEmb:
merged_var_dic.update(OrderedDict((k + '_S2L',v) for (k,v) in self.classifier.tparams.iteritems() if 'Wemb' not in k ))
merged_var_dic.update(OrderedDict((k + '_L2S',v) for (k,v) in self.CLM.tparams.iteritems() if 'Wemb' not in k ))
else:
merged_var_dic.update(OrderedDict((k + '_S2L',v) for (k,v) in self.classifier.tparams.iteritems()))
merged_var_dic.update(OrderedDict((k + '_L2S',v) for (k,v) in self.CLM.tparams.iteritems()))
inps = [CLM_srcx, CLM_srcx_mask, CLM_ctx_, classifier_x, classifier_mask, classifier_y, LMsores]
outs = [CLM_cost_avg, classifier_nlls_avg, consistent_loss, overall_L2S, overall_S2L, norm_grads_L2S, norm_grads_S2L]
self.f_grad_shared, self.f_update = Optim[config_params.optim](lrate, merged_var_dic, grads_S2L + grads_L2S, inps, outs)
elif config_params.dual_style == 'S2L':
if config_params.FreezeEmb:
merged_var_dic = OrderedDict((k + '_S2L',v) for (k,v) in self.classifier.tparams.iteritems() if 'Wemb' not in k )
else:
merged_var_dic = OrderedDict((k + '_S2L',v) for (k,v) in self.classifier.tparams.iteritems())
inps = [CLM_srcx, CLM_srcx_mask, CLM_ctx_, classifier_x, classifier_mask, classifier_y, LMsores]
norm_grads_L2S = tensor.alloc(-1.)
outs = [CLM_cost_avg, classifier_nlls_avg, consistent_loss, overall_L2S, overall_S2L, norm_grads_L2S, norm_grads_S2L]
self.f_grad_shared, self.f_update = Optim[config_params.optim](lrate, merged_var_dic, grads_S2L, inps, outs)
elif config_params.dual_style == 'L2S':
if config_params.FreezeEmb:
merged_var_dic = OrderedDict((k + '_L2S',v) for (k,v) in self.CLM.tparams.iteritems() if 'Wemb' not in k )
else:
merged_var_dic = OrderedDict((k + '_L2S',v) for (k,v) in self.CLM.tparams.iteritems())
inps = [CLM_srcx, CLM_srcx_mask, CLM_ctx_, classifier_x, classifier_mask, classifier_y, LMsores]
norm_grads_S2L = tensor.alloc(-1.)
outs = [CLM_cost_avg, classifier_nlls_avg, consistent_loss, overall_L2S, overall_S2L, norm_grads_L2S, norm_grads_S2L]
self.f_grad_shared, self.f_update = Optim[config_params.optim](lrate, merged_var_dic, grads_L2S, inps, outs)
else:
raise Exception('Not support {} in dual_style'.format(config_params.dual_style))
def train_one_minibatch(self, seqx, seqy, LMscore):
CLM_x, CLM_xmask = prepare_data_x(seqx, pad_eos=True)
labels = numpy.array(seqy).astype('int64')
classifier_x, classifier_xmask = prepare_data_x(seqx, pad_eos=False)
CLM_cost_avg, classifier_nlls_avg, consistent_loss, overall_L2S, overall_S2L, norm_grads_L2S, norm_grads_S2L = self.f_grad_shared(
CLM_x, CLM_xmask, labels, classifier_x, classifier_xmask, labels, LMscore
)
print 'CLM_cost_avg=%f, classifier_nlls_avg=%f, norm_grads_L2S=%f, norm_grads_S2L=%f, consistent_loss=%f,' \
' overall_L2S=%f, overall_S2L=%f' % (
CLM_cost_avg, classifier_nlls_avg, norm_grads_L2S, norm_grads_S2L, consistent_loss, overall_L2S, overall_S2L )
self.f_update(config_params.lrate)
def train(self):
uidx = 0
for eidx in xrange(0, config_params.maxEpoch):
n_samples = 0
self.kf_train = get_minibatches_idx_bucket(self.trainSet[0],config_params.minibatch,shuffle=True)
for _, train_index in self.kf_train:
uidx += 1
self.classifier.use_noise.set_value(1.)
self.CLM.use_noise.set_value(1.)
# Select the random examples for this minibatch
seqx = [self.trainSet[0][t] for t in train_index]
seqy = [self.trainSet[1][t] for t in train_index]
LMscore = [self.LMscore[t] for t in train_index]
self.train_one_minibatch(seqx, seqy, numpy.array(LMscore).astype('float32'))
if uidx % config_params.validFreq == 0:
self.classifier.use_noise.set_value(0.)
self.CLM.use_noise.set_value(0.)
if config_params.dual_style == 'all':
suffix_S2L = self.valid_S2L()
suffix_L2S = self.valid_L2S()
S2Lpath = config_params.model_dir + '/model_S2L_' + suffix_S2L + '_uidx' + str(uidx)
L2Spath = config_params.model_dir + '/model_L2S_' + suffix_L2S + '_uidx' + str(uidx)
numpy.savez(S2Lpath, history_errs=[], **unzip(self.classifier.tparams) )
numpy.savez(L2Spath, history_errs=[], **unzip(self.CLM.tparams) )
elif config_params.dual_style == 'S2L':
suffix_S2L = self.valid_S2L()
S2Lpath = config_params.model_dir + '/model_S2L_' + suffix_S2L + '_uidx' + str(uidx)
numpy.savez(S2Lpath, history_errs=[], **unzip(self.classifier.tparams) )
elif config_params.dual_style == 'L2S':
suffix_L2S = self.valid_L2S()
L2Spath = config_params.model_dir + '/model_L2S_' + suffix_L2S + '_uidx' + str(uidx)
numpy.savez(L2Spath, history_errs=[], **unzip(self.CLM.tparams) )
def valid_S2L(self):
acc = self.classifier.evaluate(self.trainSet, self.validSet, self.testSet)
print 'TrainAcc=%f, ValidAcc=%f, TestAcc=%f' % (acc[0], acc[1], acc[2])
return 'train_{}_valid_{}_test_{}'.format(acc[0], acc[1], acc[2])
def valid_L2S(self):
valid_ppl, test_ppl = self.CLM.evaluate(self.validSet, self.testSet)
print 'Valid_PPL=%f, Test_PPL=%f' % (valid_ppl, test_ppl)
return 'valid_{}_test_{}'.format(valid_ppl, test_ppl)
if __name__ == '__main__':
runner = monitor()
runner.train()

Просмотреть файл

@ -0,0 +1 @@
python monitor.py --classifier_drop_in=0.8 --classifier_drop_out=0.5 --clip_L2S=5.0 --trade_off_S2L=5.0 --trade_off_L2S=5.0 --validFreq=5000 --model_dir=your_model_folder --lrS2L=0.1 --lrL2S=0.1 --lrate=0.1 --bias=0.0 --dual_style=all --optim=adadelta

Просмотреть файл

@ -0,0 +1 @@
python monitor.py --classifier_drop_in=0.8 --classifier_drop_out=0.5 --clip_L2S=5.0 --trade_off_S2L=5.0 --trade_off_L2S=5.0 --validFreq=5000 --model_dir=Sentiment_model --lrS2L=0.1 --lrL2S=0.1 --lrate=0.1 --bias=0.0 --dual_style=all --optim=adadelta

Просмотреть файл

@ -0,0 +1 @@
python inference.py --model_dir=winner --model_S2L=warmClassifier.npz --model_L2S=warmCLM.npz

Просмотреть файл

@ -0,0 +1 @@
python inference.py --model_dir=winner --model_S2L=warmClassifier.npz --model_L2S=warmCLM.npz --gpu=3

Просмотреть файл

@ -12,3 +12,7 @@ provided by the bot. You will only need to do this once across all repos using o
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
The code consists of two parts:
(1) dual supervised learning for image processing: DSL_ImgProcess
(2) dual supervised learning for sentiment analysis: DSL_SentimentAnalysis