updated some files to master copies

This commit is contained in:
Nikos Karampatziakis 2016-10-24 20:46:18 -07:00
Родитель 50b1510cfc
Коммит 9ee35dd057
8 изменённых файлов: 54 добавлений и 92 удалений

Просмотреть файл

@ -118,7 +118,7 @@ class MinibatchSource(cntk_py.MinibatchSource):
the next minibatch. Must be > 0.
minibatch_size_in_sequences (`int`, defaults to `None`): number of
samples to retrieve for the next minibatch. Must be > 0.
input_map (`dict`): mapping of :class:`cntk.ops.variables.Variable`
input_map (`dict`): mapping of :class:`cntk.ops.variabls.Variable`
to :class:`StreamInformation` which will be used to convert the
returned data.
device (`DeviceDescriptor`, defaults to `None`): CNTK DeviceDescriptor
@ -126,7 +126,7 @@ class MinibatchSource(cntk_py.MinibatchSource):
Returns:
A mapping of :class:`StramInformation` to :class:`MinibatchData` if
``input_map`` was not specified. Otherwise, the returned value will
be a mapping of :class:`cntk.ops.variables.Variable` to class:`MinibatchData`.
be a mapping of :class:`cntk.ops.variabls.Variable` to class:`MinibatchData`.
'''
if device is None:
device = use_default_device()
@ -223,8 +223,7 @@ class ReaderConfig(dict):
deserializers ('list', default is empty): list of deserializers
(:class:`ImageDeserializer` for now).
randomize (`bool`, default True): randomize images before every epoch
epoch_size (`int`): epoch size. FULL_DATA_SWEEP means one pass;
INFINITELY_REPEAT means duplicate infinitely (with different randomization each time).
epoch_size (`int`): epoch size
'''
def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT):
@ -235,7 +234,6 @@ class ReaderConfig(dict):
self['deserializers'] = self.deserializers = deserializers or []
self['randomize'] = randomize
# TODO: This should be deleted; do it the other way round and create MinibatchSource() directly with the same parameter
@typemap
def minibatch_source(self):
'''
@ -417,9 +415,9 @@ class ImageDeserializer(Deserializer):
class CTFDeserializer(Deserializer):
'''
This class configures the text reader that reads text-encoded files from a file with lines of the form
[Sequence_Id](Sample)+
[Sequence_Id](Sample)+
where
Sample=|Input_Name (Value )*
Sample=|Input_Name (Value )*
Args:
filename (`str`): file name containing the text input
See also:
@ -440,17 +438,17 @@ class CTFDeserializer(Deserializer):
# TODO: should be a private method; use constructor only
def map_input(self, node, dim, format="dense", alias=None):
'''
Maps node (either node instance or node name) to a part of the text input,
Maps node (either node instance or node name) to a part of the text input,
either specified by the node name or the alias in the text file.
Example: for node name 'Apples' an input line could look like this:
|Apples 0 1 2 3 4 5 6 7 8 9
Args:
node (`str` or input node): node or its name
dim (`int`): specifies the dimension of the input value vector
(for dense input this directly corresponds to the number of values in each sample,
dim (`int`): specifies the dimension of the input value vector
(for dense input this directly corresponds to the number of values in each sample,
for sparse this represents the upper bound on the range of possible index values).
format (`str`, default 'dense'): 'dense' or 'sparse'. Specifies the input type.
alias (`str`, default None): None or alias name. Optional abbreviated name that
format (`str`, default 'dense'): 'dense' or 'sparse'. Specifies the input type.
alias (`str`, default None): None or alias name. Optional abbreviated name that
is used in the text file to avoid repeating long input names. For details please
see https://github.com/Microsoft/CNTK/wiki/CNTKTextFormat-Reader
'''
@ -504,6 +502,22 @@ class StreamConfiguration(cntk_py.StreamConfiguration):
return super(StreamConfiguration, self).__init__(name, dim, is_sparse, stream_alias)
# wrapper around text_format_minibatch_source() that attaches a record of streams
# TODO: This should not exist; use MinibatchSource(CTFDeserializer(...))
def _unused_CNTKTextFormatMinibatchSource(path, streams, epoch_size=None): # TODO: delete this
from cntk.utils import _ClassFromDict
# convert streams into StreamConfiguration format
# TODO: stream_alias should default to 'key'
stream_configs = [ StreamConfiguration(key, dim=value.dim, is_sparse=value.is_sparse, stream_alias=value.stream_alias) for (key, value) in streams.items() ]
if epoch_size is not None: # TODO: use MAX_UI64, now that we have access
source = text_format_minibatch_source(path, stream_configs, epoch_size)
else:
source = text_format_minibatch_source(path, stream_configs)
# attach a dictionary of the streams
source.streams = _ClassFromDict({ name : source.stream_info(name) for name in streams.keys() })
return source
# stream definition for use in StreamDefs
# returns a record { stream_alias, is_sparse, optional dim, optional transforms }
from cntk.utils import Record

Просмотреть файл

@ -147,7 +147,6 @@ class ProgressPrinter:
else:
print(' Minibatch[{:4d}-{:4d}]: loss = {:0.6f} * {:d}'.format(
first_mb, self.updates, avg_loss, samples))
return self.samples_since_start # so that we can trigger on this
def update_with_trainer(self, trainer, with_metric=False):
'''
@ -158,7 +157,7 @@ class ProgressPrinter:
trainer (:class:`cntk.trainer.Trainer`): trainer from which information is gathered
with_metric (`bool`): whether to update the metric accumulators
'''
self.update(trainer.previous_minibatch_loss_average, trainer.previous_minibatch_sample_count, trainer.previous_minibatch_evaluation_average if with_metric else None)
self.update(trainer.previous_minibatch_loss_average,trainer.previous_minibatch_sample_count, trainer.previous_minibatch_evaluation_average if with_metric else None)
# print the total number of parameters to log

Просмотреть файл

@ -5,7 +5,7 @@
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{1a078fc2-21c0-4f42-9a5b-0e84e944bc74}</ProjectGuid>
<ProjectHome />
<StartupFile>CifarConvNet\CifarConvNet.py</StartupFile>
<StartupFile>test\language_understanding_test.py</StartupFile>
<SearchPath>$(RepoRootPath)$(Platform)\$(Configuration);$(RepoRootPath)bindings\python;$(RepoRootPath)bindings\python\examples</SearchPath>
<WorkingDirectory>.</WorkingDirectory>
<OutputPath>.</OutputPath>

Просмотреть файл

@ -17,11 +17,9 @@ from cntk import Trainer
from cntk.learner import momentum_sgd, learning_rate_schedule
from cntk.ops import cross_entropy_with_softmax, classification_error, relu, convolution, pooling, PoolingType_Max
########################
# variables and paths #
########################
# paths (are relative to current python file)
#
# Paths relative to current python file.
#
abs_path = os.path.dirname(os.path.abspath(__file__))
cntk_path = os.path.normpath(os.path.join(abs_path, "..", "..", "..", ".."))
data_path = os.path.join(cntk_path, "Examples", "Image", "Datasets", "CIFAR-10")
@ -44,9 +42,9 @@ def create_reader(map_file, mean_file, train):
# transformation pipeline for the features has jitter/crop only when training
transforms = []
if is_training:
if train:
transforms += [
ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # is_training uses jitter
ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter
]
transforms += [
ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),
@ -56,11 +54,7 @@ def create_reader(map_file, mean_file, train):
return MinibatchSource(ImageDeserializer(map_file, StreamDefs(
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels = StreamDef(field='label', shape=num_classes) # and second as 'label'
)), randomize=is_training, epoch_size = INFINITELY_REPEAT if is_training else FULL_DATA_SWEEP)
########################
# define the model #
########################
)))
#
# Define a VGG like network for Cifar dataset.
@ -122,12 +116,11 @@ def train_and_evaluate(reader_train, reader_test, max_epochs):
# loss and metric
ce = cross_entropy_with_softmax(z, label_var)
pe = classification_error (z, label_var)
pe = classification_error(z, label_var)
# training config
epoch_size = 50000
minibatch_size = 64
epoch_size = 1000 ; max_epochs = 1 # for faster testing
# For basic model
lr_per_sample = [0.00015625]*10+[0.000046875]*10+[0.0000156]
@ -147,8 +140,7 @@ def train_and_evaluate(reader_train, reader_test, max_epochs):
}
log_number_of_parameters(z) ; print()
progress_printer = ProgressPrinter(freq=100, first=10, tag='Training')
#progress_printer = ProgressPrinter(tag='Training')
progress_printer = ProgressPrinter(tag='Training')
# perform model training
for epoch in range(max_epochs): # loop over epochs
@ -159,14 +151,13 @@ def train_and_evaluate(reader_train, reader_test, max_epochs):
sample_count += data[label_var].num_samples # count samples processed so far
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)
#return loss, metric # return values from last epoch
progress_printer.epoch_summary(with_metric=True)
#
# Evaluation action
#
minibatch_size = 1000
epoch_size = 10000
minibatch_size = 16
# process minibatches and evaluate the model
metric_numer = 0

Просмотреть файл

@ -94,9 +94,10 @@ def create_resnet_model(input, num_classes):
bn_time_const = 4096
c_map1 = 16
feat_scale = 0.00390625
# feat_scale = 0.00390625
# input_norm = element_times(feat_scale, input)
input_norm = element_times(feat_scale, input)
conv = conv_bn_relu_layer(input, c_map1, [3, 3], [1, 1], bn_time_const)
r1_1 = resnet_basic_stack3(conv, c_map1, bn_time_const)

Просмотреть файл

@ -10,16 +10,15 @@ from cntk.blocks import * # non-layer like building blocks such as LSTM()
from cntk.layers import * # layer-like stuff such as Linear()
from cntk.models import * # higher abstraction level, e.g. entire standard models and also operators like Sequential()
from cntk.utils import *
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
from cntk import Trainer
from cntk.learner import adam_sgd, learning_rate_schedule, momentum_schedule
from cntk.ops import cross_entropy_with_softmax, classification_error
########################
# variables and paths #
# variables and stuff #
########################
# paths
cntk_dir = os.path.dirname(os.path.abspath(__file__)) + "/../../../.." # data resides in the CNTK folder
data_dir = cntk_dir + "/Examples/Tutorials/SLUHandsOn" # under Examples/Tutorials
vocab_size = 943 ; num_labels = 129 ; num_intents = 26 # number of words in vocab, slot labels, and intent labels
@ -36,12 +35,12 @@ hidden_dim = 300
# define the reader #
########################
def create_reader(path, is_training):
def create_reader(path):
return MinibatchSource(CTFDeserializer(path, StreamDefs(
query = StreamDef(field='S0', shape=input_dim, is_sparse=True),
intent_unused = StreamDef(field='S1', shape=num_intents, is_sparse=True), # BUGBUG: unused, and should infer dim
slot_labels = StreamDef(field='S2', shape=label_dim, is_sparse=True)
)), randomize=is_training, epoch_size = INFINITELY_REPEAT if is_training else FULL_DATA_SWEEP)
)))
########################
# define the model #
@ -74,7 +73,6 @@ def train(reader, model, max_epochs):
# training config
epoch_size = 36000
minibatch_size = 70
epoch_size = 1000 ; max_epochs = 1 # for faster testing
num_mbs_to_show_result = 100
momentum_as_time_constant = minibatch_size / -math.log(0.9) # TODO: Change to round number. This is 664.39. 700?
@ -117,45 +115,6 @@ def train(reader, model, max_epochs):
#trace_node('stabilizer_param')
loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)
return loss, metric # return values from last epoch
########################
# eval action #
########################
def evaluate(reader, model):
# Input variables denoting the features and label data
query = Input(input_dim, is_sparse=False)
slot_labels = Input(num_labels, is_sparse=True) # TODO: make sparse once it works
# apply model to input
z = model(query)
# loss and metric
ce = cross_entropy_with_softmax(z, slot_labels)
pe = classification_error (z, slot_labels)
# define mapping from reader streams to network inputs
input_map = {
query : reader.streams.query,
slot_labels : reader.streams.slot_labels
}
# process minibatches and perform evaluation
dummy_learner = adam_sgd(z.parameters, lr_per_sample=1, momentum_time_constant=0, low_memory=True) # BUGBUG: should not be needed
evaluator = Trainer(z, ce, pe, [dummy_learner])
progress_printer = ProgressPrinter(freq=100, first=10, tag='Evaluation') # more detailed logging
#progress_printer = ProgressPrinter(tag='Evaluation')
while True:
minibatch_size = 1000
data = reader.next_minibatch(minibatch_size, input_map=input_map) # fetch minibatch
if not data: # until we hit the end
break
metric = evaluator.test_minibatch(data) # evaluate minibatch
progress_printer.update(0, data[slot_labels].num_samples, metric) # log progress
loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)
return loss, metric
#############################
@ -169,13 +128,10 @@ if __name__=='__main__':
set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works
force_deterministic_algorithms()
# create the model
reader = create_reader(data_dir + "/atis.train.ctf")
model = create_model()
# train
reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
train(reader, model, max_epochs=8)
# test
reader = create_reader(data_dir + "/atis.test.ctf", is_training=False)
evaluate(reader, model)
# test (TODO)
reader = create_reader(data_dir + "/atis.test.ctf")
#test(reader, model_dir + "/slu.cmf") # TODO: what is the correct pattern here?

Просмотреть файл

@ -10,6 +10,7 @@ import sys
from cntk.utils import cntk_device
from cntk.cntk_py import DeviceKind_GPU
from cntk.device import set_default_device
from cntk.io import ReaderConfig, ImageDeserializer
import pytest
from examples.CifarResNet.CifarResNet import train_and_evaluate, create_reader

Просмотреть файл

@ -37,7 +37,7 @@ def test_seq_classification_error(device_id):
# test of the example itself
# this emulates the main code in the PY file
reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
reader = create_reader(data_dir + "/atis.train.ctf")
model = create_model()
loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
expected_avg = [0.15570838301766451, 0.7846451368305728]
@ -45,7 +45,7 @@ def test_seq_classification_error(device_id):
# test of a config like in the example but with additions to test many code paths
if device_id >= 0: # BatchNormalization currently does not run on CPU
reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
reader = create_reader(data_dir + "/atis.train.ctf")
model = create_test_model()
loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
log_number_of_parameters(model, trace_level=1) ; print()