Merge branch 'blis/ops14todo' of https://github.com/Microsoft/CNTK into blis/ops14todo

2016-05-04 13:24:32 +02:00 · 2016-05-04 13:24:32 +02:00 · f904d224c2
--- a/contrib/Python/cntk/examples/LSTM/seqcla.py
+++ b/contrib/Python/cntk/examples/LSTM/seqcla.py
@ -109,7 +109,7 @@ def seqcla():

    # setup embedding matrix
    embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0, 
-                          init='fromFile', init_from_file_path=embedding_file)
+                          init_from_file_path=embedding_file)

    # get the vector representing the word
    sequence = C.times(embedding, features, name='sequence')
@ -146,7 +146,7 @@ def seqcla():
        acc = calc_accuracy(train_file, ctx.output_filename_base)
        
        # and test for the same number...
-        TOLERANCE_ABSOLUTE = 1E-06    
+        TOLERANCE_ABSOLUTE = 1E-06
        assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)

 """
--- a/contrib/Python/cntk/examples/LogReg/logreg.py
+++ b/contrib/Python/cntk/examples/LogReg/logreg.py
@ -65,7 +65,7 @@ def train_eval_logistic_regression_from_file(criterion_name=None, eval_name=None
 def test_logistic_regression_from_file():
    result = train_eval_logistic_regression_from_file('crit_node', 'eval_node')

-    TOLERANCE_ABSOLUTE = 1E-06
+    TOLERANCE_ABSOLUTE = 1E-02
    assert result['SamplesSeen'] == 500
    assert np.allclose(result['Perplexity'], 1.5584637, atol=TOLERANCE_ABSOLUTE)
    assert np.allclose(result['crit_node'], 0.4437005, atol=TOLERANCE_ABSOLUTE)
--- a/contrib/Python/cntk/examples/LogReg/logreg_numpy.py
+++ b/contrib/Python/cntk/examples/LogReg/logreg_numpy.py
@ -59,7 +59,7 @@ def train_eval_logistic_regression_with_numpy(criterion_name=None, eval_name=Non
 def test_logistic_regression_with_numpy():
    result = train_eval_logistic_regression_with_numpy('crit_node', 'eval_node')

-    TOLERANCE_ABSOLUTE = 1E-06
+    TOLERANCE_ABSOLUTE = 1E-02
    assert result['SamplesSeen'] == 500
    assert np.allclose(result['Perplexity'], 1.5575403, atol=TOLERANCE_ABSOLUTE)
    assert np.allclose(result['crit_node'], 0.44310782, atol=TOLERANCE_ABSOLUTE)
--- a/contrib/Python/cntk/examples/MNIST/mnist_one_layer.py
+++ b/contrib/Python/cntk/examples/MNIST/mnist_one_layer.py
@ -4,75 +4,99 @@
 # for full license information.
 # ==============================================================================

-# TODO: re-write the example using the new facade
-
 """
-MNIST Example, one hidden layer neural network
+MNIST Example, one hidden layer neural network using training and testing data 
+generated through `uci_to_cntk_text_format_converter.py 
+<https://github.com/Microsoft/CNTK/blob/master/Source/Readers/CNTKTextFormatReader/uci_to_cntk_text_format_converter.py>`_
+to convert it to the CNTKTextFormatReader format.
 """

 import sys
 import os
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', '..'))

-from cntk import *
+import numpy as np
+import cntk as C


 def add_dnn_sigmoid_layer(in_dim, out_dim, x, param_scale):
-    W = LearnableParameter(out_dim, in_dim, initValueScale=param_scale)
-    b = LearnableParameter(out_dim, 1, initValueScale=param_scale)
-    t = Times(W, x)
-    z = Plus(t, b)
-    return Sigmoid(z)
+    W = C.parameter((out_dim, in_dim)) * param_scale
+    b = C.parameter((out_dim, 1)) * param_scale
+    t = C.times(W, x)
+    z = C.plus(t, b)
+    return C.sigmoid(z)


 def add_dnn_layer(in_dim, out_dim, x, param_scale):
-    W = LearnableParameter(out_dim, in_dim, initValueScale=param_scale)
-    b = LearnableParameter(out_dim, 1, initValueScale=param_scale)
-    t = Times(W, x)
-    return Plus(t, b)
+    W = C.parameter((out_dim, in_dim)) * param_scale
+    b = C.parameter((out_dim, 1)) * param_scale
+    t = C.times(W, x)
+    return C.plus(t, b)

-if (__name__ == "__main__"):
+def train_eval_mnist_onelayer_from_file(criterion_name=None, eval_name=None):

    # Network definition
    feat_dim = 784
    label_dim = 10
    hidden_dim = 200
+    
+    cur_dir = os.path.dirname(__file__)

-    training_filename = os.path.join("Data", "Train-28x28.txt")
-    test_filename = os.path.join("Data", "Test-28x28.txt")
+    training_filename = os.path.join(cur_dir, "Data", "Train-28x28_text.txt")
+    test_filename = os.path.join(cur_dir, "Data", "Test-28x28_text.txt")

-    features = Input(feat_dim)
+    features = C.input(feat_dim)
    features.name = 'features'

-    feat_scale = Constant(0.00390625)
-    feats_scaled = Scale(feat_scale, features)
+    feat_scale = C.constant(0.00390625)
+    feats_scaled = C.element_times(features, feat_scale)

-    labels = Input(label_dim)
+    labels = C.input(label_dim)
    labels.tag = 'label'
    labels.name = 'labels'

-    f_reader = UCIFastReader(training_filename, 1, feat_dim)
-    l_reader = UCIFastReader(training_filename, 0, 1, label_dim,
-                             os.path.join("Data", "labelsmap.txt"))
-
-    f_reader_t = UCIFastReader(test_filename, 1, feat_dim)
-    l_reader_t = UCIFastReader(test_filename, 0, 1, label_dim,
-                               os.path.join("Data", "labelsmap.txt"))
+    traning_reader = C.CNTKTextFormatReader(training_filename)
+    test_reader = C.CNTKTextFormatReader(test_filename)

    h1 = add_dnn_sigmoid_layer(feat_dim, hidden_dim, feats_scaled, 1)
    out = add_dnn_layer(hidden_dim, label_dim, h1, 1)
    out.tag = 'output'

-    ec = CrossEntropyWithSoftmax(labels, out)
+    ec = C.cross_entropy_with_softmax(labels, out)
+    ec.name = criterion_name
    ec.tag = 'criterion'
-
+    
+    eval = C.ops.square_error(labels, out)
+    eval.name = eval_name
+    eval.tag = 'eval'
+    
    # Specify the training parameters (settings are scaled down)
-    my_sgd = SGDParams(epoch_size=600, minibatch_size=32,
-                       learning_ratesPerMB=0.1, max_epochs=5, momentum_per_mb=0)
+    my_sgd = C.SGDParams(epoch_size=600, minibatch_size=32,
+                       learning_rates_per_mb=0.1, max_epochs=5, momentum_per_mb=0)

    # Create a context or re-use if already there
-    with LocalExecutionContext('mnist_one_layer', clean_up=True) as ctx:
+    with C.LocalExecutionContext('mnist_one_layer', clean_up=True) as ctx:
        # CNTK actions
-        ctx.train(ec, my_sgd, {features: f_reader, labels: l_reader})
-        ctx.write({features: f_reader_t, labels: l_reader_t})
-        print(ctx.test({features: f_reader_t, labels: l_reader_t}))
+         ctx.train(
+            root_nodes=[ec, eval],
+            training_params=my_sgd,
+            input_map=traning_reader.map(labels, alias='labels', dim=label_dim).map(features, alias='features', dim=feat_dim))
+            
+         result = ctx.test(
+            root_nodes=[ec, eval],
+            input_map=test_reader.map(labels, alias='labels', dim=label_dim).map(features, alias='features', dim=feat_dim))
+
+         return result
+
+
+def _test_mnist_onelayer_from_file():
+    result = train_eval_mnist_onelayer_from_file('crit_node', 'eval_node')
+
+    TOLERANCE_ABSOLUTE = 1E-06
+    assert result['SamplesSeen'] == 10000
+    assert np.allclose(result['Perplexity'], 7.6323031, atol=TOLERANCE_ABSOLUTE)
+    assert np.allclose(result['crit_node'], 2.0323896, atol=TOLERANCE_ABSOLUTE)
+    assert np.allclose(result['eval_node'], 1.9882504, atol=TOLERANCE_ABSOLUTE)
+
+if __name__ == "__main__":
+    print(train_eval_mnist_onelayer_from_file('crit_node', 'eval_node'))
--- a/contrib/Python/cntk/ops/init.py
+++ b/contrib/Python/cntk/ops/init.py
@ -665,70 +665,69 @@ def input(shape, dynamic_axis='', name=None):
    return Input(shape, dynamicAxis=dynamic_axis, name=name)


-def parameter(shape=None, value=0, learning_rate_multiplier=1.0, init='uniform',
-              init_value_scale=1, init_from_file_path='', init_from_literal=None,
-              random_seed=-1, name=None):
+def parameter(shape=None, value=None, learning_rate_multiplier=1.0,
+        init_from_file_path=None, name=None):
    """
    It creates a parameter tensor. 

    Args:
-        shape (tuple or int): the shape of the input tensor. If `init='fromLiteral'`, shape is not needed as it will be inferred from the literal.
-        value: a scalar initial value that would be replicated for every element in the tensor
-        learning_rate_multiplier (float): 
-        init (str): 'uniform', 'fromFile' or 'fromLiteral' 
-        init_value_scale (float): a scaling factor for the initial value
-        init_from_file_path (str): the file that contains the initial tensor value
-        init_from_literal (ndarray): the numpy array used to initialize the tensor parameter
-        random_seed (float): the seed used for initialization
+        shape (tuple or int, optional): the shape of the input tensor. If not provided, it will be inferred from ``value``.
+        value (scalar or NumPy array, optional): a scalar initial value that would be replicated for every element in the tensor or NumPy array. If ``None``, the tensor will be initialized uniformly random.
+        learning_rate_multiplier (float): set to control the learning rate on this particular node
+        init_from_file_path (str): the file that contains the initial tensor value. Used only if ``value=None``.
        name (str, optional): the name of the node in the network
+
    Returns:
        :class:`cntk.graph.ComputationNode`
    """

    from . import cntk1

-    # if the parameter is initialized from a literal value
-    if (init == 'fromLiteral'):
-        """
-        To be as generic as possible, we 
-         - flatten the data 
-         - initialize a ParameterTensor operator with it
-         - ensure that the graph does not backprob to it.  
-         - Finally we to reshape it.
-        """
+    if value is None:
+        if shape is None:
+            raise ValueError('you need to specify at least shape or value')

-        value = init_from_literal
+        if init_from_file_path:
+            return cntk1.ParameterTensor(shape, init='fromFile',
+                    learningRateMultiplier=learning_rate_multiplier,
+                    initFromFilePath=init_from_file_path, name=name)
+        else:
+            return cntk1.ParameterTensor(shape, 
+                    learningRateMultiplier=learning_rate_multiplier,
+                    name=name)
+    """
+    To be as generic as possible, we 
+     - flatten the data 
+     - initialize a ParameterTensor operator with it
+     - ensure that the graph does not backprob to it.  
+     - Finally we to reshape it.
+    """

-        from .. import utils
-        if not (np.isscalar(value) or utils.is_tensor(value)):
-            raise ValueError('value type is not supported: %s' % type(value))
+    from .. import utils
+    if not (np.isscalar(value) or utils.is_tensor(value)):
+        raise ValueError('value type is not supported: %s' % type(value))

-        if isinstance(value, list) or np.isscalar(value):
-            value = np.asarray(value)
+    if isinstance(value, list) or np.isscalar(value):
+        value = np.asarray(value)

-        import scipy.sparse
-        if scipy.sparse.issparse(value):
-            raise ValueError('only dense data is supported')
+    import scipy.sparse
+    if scipy.sparse.issparse(value):
+        raise ValueError('only dense data is supported')

-        param_shape = value.shape if value.shape else (1,)
-        literal_shape = (param_shape[0], np.multiply.reduce(param_shape[1:]))
+    param_shape = value.shape if value.shape else (1,)
+    literal_shape = (param_shape[0], np.multiply.reduce(param_shape[1:]))

-        literal_array = np.reshape(value, literal_shape)
+    literal_array = np.reshape(value, literal_shape)

-        from io import BytesIO
-        s = BytesIO()
-        np.savetxt(s, literal_array, '%.4f')
+    from io import BytesIO
+    s = BytesIO()
+    np.savetxt(s, literal_array, '%.4f')

-        return cntk1.ParameterTensor(
-            dims=param_shape,
-            learningRateMultiplier=learning_rate_multiplier,
-            init='fromLiteral',
-            initFromLiteral=s.getvalue().decode())
-
-    else:
-        return cntk1.ParameterTensor(shape, learning_rate_multiplier, init,
-                                     init_value_scale, value, init_from_file_path,
-                                     randomSeed=random_seed, name=name)
+    return cntk1.ParameterTensor(
+        dims=param_shape,
+        learningRateMultiplier=learning_rate_multiplier,
+        init='fromLiteral',
+        initFromLiteral=s.getvalue().decode())


 def constant(value, name=None):
@ -742,8 +741,7 @@ def constant(value, name=None):
        :class:`cntk.graph.ComputationNode`
    """

-    return parameter(name=name, init='fromLiteral', init_from_literal=value,
-                     learning_rate_multiplier=0.0)
+    return parameter(value=value, learning_rate_multiplier=0.0, name=name)


 def dynamic_axis(name=None):
--- a/contrib/Python/cntk/sgd.py
+++ b/contrib/Python/cntk/sgd.py
@ -1,5 +1,4 @@
 # Copyright (c) Microsoft. All rights reserved.
-
 #Licensed under the MIT license. See LICENSE.md file in the project root
 # for full license information.
 # ==============================================================================
@ -27,8 +26,7 @@ class SGDParams:
        * **momentum_per_sample: momentum per sample. Useful when you want to keep the momentum per sample constant, i.e., automatically scales effective momentum for the minibatch when the minibatch size is changed. Can use syntax such as 0.9996*10**:0.998 which means using the per sample momentum 0.9996 for 10 epochs and then 0.998 for the rest. momentumPerSample may be missing, for example, when momentumPerMB is provided.
        * **momentum_as_time_constant**: number of samples after which the contribution is decayed to e^-1
        * **auto_adjust parameters**: they represent information related to the automatic learning rate control. 
-        * **auto_adjust_lr**: the automatic learning rate adjustment algorithm to use. Valid values are None (default, dont auto adjust learning rate), AdjustAfterEpoch (check the training criterion after each epoch using the development set of the training set and decide whether to adjust the learning rate), and SearchBeforeEpoch (search the learning rate based on a small portion of the training set before each epoch starts).
-    
+        * **auto_adjust_lr**: the automatic learning rate adjustment algorithm to use. Valid values are None (default, don't auto adjust learning rate), AdjustAfterEpoch (check the training criterion after each epoch using the development set of the training set and decide whether to adjust the learning rate), and SearchBeforeEpoch (search the learning rate based on a small portion of the training set before each epoch starts).
    * When used in the AdjustAfterEpoch mode
        * **reduce_learn_rate_if_improve_less_than**: reduce the learning rate if the improvement is less than this value. Default is 0.
        * **learn_rate_decrease_factor**: the learning rate decrease factor. Default value is 0.618.
@ -38,12 +36,12 @@ class SGDParams:
        * **learn_rate_adjust_interval**: determine the frequency of applying the learning rate adjustment check. Default is 1 epoch. If this value is set to a value larger than 1 the learning rate adjustment will be based on the average criterion computed from the last learnRateAdjustInterval epochs.
    
    * When used in the SearchBeforeEpoch mode.
-        * **numMiniBatch4LRSearch**: the number of minibatches used to search the learning rate. Default value is 500. Its typically set to 10-20% of the total minibatches in an epoch.
+        * **numMiniBatch4LRSearch**: the number of minibatches used to search the learning rate. Default value is 500. It's typically set to 10-20% of the total minibatches in an epoch.
        * **num_prev_learn_rate**: number of previous learning rates used as a hint to the search range. Default value is 5.
        * **num_best_search_epoch**: number of epochs in which we use the best learning rate instead of the sufficient learning rate . Default value is 1.
    
    * When used in the 'AdaptiveMinibatchSizing' mode
-        * **num_minibatch_for_lr_search**: the number of minibatches used to search the minibatch size when in adaptive minibatch size mode. Default value is 500. Its typically set to 10-20% of the total minibatches in an epoch this is shared with the search for learning rate in SearchBeforeEpoch mode.
+        * **num_minibatch_for_lr_search**: the number of minibatches used to search the minibatch size when in adaptive minibatch size mode. Default value is 500. It's typically set to 10-20% of the total minibatches in an epoch this is shared with the search for learning rate in SearchBeforeEpoch mode.
        * **auto_adjust_minibatch: enable or disable whether minibatch size is adaptively adjusted. Default value is false. Adapative minibatch sizing will begin on epochs starting after user minbatch sizes expcitily specified are complete. For example if the user specifed minibatchSize=256**:1024, then 256 and 1024 are used in the first 2 Epochs and adaptive minibatch sizing is used afterwards.
        * **minibatch_size_tuning_frequency**: The number of epochs to skip, on a periodic basis, before dynamically adjusting the minibatch size. Default value is 1.
        * **minibatch_size_tuning_max**: The maximum size allowed for an adaptively adjusted minibatch size. Default value is 1048576.
@ -85,9 +83,7 @@ class SGDParams:
    
    * Gradient Check
        * **gradient_check**: determines whether to use the gradient checker. The default value is false. When using the gradient checker you need to use a minibatch size that is larger than the sequence length for RNNs due to the truncated backpropagation through time (BPTT) algorithm used to train RNNs, and a smaller learning rate to prevent numerical issues caused by divergence. In addition, precision should be set to double.
-    
    """
-    
    def __init__(self,
                model_path=None,
                train_criterion_node_name=None,
--- a/contrib/Python/cntk/utils/eval.py
+++ b/contrib/Python/cntk/utils/eval.py
@ -19,9 +19,10 @@ def eval(node):
        #   [array([[[-150., 200.], [5., 10.]]])]        

    Args:
-        node (cntk.graph.ComputationNode): the node to evaluate        
+        node (:class:`cntk.graph.ComputationNode`): the node to evaluate        
+
    Returns:
-        numpy array containing the result
+        NumPy array containing the result
    """    
    
    from cntk.context import get_context        
@ -31,15 +32,15 @@ def eval(node):
    ctx = get_context()    
    first = True    
    
-    # the params are passed as arryas e.g. plus([1,2], [3,4]), we need to 
-    # wrap them with input and parameter nodes
+    # The params are passed as arryas, e.g. plus([1,2], [3,4]),  and we need to 
+    # wrap them with input and parameter nodes.
    if node.params:
        for p in node.params:
            if p in node.inputs:
                val = getattr(node, p)
-                # one param needs to be an Input() node. This is being fixed in 
-                #CNTK we will remove this workaround onces we can evaluate a 
-                #network with no inputs
+                # One param needs to be an Input() node. This will being fixed in 
+                # CNTK soon, so that we can remove this workaround and evaluate a 
+                # network with no inputs.
                if first:
                    if not isinstance(val, list):                
                        # inputs have the outmost dimension for sequences