example with numpy data

2016-09-06 17:08:33 +02:00 · 2016-09-06 17:08:33 +02:00 · 5f9778b841
--- a/bindings/python/cntk/init.py
+++ b/bindings/python/cntk/init.py
@ -17,11 +17,11 @@ import numpy as np
 DATATYPE = np.float32

 class Trainer(Trainer):
-    """
+    '''
    Trainer to train the specified 'model' with the specified `training_loss` as the training criterion,
    the specified `evaluation_function` as the criterion for evaluating the trained model's quality, and using the specified set
    of `parameters` for updating the model's parameters using computed gradients.
-    """
+    '''
    def __init__(self, model, loss_function, eval_function, parameters):
        if isinstance(model, Variable):
            model = model.owner
@ -30,3 +30,35 @@ class Trainer(Trainer):
        if isinstance(eval_function, Variable):
            eval_function = eval_function.owner
        super(Trainer, self).__init__(model, loss_function, eval_function, parameters)
+
+    def train_minibatch(self, arguments, device=None):
+        '''
+        Optimize model parameters using the specified 'arguments' minibatch of training samples.
+        Returns false if all parameter learners indicate end of learning (through their Update method's return value).
+        Args:
+            arguments (dict): map from input variables to the data, data should be either numpy
+            arrays or cntk.Value instances returned by a minibatch source
+            device (:class:`cntk.DeviceDescriptor`): the device descriptor that contains the type and id of the device
+        Returns:
+            bool
+        '''
+        if not device:
+            device=DeviceDescriptor.use_default_device()        
+        arguments = sanitize_var_map(arguments, add_batch_axis=True)
+        super(Trainer, self).train_minibatch(arguments, device)
+
+    def test_minibatch(self, arguments, device=None):
+        '''
+        Test the model on the specified batch of samples using the evaluation Function specified during construction of the Trainer
+        Returns the average evaluation criterion value per sample for the tested minibatch of samples
+        Args:
+            arguments (dict): map from input variables to the data, data should be either numpy
+            arrays or cntk.Value instances returned by a minibatch source
+            device (:class:`cntk.DeviceDescriptor`): the device descriptor that contains the type and id of the device
+        Returns:
+            float
+        '''
+        if not device:
+            device=DeviceDescriptor.use_default_device()        
+        arguments = sanitize_var_map(arguments, add_batch_axis=True)
+        super(Trainer, self).train_minibatch(arguments, device)
--- a/bindings/python/cntk/utils/init.py
+++ b/bindings/python/cntk/utils/init.py
@ -307,7 +307,7 @@ def pad_to_dense(batch):
        Z[idx, :len(seq)] += seq 
    return Z

-def sanitize_batch(batch, data_type, dev):
+def sanitize_batch(batch, data_type=None, dev=None):
    """
    Convert to Value with `data_type`. If the samples in `batch` have different
    sequence lengths, pad them to max sequence length and create a mask.
@ -347,7 +347,7 @@ def sanitize_batch(batch, data_type, dev):
            batch = pad_to_dense(batch)

    # If it still is not an NumPy array, try brute force...
-    if not isinstance(batch, np.ndarray) or batch.dtype != data_type:
+    if not isinstance(batch, np.ndarray):
        batch = np.asarray(batch, dtype=data_type)

    '''
@ -371,7 +371,7 @@ def sanitize_batch(batch, data_type, dev):

    return value

-def sanitize_var_map(input_map, precision_numpy, device):
+def sanitize_var_map(input_map, precision_numpy=None, device=None, add_batch_axis=False):
    '''
    Sanitizes a dictionary of `Variable`s to input data such that it can be
    handed off to the `Forward` method.
@ -380,6 +380,7 @@ def sanitize_var_map(input_map, precision_numpy, device):
        input_map (`dict`): `Variable` to input (NumPy array or simple list of lists)
        precision_numpy : np.float32 or np.float64
        device: CNTK DeviceDescriptor
+        add_batch_axis (bool): if the data does not have the batch axis, add it before creating NDArrayView

    Returns:
        `dict` that maps variables to sanitized batches
@ -387,7 +388,13 @@ def sanitize_var_map(input_map, precision_numpy, device):
    var_map = {}
    if input_map:
        for var, batch in input_map.items():
+            from ..cntk_py import Value
+            if not isinstance(batch, Value):                
+                if add_batch_axis:
+                    batch = [batch]
                if isinstance(batch, np.ndarray):
+                    if batch.dtype == np.int:
+                        batch = batch.astype(np.float32)
                    if batch.dtype not in (np.float32, np.float64):                        
                        raise ValueError('only float32 and float64 are supported')
                    batch = sanitize_batch(batch, precision_numpy, device)
--- a/bindings/python/examples/NumpyInterop/FeedForwardNet.py
+++ b/bindings/python/examples/NumpyInterop/FeedForwardNet.py
@ -11,6 +11,19 @@ from cntk import learning_rates_per_sample, DeviceDescriptor, Trainer, sgd_learn
 from cntk.ops import input_variable, cross_entropy_with_softmax, combine, classification_error, sigmoid
 from examples.common.nn import fully_connected_classifier_net, print_training_progress

+def generate_random_data(sample_dize, feature_dim, num_classes):
+    # Create synthetic data using NumPy. 
+    Y = np.random.randint(size=(sample_dize, 1), low=0, high=num_classes)
+
+    # Make sure that the data is separable
+    X = (np.random.randn(sample_dize, feature_dim)+3) * (Y+1)
+    X = X.astype(np.float32)    
+    # converting class 0 into the vector "1 0 0", 
+    # class 1 into vector "0 1 0", ...
+    class_ind = [Y==class_number for class_number in range(num_classes)]
+    Y = np.asarray(np.hstack(class_ind), dtype=np.float32)
+    return X, Y          
+
 # Creates and trains a feedforward classification model
 def ffnet():
    input_dim = 2
@ -28,17 +41,6 @@ def ffnet():
    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

-    rel_path = r"../../../../Examples/Other/Simple2d/Data/SimpleDataTrain_cntk_text.txt"
-    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
-    feature_stream_name = 'features'
-    labels_stream_name = 'labels'
-
-    mb_source = text_format_minibatch_source(path, [
-                    StreamConfiguration( feature_stream_name, input_dim ), 
-                    StreamConfiguration( labels_stream_name, num_output_classes)])
-    features_si = mb_source.stream_info(feature_stream_name)
-    labels_si = mb_source.stream_info(labels_stream_name)
-
    # Instantiate the trainer object to drive the model training
    lr = learning_rates_per_sample(0.02)
    trainer = Trainer(netout, ce, pe, [sgd_learner(netout.owner.parameters(), lr)])
@ -50,11 +52,9 @@ def ffnet():
    num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
    training_progress_output_freq = 20
    for i in range(0, int(num_minibatches_to_train)):
-        mb = mb_source.get_next_minibatch(minibatch_size)
-
+        features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
-        arguments = {input : mb[features_si].m_data, label : mb[labels_si].m_data}
-        trainer.train_minibatch(arguments)
+        trainer.train_minibatch({input : features, label : labels})
        print_training_progress(trainer, i, training_progress_output_freq)

 if __name__=='__main__':