fixing conflicts

2016-05-03 15:39:20 +02:00 · 2016-05-03 15:39:20 +02:00 · 26f5a43917
--- a/contrib/Python/cntk/ops/init.py
+++ b/contrib/Python/cntk/ops/init.py
@ -15,21 +15,27 @@ import numpy as np

 def cross_entropy_with_softmax(target_vector, output_vector, name=None):
    """
-    This operator computes the cross entropy over the softmax of the `feature_values`.
-    This op expects the `output_vector` as unscaled, it computes softmax over 
+    This operation computes the cross entropy over the softmax of the `output_vector`.
+    It expects the `output_vector` as unscaled, and it computes softmax over 
    the `output_vector` internally.  Any `output_vector` input over which softmax is 
    already computed before passing to this operator will be incorrect.
    
+    :math:`cross\_entropy\_with\_softmax(t, o) = {-{\sum_{i \in \{1,len(t)\}} t_i \log(softmax(o_i)) }}`
+
+    :func:`cntk.ops.softmax`
+    
    Example:
-        >>> C.eval(C.cross_entropy_with_softmax([0., 0., 0., 1.], [1., 1., 1., 1.]))
-        #[1.3862]
+        >>> C.eval(C.cross_entropy_with_softmax([0., 0., 0., 1.], [1., 1., 1., 50.]))
+        #[0.]
        
-        >>> C.eval(C.cross_entropy_with_softmax([0., 0., 0., 1.], [1., 1., 1., 1.]))
-        #[1.840]
+        >>> C.eval(C.cross_entropy_with_softmax([[0.35, 0.15, 0.05, 0.45], [1., 2., 3., 4.]))
+        #[1.84]
    
    Args:
-        target_vector: the target valid probability distribution
-        output_vector: the unscaled computed values from the network
+        target_vector: usually it is one-hot vector where the hot bit 
+        corresponds to the label index. But it can be any probability distribution
+        over the labels.
+        output_vector: the unscaled computed output values from the network
        name: the name of the node in the network            
    Returns:
        :class:`cntk.graph.ComputationNode`
@ -37,49 +43,50 @@ def cross_entropy_with_softmax(target_vector, output_vector, name=None):
    from cntk.ops.cntk1 import CrossEntropyWithSoftmax
    return CrossEntropyWithSoftmax(target_vector, output_vector, name = name)

-def square_error(target_vector, output_vector, name=None):
+def square_error(target_matrix, output_matrix, name=None):
    """
-    This operator computes the square error.
-    This op expects the `output_vector` as unscaled, it computes softmax over 
-    the `output_vector` internally.  Any `feature_values` input over which softmax is 
-    already computed before passing to this operator will be incorrect.
+    This operation computes the sum of the squared difference between elements 
+    in the two input matrices. The result is a scalar (i.e., one by one matrix). 
+    This is often used as a training criterion node. 
    
    Example:
-        >>> C.eval(C.square_error([0., 0., 0., 1.], [1., 1., 1., 1.]))
-        #[1.3862]
+        >>> C.eval(C.square_error([4., 6.], [2., 1.]))
+        #[29.]
        
-        >>> C.eval(C.square_error([0.35, 0.15, 0.05, 0.45], [1, 2., 3., 4.]))
-        #[1.840]
+        >>> C.eval(C.square_error([1., 2.], [1., 2.]))
+        #[0.]
    
    Args:
-        target_vector: the target valid probability distribution
-        output_vector: the unscaled computed values from the network
+        target_matrix: target matrix, it is usually a one-hot vector where the 
+        hot bit corresponds to the label index
+        output_matrix: the output values from the network
        name: the name of the node in the network            
    Returns:
        :class:`cntk.graph.ComputationNode`
    """
    from cntk.ops.cntk1 import SquareError
-    return SquareError(target_vector, output_vector, name = name)
+    return SquareError(target_matrix, output_matrix, name = name)

 def error_prediction(target_vector, output_vector, name=None):
    """
-    This operator computes the prediction error.It finds the index of the highest 
-    value for each column in the input matrix
-    and compares it to the actual ground truth label. The result is a scalar 
+    This operation computes the prediction error. It finds the index of the highest 
+    value in the output_vector and compares it to the actual ground truth label
+    (the index of the hot bit in the target vector). The result is a scalar 
    (i.e., one by one matrix). This is often used as an evaluation criterion. 
-    It cannot be used as a training criterion though since the gradient is not 
-    defined for this operation.
+    It cannot be used as a training criterion though since the gradient is not
+    defined for it.
    
    Example:
-        >>> C.eval(C.error_prediction([0., 0., 0., 1.], [1., 1., 1., 1.]))
-        #[1.3862]
+        >>> C.eval(C.error_prediction([0., 0., 0., 1.], [1., 2., 3., 4.]))
+        #[0.]
        
-        >>> C.eval(C.error_prediction([0.35, 0.15, 0.05, 0.45], [1, 2., 3., 4.]))
-        #[1.840]
+        >>> C.eval(C.error_prediction([0., 0., 1., 0.], [1., 2., 3., 4.]))
+        #[1.]
    
    Args:
-        target_vector: the target valid probability distribution
-        output_vector: the unscaled computed values from the network
+        target_vector: it is one-hot vector where the hot bit corresponds to the 
+        label index
+        output_vector: the output values from the network
        name: the name of the node in the network            
    Returns:
        :class:`cntk.graph.ComputationNode`
--- a/contrib/Python/cntk/ops/tests/evaluation_test.py
+++ b/contrib/Python/cntk/ops/tests/evaluation_test.py
@ -37,10 +37,10 @@ def test_op_crossentropywithsoftmax(target_vector, output_vector, device_id, pre
    def numpy_op(label, softmax):
        return -np.sum(label * np.log(softmax, dtype=PRECISION_TO_TYPE[precision]), dtype=PRECISION_TO_TYPE[precision])
    
-    input_target = I([target_vector], has_dynamic_axis=True)
-    input_features = I([output_vector], has_dynamic_axis=True)
+    target = I([target_vector], has_dynamic_axis=True)
+    output = I([output_vector], has_dynamic_axis=True)
    
-    op_node = cross_entropy_with_softmax(input_target, input_features)
+    op_node = cross_entropy_with_softmax(target, output)

    #Forward pass test
    #==================
@ -63,7 +63,6 @@ def test_op_crossentropywithsoftmax(target_vector, output_vector, device_id, pre
    unittest_helper(op_node, None, expected,
            device_id=device_id,
            precision=precision, clean_up=True, backward_pass=True,
-            input_node=input_features)
-
+            input_node=output)