CNTK v2 library: Add binary CrossEntropy operator

2016-11-08 14:33:07 -08:00 · 2016-11-08 14:33:07 -08:00 · e2cf02a609
--- a/Source/CNTKv2LibraryDll/API/CNTKLibrary.h
+++ b/Source/CNTKv2LibraryDll/API/CNTKLibrary.h
@ -2699,6 +2699,16 @@ namespace CNTK
        return TransposeTimes(leftOperand, rightOperand, /*outputRank =*/ 1, name);
    }

+    ///
+    /// Create an instance of the CNTK built-in operation to compute binary cross-entropy for specified input operands.
+    ///
+    CNTK_API FunctionPtr BinaryCrossEntropy(const Variable& prediction, const Variable& targets, const std::wstring& name = L"");
+
+    ///
+    /// Create an instance of the CNTK built-in operation to compute weighted binary cross-entropy for specified input operands.
+    ///
+    CNTK_API FunctionPtr WeightedBinaryCrossEntropy(const Variable& prediction, const Variable& targets, const Variable& weights, const std::wstring& name = L"");
+
    ///
    /// Create an instance of the CNTK built-in operation to compute squared-error for specified input operands.
    ///
--- a/Source/CNTKv2LibraryDll/BackCompat.cpp
+++ b/Source/CNTKv2LibraryDll/BackCompat.cpp
@ -232,6 +232,8 @@ namespace CNTK
                    primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameOffset] = (size_t)node->As<FutureValueNode<ElementType>>()->TimeStep();
                    opType = PrimitiveOpType::FutureValue;
                }
+                else if (node->OperationName() == OperationNameOf(LogisticNode))
+                    opType = PrimitiveOpType::Logistic;
                else if (node->OperationName() == OperationNameOf(SquareErrorNode))
                    opType = PrimitiveOpType::SquaredError;
                else if (node->OperationName() == OperationNameOf(CrossEntropyWithSoftmaxNode))
--- a/Source/CNTKv2LibraryDll/Function.cpp
+++ b/Source/CNTKv2LibraryDll/Function.cpp
@ -634,10 +634,16 @@ namespace CNTK
        if (outputDataType == DataType::Unknown)
            outputDataType = firstKnownInputDataType;

-        // We currently require that the inputs' dynamic axes if any match
+        // We currently require that the inputs' dynamic axes, if any, match
        std::vector<Axis> outputDynamicAxes;
-        if ((op == PrimitiveOpType::SumAll) || (op == PrimitiveOpType::SquaredError) || (op == PrimitiveOpType::CrossEntropyWithSoftmax) || (op == PrimitiveOpType::ClassificationError))
+        if ((op == PrimitiveOpType::SumAll) ||
+            (op == PrimitiveOpType::SquaredError) ||
+            (op == PrimitiveOpType::CrossEntropyWithSoftmax) ||
+            (op == PrimitiveOpType::ClassificationError) ||
+            (op == PrimitiveOpType::Logistic))
+        {
            outputDynamicAxes = std::vector<Axis>({});
+        }
        else if (op == PrimitiveOpType::Where)
        {
            if (functionConfig.Contains(PrimitiveFunction::AttributeNameNewDynamicAxes))
@ -889,9 +895,9 @@ namespace CNTK
            case PrimitiveOpType::Convolution:
            {
                assert(inputs.size() == 2);
-                    auto& strides = functionConfig[PrimitiveFunction::AttributeNameStrides].Value<NDShape>();
-                    auto& lowerPad = functionConfig[PrimitiveFunction::AttributeNameLowerPad].Value<NDShape>();
-                    auto& upperPad = functionConfig[PrimitiveFunction::AttributeNameUpperPad].Value<NDShape>();
+                auto& strides = functionConfig[PrimitiveFunction::AttributeNameStrides].Value<NDShape>();
+                auto& lowerPad = functionConfig[PrimitiveFunction::AttributeNameLowerPad].Value<NDShape>();
+                auto& upperPad = functionConfig[PrimitiveFunction::AttributeNameUpperPad].Value<NDShape>();
                auto sharing = AsVector<bool>(functionConfig[PrimitiveFunction::AttributeNameSharing].Value<std::vector<DictionaryValue>>());
                auto autoPadding = AsVector<bool>(functionConfig[PrimitiveFunction::AttributeNameAutoPadding].Value<std::vector<DictionaryValue>>());
                bool transpose = functionConfig[PrimitiveFunction::AttributeNameTranspose].Value<bool>();
@ -900,23 +906,24 @@ namespace CNTK

                NDShape outputMapCount, kernelShape;
                std::tie(outputMapCount, kernelShape) = GetConvolutionOutputMapCountAndKernelShape(inputs[0].Shape(), inputs[1].Shape());
-                    auto originalKernelShape = kernelShape;
-                    outputShape = ConvolutionOpOutputShape(op, inputs[1].Shape(), kernelShape, outputMapCount, strides, sharing, autoPadding, lowerPad, upperPad, transpose, inferDimensions);
-                    if (originalKernelShape != kernelShape)
-                    {
-                        for (size_t i = 0; i < kernelShape.Rank(); ++i)
-                            inputs[0].m_dataFields->m_shape[i] = kernelShape[i];
-                    }
+                auto originalKernelShape = kernelShape;
+                outputShape = ConvolutionOpOutputShape(op, inputs[1].Shape(), kernelShape, outputMapCount, strides, sharing, autoPadding, lowerPad, upperPad, transpose, inferDimensions);
+                if (originalKernelShape != kernelShape)
+                {
+                    for (size_t i = 0; i < kernelShape.Rank(); ++i)
+                        inputs[0].m_dataFields->m_shape[i] = kernelShape[i];
+                }

-                    functionConfig[PrimitiveFunction::AttributeNameSharing] = AsDictionaryValueVector(sharing);
-                    functionConfig[PrimitiveFunction::AttributeNameAutoPadding] = AsDictionaryValueVector(autoPadding);
+                functionConfig[PrimitiveFunction::AttributeNameSharing] = AsDictionaryValueVector(sharing);
+                functionConfig[PrimitiveFunction::AttributeNameAutoPadding] = AsDictionaryValueVector(autoPadding);
                break;
            }
+            case PrimitiveOpType::Logistic:
            case PrimitiveOpType::SquaredError:
            case PrimitiveOpType::CrossEntropyWithSoftmax:
            case PrimitiveOpType::ClassificationError:
            {
-                if (op == PrimitiveOpType::ClassificationError)
+                if ((op == PrimitiveOpType::ClassificationError) || (op == PrimitiveOpType::Logistic))
                    assert(inputs.size() >= 2);
                else
                    assert(inputs.size() == 2);
@ -929,9 +936,9 @@ namespace CNTK
                if (predictionShape != labelsShape)
                    RuntimeError("Prediction output operand's shape %S is incompatible with label operand's shape %S for the %S operation", AsStringForErrorReporting(predictionShape).c_str(), AsStringForErrorReporting(labelsShape).c_str(), PrimitiveOpTypeName(op).c_str());

-                    std::vector<int> reductionAxes;
-                    for (int i = 0; i < (int)inputs[0].Shape().Rank(); ++i)
-                    reductionAxes.push_back(i);
+                std::vector<int> reductionAxes;
+                for (int i = 0; i < (int)inputs[0].Shape().Rank(); ++i)
+                reductionAxes.push_back(i);

                outputShape = ReductionOpOutputShape(op, predictionShape, reductionAxes, /*preserveReductionAxes =*/ false);
                break;
@ -1613,6 +1620,9 @@ namespace CNTK
            computationNodePtr = New<ConvolutionNode<ElementType>>(network->GetDeviceId(), internalNodeName, AsTensorShape(kernelShape), AsTensorShape(outputMapCount), AsTensorShape(strides), sharing, autoPadding, AsTensorShape(lowerPad), AsTensorShape(upperPad), transpose, ImageLayoutKind::CHW, maxTempMemSizeInSamples);
            break;
        }
+        case PrimitiveOpType::Logistic:
+            computationNodePtr = New<LogisticNode<ElementType>>(network->GetDeviceId(), internalNodeName);
+            break;
        case PrimitiveOpType::SquaredError:
            computationNodePtr = New<SquareErrorNode<ElementType>>(network->GetDeviceId(), internalNodeName);
            break;
@ -2792,6 +2802,18 @@ namespace CNTK
        return BinaryOp(PrimitiveOpType::TransposeTimes, leftOperand, rightOperand, std::move(additionalProperties), name);
    }

+    FunctionPtr BinaryCrossEntropy(const Variable& prediction, const Variable& targets, const std::wstring& name)
+    {
+        std::vector<Variable> operands = { prediction, targets };
+        return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Logistic, operands, Dictionary(), name), name);
+    }
+
+    FunctionPtr WeightedBinaryCrossEntropy(const Variable& prediction, const Variable& targets, const Variable& weights, const std::wstring& name)
+    {
+        std::vector<Variable> operands = { prediction, targets, weights };
+        return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Logistic, operands, Dictionary(), name), name);
+    }
+
    FunctionPtr SquaredError(const Variable& prediction, const Variable& targets, const std::wstring& name)
    {
        auto difference = Minus(prediction, targets);
--- a/Source/CNTKv2LibraryDll/Function.h
+++ b/Source/CNTKv2LibraryDll/Function.h
@ -65,7 +65,7 @@ namespace CNTK
        {PrimitiveOpType::Times, L"Times"},
        {PrimitiveOpType::TransposeTimes, L"TransposeTimes"},
        {PrimitiveOpType::Convolution, L"Convolution"},
-        {PrimitiveOpType::SquaredError, L"SquaredError"},
+        { PrimitiveOpType::SquaredError, L"SquaredError" },
        {PrimitiveOpType::CrossEntropyWithSoftmax, L"CrossEntropyWithSoftmax"},
        {PrimitiveOpType::ClassificationError, L"ClassificationError"},
        {PrimitiveOpType::PastValue, L"PastValue"},
@ -79,6 +79,7 @@ namespace CNTK
        {PrimitiveOpType::RandomSample, L"RandomSample"},
        {PrimitiveOpType::RandomSampleInclusionFrequency, L"RandomSampleInclusionFrequency"},
        {PrimitiveOpType::ROIPooling, L"ROIPooling"},
+        { PrimitiveOpType::Logistic, L"Logistic" },
    };

    inline const std::wstring& PrimitiveOpTypeName(PrimitiveOpType opType)
@ -103,7 +104,15 @@ namespace CNTK
            if (numFunctionInputs > 2)
                indexMap.insert({2, 2});
        }
-        else if ((op == PrimitiveOpType::CrossEntropyWithSoftmax) || (op == PrimitiveOpType::GatherPacked))
+        else if (op == PrimitiveOpType::Logistic)
+        {
+            indexMap = std::unordered_map<size_t, size_t>({ { 0, 1 }, { 1, 0 } });
+            if (numFunctionInputs > 2)
+                indexMap.insert({ 2, 2 });
+        }
+        else if (op == PrimitiveOpType::CrossEntropyWithSoftmax)
+            indexMap = std::unordered_map<size_t, size_t>({ { 0, 1 }, { 1, 0 } });
+        else if (op == PrimitiveOpType::GatherPacked)
            indexMap = std::unordered_map<size_t, size_t>({ { 0, 1 }, { 1, 0 } });
        else if (op == PrimitiveOpType::ScatterPacked)
            indexMap = std::unordered_map<size_t, size_t>({ { 0, 2 }, { 1, 1 }, { 2, 0 } });
--- a/Source/CNTKv2LibraryDll/PrimitiveOpType.h
+++ b/Source/CNTKv2LibraryDll/PrimitiveOpType.h
@ -57,6 +57,7 @@ namespace CNTK
        RandomSample = 45,
        RandomSampleInclusionFrequency = 46,
        ROIPooling = 47,
+        Logistic = 48,
        // New op types should only be appended to the end of this list.
    };
 }
--- a/Source/ComputationNetworkLib/LinearAlgebraNodes.h
+++ b/Source/ComputationNetworkLib/LinearAlgebraNodes.h
@ -395,7 +395,7 @@ public:
            // If input data is sparse, then gradient is block sparse.
            if (InputRef(1).Value().GetMatrixType() == SPARSE && InputRef(0).Gradient().GetMatrixType() == DENSE && Gradient().GetMatrixType() == DENSE)
            {
-                // We need a sparse matrix for the gradient. However, we should allocate a new one instead of switching the type in place
+                // We need a sparse matrix for the gradient. We allocate a new one instead of switching the type in place
                // since switching in place may affect other nodes who share this matrix due to memory sharing
                auto& currentInput0GradientMatrixRef = InputRef(0).Gradient();
                auto newInput0SparseGradientMatrix = std::make_shared<Matrix<ElemType>>(currentInput0GradientMatrixRef.GetNumRows(),
@ -556,7 +556,7 @@ public:
        {
            Input(0)->CreateGradientMatrixIfNull();

-            // We need a sparse matrix for the gradient. However, we should allocate a new one instead of switching the type in place
+            // We need a sparse matrix for the gradient. We allocate a new one instead of switching the type in place
            // since switching in place may affect other nodes who share this matrix due to memory sharing
            auto& currentInput0GradientMatrixRef = InputRef(0).Gradient();
            if (currentInput0GradientMatrixRef.GetMatrixType() != SPARSE)
--- a/Source/ComputationNetworkLib/TrainingNodes.cpp
+++ b/Source/ComputationNetworkLib/TrainingNodes.cpp
@ -126,7 +126,7 @@ void RandomSampleNode<ElemType>::ForwardPropNonLooping()
    if (ValueAsMatrix().GetMatrixType() != SPARSE)
    {
        // BUGBUG: matrix type should be configured during validation
-        // We should allocate a new one instead of switching the type in place since switching in place may
+        // Note: We allocate a new one instead of switching the type in place since switching in place may
        // affect other nodes who share this matrix due to memory sharing
        auto newSparseValueMatrix = std::make_shared<Matrix<ElemType>>(ValueAsMatrix().GetNumRows(), ValueAsMatrix().GetNumCols(), CPUDEVICE, SPARSE, matrixFormatSparseCSC);
 #ifdef _MSC_VER
@ -140,10 +140,7 @@ void RandomSampleNode<ElemType>::ForwardPropNonLooping()

    // TODO: Should we prepare the CSC data directly on the CPU and move it in one go?
    // Currently the reader will place the data onto the GPU. It will then be pulled on-demand to the CPU once (and cached there).
-    valueMatrix.TransferToDeviceIfNotThere(CPUDEVICE, /*ismoved =*/ true/*means: BOTH state not ok */, /*emptyTransfer =*/ true, /*updatePreferredDevice =*/ false);
-
-    // BUGUBUG: This is a no-op; was the intent to change the preferred device to CPU?
-    valueMatrix.SetDevice(CPUDEVICE);
+    valueMatrix.TransferToDeviceIfNotThere(CPUDEVICE, /*ismoved =*/ true/*means: BOTH state not ok */, /*emptyTransfer =*/ true, /*updatePreferredDevice =*/ true);
    valueMatrix.Reset();

    // Get vector with indices of randomly sampled classes
--- a/bindings/python/cntk/ops/init.py
+++ b/bindings/python/cntk/ops/init.py
@ -59,9 +59,52 @@ def alias(x, name=''):
    return alias(x, name)

 ##########################################################################
-# evaluation ops
+# loss and evaluation ops
 ##########################################################################

+@typemap
+def binary_cross_entropy(output, target, name=''):
+    r'''
+    This operation computes the binary cross entropy between the ``output`` and ``target``.
+
+    Example:
+        TBA
+
+    Args:
+        output: the computed posterior probability from the network
+        target: ground-truth label, 0 or 1
+        name (`str`, optional): the name of the Function instance in the network
+    Returns:
+        :class:`cntk.ops.functions.Function`
+    '''
+    from cntk.cntk_py import binary_cross_entropy
+    dtype = get_data_type(output, target)
+    output = sanitize_input(output, dtype)
+    target = sanitize_input(target, dtype)
+    return binary_cross_entropy(output, target, name)
+
+@typemap
+def weighted_binary_cross_entropy(output, target, weight, name=''):
+    r'''
+    This operation computes the weighted binary cross entropy between the ``output`` and ``target``.
+
+    Example:
+        TBA
+
+    Args:
+        output: the computed posterior probability from the network
+        target: ground-truth label, 0 or 1
+        weight: weight of each example
+        name (`str`, optional): the name of the Function instance in the network
+    Returns:
+        :class:`cntk.ops.functions.Function`
+    '''
+    from cntk.cntk_py import weighted_binary_cross_entropy
+    dtype = get_data_type(output, target, weight)
+    output = sanitize_input(output, dtype)
+    target = sanitize_input(target, dtype)
+    weight = sanitize_input(weight, dtype)
+    return weighted_binary_cross_entropy(output, target, weight, name)

@typemap
 def cross_entropy_with_softmax(output_vector, target_vector, axis=-1, name=''):