CNTK v2 library: Add binary CrossEntropy operator

This commit is contained in:
Amit Agarwal 2016-11-08 14:33:07 -08:00
Родитель 2ca454bd84
Коммит e2cf02a609
8 изменённых файлов: 112 добавлений и 28 удалений

Просмотреть файл

@ -2699,6 +2699,16 @@ namespace CNTK
return TransposeTimes(leftOperand, rightOperand, /*outputRank =*/ 1, name);
}
///
/// Create an instance of the CNTK built-in operation to compute binary cross-entropy for specified input operands.
///
CNTK_API FunctionPtr BinaryCrossEntropy(const Variable& prediction, const Variable& targets, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in operation to compute weighted binary cross-entropy for specified input operands.
///
CNTK_API FunctionPtr WeightedBinaryCrossEntropy(const Variable& prediction, const Variable& targets, const Variable& weights, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in operation to compute squared-error for specified input operands.
///

Просмотреть файл

@ -232,6 +232,8 @@ namespace CNTK
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameOffset] = (size_t)node->As<FutureValueNode<ElementType>>()->TimeStep();
opType = PrimitiveOpType::FutureValue;
}
else if (node->OperationName() == OperationNameOf(LogisticNode))
opType = PrimitiveOpType::Logistic;
else if (node->OperationName() == OperationNameOf(SquareErrorNode))
opType = PrimitiveOpType::SquaredError;
else if (node->OperationName() == OperationNameOf(CrossEntropyWithSoftmaxNode))

Просмотреть файл

@ -634,10 +634,16 @@ namespace CNTK
if (outputDataType == DataType::Unknown)
outputDataType = firstKnownInputDataType;
// We currently require that the inputs' dynamic axes if any match
// We currently require that the inputs' dynamic axes, if any, match
std::vector<Axis> outputDynamicAxes;
if ((op == PrimitiveOpType::SumAll) || (op == PrimitiveOpType::SquaredError) || (op == PrimitiveOpType::CrossEntropyWithSoftmax) || (op == PrimitiveOpType::ClassificationError))
if ((op == PrimitiveOpType::SumAll) ||
(op == PrimitiveOpType::SquaredError) ||
(op == PrimitiveOpType::CrossEntropyWithSoftmax) ||
(op == PrimitiveOpType::ClassificationError) ||
(op == PrimitiveOpType::Logistic))
{
outputDynamicAxes = std::vector<Axis>({});
}
else if (op == PrimitiveOpType::Where)
{
if (functionConfig.Contains(PrimitiveFunction::AttributeNameNewDynamicAxes))
@ -889,9 +895,9 @@ namespace CNTK
case PrimitiveOpType::Convolution:
{
assert(inputs.size() == 2);
auto& strides = functionConfig[PrimitiveFunction::AttributeNameStrides].Value<NDShape>();
auto& lowerPad = functionConfig[PrimitiveFunction::AttributeNameLowerPad].Value<NDShape>();
auto& upperPad = functionConfig[PrimitiveFunction::AttributeNameUpperPad].Value<NDShape>();
auto& strides = functionConfig[PrimitiveFunction::AttributeNameStrides].Value<NDShape>();
auto& lowerPad = functionConfig[PrimitiveFunction::AttributeNameLowerPad].Value<NDShape>();
auto& upperPad = functionConfig[PrimitiveFunction::AttributeNameUpperPad].Value<NDShape>();
auto sharing = AsVector<bool>(functionConfig[PrimitiveFunction::AttributeNameSharing].Value<std::vector<DictionaryValue>>());
auto autoPadding = AsVector<bool>(functionConfig[PrimitiveFunction::AttributeNameAutoPadding].Value<std::vector<DictionaryValue>>());
bool transpose = functionConfig[PrimitiveFunction::AttributeNameTranspose].Value<bool>();
@ -900,23 +906,24 @@ namespace CNTK
NDShape outputMapCount, kernelShape;
std::tie(outputMapCount, kernelShape) = GetConvolutionOutputMapCountAndKernelShape(inputs[0].Shape(), inputs[1].Shape());
auto originalKernelShape = kernelShape;
outputShape = ConvolutionOpOutputShape(op, inputs[1].Shape(), kernelShape, outputMapCount, strides, sharing, autoPadding, lowerPad, upperPad, transpose, inferDimensions);
if (originalKernelShape != kernelShape)
{
for (size_t i = 0; i < kernelShape.Rank(); ++i)
inputs[0].m_dataFields->m_shape[i] = kernelShape[i];
}
auto originalKernelShape = kernelShape;
outputShape = ConvolutionOpOutputShape(op, inputs[1].Shape(), kernelShape, outputMapCount, strides, sharing, autoPadding, lowerPad, upperPad, transpose, inferDimensions);
if (originalKernelShape != kernelShape)
{
for (size_t i = 0; i < kernelShape.Rank(); ++i)
inputs[0].m_dataFields->m_shape[i] = kernelShape[i];
}
functionConfig[PrimitiveFunction::AttributeNameSharing] = AsDictionaryValueVector(sharing);
functionConfig[PrimitiveFunction::AttributeNameAutoPadding] = AsDictionaryValueVector(autoPadding);
functionConfig[PrimitiveFunction::AttributeNameSharing] = AsDictionaryValueVector(sharing);
functionConfig[PrimitiveFunction::AttributeNameAutoPadding] = AsDictionaryValueVector(autoPadding);
break;
}
case PrimitiveOpType::Logistic:
case PrimitiveOpType::SquaredError:
case PrimitiveOpType::CrossEntropyWithSoftmax:
case PrimitiveOpType::ClassificationError:
{
if (op == PrimitiveOpType::ClassificationError)
if ((op == PrimitiveOpType::ClassificationError) || (op == PrimitiveOpType::Logistic))
assert(inputs.size() >= 2);
else
assert(inputs.size() == 2);
@ -929,9 +936,9 @@ namespace CNTK
if (predictionShape != labelsShape)
RuntimeError("Prediction output operand's shape %S is incompatible with label operand's shape %S for the %S operation", AsStringForErrorReporting(predictionShape).c_str(), AsStringForErrorReporting(labelsShape).c_str(), PrimitiveOpTypeName(op).c_str());
std::vector<int> reductionAxes;
for (int i = 0; i < (int)inputs[0].Shape().Rank(); ++i)
reductionAxes.push_back(i);
std::vector<int> reductionAxes;
for (int i = 0; i < (int)inputs[0].Shape().Rank(); ++i)
reductionAxes.push_back(i);
outputShape = ReductionOpOutputShape(op, predictionShape, reductionAxes, /*preserveReductionAxes =*/ false);
break;
@ -1613,6 +1620,9 @@ namespace CNTK
computationNodePtr = New<ConvolutionNode<ElementType>>(network->GetDeviceId(), internalNodeName, AsTensorShape(kernelShape), AsTensorShape(outputMapCount), AsTensorShape(strides), sharing, autoPadding, AsTensorShape(lowerPad), AsTensorShape(upperPad), transpose, ImageLayoutKind::CHW, maxTempMemSizeInSamples);
break;
}
case PrimitiveOpType::Logistic:
computationNodePtr = New<LogisticNode<ElementType>>(network->GetDeviceId(), internalNodeName);
break;
case PrimitiveOpType::SquaredError:
computationNodePtr = New<SquareErrorNode<ElementType>>(network->GetDeviceId(), internalNodeName);
break;
@ -2792,6 +2802,18 @@ namespace CNTK
return BinaryOp(PrimitiveOpType::TransposeTimes, leftOperand, rightOperand, std::move(additionalProperties), name);
}
FunctionPtr BinaryCrossEntropy(const Variable& prediction, const Variable& targets, const std::wstring& name)
{
std::vector<Variable> operands = { prediction, targets };
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Logistic, operands, Dictionary(), name), name);
}
FunctionPtr WeightedBinaryCrossEntropy(const Variable& prediction, const Variable& targets, const Variable& weights, const std::wstring& name)
{
std::vector<Variable> operands = { prediction, targets, weights };
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Logistic, operands, Dictionary(), name), name);
}
FunctionPtr SquaredError(const Variable& prediction, const Variable& targets, const std::wstring& name)
{
auto difference = Minus(prediction, targets);

Просмотреть файл

@ -65,7 +65,7 @@ namespace CNTK
{PrimitiveOpType::Times, L"Times"},
{PrimitiveOpType::TransposeTimes, L"TransposeTimes"},
{PrimitiveOpType::Convolution, L"Convolution"},
{PrimitiveOpType::SquaredError, L"SquaredError"},
{ PrimitiveOpType::SquaredError, L"SquaredError" },
{PrimitiveOpType::CrossEntropyWithSoftmax, L"CrossEntropyWithSoftmax"},
{PrimitiveOpType::ClassificationError, L"ClassificationError"},
{PrimitiveOpType::PastValue, L"PastValue"},
@ -79,6 +79,7 @@ namespace CNTK
{PrimitiveOpType::RandomSample, L"RandomSample"},
{PrimitiveOpType::RandomSampleInclusionFrequency, L"RandomSampleInclusionFrequency"},
{PrimitiveOpType::ROIPooling, L"ROIPooling"},
{ PrimitiveOpType::Logistic, L"Logistic" },
};
inline const std::wstring& PrimitiveOpTypeName(PrimitiveOpType opType)
@ -103,7 +104,15 @@ namespace CNTK
if (numFunctionInputs > 2)
indexMap.insert({2, 2});
}
else if ((op == PrimitiveOpType::CrossEntropyWithSoftmax) || (op == PrimitiveOpType::GatherPacked))
else if (op == PrimitiveOpType::Logistic)
{
indexMap = std::unordered_map<size_t, size_t>({ { 0, 1 }, { 1, 0 } });
if (numFunctionInputs > 2)
indexMap.insert({ 2, 2 });
}
else if (op == PrimitiveOpType::CrossEntropyWithSoftmax)
indexMap = std::unordered_map<size_t, size_t>({ { 0, 1 }, { 1, 0 } });
else if (op == PrimitiveOpType::GatherPacked)
indexMap = std::unordered_map<size_t, size_t>({ { 0, 1 }, { 1, 0 } });
else if (op == PrimitiveOpType::ScatterPacked)
indexMap = std::unordered_map<size_t, size_t>({ { 0, 2 }, { 1, 1 }, { 2, 0 } });

Просмотреть файл

@ -57,6 +57,7 @@ namespace CNTK
RandomSample = 45,
RandomSampleInclusionFrequency = 46,
ROIPooling = 47,
Logistic = 48,
// New op types should only be appended to the end of this list.
};
}

Просмотреть файл

@ -395,7 +395,7 @@ public:
// If input data is sparse, then gradient is block sparse.
if (InputRef(1).Value().GetMatrixType() == SPARSE && InputRef(0).Gradient().GetMatrixType() == DENSE && Gradient().GetMatrixType() == DENSE)
{
// We need a sparse matrix for the gradient. However, we should allocate a new one instead of switching the type in place
// We need a sparse matrix for the gradient. We allocate a new one instead of switching the type in place
// since switching in place may affect other nodes who share this matrix due to memory sharing
auto& currentInput0GradientMatrixRef = InputRef(0).Gradient();
auto newInput0SparseGradientMatrix = std::make_shared<Matrix<ElemType>>(currentInput0GradientMatrixRef.GetNumRows(),
@ -556,7 +556,7 @@ public:
{
Input(0)->CreateGradientMatrixIfNull();
// We need a sparse matrix for the gradient. However, we should allocate a new one instead of switching the type in place
// We need a sparse matrix for the gradient. We allocate a new one instead of switching the type in place
// since switching in place may affect other nodes who share this matrix due to memory sharing
auto& currentInput0GradientMatrixRef = InputRef(0).Gradient();
if (currentInput0GradientMatrixRef.GetMatrixType() != SPARSE)

Просмотреть файл

@ -126,7 +126,7 @@ void RandomSampleNode<ElemType>::ForwardPropNonLooping()
if (ValueAsMatrix().GetMatrixType() != SPARSE)
{
// BUGBUG: matrix type should be configured during validation
// We should allocate a new one instead of switching the type in place since switching in place may
// Note: We allocate a new one instead of switching the type in place since switching in place may
// affect other nodes who share this matrix due to memory sharing
auto newSparseValueMatrix = std::make_shared<Matrix<ElemType>>(ValueAsMatrix().GetNumRows(), ValueAsMatrix().GetNumCols(), CPUDEVICE, SPARSE, matrixFormatSparseCSC);
#ifdef _MSC_VER
@ -140,10 +140,7 @@ void RandomSampleNode<ElemType>::ForwardPropNonLooping()
// TODO: Should we prepare the CSC data directly on the CPU and move it in one go?
// Currently the reader will place the data onto the GPU. It will then be pulled on-demand to the CPU once (and cached there).
valueMatrix.TransferToDeviceIfNotThere(CPUDEVICE, /*ismoved =*/ true/*means: BOTH state not ok */, /*emptyTransfer =*/ true, /*updatePreferredDevice =*/ false);
// BUGUBUG: This is a no-op; was the intent to change the preferred device to CPU?
valueMatrix.SetDevice(CPUDEVICE);
valueMatrix.TransferToDeviceIfNotThere(CPUDEVICE, /*ismoved =*/ true/*means: BOTH state not ok */, /*emptyTransfer =*/ true, /*updatePreferredDevice =*/ true);
valueMatrix.Reset();
// Get vector with indices of randomly sampled classes

Просмотреть файл

@ -59,9 +59,52 @@ def alias(x, name=''):
return alias(x, name)
##########################################################################
# evaluation ops
# loss and evaluation ops
##########################################################################
@typemap
def binary_cross_entropy(output, target, name=''):
r'''
This operation computes the binary cross entropy between the ``output`` and ``target``.
Example:
TBA
Args:
output: the computed posterior probability from the network
target: ground-truth label, 0 or 1
name (`str`, optional): the name of the Function instance in the network
Returns:
:class:`cntk.ops.functions.Function`
'''
from cntk.cntk_py import binary_cross_entropy
dtype = get_data_type(output, target)
output = sanitize_input(output, dtype)
target = sanitize_input(target, dtype)
return binary_cross_entropy(output, target, name)
@typemap
def weighted_binary_cross_entropy(output, target, weight, name=''):
r'''
This operation computes the weighted binary cross entropy between the ``output`` and ``target``.
Example:
TBA
Args:
output: the computed posterior probability from the network
target: ground-truth label, 0 or 1
weight: weight of each example
name (`str`, optional): the name of the Function instance in the network
Returns:
:class:`cntk.ops.functions.Function`
'''
from cntk.cntk_py import weighted_binary_cross_entropy
dtype = get_data_type(output, target, weight)
output = sanitize_input(output, dtype)
target = sanitize_input(target, dtype)
weight = sanitize_input(weight, dtype)
return weighted_binary_cross_entropy(output, target, weight, name)
@typemap
def cross_entropy_with_softmax(output_vector, target_vector, axis=-1, name=''):