CNTK/Source/ComputationNetworkLib/PreComputeNodes.h

511 строки
20 KiB
C++

//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include "Basics.h"
#include "ComputationNode.h"
#include "InputAndParamNodes.h"
#include "Matrix.h"
#include <map>
#include <string>
#include <stdexcept>
#include <list>
#include <iostream>
// this file will contain computation nodes that require several atomic computation.
namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
// PreComputedNodeBase
// base class for nodes requiring pre-computation
// -----------------------------------------------------------------------
template <class ElemType>
class PreComputedNodeBase : public ComputationNodeNonLooping /*ComputationNode*/<ElemType>, public IPreComputeNode
{
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembers;
using Base::OperationName;
public:
PreComputedNodeBase(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name), m_hasComputed(false)
{
MarkValueNonSharable();
}
// interface through which this node is operated on are these two functions
// check whether node has already undergone precomputation
virtual bool /*IPreComputeNode::*/ HasComputed() const override { return m_hasComputed; }
// call this with 'false' at start and with 'true' at end
// This is used for resetting and updating from accumulators.
virtual void /*IPreComputeNode::*/ MarkComputed(const bool hasComputed) override
{
m_hasComputed = hasComputed;
}
virtual bool RequiresPreCompute() const override { return true; }
virtual void Save(File& fstream) const override
{
Base::Save(fstream);
fstream << m_hasComputed;
fstream << Value();
}
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::Load(fstream, modelVersion);
fstream >> m_hasComputed;
LoadValue(fstream);
// Note: This loses the sample layout, but that is recovered by Validate().
}
virtual void DumpNodeInfo(const bool printValues, const bool printMetadata, File& fstream) const override
{
Base::DumpNodeInfo(printValues, printMetadata, fstream);
if (printMetadata)
{
char str[4096];
sprintf(str, "[%s] ", string(GetSampleLayout()).c_str());
fstream << string(str);
sprintf(str, "HasComputed=%ls", HasComputed() ? L"true" : L"false");
fstream << string(str);
}
PrintNodeValuesToFile(printValues, printMetadata, fstream);
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
if (!Input(0)->HasMBLayout())
InvalidArgument("%ls %ls operation requires its input to come in minibatches of samples.", NodeName().c_str(), OperationName().c_str());
m_pMBLayout = nullptr; // this node does not hold mini-batch data
//if (!m_hasComputed) // this node retains state, and state gets destroyed by Resize(), so we must be careful
SetDims(Input(0)->GetSampleLayout(), false);
//else if (!GetSampleLayout().IsElementwiseCompatibleWith(Input(0)->GetSampleLayout()))
// InvalidArgument("%ls %ls operation: Precomputed parameter does not match input dimensions.", NodeName().c_str(), OperationName().c_str());
// BUGBUG: Above is a workaround, which may be OK since m_hasComputed getting set requires Validate() to have passed.
// This workaround won't guard agains corrupt files.
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
{
Base::CopyTo(nodeP, newName, flags);
if (flags & CopyNodeFlags::copyNodeValue)
{
auto node = dynamic_pointer_cast<PreComputedNodeBase<ElemType>>(nodeP);
node->m_hasComputed = m_hasComputed;
}
}
// this is for the special case: convertDBN needs this; because we initialize values directly from another well-trained model
virtual void SideLoadFromMatrix(const Matrix<ElemType>& value)
{
if (value.GetNumCols() != 1)
InvalidArgument("SideLoadFromMatrix: Side-loading is only supported for column vectors.");
m_value->SetValue(value);
m_hasComputed = true;
SetDims(TensorShape(value.GetNumRows()), false);
}
public:
bool m_hasComputed;
};
#define UsingPreComputedNodeMembers \
UsingComputationNodeMembers; \
using Base::m_hasComputed; \
using Base::OperationName
// -----------------------------------------------------------------------
// MeanInvStdDevNodeBase (features) -- common base class for Mean and InvStdDev
// -----------------------------------------------------------------------
template <class ElemType>
class MeanInvStdDevNodeBase : public PreComputedNodeBase<ElemType>, public NumInputs<1>
{
typedef PreComputedNodeBase<ElemType> Base; UsingPreComputedNodeMembers;
// static const std::wstring TypeName() { return L"MeanInvStdDev (base)"; }
public:
// DeclareConstructorFromConfigWithNumInputs(MeanInvStdDevNodeBase);
MeanInvStdDevNodeBase(DEVICEID_TYPE deviceId, const wstring& name)
: PreComputedNodeBase<ElemType>(deviceId, name),
m_numSamples(SIZE_MAX)
{
}
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::Load(fstream, modelVersion);
m_numSamples = SIZE_MAX;
}
// this is used by the special-purpose command "convertdbn".
virtual void SideLoadFromMatrix(const Matrix<ElemType>& m)
{
Base::SideLoadFromMatrix(m);
m_numSamples = SIZE_MAX;
}
virtual void /*PreComputedNodeBase::*/ MarkComputed(const bool hasComputed, size_t numSamples = 0)
{
Base::MarkComputed(hasComputed);
if (!m_hasComputed) // initialize
{
if (IsAccumulating())
LogicError("%ls %ls operation: MarkComputed(false) has been called while accumulating.", NodeName().c_str(), OperationName().c_str());
m_numSamples = 0;
}
else // finalize
{
if (!IsAccumulating())
LogicError("%ls %ls operation: MarkComputed(true) has been called without MarkComputed(false) first.", NodeName().c_str(), OperationName().c_str());
if (m_numSamples == 0)
LogicError("%ls %ls operation: No data accumulated during precomputation.", NodeName().c_str(), OperationName().c_str());
m_numSamples = SIZE_MAX;
}
}
virtual void BackpropToNonLooping(size_t /*inputIndex*/) override
{
// LogicError("Mean operation should not be involved in the gradient calculation.");
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
{
Base::CopyTo(nodeP, newName, flags);
if (flags & CopyNodeFlags::copyNodeValue)
{
if (m_numSamples != SIZE_MAX)
LogicError("%ls %ls operation: CopyTo() called while accumulating.", NodeName().c_str(), OperationName().c_str());
auto node = dynamic_pointer_cast<MeanInvStdDevNodeBase<ElemType>>(nodeP);
node->m_numSamples = SIZE_MAX;
}
}
protected:
size_t m_numSamples; // (SIZE_MAX while outside accumulation state)
bool IsAccumulating() const { return m_numSamples != SIZE_MAX; }
};
#define UsingMeanInvStdDevNodeBaseNodeMembers \
ComputationNodeBoilerplate; \
UsingPreComputedNodeMembers; \
using Base::m_numSamples; \
using Base::IsAccumulating
// -----------------------------------------------------------------------
// MeanNode (features)
// -----------------------------------------------------------------------
template <class ElemType>
class MeanNode : public MeanInvStdDevNodeBase<ElemType>
{
typedef MeanInvStdDevNodeBase<ElemType> Base; UsingMeanInvStdDevNodeBaseNodeMembers;
static const std::wstring TypeName() { return L"Mean"; }
public:
DeclareConstructorFromConfigWithNumInputs(MeanNode);
MeanNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
}
MeanNode(DEVICEID_TYPE deviceId, const wstring& name, size_t)
: Base(deviceId, name)
{
}
virtual void /*PreComputedNodeBase::*/ MarkComputed(const bool hasComputed)
{
Base::MarkComputed(hasComputed);
if (!m_hasComputed) // initialize accumulation
{
UpdateFunctionValuesSize();
Value().SetValue(0);
}
// no else branch because ForwardPropNonLooping() already leaves a valid mean in m_value
}
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override
{
FrameRange fr(Input(0)->GetMBLayout());
if (m_hasComputed)
return; // not accumulating
if (!IsAccumulating())
LogicError("%ls %ls operation: MarkComputed(false) has not been called.", NodeName().c_str(), OperationName().c_str());
// set gaps to zero, since we are reducing in time
Input(0)->MaskMissingValueColumnsToZero(fr);
size_t numNewSamples = Input(0)->GetMBLayout()->GetActualNumSamples();
size_t totalNumSamples = m_numSamples + numNewSamples;
if (totalNumSamples == 0)
totalNumSamples = 1; // 0/0=1 in this context
ElemType alpha = 1.0f / totalNumSamples;
ElemType beta = (ElemType)m_numSamples / totalNumSamples;
size_t rank = DetermineElementwiseTensorRank();
auto mean = ValueTensorFor(rank, FrameRange()); // mean is formed directly in our m_value
auto input = Input(0)->ValueTensorFor(rank, fr);
mean.DoCopyOf(beta, input, alpha);
// Note: We leverage that TensorView allows "broadcasting" the output,
// which really means a reduction.
m_numSamples += numNewSamples;
}
};
template class MeanNode<float>;
template class MeanNode<double>;
// -----------------------------------------------------------------------
// InvStdDevNode (features)
// TODO: share stuff with MeanNode
// -----------------------------------------------------------------------
template <class ElemType>
class InvStdDevNode : public MeanInvStdDevNodeBase<ElemType>
{
typedef MeanInvStdDevNodeBase<ElemType> Base; UsingMeanInvStdDevNodeBaseNodeMembers;
static const std::wstring TypeName() { return L"InvStdDev"; }
public:
DeclareConstructorFromConfigWithNumInputs(InvStdDevNode);
InvStdDevNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name),
m_mean(deviceId),
m_var(deviceId),
m_temp(deviceId)
{
}
virtual void /*PreComputedNodeBase::*/ MarkComputed(const bool hasComputed) override
{
Base::MarkComputed(hasComputed);
if (!m_hasComputed) // initialize
{
// reset accumulators
UpdateFunctionValuesSize();
m_mean.Resize(Value()); // mean accumulator normalized by #samples in it
m_var .Resize(Value()); // likewise the variance
m_temp.Resize(Value()); // and a temp
m_mean.SetValue(0); // reset the mean and var accumulators
m_var .SetValue(0);
Value().SetValue(0); // and clear m_value as well: We must do this here already to avoid a NaN check to flag while this is being estimated.
}
else // finalize
{
// m_value <- 1/stddev
ElemType sqrtFloor = 1e-10f;
m_var.InplaceTruncateBottom(sqrtFloor); // prevent too small variance (and negative square roots due to numeric inaccuracy)
m_var.InplaceSqrt();
m_var.ElementInverse();
Value().SetValue(m_var);
}
}
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override
{
FrameRange fr(Input(0)->GetMBLayout());
if (m_hasComputed)
return; // not accumulating
if (!IsAccumulating())
LogicError("%ls %ls operation: MarkComputed(false) has not been called.", NodeName().c_str(), OperationName().c_str());
// set gaps to zero, since we are reducing in time
Input(0)->MaskMissingValueColumnsToZero(fr);
size_t numNewSamples = Input(0)->GetMBLayout()->GetActualNumSamples();
size_t totalNumSamples = m_numSamples + numNewSamples;
if (totalNumSamples == 0)
totalNumSamples = 1; // 0/0=1 in this context
ElemType alpha = 1.0f / totalNumSamples;
ElemType beta = (ElemType)m_numSamples / totalNumSamples;
size_t rank = DetermineElementwiseTensorRank();
auto input = Input(0)->ValueTensorFor( rank, fr);
auto mean = DataTensorFor(m_mean, rank, FrameRange());
auto temp = DataTensorFor(m_temp, rank, FrameRange());
auto var = DataTensorFor(m_var, rank, FrameRange());
// preserve the old mean value for the next step
temp.AssignCopyOf(mean);
// accumulate the mean
mean.DoCopyOf(beta, input, alpha); // Note: This reduces over samples.
// compute the correction term
// var += (oldMean - newMean)^2
temp.AddCopyOf(mean, -1.0f); // subtract new 'mean' from the old one
var.AddSqrOf(temp); // add the square
// var += (input - mean)^2
var.DoSqrOfDifferenceOf(beta, input, mean, alpha); // this reduces as well
#if 0 // BUGBUG: This is the correct version, but it will break test cases, so do this later. MeanNode does it right already.
m_numSamples += Input(0)->GetMBLayout()->GetActualNumSamples();
#else
m_numSamples += Input(0)->Value().GetNumCols(); // BUGBUG: Should be -> GetActualNumSamples().
#endif
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
{
Base::CopyTo(nodeP, newName, flags);
if (flags & CopyNodeFlags::copyNodeValue)
{
auto node = dynamic_pointer_cast<InvStdDevNode<ElemType>>(nodeP);
node->m_mean = m_mean;
node->m_var = m_var;
node->m_temp = m_temp;
}
}
private:
Matrix<ElemType> m_mean;
Matrix<ElemType> m_var;
Matrix<ElemType> m_temp;
};
template class InvStdDevNode<float>;
template class InvStdDevNode<double>;
// -----------------------------------------------------------------------
// PerDimMeanVarNormalizationNode (feature, mean, invStdDev)
// Computes
// output = (feature - mean) .* invStdDev
// where mean and invStdDev are meant to be single elements while features
// is minibatch data.
// TODO: Why do we need this? Why not use Plus and ElementTimes?
// -----------------------------------------------------------------------
template <class ElemType>
class PerDimMeanVarNormalizationNode : public ComputationNode<ElemType>, public NumInputs<3>
{
typedef ComputationNode<ElemType> Base;
UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName()
{
return L"PerDimMeanVarNormalization";
}
public:
DeclareConstructorFromConfigWithNumInputs(PerDimMeanVarNormalizationNode);
PerDimMeanVarNormalizationNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t /*inputIndex*/, const FrameRange&) override
{
InvalidArgument("PerDimMeanVarNormalizationNode should only be called in the evaluation stage. Is any of its descendents a learnable parameter that requires gradient?");
}
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
size_t rank = DetermineElementwiseTensorRank();
auto output = ValueTensorFor(rank, fr);
auto input = Input(0)->ValueTensorFor(rank, fr);
auto mean = Input(1)->ValueTensorFor(rank, fr.AllowBroadcast());
auto invStdDev = Input(2)->ValueTensorFor(rank, fr.AllowBroadcast());
output.AssignDifferenceOf(input, mean); // output = input - mean
output.AssignElementwiseProductOf(output, invStdDev); // output *= invStdDev
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase();
Input(1)->ValidateInferInputDimsFrom(Input(0)->GetSampleLayout());
Input(2)->ValidateInferInputDimsFrom(Input(0)->GetSampleLayout());
if (isFinalValidationPass)
{
if (!Input(0)->GetSampleLayout().IsElementwiseCompatibleWith(Input(1)->GetSampleLayout()) || !Input(0)->GetSampleLayout().IsElementwiseCompatibleWith(Input(2)->GetSampleLayout()))
InvalidArgument("PerDimMeanVarNormalizationNode: All inputs should have same sample layout.");
}
SetDims(Input(0));
}
};
template class PerDimMeanVarNormalizationNode<float>;
template class PerDimMeanVarNormalizationNode<double>;
// -----------------------------------------------------------------------
// PerDimMeanVarDeNormalizationNode (feature, mean, invStdDev)
// Computes
// output = feature ./ invStdDev + mean
// with parameters the same as PerDimMeanVarNormalizationNode.
// TODO: Why do we need this? Why not use Plus and ElementDividedBy?
// -----------------------------------------------------------------------
template <class ElemType>
class PerDimMeanVarDeNormalizationNode : public ComputationNode<ElemType>, public NumInputs<3>
{
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"PerDimMeanVarDeNormalization"; }
public:
DeclareConstructorFromConfigWithNumInputs(PerDimMeanVarDeNormalizationNode);
PerDimMeanVarDeNormalizationNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t /*inputIndex*/, const FrameRange&) override
{
InvalidArgument("PerDimMeanVarDeNormalizationNode should only be called in the evaluation stage. Is any of its descendents a learnable parameter that requires gradient?");
}
// feature ./ invStdDev + mean
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
size_t rank = DetermineElementwiseTensorRank();
auto output = ValueTensorFor(rank, fr);
auto input = Input(0)->ValueTensorFor(rank, fr);
auto mean = Input(1)->ValueTensorFor(rank, fr.AllowBroadcast());
auto invStdDev = Input(2)->ValueTensorFor(rank, fr.AllowBroadcast());
output.AssignElementwiseQuotientOf(input, invStdDev); // output = input / invStdDev
output.AddCopyOf(mean); // output += mean
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase();
Input(1)->ValidateInferInputDimsFrom(Input(0)->GetSampleLayout());
Input(2)->ValidateInferInputDimsFrom(Input(0)->GetSampleLayout());
if (isFinalValidationPass)
{
if (!Input(0)->GetSampleLayout().IsElementwiseCompatibleWith(Input(1)->GetSampleLayout()) || !Input(0)->GetSampleLayout().IsElementwiseCompatibleWith(Input(2)->GetSampleLayout()))
InvalidArgument("PerDimMeanVarDeNormalizationNode: All inputs should have same sample layout.");
}
SetDims(Input(0));
}
};
template class PerDimMeanVarDeNormalizationNode<float>;
template class PerDimMeanVarDeNormalizationNode<double>;
}}}