cleaned up state control of MeanNode and InvStdNode. Now no longer fails the NaN check on m_functionValues. Also factored that out into a shared base class MeanInvStdDevNodeBase;
changed pre-compute protocol in that it must call MarkComputed(false) upfront to signal initialization of accumulators; bug fix in GetNodesRequiringX(): should be dynamic cast not static cast; BuildAndValidateSubNetwork() is no longer called on demand by Evaluate(), but rather must have been called beforehand. And that is done through a new method StartEvaluateMinibatchLoop(); some (hopefully all) places that call Evaluate() now call StartEvaluateMinibatchLoop() beforehand, e.g. SGD and SimpleEvaluator; new method Evaluate(set of nodes);
This commit is contained in:
Родитель
e5a6da9622
Коммит
8dae07f0ac
|
@ -122,4 +122,4 @@ void Eval<ElemType>::ResetState()
|
||||||
template class Eval<double>;
|
template class Eval<double>;
|
||||||
template class Eval<float>;
|
template class Eval<float>;
|
||||||
|
|
||||||
}}}
|
}}}
|
||||||
|
|
|
@ -204,15 +204,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembers;
|
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembers;
|
||||||
public:
|
public:
|
||||||
//virtual ComputationNodeBase * NewThis(DEVICEID_TYPE deviceId, const wstring & name) = 0;
|
//virtual ComputationNodeBase * NewThis(DEVICEID_TYPE deviceId, const wstring & name) = 0;
|
||||||
PreComputedNode(DEVICEID_TYPE deviceId, const wstring & name) : Base(deviceId, name)
|
PreComputedNode(DEVICEID_TYPE deviceId, const wstring & name) :
|
||||||
{
|
Base(deviceId, name),
|
||||||
// further initializations
|
m_hasComputed(false)
|
||||||
m_hasComputed = false;
|
{ }
|
||||||
}
|
|
||||||
|
|
||||||
// interface through which this node is operated on are these two functions
|
// interface through which this node is operated on are these two functions
|
||||||
|
|
||||||
|
// check whether node has already undergone precomputation
|
||||||
virtual bool HasComputed() const { return m_hasComputed; }
|
virtual bool HasComputed() const { return m_hasComputed; }
|
||||||
virtual void MarkComputed(const bool hasComputed) // override this for further finalizing operation
|
|
||||||
|
// call this with 'false' at start and with 'true' at end
|
||||||
|
// This is used for resetting and updating from accumulators.
|
||||||
|
virtual void MarkComputed(const bool hasComputed)
|
||||||
{
|
{
|
||||||
m_hasComputed = hasComputed;
|
m_hasComputed = hasComputed;
|
||||||
}
|
}
|
||||||
|
@ -223,7 +227,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
{
|
{
|
||||||
Base::SaveToFile(fstream);
|
Base::SaveToFile(fstream);
|
||||||
fstream << m_hasComputed;
|
fstream << m_hasComputed;
|
||||||
fstream << m_functionValues;
|
fstream << m_functionValues; // TODO: why serialize if not yet computed?
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
|
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
|
||||||
|
@ -271,34 +275,46 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
bool m_hasComputed;
|
bool m_hasComputed;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define UsingPreComputedNodeMembers UsingComputationNodeMembersBoilerplate; using Base::m_hasComputed
|
#define UsingPreComputedNodeMembers UsingComputationNodeMembers; using Base::m_hasComputed
|
||||||
|
|
||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
// MeanNode (features)
|
// MeanInvStdDevNodeBase (features) -- common base class for Mean and InvStdDev
|
||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
class MeanNode : public PreComputedNode<ElemType>, public NumInputs<1>
|
class MeanInvStdDevNodeBase : public PreComputedNode<ElemType>, public NumInputs<1>
|
||||||
{
|
{
|
||||||
typedef PreComputedNode<ElemType> Base; UsingPreComputedNodeMembers;
|
typedef PreComputedNode<ElemType> Base; UsingPreComputedNodeMembers;
|
||||||
static const std::wstring TypeName() { return L"Mean"; }
|
//static const std::wstring TypeName() { return L"MeanInvStdDev (base)"; }
|
||||||
public:
|
public:
|
||||||
MeanNode(DEVICEID_TYPE deviceId, const wstring & name) :
|
MeanInvStdDevNodeBase(DEVICEID_TYPE deviceId, const wstring & name) :
|
||||||
PreComputedNode<ElemType>(deviceId, name),
|
PreComputedNode<ElemType>(deviceId, name),
|
||||||
m_numSamples(0)
|
m_numSamples(SIZE_MAX)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
|
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
|
||||||
{
|
{
|
||||||
Base::LoadFromFile(fstream, modelVersion);
|
Base::LoadFromFile(fstream, modelVersion);
|
||||||
m_numSamples = 0; // TODO: intended? Not loaded from file?
|
m_numSamples = SIZE_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void /*PreComputedNode::*/MarkComputed(const bool hasComputed)
|
virtual void /*PreComputedNode::*/MarkComputed(const bool hasComputed)
|
||||||
{
|
{
|
||||||
Base::MarkComputed(hasComputed);
|
Base::MarkComputed(hasComputed);
|
||||||
if (m_hasComputed)
|
if (!m_hasComputed) // initialize
|
||||||
|
{
|
||||||
|
if (IsAccumulating())
|
||||||
|
LogicError("%ls %ls operation: MarkComputed(false) has been called while accumulating.", NodeName().c_str(), OperationName().c_str());
|
||||||
m_numSamples = 0;
|
m_numSamples = 0;
|
||||||
|
}
|
||||||
|
else // finalize
|
||||||
|
{
|
||||||
|
if (!IsAccumulating())
|
||||||
|
LogicError("%ls %ls operation: MarkComputed(true) has been called without MarkComputed(false) first.", NodeName().c_str(), OperationName().c_str());
|
||||||
|
if (m_numSamples == 0)
|
||||||
|
LogicError("%ls %ls operation: No data accumulated during precomputation.", NodeName().c_str(), OperationName().c_str());
|
||||||
|
m_numSamples = SIZE_MAX;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void ComputeInputPartial(const size_t /*inputIndex*/)
|
virtual void ComputeInputPartial(const size_t /*inputIndex*/)
|
||||||
|
@ -306,51 +322,68 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
LogicError("Mean operation should not be involved in the gradient calculation.");
|
LogicError("Mean operation should not be involved in the gradient calculation.");
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
|
|
||||||
{
|
|
||||||
if (!m_hasComputed)
|
|
||||||
{
|
|
||||||
Matrix<ElemType> &samples = Inputs(0)->FunctionValues();
|
|
||||||
Matrix<ElemType> &avg = FunctionValues();
|
|
||||||
#if 1//NANCHECK
|
|
||||||
samples.HasNan("Mean-Samples");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
size_t numNewSamples = samples.GetNumCols();
|
|
||||||
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / (m_numSamples + samples.GetNumCols()), samples, false,
|
|
||||||
ConstOnes(numNewSamples, 1, samples.GetDeviceId()),
|
|
||||||
false, (ElemType)m_numSamples / (m_numSamples + numNewSamples), avg);
|
|
||||||
|
|
||||||
#if 1//NANCHECK
|
|
||||||
avg.HasNan("Mean-avg");
|
|
||||||
//ones.HasNan("Mean-ones");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
m_numSamples += numNewSamples;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
|
|
||||||
{
|
|
||||||
Base::Validate(isFinalValidationPass);
|
|
||||||
if (!m_hasComputed)
|
|
||||||
{
|
|
||||||
FunctionValues().SetValue(0); // reset accumulator
|
|
||||||
fprintf(stderr, "Mean: SetValue(0)\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const
|
virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const
|
||||||
{
|
{
|
||||||
Base::CopyTo(nodeP, newName, flags);
|
Base::CopyTo(nodeP, newName, flags);
|
||||||
if (flags & CopyNodeFlags::copyNodeValue)
|
if (flags & CopyNodeFlags::copyNodeValue)
|
||||||
{
|
{
|
||||||
auto node = dynamic_pointer_cast<MeanNode<ElemType>>(nodeP);
|
if (m_numSamples != SIZE_MAX)
|
||||||
node->m_numSamples = m_numSamples;
|
LogicError("%ls %ls operation: CopyTo() called while accumulating.", NodeName().c_str(), OperationName().c_str());
|
||||||
|
auto node = dynamic_pointer_cast<MeanInvStdDevNodeBase<ElemType>>(nodeP);
|
||||||
|
node->m_numSamples = SIZE_MAX;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
private:
|
protected:
|
||||||
size_t m_numSamples; // TODO: move to base class?
|
size_t m_numSamples; // (SIZE_MAX while outside accumulation state)
|
||||||
|
bool IsAccumulating() const { return m_numSamples != SIZE_MAX; }
|
||||||
|
};
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// MeanNode (features)
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
template<class ElemType>
|
||||||
|
class MeanNode : public MeanInvStdDevNodeBase<ElemType>
|
||||||
|
{
|
||||||
|
typedef MeanInvStdDevNodeBase<ElemType> Base; ComputationNodeBoilerplate; UsingPreComputedNodeMembers;
|
||||||
|
static const std::wstring TypeName() { return L"Mean"; }
|
||||||
|
public:
|
||||||
|
MeanNode(DEVICEID_TYPE deviceId, const wstring & name) :
|
||||||
|
Base(deviceId, name)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
virtual void /*PreComputedNode::*/MarkComputed(const bool hasComputed)
|
||||||
|
{
|
||||||
|
Base::MarkComputed(hasComputed);
|
||||||
|
if (!m_hasComputed) // initialize accumulation
|
||||||
|
FunctionValues().SetValue(0);
|
||||||
|
// no else branch because EvaluateThisNodeNonLooping() already leaves a valid mean in m_functionValues
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
|
||||||
|
{
|
||||||
|
if (m_hasComputed)
|
||||||
|
return; // not accumulating
|
||||||
|
|
||||||
|
if (!IsAccumulating())
|
||||||
|
LogicError("%ls %ls operation: MarkComputed(false) has not been called.", NodeName().c_str(), OperationName().c_str());
|
||||||
|
|
||||||
|
Matrix<ElemType> &samples = Inputs(0)->FunctionValues();
|
||||||
|
Matrix<ElemType> &avg = FunctionValues();
|
||||||
|
|
||||||
|
#if 1//NANCHECK
|
||||||
|
samples.HasNan("Mean-Samples");
|
||||||
|
#endif
|
||||||
|
size_t numNewSamples = samples.GetNumCols();
|
||||||
|
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / (m_numSamples + samples.GetNumCols()), samples, false,
|
||||||
|
ConstOnes(numNewSamples, 1, samples.GetDeviceId()),
|
||||||
|
false, (ElemType)m_numSamples / (m_numSamples + numNewSamples), avg);
|
||||||
|
#if 1//NANCHECK
|
||||||
|
avg.HasNan("Mean-avg");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
m_numSamples += numNewSamples;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template class MeanNode<float>;
|
template class MeanNode<float>;
|
||||||
|
@ -362,32 +395,34 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
class InvStdDevNode : public PreComputedNode<ElemType>, public NumInputs<1>
|
class InvStdDevNode : public MeanInvStdDevNodeBase<ElemType>
|
||||||
{
|
{
|
||||||
typedef PreComputedNode<ElemType> Base; UsingPreComputedNodeMembers;
|
typedef MeanInvStdDevNodeBase<ElemType> Base; ComputationNodeBoilerplate; UsingPreComputedNodeMembers;
|
||||||
static const std::wstring TypeName() { return L"InvStdDev"; }
|
static const std::wstring TypeName() { return L"InvStdDev"; }
|
||||||
public:
|
public:
|
||||||
InvStdDevNode(DEVICEID_TYPE deviceId, const wstring & name) :
|
InvStdDevNode(DEVICEID_TYPE deviceId, const wstring & name) :
|
||||||
PreComputedNode<ElemType>(deviceId, name),
|
Base(deviceId, name),
|
||||||
m_mean(deviceId), m_var(deviceId), m_temp(deviceId),
|
m_mean(deviceId), m_var(deviceId), m_temp(deviceId)
|
||||||
m_numSamples(0)
|
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
|
|
||||||
{
|
|
||||||
Base::LoadFromFile(fstream, modelVersion);
|
|
||||||
m_numSamples = 0; // TODO: intended? not loading from file?
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void /*PreComputedNode::*/MarkComputed(const bool hasComputed) override
|
virtual void /*PreComputedNode::*/MarkComputed(const bool hasComputed) override
|
||||||
{
|
{
|
||||||
Base::MarkComputed(hasComputed);
|
Base::MarkComputed(hasComputed);
|
||||||
|
|
||||||
if (m_hasComputed && m_numSamples > 0) //m_numSamples>0 means it's not called from model loading
|
if (!m_hasComputed) // initialize
|
||||||
|
{
|
||||||
|
// reset accumulators
|
||||||
|
size_t inputDim = Inputs(0)->GetNumRows();
|
||||||
|
m_mean.Resize(inputDim, 1);
|
||||||
|
m_var.Resize(inputDim, 1);
|
||||||
|
m_mean.SetValue(0);
|
||||||
|
m_var.SetValue(0);
|
||||||
|
FunctionValues().SetValue(0); // also set this because not doing it may flag during debugging; avoids special-casing this
|
||||||
|
}
|
||||||
|
else // finalize
|
||||||
{
|
{
|
||||||
ElemType sqrtFloor = 1e-10f;
|
ElemType sqrtFloor = 1e-10f;
|
||||||
|
m_var.InplaceTruncateBottom(sqrtFloor); // prevent too small variance (and negative square roots due to numeric inaccuracy)
|
||||||
m_var.InplaceTruncateBottom(sqrtFloor); //prevent too small variance (and negative square roots)
|
|
||||||
#if 1//NANCHECK
|
#if 1//NANCHECK
|
||||||
m_var.HasNan("MarkComputed-InplaceTruncateBottom");
|
m_var.HasNan("MarkComputed-InplaceTruncateBottom");
|
||||||
#endif
|
#endif
|
||||||
|
@ -402,63 +437,43 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
m_var.HasNan("MarkComputed-ElementInverse()");
|
m_var.HasNan("MarkComputed-ElementInverse()");
|
||||||
#endif
|
#endif
|
||||||
FunctionValues().SetValue(m_var);
|
FunctionValues().SetValue(m_var);
|
||||||
|
|
||||||
m_numSamples = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void ComputeInputPartial(const size_t /*inputIndex*/) override
|
|
||||||
{
|
|
||||||
LogicError("InvStdDev operation should not be involved in the gradient calculation.");
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
|
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
|
||||||
{
|
{
|
||||||
if (!m_hasComputed)
|
if (m_hasComputed)
|
||||||
{
|
return; // not accumulating
|
||||||
Matrix<ElemType> &samples = Inputs(0)->FunctionValues();
|
|
||||||
|
if (!IsAccumulating())
|
||||||
|
LogicError("%ls %ls operation: MarkComputed(false) has not been called.", NodeName().c_str(), OperationName().c_str());
|
||||||
|
|
||||||
|
Matrix<ElemType> &samples = Inputs(0)->FunctionValues();
|
||||||
#if 1//NANCHECK
|
#if 1//NANCHECK
|
||||||
samples.HasNan("InvStdDev-Samples");
|
samples.HasNan("InvStdDev-Samples");
|
||||||
#endif
|
#endif
|
||||||
m_temp.SetValue(m_mean);
|
m_temp.SetValue(m_mean);
|
||||||
size_t numNewSample = samples.GetNumCols();
|
size_t numNewSample = samples.GetNumCols();
|
||||||
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / (m_numSamples + numNewSample), samples, false,
|
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / (m_numSamples + numNewSample), samples, false,
|
||||||
ConstOnes(numNewSample, 1, samples.GetDeviceId()),
|
ConstOnes(numNewSample, 1, samples.GetDeviceId()),
|
||||||
false, (ElemType)m_numSamples / (m_numSamples + numNewSample), m_mean);
|
false, (ElemType)m_numSamples / (m_numSamples + numNewSample), m_mean);
|
||||||
|
|
||||||
m_temp -= m_mean;
|
m_temp -= m_mean;
|
||||||
m_temp.AssignElementPowerOf(m_temp, 2);
|
m_temp.AssignElementPowerOf(m_temp, 2);
|
||||||
m_var += m_temp;
|
m_var += m_temp;
|
||||||
|
|
||||||
m_temp.AssignDifferenceOf(samples, m_mean);
|
m_temp.AssignDifferenceOf(samples, m_mean);
|
||||||
m_temp.AssignElementPowerOf(m_temp, 2);
|
m_temp.AssignElementPowerOf(m_temp, 2);
|
||||||
|
|
||||||
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / (m_numSamples + numNewSample), m_temp, false,
|
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / (m_numSamples + numNewSample), m_temp, false,
|
||||||
ConstOnes(numNewSample, 1, samples.GetDeviceId()),
|
ConstOnes(numNewSample, 1, samples.GetDeviceId()),
|
||||||
false, (ElemType)m_numSamples / (m_numSamples + numNewSample), m_var);
|
false, (ElemType)m_numSamples / (m_numSamples + numNewSample), m_var);
|
||||||
|
|
||||||
#if 1//NANCHECK
|
#if 1//NANCHECK
|
||||||
m_var.HasNan("InvStdDev-m_var");
|
m_var.HasNan("InvStdDev-m_var");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
m_numSamples += samples.GetNumCols();
|
m_numSamples += samples.GetNumCols();
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
|
|
||||||
{
|
|
||||||
Base::Validate(isFinalValidationPass);
|
|
||||||
|
|
||||||
if (!m_hasComputed)
|
|
||||||
{
|
|
||||||
size_t inputDim = Inputs(0)->GetNumRows();
|
|
||||||
m_mean.Resize(inputDim, 1);
|
|
||||||
m_var.Resize(inputDim, 1);
|
|
||||||
// reset accumulators
|
|
||||||
m_mean.SetValue(0);
|
|
||||||
m_var.SetValue(0);
|
|
||||||
fprintf(stderr, "InvStdDev: SetValue(0)\n");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void MoveMatricesToDevice(const DEVICEID_TYPE deviceId) override
|
virtual void MoveMatricesToDevice(const DEVICEID_TYPE deviceId) override
|
||||||
|
@ -475,15 +490,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
if (flags & CopyNodeFlags::copyNodeValue)
|
if (flags & CopyNodeFlags::copyNodeValue)
|
||||||
{
|
{
|
||||||
auto node = dynamic_pointer_cast<InvStdDevNode<ElemType>>(nodeP);
|
auto node = dynamic_pointer_cast<InvStdDevNode<ElemType>>(nodeP);
|
||||||
node->m_numSamples = m_numSamples;
|
|
||||||
|
|
||||||
node->m_mean = m_mean;
|
node->m_mean = m_mean;
|
||||||
node->m_var = m_var;
|
node->m_var = m_var;
|
||||||
node-> m_temp = m_temp;
|
node->m_temp = m_temp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
size_t m_numSamples;
|
|
||||||
Matrix<ElemType> m_mean;
|
Matrix<ElemType> m_mean;
|
||||||
Matrix<ElemType> m_var;
|
Matrix<ElemType> m_var;
|
||||||
Matrix<ElemType> m_temp;
|
Matrix<ElemType> m_temp;
|
||||||
|
|
|
@ -430,7 +430,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// prepares the network for computation
|
bool ComputationNetwork::BuiltAndValidatedSubNetwork(const ComputationNodeBasePtr & rootNode)
|
||||||
|
{
|
||||||
|
return m_built.find(rootNode) != m_built.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
// prepare to compute with the subnetwork that this rootNode depends on, including
|
||||||
|
// - auto-detecting recurrent loops
|
||||||
|
// - collect input and learnable nodes
|
||||||
|
// - calling Validate() on all nodes lazily, which sizes all matrices (column dimensions get updated to MB size)
|
||||||
// Done lazily, called for every minibatch's invocation of EvaluateNode(), but memoizing which nodes were done already.
|
// Done lazily, called for every minibatch's invocation of EvaluateNode(), but memoizing which nodes were done already.
|
||||||
// BUGBUG? Lazy triggers on the root node. I.e. for two different root nodes (training, eval), it validates twice.
|
// BUGBUG? Lazy triggers on the root node. I.e. for two different root nodes (training, eval), it validates twice.
|
||||||
void ComputationNetwork::BuildAndValidateSubNetwork(const ComputationNodeBasePtr rootNode)
|
void ComputationNetwork::BuildAndValidateSubNetwork(const ComputationNodeBasePtr rootNode)
|
||||||
|
@ -558,53 +566,53 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<class N> void ComputationNetwork::GetNodesRequiringX(std::list<ComputationNodeBasePtr> & nodesRequirePreComputation, const ComputationNodeBasePtr rootNode, bool checkComputed)
|
template<class N> void ComputationNetwork::GetNodesRequiringX(std::list<ComputationNodeBasePtr> & nodesRequiringX, const ComputationNodeBasePtr rootNode, bool checkComputed)
|
||||||
{
|
{
|
||||||
if (rootNode == nullptr) // find nodes from all available nodes
|
if (!rootNode) // find nodes from all available nodes
|
||||||
{
|
{
|
||||||
for (auto nodeIter = m_nameToNodeMap.begin(); nodeIter != m_nameToNodeMap.end(); nodeIter++)
|
for (const auto & nodep : m_nameToNodeMap)
|
||||||
{
|
{
|
||||||
ComputationNodeBasePtr node = nodeIter->second;
|
auto node = dynamic_pointer_cast<N>(nodep.second);
|
||||||
if (node->RequiresPreCompute()) // TODO: why not check directly for the type with a dynamic_cast?
|
if (node)
|
||||||
{
|
{
|
||||||
auto preComputedNode = static_pointer_cast<N>(node);
|
assert(node->RequiresPreCompute());
|
||||||
if (!checkComputed || !preComputedNode->HasComputed())
|
if (!checkComputed || !node->HasComputed())
|
||||||
nodesRequirePreComputation.push_back(node);
|
nodesRequiringX.push_back(node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else // or for calculating a specific node
|
else // or for calculating a specific node
|
||||||
{
|
{
|
||||||
const auto & nodes = GetEvalOrder(rootNode, false);
|
for (const auto & nodei : GetEvalOrder(rootNode, false))
|
||||||
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
|
|
||||||
{
|
{
|
||||||
ComputationNodeBasePtr node = *nodeIter;
|
auto node = dynamic_pointer_cast<N>(nodei);
|
||||||
if (node->RequiresPreCompute()) // TODO: why not check directly for the type with a dynamic_cast?
|
if (node)
|
||||||
{
|
{
|
||||||
auto preComputedNode = static_pointer_cast<N>(node);
|
assert(node->RequiresPreCompute());
|
||||||
if (!checkComputed || !preComputedNode->HasComputed())
|
if (!checkComputed || !node->HasComputed())
|
||||||
nodesRequirePreComputation.push_back(node);
|
nodesRequiringX.push_back(node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
nodesRequiringX.unique();
|
||||||
}
|
}
|
||||||
|
|
||||||
//return list of nodes that require precomputation and not precomputed yet.
|
//return list of nodes that require precomputation and not precomputed yet.
|
||||||
std::list<ComputationNodeBasePtr> ComputationNetwork::GetNodesRequiringPreComputation(const ComputationNodeBasePtr rootNode, bool checkComputed)
|
std::list<ComputationNodeBasePtr> ComputationNetwork::GetNodesRequiringPreComputation(const ComputationNodeBasePtr rootNode, bool checkComputed)
|
||||||
{
|
{
|
||||||
std::list<ComputationNodeBasePtr> nodesRequirePreComputation;
|
std::list<ComputationNodeBasePtr> nodesRequiringX;
|
||||||
GetNodesRequiringX<PreComputedNode<float>>(nodesRequirePreComputation, rootNode, checkComputed);
|
GetNodesRequiringX<PreComputedNode<float>>(nodesRequiringX, rootNode, checkComputed);
|
||||||
GetNodesRequiringX<PreComputedNode<double>>(nodesRequirePreComputation, rootNode, checkComputed);
|
GetNodesRequiringX<PreComputedNode<double>>(nodesRequiringX, rootNode, checkComputed);
|
||||||
return nodesRequirePreComputation;
|
return nodesRequiringX;
|
||||||
}
|
}
|
||||||
|
|
||||||
//return list of nodes that require batch mode and not precomputed yet.
|
//return list of nodes that require batch mode and not precomputed yet.
|
||||||
std::list<ComputationNodeBasePtr> ComputationNetwork::GetNodesRequiringBatchMode(const ComputationNodeBasePtr rootNode, bool checkComputed)
|
std::list<ComputationNodeBasePtr> ComputationNetwork::GetNodesRequiringBatchMode(const ComputationNodeBasePtr rootNode, bool checkComputed)
|
||||||
{
|
{
|
||||||
std::list<ComputationNodeBasePtr> nodesRequirePreComputation;
|
std::list<ComputationNodeBasePtr> nodesRequiringX;
|
||||||
GetNodesRequiringX<BatchModeNode<float>>(nodesRequirePreComputation, rootNode, checkComputed);
|
GetNodesRequiringX<BatchModeNode<float>>(nodesRequiringX, rootNode, checkComputed);
|
||||||
GetNodesRequiringX<BatchModeNode<double>>(nodesRequirePreComputation, rootNode, checkComputed);
|
GetNodesRequiringX<BatchModeNode<double>>(nodesRequiringX, rootNode, checkComputed);
|
||||||
return nodesRequirePreComputation;
|
return nodesRequiringX;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The methods below determine evaluation order, which is tricky in presence of recurrent loops.
|
// The methods below determine evaluation order, which is tricky in presence of recurrent loops.
|
||||||
|
|
|
@ -565,13 +565,12 @@ public:
|
||||||
// - these must be executed frame by frame rather than as a map
|
// - these must be executed frame by frame rather than as a map
|
||||||
// - such a loop is treated as if they were a little nested network; this is done inside here
|
// - such a loop is treated as if they were a little nested network; this is done inside here
|
||||||
// - these little nested networks are defined in m_recurrentInfo[]
|
// - these little nested networks are defined in m_recurrentInfo[]
|
||||||
void Evaluate(const ComputationNodeBasePtr rootNode)
|
void Evaluate(const ComputationNodeBasePtr & rootNode)
|
||||||
{
|
{
|
||||||
// prepare to compute with the subnetwork that this rootNode depends on, including
|
// caller must call BuildAndValidateSubNetwork() before
|
||||||
// - auto-detecting recurrent loops
|
// TODO: Some places are hard to fix, e.g. encoder-decoder best-path functions. Those may be broken; this message will tell you.
|
||||||
// - collect input and learnable nodes
|
if (!BuiltAndValidatedSubNetwork(rootNode))
|
||||||
// - calling Validate() on all nodes lazily, which sizes all matrices (column dimensions get updated to MB size)
|
LogicError("Evaluate for node %ls %ls: BuildAndValidateSubNetwork() has not been called on this node.");
|
||||||
BuildAndValidateSubNetwork(rootNode);
|
|
||||||
|
|
||||||
// determines order of evaluation, such that children get evaluated before their parent nodes
|
// determines order of evaluation, such that children get evaluated before their parent nodes
|
||||||
std::list<ComputationNodeBasePtr>& allNodes = GetEvalOrder(rootNode, false);
|
std::list<ComputationNodeBasePtr>& allNodes = GetEvalOrder(rootNode, false);
|
||||||
|
@ -692,6 +691,12 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
template<class NODESET>
|
||||||
|
void Evaluate(const NODESET & nodes)
|
||||||
|
{
|
||||||
|
for (auto & node : nodes)
|
||||||
|
Evaluate(node);
|
||||||
|
}
|
||||||
|
|
||||||
// propagate the features' MB size to all nodes of the network
|
// propagate the features' MB size to all nodes of the network
|
||||||
// TODO: This function should go. Resizing is now part of Validate() and EvaluateThisNode().
|
// TODO: This function should go. Resizing is now part of Validate() and EvaluateThisNode().
|
||||||
|
@ -1233,6 +1238,24 @@ private:
|
||||||
public:
|
public:
|
||||||
// prepares the network for computation
|
// prepares the network for computation
|
||||||
void BuildAndValidateSubNetwork(const ComputationNodeBasePtr rootNode);
|
void BuildAndValidateSubNetwork(const ComputationNodeBasePtr rootNode);
|
||||||
|
// and for a set of nodes
|
||||||
|
void StartEvaluateMinibatchLoop(const ComputationNodeBasePtr & rootNode) // (ugly name; meant to be unique so we can rename if needed)
|
||||||
|
{
|
||||||
|
BuildAndValidateSubNetwork(rootNode);
|
||||||
|
}
|
||||||
|
template<class NODESET>
|
||||||
|
void StartEvaluateMinibatchLoop(const NODESET & nodes) // (ugly name; meant to be unique so we can rename if needed)
|
||||||
|
{
|
||||||
|
for (auto & node : nodes)
|
||||||
|
StartEvaluateMinibatchLoop(node);
|
||||||
|
}
|
||||||
|
template<class NODESET>
|
||||||
|
void StartEvaluateMinibatchLoop(const NODESET & nodes1, const NODESET & nodes2) // often needed for two sets (training & evaluation criteria)
|
||||||
|
{
|
||||||
|
StartEvaluateMinibatchLoop(nodes1);
|
||||||
|
StartEvaluateMinibatchLoop(nodes2);
|
||||||
|
}
|
||||||
|
bool BuiltAndValidatedSubNetwork(const ComputationNodeBasePtr & rootNode);
|
||||||
|
|
||||||
//this function will need to be called before actual validation and execution to
|
//this function will need to be called before actual validation and execution to
|
||||||
//predetermine how to share matrices to reduce memory usage.
|
//predetermine how to share matrices to reduce memory usage.
|
||||||
|
|
|
@ -503,9 +503,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
virtual void /*IComputationNode::*/OnEvaluateBeginIteration() // called before first iteration step of EvaluateThisNode()
|
virtual void /*IComputationNode::*/OnEvaluateBeginIteration() // called before first iteration step of EvaluateThisNode()
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Trace: %ls %ls operation\n", NodeName().c_str(), OperationName().c_str());
|
fprintf(stderr, "OnEvaluateBeginIteration: %ls %ls operation\n", NodeName().c_str(), OperationName().c_str());
|
||||||
|
}
|
||||||
|
virtual void /*IComputationNode::*/OnEvaluateEndIteration() // called after last iteration step of EvaluateThisNode()
|
||||||
|
{
|
||||||
|
fprintf(stderr, "OnEvaluateEndIteration: %ls %ls operation\n", NodeName().c_str(), OperationName().c_str());
|
||||||
}
|
}
|
||||||
virtual void /*IComputationNode::*/OnEvaluateEndIteration() { } // called after last iteration step of EvaluateThisNode()
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
@ -823,6 +826,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
class ComputationNode : public ComputationNodeBase // abstract class that cannot be instantiated
|
class ComputationNode : public ComputationNodeBase // abstract class that cannot be instantiated
|
||||||
{
|
{
|
||||||
|
typedef ComputationNodeBase Base;
|
||||||
protected:
|
protected:
|
||||||
//std containers such as list and map does not support class reference so we need to use pointer
|
//std containers such as list and map does not support class reference so we need to use pointer
|
||||||
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
|
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
|
||||||
|
@ -1249,6 +1253,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
virtual void /*IComputationNode::*/OnEvaluateEndIteration() // called after last iteration step of EvaluateThisNode()
|
virtual void /*IComputationNode::*/OnEvaluateEndIteration() // called after last iteration step of EvaluateThisNode()
|
||||||
{
|
{
|
||||||
|
Base::OnEvaluateEndIteration();
|
||||||
MaskMissingValuesColumnsToZero();
|
MaskMissingValuesColumnsToZero();
|
||||||
if (m_functionValues.HasNan("OnEvaluateEndIteration"))
|
if (m_functionValues.HasNan("OnEvaluateEndIteration"))
|
||||||
LogicError("%ls %ls operation unexpectedly produced NaN values.", NodeName().c_str(), OperationName().c_str());
|
LogicError("%ls %ls operation unexpectedly produced NaN values.", NodeName().c_str(), OperationName().c_str());
|
||||||
|
@ -1487,11 +1492,13 @@ public: \
|
||||||
using Base::SaveToFile; using Base::UpdateFunctionAndGradientMBSize; using Base::SetInput; \
|
using Base::SaveToFile; using Base::UpdateFunctionAndGradientMBSize; using Base::SetInput; \
|
||||||
using Base::Validate; using Base::ValidateUnaryMap; using Base::ValidateBinaryZip; using Base::ValidateUnaryReduce; using Base::ValidateBinaryReduce; using Base::ValidateInferBinaryChildren; using Base::ValidateInferInputSize
|
using Base::Validate; using Base::ValidateUnaryMap; using Base::ValidateBinaryZip; using Base::ValidateUnaryReduce; using Base::ValidateBinaryReduce; using Base::ValidateInferBinaryChildren; using Base::ValidateInferInputSize
|
||||||
|
|
||||||
#define UsingComputationNodeMembersBoilerplate \
|
#define ComputationNodeBoilerplate \
|
||||||
protected: /* some boilerplate goes here */ \
|
protected: /* some boilerplate goes here */ \
|
||||||
virtual const std::wstring OperationName() const override { return TypeName(); } \
|
virtual const std::wstring OperationName() const override { return TypeName(); } \
|
||||||
virtual ComputationNodeBase * NewThis(DEVICEID_TYPE deviceId, const wstring & name) override { return new typename std::remove_reference<decltype(*this)>::type(deviceId, name); } \
|
virtual ComputationNodeBase * NewThis(DEVICEID_TYPE deviceId, const wstring & name) override { return new typename std::remove_reference<decltype(*this)>::type(deviceId, name); }
|
||||||
UsingComputationNodeMembers
|
|
||||||
|
#define UsingComputationNodeMembersBoilerplate \
|
||||||
|
ComputationNodeBoilerplate; UsingComputationNodeMembers
|
||||||
|
|
||||||
#pragma endregion base computation class
|
#pragma endregion base computation class
|
||||||
|
|
||||||
|
|
|
@ -49,6 +49,7 @@ void DoCommand(const ConfigParameters& configRoot)
|
||||||
DataReader<ElemType>* dataReader = new DataReader<ElemType>(readerConfig);
|
DataReader<ElemType>* dataReader = new DataReader<ElemType>(readerConfig);
|
||||||
eval.LoadModel(modelPath);
|
eval.LoadModel(modelPath);
|
||||||
dataReader->StartMinibatchLoop(mbSize, 0, epochSize);
|
dataReader->StartMinibatchLoop(mbSize, 0, epochSize);
|
||||||
|
eval.StartEvaluateMinibatchLoop(outputName);
|
||||||
while (dataReader->GetMinibatch(inputMatrices))
|
while (dataReader->GetMinibatch(inputMatrices))
|
||||||
{
|
{
|
||||||
void* data = (void*)arr->data();
|
void* data = (void*)arr->data();
|
||||||
|
|
|
@ -1145,7 +1145,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
std::vector<ComputationNodeBasePtr> & labelNodes,
|
std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices)
|
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices)
|
||||||
{
|
{
|
||||||
std::list<ComputationNodeBasePtr> nodes = net.GetNodesRequiringPreComputation();
|
std::list<ComputationNodeBasePtr> nodes = net.GetNodesRequiringPreComputation(); // this tests all HasComputed() flags
|
||||||
|
|
||||||
if (nodes.size() == 0)
|
if (nodes.size() == 0)
|
||||||
{
|
{
|
||||||
|
@ -1169,39 +1169,28 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0);
|
trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0);
|
||||||
else // using only one epoch
|
else // using only one epoch
|
||||||
trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0, m_epochSize);
|
trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0, m_epochSize);
|
||||||
#if 1
|
net.StartEvaluateMinibatchLoop(nodes);
|
||||||
size_t actualMBSize;
|
|
||||||
while (DataReaderHelpers::GetMinibatchIntoNetwork(*trainSetDataReader, net, nullptr, false, false, *inputMatrices, actualMBSize))
|
// initialize
|
||||||
|
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
|
||||||
|
{
|
||||||
|
auto node = static_pointer_cast<PreComputedNode<ElemType>>(*nodeIter);
|
||||||
|
node->MarkComputed(false/*begin accumulating*/);
|
||||||
|
}
|
||||||
|
size_t actualMBSizeDummy;
|
||||||
|
while (DataReaderHelpers::GetMinibatchIntoNetwork(*trainSetDataReader, net, nullptr, false, false, *inputMatrices, actualMBSizeDummy))
|
||||||
{
|
{
|
||||||
// TODO: move these into GetMinibatchIntoNetwork() --but those are passed around; necessary? Can't we get them from 'net'?
|
// TODO: move these into GetMinibatchIntoNetwork() --but those are passed around; necessary? Can't we get them from 'net'?
|
||||||
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
||||||
ComputationNetwork::UpdateEvalTimeStamps(labelNodes);
|
ComputationNetwork::UpdateEvalTimeStamps(labelNodes);
|
||||||
|
|
||||||
for (auto & node : nodes) // this loops over all pertinent PreComputeNodes
|
net.Evaluate(nodes);
|
||||||
net.Evaluate(node);
|
|
||||||
}
|
}
|
||||||
#else
|
// finalize
|
||||||
while (trainSetDataReader->GetMinibatch(*inputMatrices))
|
|
||||||
{
|
|
||||||
// TODO: use GetMinibatchIntoNetwork(), should be easy
|
|
||||||
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
|
||||||
ComputationNetwork::UpdateEvalTimeStamps(labelNodes);
|
|
||||||
|
|
||||||
net.SetActualMiniBatchSizeFromFeatures();
|
|
||||||
trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr());
|
|
||||||
net.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences());
|
|
||||||
|
|
||||||
// TODO: Exactly this loop should be INSIDE ComputationNetwork--pass the nodes array instead!
|
|
||||||
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
|
|
||||||
net.Evaluate(*nodeIter);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// mark done
|
|
||||||
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
|
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
|
||||||
{
|
{
|
||||||
auto node = static_pointer_cast<PreComputedNode<ElemType>>(*nodeIter);
|
auto node = static_pointer_cast<PreComputedNode<ElemType>>(*nodeIter);
|
||||||
node->MarkComputed(true);
|
node->MarkComputed(true/*done accumulating*/);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -1714,13 +1703,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
int numMBsRun = 0;
|
int numMBsRun = 0;
|
||||||
|
|
||||||
size_t numEvalNodes = epochEvalErrors.size();
|
|
||||||
|
|
||||||
// NOTE: the following two local matrices are not used in distGradAgg path
|
// NOTE: the following two local matrices are not used in distGradAgg path
|
||||||
// assume only one training criterion node for each epoch.
|
// assume only one training criterion node for each epoch.
|
||||||
// The criterion values are accumulated here over the minibatches (without having to pull them off the GPU).
|
// The criterion values are accumulated here over the minibatches (without having to pull them off the GPU).
|
||||||
Matrix<ElemType> localEpochCriterion(1, 1, net.GetDeviceId());
|
Matrix<ElemType> localEpochCriterion(1, 1, net.GetDeviceId());
|
||||||
Matrix<ElemType> localEpochEvalErrors(1, numEvalNodes, net.GetDeviceId());
|
Matrix<ElemType> localEpochEvalErrors(1, epochEvalErrors.size(), net.GetDeviceId());
|
||||||
|
|
||||||
localEpochCriterion.SetValue(0);
|
localEpochCriterion.SetValue(0);
|
||||||
localEpochEvalErrors.SetValue(0);
|
localEpochEvalErrors.SetValue(0);
|
||||||
|
@ -1740,7 +1727,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
if (useGradientAggregation)
|
if (useGradientAggregation)
|
||||||
{
|
{
|
||||||
epochCriterion = double(0.0);
|
epochCriterion = double(0.0);
|
||||||
epochEvalErrors.assign(numEvalNodes, double(0.0));
|
epochEvalErrors.assign(epochEvalErrors.size(), double(0.0));
|
||||||
}
|
}
|
||||||
|
|
||||||
Profiler profiler(m_numMBsToCUDAProfile);
|
Profiler profiler(m_numMBsToCUDAProfile);
|
||||||
|
@ -1752,13 +1739,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
m_enableDistributedMBReading &&
|
m_enableDistributedMBReading &&
|
||||||
trainSetDataReader->SupportsDistributedMBRead();
|
trainSetDataReader->SupportsDistributedMBRead();
|
||||||
if (useDistributedMBReading)
|
if (useDistributedMBReading)
|
||||||
{
|
|
||||||
trainSetDataReader->StartDistributedMinibatchLoop(tunedMBSize, epochNumber, g_mpi->CurrentNodeRank(), g_mpi->NumNodesInUse(), m_epochSize);
|
trainSetDataReader->StartDistributedMinibatchLoop(tunedMBSize, epochNumber, g_mpi->CurrentNodeRank(), g_mpi->NumNodesInUse(), m_epochSize);
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
|
||||||
trainSetDataReader->StartMinibatchLoop(tunedMBSize, epochNumber, m_epochSize);
|
trainSetDataReader->StartMinibatchLoop(tunedMBSize, epochNumber, m_epochSize);
|
||||||
}
|
net.StartEvaluateMinibatchLoop(evaluationNodes);
|
||||||
|
net.StartEvaluateMinibatchLoop(criterionNodes);
|
||||||
|
if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode)
|
||||||
|
refNet.StartEvaluateMinibatchLoop(refNode);
|
||||||
|
|
||||||
// TODO: what is this??
|
// TODO: what is this??
|
||||||
AttemptUtteranceDerivativeFeatures(net, trainSetDataReader, featureNodes, inputMatrices);
|
AttemptUtteranceDerivativeFeatures(net, trainSetDataReader, featureNodes, inputMatrices);
|
||||||
|
@ -1829,10 +1816,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
//compute eval node first since when gradient is computed the forward function values
|
//compute eval node first since when gradient is computed the forward function values
|
||||||
//may be changed and need to be recomputed when gradient and function value share the same matrix
|
//may be changed and need to be recomputed when gradient and function value share the same matrix
|
||||||
for (size_t i = 0; i < numEvalNodes; i++)
|
net.Evaluate(evaluationNodes);
|
||||||
{
|
|
||||||
net.Evaluate(evaluationNodes[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// only compute gradient when learning rate is large enough
|
// only compute gradient when learning rate is large enough
|
||||||
if (learnRatePerSample > m_minLearnRate * 0.01)
|
if (learnRatePerSample > m_minLearnRate * 0.01)
|
||||||
|
@ -1872,7 +1856,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
// criteria are in FunctionValues()(0,0), we accumulate into another 1x1 Matrix (to avoid having to pull the values off the GPU)
|
// criteria are in FunctionValues()(0,0), we accumulate into another 1x1 Matrix (to avoid having to pull the values off the GPU)
|
||||||
Matrix<ElemType>::AddElementToElement(dynamic_pointer_cast<ComputationNode<ElemType>>(criterionNodes[0])->FunctionValues(),
|
Matrix<ElemType>::AddElementToElement(dynamic_pointer_cast<ComputationNode<ElemType>>(criterionNodes[0])->FunctionValues(),
|
||||||
0, 0, localEpochCriterion, 0, 0);
|
0, 0, localEpochCriterion, 0, 0);
|
||||||
for (size_t i = 0; i < numEvalNodes; i++)
|
for (size_t i = 0; i < evaluationNodes.size(); i++)
|
||||||
{
|
{
|
||||||
Matrix<ElemType>::AddElementToElement(dynamic_pointer_cast<ComputationNode<ElemType>>(evaluationNodes[i])->FunctionValues(),
|
Matrix<ElemType>::AddElementToElement(dynamic_pointer_cast<ComputationNode<ElemType>>(evaluationNodes[i])->FunctionValues(),
|
||||||
0, 0, localEpochEvalErrors, 0, i);
|
0, 0, localEpochEvalErrors, 0, i);
|
||||||
|
@ -1882,14 +1866,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//distributed gradient aggregation
|
//distributed gradient aggregation
|
||||||
LazyInitDistGradAgg(learnableNodes, numEvalNodes, m_traceLevel);
|
LazyInitDistGradAgg(learnableNodes, evaluationNodes.size(), m_traceLevel);
|
||||||
|
|
||||||
//prepare the header
|
//prepare the header
|
||||||
m_gradHeader->numEvalNode = numEvalNodes;
|
m_gradHeader->numEvalNode = evaluationNodes.size();
|
||||||
m_gradHeader->numSamples = actualMBSize;
|
m_gradHeader->numSamples = actualMBSize;
|
||||||
m_gradHeader->numSamplesWithLabel = numSamplesWithLabel;
|
m_gradHeader->numSamplesWithLabel = numSamplesWithLabel;
|
||||||
m_gradHeader->criterion = actualMBSize > 0 ? criterionNodes[0]->Get00Element() : 0.0;
|
m_gradHeader->criterion = actualMBSize > 0 ? criterionNodes[0]->Get00Element() : 0.0;
|
||||||
for (size_t i = 0; i < numEvalNodes; i++)
|
for (size_t i = 0; i < evaluationNodes.size(); i++)
|
||||||
m_gradHeader->evalErrors[i] = actualMBSize > 0 ? evaluationNodes[i]->Get00Element() : 0.0;
|
m_gradHeader->evalErrors[i] = actualMBSize > 0 ? evaluationNodes[i]->Get00Element() : 0.0;
|
||||||
|
|
||||||
m_distGradAgg->AggregateGradients(m_gradHeader, epochNumber);
|
m_distGradAgg->AggregateGradients(m_gradHeader, epochNumber);
|
||||||
|
@ -1897,7 +1881,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
aggregateNumSamples = m_gradHeader->numSamples;
|
aggregateNumSamples = m_gradHeader->numSamples;
|
||||||
aggregateNumSamplesWithLabel = m_gradHeader->numSamplesWithLabel;
|
aggregateNumSamplesWithLabel = m_gradHeader->numSamplesWithLabel;
|
||||||
epochCriterion += m_gradHeader->criterion;
|
epochCriterion += m_gradHeader->criterion;
|
||||||
for (size_t i = 0; i<numEvalNodes; i++)
|
for (size_t i = 0; i<epochEvalErrors.size(); i++)
|
||||||
epochEvalErrors[i] += m_gradHeader->evalErrors[i];
|
epochEvalErrors[i] += m_gradHeader->evalErrors[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1963,7 +1947,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
{
|
{
|
||||||
timer.Restart();
|
timer.Restart();
|
||||||
epochCriterion = localEpochCriterion.Get00Element();
|
epochCriterion = localEpochCriterion.Get00Element();
|
||||||
for (size_t i = 0; i < numEvalNodes; i++)
|
for (size_t i = 0; i < epochEvalErrors.size(); i++)
|
||||||
epochEvalErrors[i] = localEpochEvalErrors(0, i);
|
epochEvalErrors[i] = localEpochEvalErrors(0, i);
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
|
|
||||||
|
@ -1991,7 +1975,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
m_maxComputedEpochSize = numMBsRun * numSamplesLastMBs / m_numMBsToShowResult;
|
m_maxComputedEpochSize = numMBsRun * numSamplesLastMBs / m_numMBsToShowResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < numEvalNodes; i++)
|
for (size_t i = 0; i < epochEvalErrors.size(); i++)
|
||||||
{
|
{
|
||||||
double evalError = (epochEvalErrors[i] - epochEvalErrorsLastMBs[i]) / numSamplesLastMBs;
|
double evalError = (epochEvalErrors[i] - epochEvalErrorsLastMBs[i]) / numSamplesLastMBs;
|
||||||
string formatString = "EvalErr[%lu]PerSample = " + GeneratePaddedFloatOrExpFormat(0, 8, evalError) + "; ";
|
string formatString = "EvalErr[%lu]PerSample = " + GeneratePaddedFloatOrExpFormat(0, 8, evalError) + "; ";
|
||||||
|
@ -2012,7 +1996,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
numSamplesLastMBs = 0;
|
numSamplesLastMBs = 0;
|
||||||
|
|
||||||
epochCriterionLastMBs = epochCriterion;
|
epochCriterionLastMBs = epochCriterion;
|
||||||
for (size_t i = 0; i < numEvalNodes; i++)
|
for (size_t i = 0; i < epochEvalErrorsLastMBs.size(); i++)
|
||||||
epochEvalErrorsLastMBs[i] = epochEvalErrors[i];
|
epochEvalErrorsLastMBs[i] = epochEvalErrors[i];
|
||||||
|
|
||||||
if (std::isnan(epochCriterion))
|
if (std::isnan(epochCriterion))
|
||||||
|
@ -2057,7 +2041,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
{
|
{
|
||||||
// with parallelization, we have them in regular variables
|
// with parallelization, we have them in regular variables
|
||||||
epochCriterion /= float(totalEpochSamples);
|
epochCriterion /= float(totalEpochSamples);
|
||||||
for (size_t i = 0; i< numEvalNodes; i++)
|
for (size_t i = 0; i< epochEvalErrors.size(); i++)
|
||||||
epochEvalErrors[i] /= totalEpochSamples;
|
epochEvalErrors[i] /= totalEpochSamples;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -2067,7 +2051,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
localEpochEvalErrors /= float(totalEpochSamples);
|
localEpochEvalErrors /= float(totalEpochSamples);
|
||||||
|
|
||||||
epochCriterion = localEpochCriterion.Get00Element();
|
epochCriterion = localEpochCriterion.Get00Element();
|
||||||
for (size_t i = 0; i < numEvalNodes; i++)
|
for (size_t i = 0; i < epochEvalErrors.size(); i++)
|
||||||
epochEvalErrors[i] = localEpochEvalErrors(0, i);
|
epochEvalErrors[i] = localEpochEvalErrors(0, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2495,6 +2479,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
#define EPSILON 1e-5
|
#define EPSILON 1e-5
|
||||||
|
|
||||||
|
// this probes the automatic gradient computation with random inputs
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
bool SGD<ElemType>::GradientCheck(ComputationNetwork& net,
|
bool SGD<ElemType>::GradientCheck(ComputationNetwork& net,
|
||||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||||
|
@ -2503,6 +2488,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
{
|
{
|
||||||
vector<string> errMsgs;
|
vector<string> errMsgs;
|
||||||
|
|
||||||
|
net.StartEvaluateMinibatchLoop(criterionNodes[npos]);
|
||||||
|
|
||||||
// gradient checking
|
// gradient checking
|
||||||
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
|
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
|
||||||
{
|
{
|
||||||
|
@ -2524,7 +2511,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
node->UpdateEvalTimeStamp();
|
node->UpdateEvalTimeStamp();
|
||||||
|
|
||||||
// use only the first criterion. Is
|
|
||||||
net.ComputeGradient<ElemType>(criterionNodes[npos]);
|
net.ComputeGradient<ElemType>(criterionNodes[npos]);
|
||||||
|
|
||||||
if (node->GradientValues().GetMatrixType() == MatrixType::SPARSE)
|
if (node->GradientValues().GetMatrixType() == MatrixType::SPARSE)
|
||||||
|
|
|
@ -122,6 +122,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
evalResultsLastMBs.push_back((ElemType)0);
|
evalResultsLastMBs.push_back((ElemType)0);
|
||||||
|
|
||||||
dataReader->StartMinibatchLoop(mbSize, 0, testSize);
|
dataReader->StartMinibatchLoop(mbSize, 0, testSize);
|
||||||
|
m_net.StartEvaluateMinibatchLoop(evalNodes);
|
||||||
|
|
||||||
while (DataReaderHelpers::GetMinibatchIntoNetwork(*dataReader, m_net, nullptr, false, false, inputMatrices, actualMBSize))
|
while (DataReaderHelpers::GetMinibatchIntoNetwork(*dataReader, m_net, nullptr, false, false, inputMatrices, actualMBSize))
|
||||||
{
|
{
|
||||||
|
@ -191,6 +192,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
}
|
}
|
||||||
|
|
||||||
//returns error rate
|
//returns error rate
|
||||||
|
// TODO: What does this function do?
|
||||||
double EvaluateUnroll(IDataReader<ElemType>* dataReader, const size_t mbSize, double &evalSetCrossEntropy, const wchar_t* output = nullptr, const size_t testSize = requestDataSize)
|
double EvaluateUnroll(IDataReader<ElemType>* dataReader, const size_t mbSize, double &evalSetCrossEntropy, const wchar_t* output = nullptr, const size_t testSize = requestDataSize)
|
||||||
{
|
{
|
||||||
std::vector<ComputationNodeBasePtr> & featureNodes = m_net.FeatureNodes();
|
std::vector<ComputationNodeBasePtr> & featureNodes = m_net.FeatureNodes();
|
||||||
|
@ -211,6 +213,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
inputMatrices[L"numberobs"] = new Matrix<ElemType>(1, 1, m_net.GetDeviceId());
|
inputMatrices[L"numberobs"] = new Matrix<ElemType>(1, 1, m_net.GetDeviceId());
|
||||||
|
|
||||||
dataReader->StartMinibatchLoop(mbSize, 0, testSize);
|
dataReader->StartMinibatchLoop(mbSize, 0, testSize);
|
||||||
|
m_net.StartEvaluateMinibatchLoop(criterionNodes, evaluationNodes);
|
||||||
|
|
||||||
double epochEvalError = 0;
|
double epochEvalError = 0;
|
||||||
double epochCrossEntropy = 0;
|
double epochCrossEntropy = 0;
|
||||||
|
@ -415,9 +418,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
double evalResultsLastMBs = (double)0;
|
double evalResultsLastMBs = (double)0;
|
||||||
|
|
||||||
for (auto ptr = dataReaders.begin(); ptr != dataReaders.end(); ptr++)
|
for (auto ptr = dataReaders.begin(); ptr != dataReaders.end(); ptr++)
|
||||||
{
|
|
||||||
(*ptr)->StartMinibatchLoop(mbSize, 0, testSize);
|
(*ptr)->StartMinibatchLoop(mbSize, 0, testSize);
|
||||||
}
|
// BUGBUG: Code below will fail because we now must call StartMinibatchLoop(), but I can't tell from below which nodes to call it for.
|
||||||
|
//for (auto & ptr : nets)
|
||||||
|
// ptr->StartMinibatchLoop(xxx);
|
||||||
|
|
||||||
bool bContinueDecoding = true;
|
bool bContinueDecoding = true;
|
||||||
while (bContinueDecoding)
|
while (bContinueDecoding)
|
||||||
|
@ -743,7 +747,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//return true if precomputation is executed.
|
// (only called by FindBestPath...())
|
||||||
void ResetPreCompute()
|
void ResetPreCompute()
|
||||||
{
|
{
|
||||||
//mark false
|
//mark false
|
||||||
|
@ -767,6 +771,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
||||||
|
|
||||||
|
net.StartEvaluateMinibatchLoop(batchComputeNodes); // TODO: Is this correct? There is no StartMinibatchLoop() for a reader.
|
||||||
|
|
||||||
net.SetActualMiniBatchSizeFromFeatures();
|
net.SetActualMiniBatchSizeFromFeatures();
|
||||||
for (auto nodeIter = batchComputeNodes.begin(); nodeIter != batchComputeNodes.end(); nodeIter++)
|
for (auto nodeIter = batchComputeNodes.begin(); nodeIter != batchComputeNodes.end(); nodeIter++)
|
||||||
net.Evaluate(*nodeIter);
|
net.Evaluate(*nodeIter);
|
||||||
|
|
|
@ -61,10 +61,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
//Matrix<ElemType> endOfFile = Matrix<ElemType>((size_t)1,(size_t)1);
|
//Matrix<ElemType> endOfFile = Matrix<ElemType>((size_t)1,(size_t)1);
|
||||||
//endOfFile(0,0)=0;
|
//endOfFile(0,0)=0;
|
||||||
|
|
||||||
//evaluate with minibatches
|
// evaluate with minibatches
|
||||||
dataReader.StartMinibatchLoop(mbSize, 0, numOutputSamples);
|
dataReader.StartMinibatchLoop(mbSize, 0, numOutputSamples);
|
||||||
dataReader.SetNumParallelSequences(1);
|
dataReader.SetNumParallelSequences(1);
|
||||||
|
|
||||||
|
m_net.StartEvaluateMinibatchLoop(outputNodes);
|
||||||
|
|
||||||
size_t totalEpochSamples = 0;
|
size_t totalEpochSamples = 0;
|
||||||
std::map<std::wstring, void *, nocase_compare> outputMatrices;
|
std::map<std::wstring, void *, nocase_compare> outputMatrices;
|
||||||
|
|
||||||
|
@ -107,7 +109,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
//clean up
|
//clean up
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void WriteOutput(IDataReader<ElemType>& dataReader, size_t mbSize, std::wstring outputPath, const std::vector<std::wstring>& outputNodeNames, size_t numOutputSamples=requestDataSize)
|
void WriteOutput(IDataReader<ElemType>& dataReader, size_t mbSize, std::wstring outputPath, const std::vector<std::wstring>& outputNodeNames, size_t numOutputSamples=requestDataSize)
|
||||||
{
|
{
|
||||||
|
@ -142,10 +143,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
|
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
|
||||||
for (size_t i=0; i<featureNodes.size(); i++)
|
for (size_t i=0; i<featureNodes.size(); i++)
|
||||||
inputMatrices[featureNodes[i]->NodeName()] = &dynamic_pointer_cast<ComputationNode<ElemType>>(featureNodes[i])->FunctionValues();
|
inputMatrices[featureNodes[i]->NodeName()] = &dynamic_pointer_cast<ComputationNode<ElemType>>(featureNodes[i])->FunctionValues();
|
||||||
|
|
||||||
//evaluate with minibatches
|
// evaluate with minibatches
|
||||||
dataReader.StartMinibatchLoop(mbSize, 0, numOutputSamples);
|
dataReader.StartMinibatchLoop(mbSize, 0, numOutputSamples);
|
||||||
|
|
||||||
|
m_net.StartEvaluateMinibatchLoop(outputNodes);
|
||||||
|
|
||||||
size_t totalEpochSamples = 0;
|
size_t totalEpochSamples = 0;
|
||||||
size_t numMBsRun = 0;
|
size_t numMBsRun = 0;
|
||||||
size_t tempArraySize = 0;
|
size_t tempArraySize = 0;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче