renamed several methods/names to more familiar or concise names:

EvaluateThisNode() -> ForwardProp();
ComputeInputPartial() -> BackpropTo();
SaveToFile() -> Save() (where else to save? In the bank?);
LoadFromFile() -> Load();
ImageLayout -> TensorShape;
m_imageLayout -> m_sampleLayout
This commit is contained in:
Frank Seide 2015-12-04 16:08:30 -08:00
Родитель ca8d153eba
Коммит d3765547f9
36 изменённых файлов: 649 добавлений и 708 удалений

Просмотреть файл

@ -13,13 +13,13 @@
namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
// ImageLayout -- tensor descriptor to describe the inner layout of a data vector that holds a tensor
// TensorShape -- tensor descriptor to describe the inner layout of a data vector that holds a tensor
//
// Minibatches are stored as Matrices. While the column dimension represents multiple data vectors, and may have
// an inner structure (time, parallel sequences) described by the MBLayout, the row dimension represents data
// vectors that hold tensors of data.
//
// The ImageLayout describes the inner tensor structure of these vectors, as a column-major tensor of arbitrary number of dimensions.
// The TensorShape describes the inner tensor structure of these vectors, as a column-major tensor of arbitrary number of dimensions.
//
// Specifically, when the image is an image, then this is a 3-dimensional tensor with dimensions ( channels, width, height ),
// which represents the column-major interpretation of a transposed row-by-row-scanned image where each pixel stores (R,G,B) as a float3.
@ -29,25 +29,25 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: really support lengths other than 3, e.g. fix serialization code to handle variable-length descriptors
// TODO: rename to DataLayout
// TODO: must match ComputationNode::m_numRows; or, rather, the ImageLayout is how m_numRows is stored??
// TODO: must match ComputationNode::m_numRows; or, rather, the TensorShape is how m_numRows is stored??
// TODO: move this elsewhere, maybe a separate header Tensors.h?
struct ImageLayout
struct TensorShape
{
public:
// BUGBUG: This initialization is not correct. This must match GetNumRows(). We probably cannot have all three members here.
// Idea: We could construct this thing with a ref to the enclosing ComputationNode, and replace 'width' by an expression.
ImageLayout() : m_tensorDims(3, 1) { }
TensorShape() : m_tensorDims(3, 1) { }
template<class VEC>
ImageLayout(const VEC & dims) { m_tensorDims.reserve(dims.size()); m_tensorDims.assign(dims.begin(), dims.end()); }
ImageLayout(std::vector<size_t> && dims) : m_tensorDims(std::move(dims)) { }
TensorShape(const VEC & dims) { m_tensorDims.reserve(dims.size()); m_tensorDims.assign(dims.begin(), dims.end()); }
TensorShape(std::vector<size_t> && dims) : m_tensorDims(std::move(dims)) { }
void Invalidate() { m_tensorDims.assign(3, SIZE_MAX); } // TODO: clean up the valid/invalid situation (this is currently done inconsistently)
// TODO: need move constructor/assignment?
bool operator==(const ImageLayout & other) const { return m_tensorDims == other.m_tensorDims; }
bool operator==(const TensorShape & other) const { return m_tensorDims == other.m_tensorDims; }
void SaveToFile(File& fstream) const
void Save(File& fstream) const
{
#if 1
// saving as 32-bit ints. This allows to continue to support the old format (size_t W, H, C)
@ -55,7 +55,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (auto dim : m_tensorDims)
{
if (dim > UINT32_MAX)
LogicError("ImageLayout::SaveToFile(): Tensor dimension out of bounds (> 4G).");
LogicError("TensorShape::Save(): Tensor dimension out of bounds (> 4G).");
fstream << (uint32_t)dim;
}
#else
@ -64,7 +64,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fstream << m_tensorDims[1] << m_tensorDims[2] << m_tensorDims[0]; // currently stored in order W, H, C. TODO: general tensor format will be different
#endif
}
void LoadFromFile(File& fstream)
void Load(File& fstream)
{
#if 1
// format: uint32_t n, dim[0], dim[1], ..., dim[n-1]
@ -113,14 +113,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// When constructing an image tensor with the usual W, H, C format, use the following function instead.
// This will sort the three parameters into the correct order.
static inline ImageLayout ImageLayoutWHC(size_t width, size_t height, size_t channels)
static inline TensorShape ImageLayoutWHC(size_t width, size_t height, size_t channels)
{
return ImageLayout(std::vector<size_t> { channels, width, height });
return TensorShape(std::vector<size_t> { channels, width, height });
}
// and use this one when the data is a plain vector
static inline ImageLayout ImageLayoutVector(size_t n)
static inline TensorShape ImageLayoutVector(size_t n)
{
return ImageLayout(std::vector<size_t> { 1, 1, n }); // for now storing it as a 3D object as well --TODO: fix this
return TensorShape(std::vector<size_t> { 1, 1, n }); // for now storing it as a 3D object as well --TODO: fix this
}
// TODO: we need a constructor from config; that will generalize

Просмотреть файл

@ -63,7 +63,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// In the special case of frame randomization, every frame is stored as a single-frame sequence.
//
// If we describe this in terms of tensors, a data matrix with sample layout (I,J,K) and
// MBLayout (S,T) can be interpreted as ImageLayout(I,J,K,T,S) (note that S is last, not T).
// MBLayout (S,T) can be interpreted as TensorShape(I,J,K,T,S) (note that S is last, not T).
//
// Sequences can also be concatenated, to fill the space better. For this case,
// this object stores about every frame whether it is at the start or end of a sequence.
@ -91,7 +91,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// Contract between ComputationNode, ComputationNetwork, and MBLayout:
// - if a node has no MBLayout, m_{function,gradient}Values are not samples (they are not activations or input data), but e.g. model parameters
// - ComputationNode::GetNumCols() == MBLayout::GetNumTimeSteps() * MBLayout::GetNumParallelSequences()
// - ComputationNetwork ensures that m_{function,gradient}Values are allocated correctly before calling EvaluateThisNode() on a node
// - ComputationNetwork ensures that m_{function,gradient}Values are allocated correctly before calling ForwardProp() on a node
// NOTE: This class represents an ongoing abstraction of an originally distributed/code-duped way of defining and accessing the MB layout.
// Some code below represents the actual use cases I encountered. Not all are, I believe, needed to be as they are; this class could be simplified/streamlined much further.
// Some wackiness below is explained by this.

Просмотреть файл

@ -133,7 +133,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
assert(outputData.GetNumRows()==dim); dim;
SaveToFile(outFile,outputData);
Save(outFile,outputData);
}
outputFileIndex++;
@ -142,7 +142,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
void HTKMLFWriter<ElemType>::SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData)
void HTKMLFWriter<ElemType>::Save(std::wstring& outputFile, Matrix<ElemType>& outputData)
{
msra::dbn::matrix output;
output.resize(outputData.GetNumRows(),outputData.GetNumCols());

Просмотреть файл

@ -25,7 +25,7 @@ private:
std::map<std::wstring,size_t> outputNameToTypeMap;
unsigned int sampPeriod;
size_t outputFileIndex;
void SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
void Save(std::wstring& outputFile, Matrix<ElemType>& outputData);
ElemType * m_tempArray;
size_t m_tempArraySize;

Просмотреть файл

@ -11,16 +11,7 @@
#include "htkfeatio.h" // for reading HTK features
//#ifndef __unix__
#include "ssematrix.h"
//#endif
//#include "msra_mgram.h" // for unigram scores of ground-truth path in sequence training
//#include "rollingwindowsource.h" // minibatch sources
//#include "utterancesource.h"
//#include "readaheadsource.h"
//#include "chunkevalsource.h"
//#include "minibatchiterator.h"
#define DATAWRITER_EXPORTS // creating the exports here
#include "DataWriter.h"
@ -224,7 +215,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
void HTKMLFWriter<ElemType>::SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData)
void HTKMLFWriter<ElemType>::Save(std::wstring& outputFile, Matrix<ElemType>& outputData)
{
msra::dbn::matrix output;
output.resize(outputData.GetNumRows(),outputData.GetNumCols());

Просмотреть файл

@ -25,7 +25,7 @@ private:
std::map<std::wstring,size_t> outputNameToTypeMap;
unsigned int sampPeriod;
size_t outputFileIndex;
void SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
void Save(std::wstring& outputFile, Matrix<ElemType>& outputData);
void SaveToKaldiFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
ElemType * m_tempArray;
size_t m_tempArraySize;

Просмотреть файл

@ -208,7 +208,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
void HTKMLFWriter<ElemType>::SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData)
void HTKMLFWriter<ElemType>::Save(std::wstring& outputFile, Matrix<ElemType>& outputData)
{
msra::dbn::matrix output;
output.resize(outputData.GetNumRows(),outputData.GetNumCols());

Просмотреть файл

@ -25,7 +25,7 @@ private:
std::map<std::wstring,size_t> outputNameToTypeMap;
unsigned int sampPeriod;
size_t outputFileIndex;
void SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
void Save(std::wstring& outputFile, Matrix<ElemType>& outputData);
void SaveToKaldiFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
ElemType * m_tempArray;
size_t m_tempArraySize;

Просмотреть файл

@ -116,14 +116,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>& outputData = *(static_cast<Matrix<ElemType>*>(iter->second));
wstring outFile = outputFiles[outputName];
SaveToFile(outFile, outputData, idx4word[iter->first], nBests[outputName]);
Save(outFile, outputData, idx4word[iter->first], nBests[outputName]);
}
return true;
}
template<class ElemType>
void LMSequenceWriter<ElemType>::SaveToFile(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest)
void LMSequenceWriter<ElemType>::Save(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest)
{
size_t nT = outputData.GetNumCols();
size_t nD = min(idx2wrd.size(), outputData.GetNumRows());

Просмотреть файл

@ -52,7 +52,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
map<wstring, int> nBests;
bool compare_val(const ElemType& first, const ElemType& second);
void SaveToFile(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest = 1);
void Save(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest = 1);
void ReadLabelInfo(const wstring & vocfile,
map<string, int> & word4idx,

Просмотреть файл

@ -102,14 +102,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>& outputData = *(static_cast<Matrix<ElemType>*>(iter->second));
wstring outFile = outputFiles[outputName];
SaveToFile(outFile,outputData, idx4word[outputName], nBests[outputName]);
Save(outFile,outputData, idx4word[outputName], nBests[outputName]);
}
return true;
}
template<class ElemType>
void LUSequenceWriter<ElemType>::SaveToFile(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest)
void LUSequenceWriter<ElemType>::Save(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest)
{
size_t nT = outputData.GetNumCols();
size_t nD = min(idx2wrd.size(), outputData.GetNumRows());

Просмотреть файл

@ -27,7 +27,7 @@ private:
map<wstring, int> nBests;
bool compare_val(const ElemType& first, const ElemType& second);
void SaveToFile(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest = 1);
void Save(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest = 1);
void ReadLabelInfo(const wstring & vocfile,
map<string, int> & word4idx,

Просмотреть файл

@ -2794,7 +2794,7 @@ CEMATRIX, const std::wstring name = L"")
\begin_layout Plain Layout
LoadFromFile(fstream, modelVersion, deviceId);
Load(fstream, modelVersion, deviceId);
\end_layout
\begin_layout Plain Layout
@ -2852,10 +2852,10 @@ The first constructor creates the node based on a deviceId and a node name.
The second constructor creates a node from a file.
It passes in a file stream to read data from and a modelVersion value to
control how to load the file, in addition to the deviceId and node name.
In this example, the actual code to load the node is in the LoadFromFile(fstrea
In this example, the actual code to load the node is in the Load(fstrea
m, modelVersion, deviceId) function implemented in the base class.
For some complicated nodes with additional node states, you need to implement
your own LoadFromFile function for your newly added node.
your own Load function for your newly added node.
\end_layout
\begin_layout Standard
@ -3241,7 +3241,7 @@ Forward Evaluation
\begin_layout Standard
For each node type you need to implement two forward computation functions
EvaluateThisNode(), which evaluate the whole minibatch, and EvaluateThisNode(co
ForwardProp(), which evaluate the whole minibatch, and ForwardProp(co
nst size_t timeIdxInSeq), which is used in the recurrent networks to evaluate
the timeIdxInSeq-th sample for all the sequences in the minibatch.
\end_layout
@ -3253,7 +3253,7 @@ status open
\begin_layout Plain Layout
virtual void EvaluateThisNode()
virtual void ForwardProp()
\end_layout
\begin_layout Plain Layout
@ -3263,7 +3263,7 @@ virtual void EvaluateThisNode()
\begin_layout Plain Layout
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->
ForwardPropS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->
FunctionValues());
\end_layout
@ -3278,7 +3278,7 @@ FunctionValues());
\begin_layout Plain Layout
virtual void EvaluateThisNode(const size_t timeIdxInSeq)
virtual void ForwardProp(const size_t timeIdxInSeq)
\end_layout
\begin_layout Plain Layout
@ -3301,7 +3301,7 @@ imeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
\begin_layout Plain Layout
EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1
ForwardPropS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1
Value);
\end_layout
@ -3316,7 +3316,7 @@ Value);
\begin_layout Plain Layout
static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const
static void WINAPI ForwardPropS(Matrix<ElemType>& functionValues, const
Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
\end_layout
@ -3351,14 +3351,14 @@ status open
\begin_layout Plain Layout
EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>&
ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>&
input0, const Matrix<ElemType>& input1)
\end_layout
\end_inset
which contains the actual evaluation code.
In the EvaluateThisNode(const size_t timeIdxInSeq) function you will notice
In the ForwardProp(const size_t timeIdxInSeq) function you will notice
the calls to the ColumnSlice function.
As the name suggests, this function returns a column slice of a matrix.
\end_layout
@ -3379,9 +3379,9 @@ Gradient Computation
\begin_layout Standard
Similar to the forward computation, for each node type you need to implement
two gradient computation functions ComputeInputPartial(const size_t inputIndex)
two gradient computation functions BackpropTo(const size_t inputIndex)
, which computes the gradient for the whole minibatch with regard to the
inputIndex-th input, and ComputeInputPartial(const size_t inputIndex, const
inputIndex-th input, and BackpropTo(const size_t inputIndex, const
size_t timeIdxInSeq), which is used in the recurrent networks to compute
the gradient of the timeIdxInSeq-th sample for all the sequences in the
minibatch.
@ -3394,7 +3394,7 @@ status open
\begin_layout Plain Layout
virtual void ComputeInputPartial(const size_t inputIndex)
virtual void BackpropTo(const size_t inputIndex)
\end_layout
\begin_layout Plain Layout
@ -3435,7 +3435,7 @@ virtual void ComputeInputPartial(const size_t inputIndex)
\begin_layout Plain Layout
ComputeInputPartialLeft(Inputs(1)->FunctionValues(), Inputs(0)->Gradient
BackpropToLeft(Inputs(1)->FunctionValues(), Inputs(0)->Gradient
Values(), GradientValues());
\end_layout
@ -3456,7 +3456,7 @@ Values(), GradientValues());
\begin_layout Plain Layout
ComputeInputPartialRight(Inputs(0)->FunctionValues(), Inputs(1)->GradientVal
BackpropToRight(Inputs(0)->FunctionValues(), Inputs(1)->GradientVal
ues(), GradientValues());
\end_layout
@ -3477,7 +3477,7 @@ ues(), GradientValues());
\begin_layout Plain Layout
virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxIn
virtual void BackpropTo(const size_t inputIndex, const size_t timeIdxIn
Seq)
\end_layout
@ -3531,7 +3531,7 @@ e(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
\begin_layout Plain Layout
ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(),
BackpropToLeft(sliceInput1Value, Inputs(0)->GradientValues(),
sliceOutputGrad);
\end_layout
@ -3564,7 +3564,7 @@ Seq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
\begin_layout Plain Layout
ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad,
BackpropToRight(Inputs(0)->FunctionValues(), sliceInput1Grad,
sliceOutputGrad);
\end_layout
@ -3585,7 +3585,7 @@ Seq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
\begin_layout Plain Layout
static void WINAPI ComputeInputPartialLeft(const Matrix<ElemType>& inputFunction
static void WINAPI BackpropToLeft(const Matrix<ElemType>& inputFunction
Values, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientV
alues)
\end_layout
@ -3613,7 +3613,7 @@ es, inputFunctionValues);
\begin_layout Plain Layout
static void WINAPI ComputeInputPartialRight(const Matrix<ElemType>& inputFunctio
static void WINAPI BackpropToRight(const Matrix<ElemType>& inputFunctio
nValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>&
gradientValues)
\end_layout
@ -3650,7 +3650,7 @@ status open
\begin_layout Plain Layout
ComputeInputPartialLeft(const Matrix<ElemType>& inputFunctionValues, Matrix<Elem
BackpropToLeft(const Matrix<ElemType>& inputFunctionValues, Matrix<Elem
Type>& inputGradientValues, const Matrix<ElemType>& gradientValues)
\end_layout
@ -3666,7 +3666,7 @@ status open
\begin_layout Plain Layout
ComputeInputPartialRight(const Matrix<ElemType>& inputFunctionValues, Matrix<Ele
BackpropToRight(const Matrix<ElemType>& inputFunctionValues, Matrix<Ele
mType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
\end_layout
@ -4488,7 +4488,7 @@ if (PreCompute(net,trainSetDataReader, FeatureNodes,labelNodes,inputMatrices)
\begin_layout Plain Layout
net.SaveToFile(GetModelNameForEpoch(int(startEpoch)-1));
net.Save(GetModelNameForEpoch(int(startEpoch)-1));
\end_layout

Просмотреть файл

@ -108,7 +108,7 @@ void DumpNodeInfo(const ConfigParameters& config)
bool printValues = config(L"printValues", true);
ComputationNetwork net(-1); //always use CPU
net.LoadFromFile<ElemType>(modelPath);
net.Load<ElemType>(modelPath);
net.DumpNodeInfoToFile(nodeName, printValues, outputFile, nodeNameRegexStr);
}
@ -530,11 +530,11 @@ void DoParameterSVD(const ConfigParameters& config)
ComputationNetwork net(deviceID);
net.LoadFromFile<ElemType>(modelPath);
net.Load<ElemType>(modelPath);
net.PerformSVDecomposition<ElemType>(svdconfig, AlignedSize);
if (!outputmodelPath.empty())
net.SaveToFile(outputmodelPath);
net.Save(outputmodelPath);
}
@ -813,7 +813,7 @@ public:
if (!m_net || m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load --TODO: why all these options?
{
auto net = make_shared<ComputationNetwork>(m_deviceId);
net->LoadFromFile<ElemType>(modelFileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
net->Load<ElemType>(modelFileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
m_net = net;
}
m_net->ResetEvalTimeStamp();
@ -1372,7 +1372,7 @@ void DoConvertFromDbn(const ConfigParameters& config)
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
net->SaveToFile(modelPath);
net->Save(modelPath);
}
// do topological plot of computation network
@ -1407,7 +1407,7 @@ void DoTopologyPlot(const ConfigParameters& config)
}
ComputationNetwork net(-1);
net.LoadFromFile<ElemType>(modelPath);
net.Load<ElemType>(modelPath);
net.PlotNetworkTopology(outdot);
fprintf(stderr, "Output network description in dot language to %S\n", outdot.c_str());

Просмотреть файл

@ -125,7 +125,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams);
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
cn->LoadFromFile<ElemType>(params[0]);
cn->Load<ElemType>(params[0]);
OverrideModelNameAndSetDefaultModel(cn);
}
else if (EqualInsensitive(name, "LoadModelWithName"))
@ -137,7 +137,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams);
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
cn->LoadFromFile<ElemType>(params[1]);
cn->Load<ElemType>(params[1]);
OverrideModelNameAndSetDefaultModel(cn, params[0]);
}
else if (EqualInsensitive(name, "LoadNDLSnippet"))
@ -188,7 +188,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
// validate the network before we save it out
ProcessNDLScript(m_netNdlDefault, ndlPassAll, true);
cn->SaveToFile(fileName);
cn->Save(fileName);
}
else if (EqualInsensitive(name, "SaveModel"))
{
@ -207,7 +207,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
// validate and finish the second pass through NDL if any in-line NDL was defined
ProcessNDLScript(netNdl, ndlPassAll, true);
netNdl->cn->SaveToFile(fileName);
netNdl->cn->Save(fileName);
}
else if (EqualInsensitive(name, "SetDefaultModel"))
{

Просмотреть файл

@ -43,7 +43,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
virtual void BackpropToNonLooping(size_t inputIndex) override
{
if (inputIndex > 1)
InvalidArgument("Parallel operation only takes two input.");
@ -60,20 +60,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> tmpMat(m_deviceId);
tmpMat.AssignRowSliceValuesOf(GradientValues(), startidx, nrows);
ComputeInputPartialS(tmpMat, child->GradientValues());
BackpropToS(tmpMat, child->GradientValues());
}
/*TODO: merge with call site*/void ComputeInputPartialS(Matrix<ElemType>& gradientValues, Matrix<ElemType>& inputGradientValues)
/*TODO: merge with call site*/void BackpropToS(Matrix<ElemType>& gradientValues, Matrix<ElemType>& inputGradientValues)
{
inputGradientValues += gradientValues;
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
ForwardPropS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
}
/*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, Matrix<ElemType>& inputFunctionValues0, Matrix<ElemType>& inputFunctionValues1)
/*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& functionValues, Matrix<ElemType>& inputFunctionValues0, Matrix<ElemType>& inputFunctionValues1)
{
size_t rows0 = inputFunctionValues0.GetNumRows(), cols0 = inputFunctionValues0.GetNumCols();
size_t rows1 = inputFunctionValues1.GetNumRows(), cols1 = inputFunctionValues1.GetNumCols();
@ -142,7 +142,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
SetDims(nInput0 + nInput1, nT);
UpdateFunctionValuesSize();
EvaluateThisNode(FrameRange(m_pMBLayout));
ForwardProp(FrameRange(m_pMBLayout));
/// check with expected values
if (!ISCLOSE(FunctionValues()(0, 0), 1, EPSILON) ||
@ -167,8 +167,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
GradientValues()(3, 1) = 5;
GradientValues()(3, 2) = 6;
ComputeInputPartial(0, FrameRange(m_pMBLayout));
ComputeInputPartial(1, FrameRange(m_pMBLayout));
BackpropTo(0, FrameRange(m_pMBLayout));
BackpropTo(1, FrameRange(m_pMBLayout));
/// check with expected values
if (!ISCLOSE(Inputs(0)->GradientValues()(0, 0), 1, EPSILON)
@ -222,16 +222,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual bool RequiresPreCompute() const override { return true; }
virtual void SaveToFile(File& fstream) const override
virtual void Save(File& fstream) const override
{
Base::SaveToFile(fstream);
Base::Save(fstream);
fstream << m_hasComputed;
fstream << FunctionValues(); // TODO: why serialize if not yet computed?
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
fstream >> m_hasComputed;
LoadFunctionValues(fstream);
}
@ -302,9 +302,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_numSamples(SIZE_MAX)
{ }
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
m_numSamples = SIZE_MAX;
}
@ -334,7 +334,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void ComputeInputPartialNonLooping(size_t /*inputIndex*/) override
virtual void BackpropToNonLooping(size_t /*inputIndex*/) override
{
//LogicError("Mean operation should not be involved in the gradient calculation.");
}
@ -384,10 +384,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
UpdateFunctionValuesSize();
FunctionValues().SetValue(0);
}
// no else branch because EvaluateThisNodeNonLooping() already leaves a valid mean in m_functionValues
// no else branch because ForwardPropNonLooping() already leaves a valid mean in m_functionValues
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
if (m_hasComputed)
@ -475,7 +475,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
if (m_hasComputed)
@ -552,21 +552,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange &) override
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange &) override
{
InvalidArgument("PerDimMeanVarNormalizationNode should only be called in the evaluation stage.");
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
//only feature (input0) and output needs to be sliced
Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues());
ForwardPropS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues());
}
/*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0,
/*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0,
const Matrix<ElemType>& input1, const Matrix<ElemType>& input2)
{
#if DUMPOUTPUT
@ -662,22 +662,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange &) override
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange &) override
{
InvalidArgument("PerDimMeanVarDeNormalizationNode should only be called in the evaluation stage.");
}
//(feature-mean).*InvStdDev
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
//only feature (input0) and output needs to be sliced
Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues());
ForwardPropS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues());
}
/*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0,
/*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0,
const Matrix<ElemType>& input1, const Matrix<ElemType>& input2)
{
#if DUMPOUTPUT
@ -795,16 +795,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual bool HasComputed() const = 0;
virtual void MarkComputed(const bool hasComputed) = 0;
virtual void SaveToFile(File& fstream) const override
virtual void Save(File& fstream) const override
{
Base::SaveToFile(fstream);
Base::Save(fstream);
fstream << m_hasComputed;
fstream << FunctionValues();
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
fstream >> m_hasComputed;
LoadFunctionValues(fstream);
}
@ -874,7 +874,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual bool HasComputed() const { return m_hasComputed; }
virtual void MarkComputed(const bool hasComputed) { m_hasComputed = hasComputed; }
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
virtual void BackpropToNonLooping(size_t inputIndex) override
{
assert(inputIndex == 0); inputIndex;
VerifyDims(Inputs(0));
@ -888,7 +888,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
// BUGBUG: We must flip the layout, too.
if (GetNumParallelSequences() != 1)
@ -949,7 +949,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
SetDims(nOutput, nT);
UpdateFunctionValuesSize();
Inputs(0)->FunctionValues().TransferToDeviceIfNotThere( m_deviceId, true);
EvaluateThisNode(FrameRange(m_pMBLayout));
ForwardProp(FrameRange(m_pMBLayout));
/// check with expected values
if (!ISCLOSE(FunctionValues()(0, 0), 3, EPSILON) ||
@ -970,7 +970,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
GradientValues()(0, 2) = 3;
GradientValues().TransferToDeviceIfNotThere( m_deviceId, true);
ComputeInputPartial(0, FrameRange(m_pMBLayout));
BackpropTo(0, FrameRange(m_pMBLayout));
/// check with expected values
if (!ISCLOSE(Inputs(0)->GradientValues()(0, 0), 4, EPSILON) ||

Просмотреть файл

@ -73,7 +73,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// serialization
// -----------------------------------------------------------------------
void ComputationNetwork::SaveToFile(const wstring& fileName, const FileOptions fileFormat) const
void ComputationNetwork::Save(const wstring& fileName, const FileOptions fileFormat) const
{
// In case of parallel training only the main node should we saving the model to prevent
// the parallel training nodes from colliding to write the same file
@ -106,7 +106,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (auto nodeIter = m_nameToNodeMap.begin(); nodeIter != m_nameToNodeMap.end(); nodeIter++)
{
ComputationNodeBasePtr nodePtr = nodeIter->second;
nodePtr->SaveToFile(fstream);
nodePtr->Save(fstream);
}
fstream.PutMarker(FileMarker::fileMarkerEndSection, L"ENodeList");
@ -204,7 +204,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fstream >> opName >> nodeName;
ComputationNodeBasePtr nodePtr = GetNodeFromName(nodeName);
// TODO: don't we have a load constructor? Then when to call which? Document the calling sequence
nodePtr->LoadFromFile(fstream, modelVersion);
nodePtr->Load(fstream, modelVersion);
}
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ENodeList");
@ -217,7 +217,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
template<class ElemType> void ComputationNetwork::LoadFromFile(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork)
template<class ElemType> void ComputationNetwork::Load(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork)
{
ClearNet();
@ -250,7 +250,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fprintf(stderr, "Unknown ComputationNode type %ls (node name %ls)\n", opName.c_str(), nodeName.c_str());
InvalidArgument("Invalid node type.");
}
newNode->LoadFromFile(fstream, modelVersion);
newNode->Load(fstream, modelVersion);
AddNodeToNet(newNode);
}
fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ENodeList");
@ -1127,13 +1127,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template void ComputationNetwork::InitLearnableParameters<float>(const ComputationNodeBasePtr& node, const bool uniformInit, const unsigned long randomSeed, const float initValueScale, bool initOnCPUOnly);
template void ComputationNetwork::LoadFromFile<float>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
template void ComputationNetwork::Load<float>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
template void ComputationNetwork::PerformSVDecomposition<float>(const map<wstring, float>& SVDConfig, size_t alignedsize);
template /*static*/void ComputationNetwork::SetDropoutRate<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
template void ComputationNetwork::SetSeqParam<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
template void ComputationNetwork::InitLearnableParameters<double>(const ComputationNodeBasePtr& node, const bool uniformInit, const unsigned long randomSeed, const double initValueScale, bool initOnCPUOnly);
template void ComputationNetwork::LoadFromFile<double>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
template void ComputationNetwork::Load<double>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
template void ComputationNetwork::PerformSVDecomposition<double>(const map<wstring, float>& SVDConfig, size_t alignedsize);
template /*static*/void ComputationNetwork::SetDropoutRate<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
template void ComputationNetwork::SetSeqParam<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);

Просмотреть файл

@ -7,18 +7,16 @@
#pragma once
// TODOs:
// - need Matrix::RowSlice() (problem: currently has no 'lead' dimension separate from numRows) --> add stride to ImageLayout
// - need Matrix::RowSlice() (problem: currently has no 'lead' dimension separate from numRows) --> add stride to TensorShape
// - BUGBUG (in the future): Once we have > 1 layout in the system, all nodes must compare their actual layouts upon Evaluate().
// Example: TimeReverse must create a new layout. A second TimeReverse ideally would revert back, but can't know. Hence, all consumers of layouts must compare upon Evaluate().
// -> solve by including a layout in the FrameRange directly; then DataSlice() can compare compatibility
// - automatic inference of time window w.r.t. delay nodes (and related nodes such as a temporal pooling)
// - have overrides of RuntimeError etc. in ComputationNode, which prepend the error string with the node name and operation
// - code prettification:
// - sort all node implementations' methods into the same order; esp, EvaluateThisNode() comes before partial
// - sort all node implementations' methods into the same order; esp, ForwardProp() comes before partial
// - sort important nodes first; move unused/experimental nodes into source files named accordingly
// - renaming:
// EvaluateThisNode() -> ForwardProp() // the familiar names
// ComputeInputPartial() -> BackpropTo()
// OnEvaluateBeginIteration() -> BeginForwardProp() // and similar functions likewise
// Inputs() -> Input() // or In()? or GetInput()?
// Children() -> Inputs()
@ -29,15 +27,11 @@
// DataSlice(frameRange) -> DataFor(t) // also more lightweight; 'slice' is an implementation detail
// ValueSlice(.) -> OutputFor(t)
// GradientSlice(.) -> GradientFor(t)
// LoadFromFile() -> Load() // keep it simpler (where else would one load from?)
// SaveToFile() -> Save()
// ImageLayout -> TensorShape // general tensor descriptor
// m_imageLayout -> SampleLayout
// - finish the job:
// - everywhere complete folding EvaluateThisNodeS() into EvaluateThisNode(FrameRange()), same for partial
// - everywhere complete folding ForwardPropS() into ForwardProp(FrameRange()), same for partial
// - revise node constructors, merge by means of default parameters
// - known issues that need actual test cases to be fixed:
// - CRFNode::ComputeInputPartial() fails for >1 parallel sequence due to DataSlice() not being able to return whole sequences
// - CRFNode::BackpropTo() fails for >1 parallel sequence due to DataSlice() not being able to return whole sequences
// - implement reading of MB Layout in Binary, DSSM, and LivbSVM readers --is DSSM already done?
// The basic idea of this implementation is learned from Brian Guenter <bguenter@microsoft.com>
@ -77,7 +71,7 @@ protected:
// SEQTraversalFlowControlNode -- FlowControlNode to traverse a (sub-)network time step by time step
//
// This is to implement recurrent loops. All nodes inside a loop are listed
// inside this node. This node's EvaluateThisNode() function will execute
// inside this node. This node's ForwardProp() function will execute
// them inside a loop over all time steps of the recurrence.
// For every time step, the entire chain of nodes is called, with the time index
// passed as a FrameRange object.
@ -92,10 +86,10 @@ protected:
// - change m_recurrentInfo to use shared_ptrs to ComputationNodeBase
virtual const std::wstring OperationName() const override { return L"SEQTraversalFlowControlNode"; }
virtual void OnEvaluateBeginIteration() override;
virtual void EvaluateThisNode(const FrameRange &) override;
virtual void ForwardProp(const FrameRange &) override;
virtual void OnEvaluateEndIteration() override;
virtual void OnComputeGradientBeginIteration() override;
virtual void ComputeInputPartial(const size_t inputIndex, const FrameRange &) override { NOT_IMPLEMENTED; } // ugh, call ComputeGradientForChildren() instead
virtual void BackpropTo(const size_t inputIndex, const FrameRange &) override { NOT_IMPLEMENTED; } // ugh, call ComputeGradientForChildren() instead
virtual void OnComputeGradientEndIteration() override;
virtual void ComputeGradientForChildren(const FrameRange & frameRange, bool childrenInThisLoop, bool childrenInOuterLoop) override;
virtual void RequestMatricesBeforeEval(MatrixPool& matrixPool);
@ -122,7 +116,7 @@ protected:
// PARTraversalFlowControlNode -- FlowControlNode that traverses a (sub-)network
//
// This node contains a list of nodes in a (sub-)network. This node's
// EvaluateThisNode() method will execute all those nodes once in PAR mode,
// ForwardProp() method will execute all those nodes once in PAR mode,
// that is, by passing a FrameRange object that represents to operate
// on all frames in the node simultaneously.
//
@ -135,10 +129,10 @@ protected:
public:
virtual const std::wstring OperationName() const override { return L"PARTraversalFlowControlNode"; }
virtual void OnEvaluateBeginIteration() override { }
virtual void EvaluateThisNode(const FrameRange &) override;
virtual void ForwardProp(const FrameRange &) override;
virtual void OnEvaluateEndIteration() override { }
virtual void OnComputeGradientBeginIteration() override { }
virtual void ComputeInputPartial(const size_t inputIndex, const FrameRange &) override { NOT_IMPLEMENTED; } // ugh, call ComputeGradientForChildren() instead
virtual void BackpropTo(const size_t inputIndex, const FrameRange &) override { NOT_IMPLEMENTED; } // ugh, call ComputeGradientForChildren() instead
virtual void OnComputeGradientEndIteration() override { }
virtual void ComputeGradientForChildren(const FrameRange & frameRange, bool childrenInThisLoop, bool childrenInOuterLoop) override;
virtual void RequestMatricesBeforeEval(MatrixPool& matrixPool);
@ -348,7 +342,7 @@ public:
// serialization
// -----------------------------------------------------------------------
void SaveToFile(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary) const;
void Save(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary) const;
private:
void SaveToFileImpl(const std::wstring& fileName, const FileOptions fileFormat) const;
public:
@ -358,7 +352,7 @@ public:
// design BUGBUG: binary files do not know whether they are float or double.
// TODO: modify file format to know this; then eliminate the <ElemType> dependency (and in some future, allow nodes to be different)
template<class ElemType>
void LoadFromFile(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary,
void Load(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary,
const bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr);
// static helper to instantiate a network from a file
@ -368,7 +362,7 @@ public:
const bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr)
{
auto net = make_shared<ComputationNetwork>(deviceId);
net->LoadFromFile<ElemType>(fileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
net->Load<ElemType>(fileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
return net;
}

Просмотреть файл

@ -24,19 +24,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// The methods below determine evaluation order, which is tricky in presence of recurrent loops.
// TODO: Can this be moved to a separate class?
// MAIN ENTRY POINT for network recurrent-loop analysis. All other functions below are called from this one.
// forms the recurrent loop that 'rootNode' participates in
// TODO: This function is not lazy, i.e. not cached. BuildAndValidateSubNetwork() caches, but others don't. Not sure why/how that's OK--won't we reassign loop ids?
// FormRecurrentLoops() -- MAIN ENTRY POINT for network recurrent-loop analysis. All other functions in this CPP are called only from this one.
// This function analysis the networks for recurrent loops present in the computation of 'rootNode.'
// This sets/updates:
// - m_recurrentInfo
// - ComputationNode::m_isPartOfLoop and m_loopId
// Is often called before ValidateNetwork() on a root; will be called from inside ValidateNetwork() as well.
// This function is called for multiple nodes, e.g. eval and training criterion. I.e. it must be able to add to a previous result. E.g. it does not clear the m_visited flags at start. This seems brittle.
// BUGBUG: m_visited is also used by ValidateSubNetwork(). Hence, it may be in unexpected state when calling into this multiple times.
// BUGBUG: This currently does not handle nested loops. To handle that:
// - loops are isolated by a ReconcileMBLayout--loop determination should see right through it, and then include everything inside
// - ...? Need to figure this out.
// This function is called for multiple nodes, e.g. eval and training criterion. I.e. it must be able to add to a previous result. E.g. it does not clear the m_visited flags at start.
// Note: This function is not lazy, i.e. not cached. BuildAndValidateSubNetwork() caches, but others don't.
void ComputationNetwork::FormRecurrentLoops(const ComputationNodeBasePtr& rootNode)
{
// determine the strongly connected cliques -> m_recurrentInfo[]

Просмотреть файл

@ -183,14 +183,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType> shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateInputNode(const std::wstring & inputName,
const ImageLayout & imageLayout,
const TensorShape & imageLayout,
const size_t numImages)
{
return net.AddNodeToNetWithElemType(New<InputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout, numImages));
}
template<class ElemType> shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateSparseInputNode(const std::wstring & inputName,
const ImageLayout & imageLayout,
const TensorShape & imageLayout,
const size_t numImages)
{
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout, numImages));
@ -547,7 +547,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType> shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Reshape(const ComputationNodePtr a,
const size_t numRows,
const ImageLayout & imageLayout,
const TensorShape & imageLayout,
const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<ReshapeNode<ElemType>>(net.GetDeviceId(), nodeName, numRows, imageLayout), a);

Просмотреть файл

@ -40,8 +40,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ComputationNodePtr CreateSparseLearnableParameter(const std::wstring & paramName, const size_t rows, const size_t cols, const size_t size = 0);
ComputationNodePtr CreateInputNode(const std::wstring & inputName, const size_t rows, const size_t cols);
ComputationNodePtr CreateSparseInputNode(const std::wstring & inputName, const size_t rows, const size_t cols);
ComputationNodePtr CreateInputNode(const std::wstring & inputName, const ImageLayout & imageLayout, const size_t numImages);
ComputationNodePtr CreateSparseInputNode(const std::wstring & inputName, const ImageLayout & imageLayout, const size_t numImages);
ComputationNodePtr CreateInputNode(const std::wstring & inputName, const TensorShape & imageLayout, const size_t numImages);
ComputationNodePtr CreateSparseInputNode(const std::wstring & inputName, const TensorShape & imageLayout, const size_t numImages);
ComputationNodePtr CreatePairNetworkNode(const std::wstring & inputName, const size_t rows, const size_t cols);
ComputationNodePtr CreateConvolutionNode(const std::wstring & nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0);
ComputationNodePtr CreateMaxPoolingNode(const std::wstring & nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
@ -52,32 +52,32 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: These next three functions are wrappers around CreateXXXNode(). Remove these.
ComputationNodePtr Parameter(const size_t rows, size_t cols, const std::wstring nodeName = L"") { return CreateLearnableParameter(nodeName, rows, cols); } // TODO: remove
ComputationNodePtr Input(const size_t rows, const size_t cols, const std::wstring nodeName = L"") { return CreateInputNode(nodeName, rows, cols); } // TODO: remove
ComputationNodePtr Input(const ImageLayout & imageLayout, const size_t numImages, const std::wstring nodeName = L"") { return CreateInputNode(nodeName, imageLayout, numImages); } // TODO: remove
ComputationNodePtr Input(const TensorShape & imageLayout, const size_t numImages, const std::wstring nodeName = L"") { return CreateInputNode(nodeName, imageLayout, numImages); } // TODO: remove
// The following functions create nodes and link them to the network and their inputs.
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
ComputationNodePtr PairNetwork(const ComputationNodePtr & a, const std::wstring nodeName = L"");
ComputationNodePtr Convolution(const ComputationNodePtr weight,
const ComputationNodePtr inputValues,
const size_t kernelWidth,
const size_t kernelHeight,
const size_t outputChannels,
const size_t horizontalSubsample,
const size_t verticalSubsample,
const bool zeroPadding = false,
const std::wstring nodeName = L"",
const size_t maxTempMemSizeInSamples = 0);
const ComputationNodePtr inputValues,
const size_t kernelWidth,
const size_t kernelHeight,
const size_t outputChannels,
const size_t horizontalSubsample,
const size_t verticalSubsample,
const bool zeroPadding = false,
const std::wstring nodeName = L"",
const size_t maxTempMemSizeInSamples = 0);
ComputationNodePtr MaxPooling(const ComputationNodePtr inputValues,
const size_t windowWidth,
const size_t windowHeight,
const size_t horizontalSubsample,
const size_t verticalSubsample,
const std::wstring nodeName = L"");
const size_t windowWidth,
const size_t windowHeight,
const size_t horizontalSubsample,
const size_t verticalSubsample,
const std::wstring nodeName = L"");
ComputationNodePtr AveragePooling(const ComputationNodePtr inputValues,
const size_t windowWidth,
const size_t windowHeight,
const size_t horizontalSubsample,
const size_t verticalSubsample,
const std::wstring nodeName = L"");
const size_t windowWidth,
const size_t windowHeight,
const size_t horizontalSubsample,
const size_t verticalSubsample,
const std::wstring nodeName = L"");
ComputationNodePtr ErrorPrediction(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr PerDimMeanVarNormalization(const ComputationNodePtr feature, const ComputationNodePtr mean, const ComputationNodePtr InvStdDev, const std::wstring nodeName = L"");
ComputationNodePtr PerDimMeanVarDeNormalization(const ComputationNodePtr feature, const ComputationNodePtr mean, const ComputationNodePtr InvStdDev, const std::wstring nodeName = L"");
@ -122,7 +122,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ComputationNodePtr Plus(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr Minus(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr Dropout(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr Reshape(const ComputationNodePtr a, const size_t num_rows, const ImageLayout & imageLayout, const std::wstring nodeName = L"");
ComputationNodePtr Reshape(const ComputationNodePtr a, const size_t num_rows, const TensorShape & imageLayout, const std::wstring nodeName = L"");
ComputationNodePtr RowRepeat(const ComputationNodePtr a, const size_t num_repeat, const std::wstring nodeName = L"");
ComputationNodePtr Diagonal(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr PastValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, const size_t col_size, size_t timeStep, const std::wstring nodeName = L"");

Просмотреть файл

@ -28,7 +28,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// MAIN ENTRY POINT for evaluating one minibatch (forward prop)
// TODO: pass a set of nodes instead of only one
// TODO: rename to ForwardProp()? To make it very clear?
// This calls EvaluateThisNode() on all nodes in order of data flow through the network.
// This calls ForwardProp() on all nodes in order of data flow through the network.
// By default, the network is applied concurrently on all frames in a minibatch in parallel (PAR mode, a "map" operation)
// Recurrent loops deviate:
// - a recurrent loop is the loop of nodes that make up computation for one time step (e.g. Times -> Plus -> Sigmoid -> Delay)
@ -43,7 +43,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
LogicError("Evaluate for node %ls %ls: BuildAndValidateSubNetwork() has not been called on this node.", rootNode->NodeName().c_str(), rootNode->OperationName().c_str());
// traverse all nodes in the pre-determined evaluation order
GetOuterLoopNode(rootNode)->EvaluateThisNode(FrameRange(nullptr));
GetOuterLoopNode(rootNode)->ForwardProp(FrameRange(nullptr));
}
// MAIN ENTRY POINT for evaluation followed by gradient computation (forward prop then back prop)
@ -125,7 +125,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
}
/*virtual*/ void ComputationNetwork::PARTraversalFlowControlNode::EvaluateThisNode(const FrameRange & frameRange) /*override*/
/*virtual*/ void ComputationNetwork::PARTraversalFlowControlNode::ForwardProp(const FrameRange & frameRange) /*override*/
{
for (auto & node : m_nestedNodes)
{
@ -136,7 +136,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
assert(recInfo->m_sourceNode->GetMBLayout() == node->GetMBLayout());
node->OnEvaluateBeginIteration();
node->EvaluateThisNode(frameRange.WithLayout(node->GetMBLayout()));
node->ForwardProp(frameRange.WithLayout(node->GetMBLayout()));
node->OnEvaluateEndIteration();
node->UpdateEvalTimeStamp();
@ -195,7 +195,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// This evaluates all nodes in this FlowControlNode in SEQ mode: process the loop frame by frame in a nested loop.
// This is where the time axis changes.
// TODO: Once we do nested loops, then the FrameRange argument to this will refer to the outer loop.
/*virtual*/ void ComputationNetwork::SEQTraversalFlowControlNode::EvaluateThisNode(const FrameRange &) /*override*/
/*virtual*/ void ComputationNetwork::SEQTraversalFlowControlNode::ForwardProp(const FrameRange &) /*override*/
{
// get layout associated with this loop
// All nodes share the same layout.
@ -209,7 +209,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
for (auto & node : m_nestedNodes)
{
node->EvaluateThisNode(t);
node->ForwardProp(t);
node->UpdateEvalTimeStamp();
}
}

Просмотреть файл

@ -79,11 +79,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void UpdateFunctionMBSize() = 0; // recalculate our column dimension from MBLayout
virtual void OnEvaluateBeginIteration() = 0;
virtual void EvaluateThisNode(const FrameRange &) = 0; // forward prop for one minibatch
virtual void OnEvaluateEndIteration() = 0; // called after last iteration step of EvaluateThisNode()
virtual void ForwardProp(const FrameRange &) = 0; // forward prop for one minibatch
virtual void OnEvaluateEndIteration() = 0; // called after last iteration step of ForwardProp()
virtual void OnComputeGradientBeginIteration() = 0; // called before first iteration step of ComputeGradient()
virtual void ComputeInputPartial(const size_t inputIndex, const FrameRange &) = 0;
virtual void BackpropTo(const size_t inputIndex, const FrameRange &) = 0;
virtual void OnComputeGradientEndIteration() = 0; // called after last iteration step of ComputeGradient()
// --- these are meant to be overridden by ControlFlowNodes
@ -103,8 +103,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void Validate(bool isFinalValidationPass) = 0; // main base validation function
virtual void InferImageDimsFromInputs() = 0;
virtual void SaveToFile(File& fstream) const = 0;
virtual void LoadFromFile(File& /*fstream*/, size_t /*modelVersion*/) = 0;
virtual void Save(File& fstream) const = 0;
virtual void Load(File& /*fstream*/, size_t /*modelVersion*/) = 0;
virtual void CopyTo(ComputationNodeBasePtr node, const std::wstring& newName, const CopyNodeFlags flags) const = 0;
// --- optional overrides that describe a feature or property of the node
@ -271,7 +271,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
node->m_nodeName = newName;
node->m_inputImageLayout = m_inputImageLayout;
node->m_imageLayout = m_imageLayout;
node->m_sampleLayout = m_sampleLayout;
ComputationNetworkOwnedNodeState::CopyTo(*node);
TimeStamp::CopyTo(*node);
@ -283,12 +283,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: make sure this does not get implemented in any of the base classes
DEVICEID_TYPE GetDeviceId() const { return m_deviceId; } // TODO: remove, only used from copy constructor which will go away
virtual void SaveToFile(File& fstream) const
virtual void Save(File& fstream) const
{
fstream << OperationName() << NodeName();
}
virtual void LoadFromFile(File& /*fstream*/, size_t /*modelVersion*/)
virtual void Load(File& /*fstream*/, size_t /*modelVersion*/)
{
// it is assumed that OperationName and NodeName have already been consumed--some asymmetry between Save and Load
// base class has nothing to load
@ -299,7 +299,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t GetNumRows() const { return m_numRows; }
size_t GetNumCols() const { return m_numCols; }
pair<size_t, size_t> GetDims() { return make_pair(GetNumRows(), GetNumCols()); }
// TODO: add an overload SetDims(ImageLayout, cols)
// TODO: add an overload SetDims(TensorShape, cols)
virtual // for now virtual as this still updates m_functionValues
void SetDims(size_t rows, size_t cols)
{
@ -485,7 +485,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
const char * mbSizeMark = child->m_pMBLayout ? "MBSize " : "";
if (IsChildAnImage(i)) //image
fprintf(stderr, "%ls[%lu {W=%lu, H=%lu, C=%lu}, %s%lu]", child->NodeName().c_str(), child->GetNumRows(),
child->m_imageLayout.GetWidth(), child->m_imageLayout.GetHeight(), child->m_imageLayout.GetNumChannels(), mbSizeMark, child->GetNumCols());
child->m_sampleLayout.GetWidth(), child->m_sampleLayout.GetHeight(), child->m_sampleLayout.GetNumChannels(), mbSizeMark, child->GetNumCols());
else
fprintf(stderr, "%ls[%lu, %s%lu]", child->NodeName().c_str(), child->GetNumRows(), mbSizeMark, child->GetNumCols());
}
@ -522,12 +522,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
bool IsChildAnImage(const size_t index) const
{
return m_inputs[index]->m_imageLayout.GetWidth() != 1 || m_inputs[index]->m_imageLayout.GetNumChannels() != 1;
return m_inputs[index]->m_sampleLayout.GetWidth() != 1 || m_inputs[index]->m_sampleLayout.GetNumChannels() != 1;
}
const ImageLayout & GetImageLayout() const { return m_imageLayout; }
const TensorShape & GetImageLayout() const { return m_sampleLayout; }
pair<ImageLayout, ImageLayout> GetImageLayouts() const { return make_pair(m_inputImageLayout, m_imageLayout); } // helper for Validate()
pair<TensorShape, TensorShape> GetImageLayouts() const { return make_pair(m_inputImageLayout, m_sampleLayout); } // helper for Validate()
const size_t ChildrenSize() const { return m_inputs.size(); } // TODO: rename to NumChildren() or NumInputs(); and inside here where we use m_inputs, use m_inputs.size() as well
@ -542,13 +542,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void ClearGradientForChildren() = 0;
virtual void /*IComputationNode::*/OnEvaluateBeginIteration() override // called before first iteration step of EvaluateThisNode()
virtual void /*IComputationNode::*/OnEvaluateBeginIteration() override // called before first iteration step of ForwardProp()
{
#ifdef TRACK_GAP_NANS
fprintf(stderr, "OnEvaluateBeginIteration: %ls %ls operation\n", NodeName().c_str(), OperationName().c_str());
#endif
}
virtual void /*IComputationNode::*/OnEvaluateEndIteration() override // called after last iteration step of EvaluateThisNode()
virtual void /*IComputationNode::*/OnEvaluateEndIteration() override // called after last iteration step of ForwardProp()
{
#ifdef TRACK_GAP_NANS
fprintf(stderr, "OnEvaluateEndIteration: %ls %ls operation\n", NodeName().c_str(), OperationName().c_str());
@ -577,9 +577,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
const auto & child = m_inputs[index];
if (child != nullptr)
m_inputImageLayout = child->m_imageLayout;
m_inputImageLayout = child->m_sampleLayout;
if (outputSameAsInput)
m_imageLayout = m_inputImageLayout;
m_sampleLayout = m_inputImageLayout;
}
void InferMBLayoutFromInputsForStandardCase();
@ -742,8 +742,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// If the matrix is minibatch data (inputs, activations, labels), then matrix columns are samples.
// Note that the actual matrix storage does not always exist.
size_t m_numRows, m_numCols; // matrix dimension of function values and gradients
ImageLayout m_inputImageLayout; // how to interpret each column in the input as an image
ImageLayout m_imageLayout; // and the output
TensorShape m_inputImageLayout; // how to interpret each column in the input as an image
TensorShape m_sampleLayout; // and the output
// TODO: Why is the input layout not just the layout of the input node?
MBLayoutPtr m_pMBLayout;
@ -950,7 +950,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
// update size (#columns) of node to match MBLayout
// This must be called right before EvaluateThisNode() the first time for a given minibatch.
// This must be called right before ForwardProp() the first time for a given minibatch.
// Currently overridden by
// - InputValue, which verifies instead of resizing (since Resize() is specified to be destructive, it should not call it).
// - LearnableParameters
@ -1126,11 +1126,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
FunctionValues().Resize(m_numRows, m_numCols);
}
// this is called before a node's EvaluateThisNode() function is called (in loops: for the first time)
// this is called before a node's ForwardProp() function is called (in loops: for the first time)
// This is where we
// - update the node dimension based on actual MB size
// - (re-)allocate the m_functionValues matrix, which may be shared across nodes and thus have changed dimensions
virtual void /*IComputationNode::*/OnEvaluateBeginIteration() override // called before first iteration step of EvaluateThisNode()
virtual void /*IComputationNode::*/OnEvaluateBeginIteration() override // called before first iteration step of ForwardProp()
{
Base::OnEvaluateBeginIteration();
@ -1162,24 +1162,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void /*IComputationNode::*/OnComputeGradientBeginIteration() override
{
Base::OnComputeGradientBeginIteration();
#if 0 // TODO: If you get a NaN failure, feel free to put this back in
// many gradients are reduction operations
// They touch both in-flowing gradients and function values, so we must set both to 0.
// BUGBUG: This masks a bug: Nodes should do that by themselves, like in EvaluateThisNode(), but they currently don't.
if (m_needsGradient)
{
MaskMissingValuesColumnsToZero(FrameRange(m_pMBLayout));
if (m_gradientInitialized)
MaskMissingGradientColumnsToZero(FrameRange(m_pMBLayout));
}
bool anyChildNeedsGradient = false;
for (size_t i = 0; i < m_inputs.size(); i++)
anyChildNeedsGradient |= Inputs(i)->m_needsGradient;
if (anyChildNeedsGradient)
for (size_t i = 0; i < m_inputs.size(); i++)
Inputs(i)->MaskMissingValuesColumnsToZero(FrameRange(Inputs(i)->GetMBLayout()));
#endif
}
#ifdef _DEBUG
@ -1201,7 +1183,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
#endif
// this is the entry point from Network; while it will call virtual ComputeInputPartial() into the actual node implementation
// this is the entry point from Network; while it will call virtual BackpropTo() into the actual node implementation
// TODO: move to -Base (or -Network?)
void ComputeGradientForChildren(const FrameRange & frameRange, bool childrenInThisLoop, bool childrenInOuterLoop) override
{
@ -1243,8 +1225,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
//fprintf(stderr, "ComputeInputPartial %d %d %ls %ls\n", (int)frameRange.timeIdxInSeq, (int)i, NodeName().c_str(), OperationName().c_str());
ComputeInputPartial(i, frameRange); // this computes partial wrt to the child and sums the gradient value in the child
//fprintf(stderr, "BackpropTo %d %d %ls %ls\n", (int)frameRange.timeIdxInSeq, (int)i, NodeName().c_str(), OperationName().c_str());
BackpropTo(i, frameRange); // this computes partial wrt to the child and sums the gradient value in the child
}
#ifdef DISPLAY_DEBUG
else fprintf (stderr, " [%lu]: %s(%s) (no gradient needed so don't compute for)\n", i, child->OperationName().c_str(), child->NodeName().c_str());
@ -1406,24 +1388,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ }
// these two implement the ComputationNode<> interface
void EvaluateThisNode(const FrameRange & frameRange) override final
void ForwardProp(const FrameRange & frameRange) override final
{
if (frameRange.IsAllFrames())
EvaluateThisNodeNonLooping();
ForwardPropNonLooping();
else
LogicError("%s node should never be in a loop.", typeid(*this).name());
}
void ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override final
void BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override final
{
if (frameRange.IsAllFrames())
ComputeInputPartialNonLooping(inputIndex);
BackpropToNonLooping(inputIndex);
else
LogicError("%s node should never be in a loop.", typeid(*this).name());
}
// non-looping node types instead implement these functions
virtual void EvaluateThisNodeNonLooping() = 0;
virtual void ComputeInputPartialNonLooping(size_t inputIndex) = 0;
virtual void ForwardPropNonLooping() = 0;
virtual void BackpropToNonLooping(size_t inputIndex) = 0;
};
// =======================================================================
@ -1442,8 +1424,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual ComputationNodeBase * NewThis(DEVICEID_TYPE deviceId, const wstring & name) override { NOT_IMPLEMENTED; }
virtual void Validate(bool isFinalValidationPass) override { NOT_IMPLEMENTED; } // main base validation function
virtual void InferImageDimsFromInputs() override { NOT_IMPLEMENTED; }
virtual void SaveToFile(File& fstream) const override { NOT_IMPLEMENTED; }
virtual void LoadFromFile(File& /*fstream*/, size_t /*modelVersion*/) override { NOT_IMPLEMENTED; }
virtual void Save(File& fstream) const override { NOT_IMPLEMENTED; }
virtual void Load(File& /*fstream*/, size_t /*modelVersion*/) override { NOT_IMPLEMENTED; }
virtual void CopyTo(ComputationNodeBasePtr node, const std::wstring& newName, const CopyNodeFlags flags) const override { NOT_IMPLEMENTED; }
virtual ComputationNodeBasePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) override { NOT_IMPLEMENTED; }
//virtual void SetDims(size_t rows, size_t cols) override { NOT_IMPLEMENTED; }
@ -1515,9 +1497,9 @@ protected: \
using Base::m_pMBLayout; using Base::GetNumTimeSteps; using Base::GetNumParallelSequences; \
using Base::MaskMissingColumnsToZero; using Base::MaskMissingValuesColumnsToZero; using Base::MaskMissingGradientColumnsToZero; using Base::InvalidateMissingValuesColumns; using Base::InvalidateMissingGradientColumns; \
using Base::DataSlice; using Base::ValueSlice; using Base::GradientValues; using Base::GradientValuesPtr; using Base::GradientSlice; using Base::MaskedValueSlice; using Base::MaskedGradientSlice; \
using Base::EvaluateThisNode; using Base::ComputeInputPartial; \
using Base::ForwardProp; using Base::BackpropTo; \
using Base::m_inputs; using Base::m_deviceId; using Base::m_functionValues; using Base::m_gradientValues; \
using Base::m_inputImageLayout; using Base::m_imageLayout; \
using Base::m_inputImageLayout; using Base::m_sampleLayout; \
using Base::m_parameterUpdateRequired; using Base::m_nodeName; \
using Base::CreateMatrixIfNull; using Base::RequestMatrixFromPool; using Base::ReleaseMatrixToPool; \
using Base::CreateUniqId; \
@ -1529,9 +1511,9 @@ protected: \
using Base::HasMBLayout; using Base::GetMBLayout; using Base::LinkToMBLayout; \
using Base::Inputs; using Base::SetInput; \
using Base::IsChildAnImage; using Base::IsEqualTo; using Base::IsFuncValueOlderThanInputs; using Base::IsLeaf; using Base::SetParameterUpdateRequired; \
using Base::LoadFromFile; \
using Base::Load; \
using Base::PrintNodeValuesToFile; using Base::PrintSelfBeforeValidation; \
using Base::SaveToFile; using Base::UpdateFunctionMBSize; \
using Base::Save; using Base::UpdateFunctionMBSize; \
using Base::RequestMatricesBeforeEval; using Base::ReleaseMatricesAfterEval; \
using Base::RequestMatricesBeforeGradientComp; using Base::ReleaseMatricesAfterGradientComp; \
using Base::Validate; using Base::ValidateUnaryMap; using Base::ValidateBinaryZip; using Base::ValidateUnaryReduce; using Base::ValidateBinaryReduce; using Base::ValidateInferBinaryChildrenDims; using Base::ValidateInferChildDims; \

Просмотреть файл

@ -45,7 +45,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_horizontalSubsample(SIZE_MAX), m_verticalSubsample(SIZE_MAX),
m_zeroPadding(false), m_maxTempMemSizeInSamples(SIZE_MAX)
{
m_imageLayout = ImageLayoutWHC(1, 1, 0); // TODO: what is this magic #channels == 0? Can this even be initialized at this time, or only inferred?
m_sampleLayout = ImageLayoutWHC(1, 1, 0); // TODO: what is this magic #channels == 0? Can this even be initialized at this time, or only inferred?
}
ConvolutionNode(DEVICEID_TYPE deviceId, const wstring & name, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0) :
Base(deviceId, name),
@ -53,7 +53,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_horizontalSubsample(horizontalSubsample), m_verticalSubsample(verticalSubsample),
m_zeroPadding(zeroPadding), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples)
{
m_imageLayout = ImageLayoutWHC(1, 1, outputChannels);
m_sampleLayout = ImageLayoutWHC(1, 1, outputChannels);
}
ConvolutionNode(const ScriptableObjects::IConfigRecordPtr configp) :
ConvolutionNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"kernelWidth"), configp->Get(L"kernelHeight"), configp->Get(L"outputChannels"),
@ -64,21 +64,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
AttachInputs(configp, this->GetExpectedNumInputs());
}
virtual void SaveToFile(File& fstream) const override
virtual void Save(File& fstream) const override
{
Base::SaveToFile(fstream);
Base::Save(fstream);
fstream << m_kernelWidth << m_kernelHeight << m_horizontalSubsample << m_verticalSubsample;
fstream << m_imageLayout.GetNumChannels();
fstream << m_sampleLayout.GetNumChannels();
fstream << m_zeroPadding << m_maxTempMemSizeInSamples;
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
fstream >> m_kernelWidth >> m_kernelHeight >> m_horizontalSubsample >> m_verticalSubsample;
size_t outputChannels;
fstream >> outputChannels;
m_imageLayout = ImageLayoutWHC(1, 1, outputChannels);
m_sampleLayout = ImageLayoutWHC(1, 1, outputChannels);
fstream >> m_zeroPadding >> m_maxTempMemSizeInSamples;
}
@ -102,38 +102,26 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
//void ComputeInputPartialMap(const size_t inputIndex)
//{
// if (inputIndex > 1)
// InvalidArgument("Convolution operation only takes two inputs.");
//
// if (inputIndex == 0) //derivative with regard to the weight matrix
// ComputeInputPartialOverWeight(GradientValues(), Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), m_tempMatrix, true);
// else // derivative with regard to the input feature
// ComputeInputPartialOverInputFeature(GradientValues(), Inputs(1)->GradientValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), m_tempMatrix);
//}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
//if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(Inputs(1)->GetNumParallelSequences(), m_pMBLayout));
if (inputIndex == 0) //derivative with regard to the weight matrix
ComputeInputPartialOverWeight(sliceOutputGrad, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), sliceInput1Value, *m_tempMatrix, !frameRange.IsAllFrames());
BackpropToOverWeight(sliceOutputGrad, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), sliceInput1Value, *m_tempMatrix, !frameRange.IsAllFrames());
else if (inputIndex == 1) // derivative with regard to the input feature
{
Matrix<ElemType> sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialOverInputFeature(sliceOutputGrad, sliceInput1Grad, Inputs(0)->FunctionValues(), sliceInput1Value, *m_tempMatrix);
BackpropToOverInputFeature(sliceOutputGrad, sliceInput1Grad, Inputs(0)->FunctionValues(), sliceInput1Value, *m_tempMatrix);
}
}
private:
void ComputeInputPartialOverWeight(Matrix<ElemType> &gradientValues,
void BackpropToOverWeight(Matrix<ElemType> &gradientValues,
Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &/*input0*/, const Matrix<ElemType> &input1, Matrix<ElemType> &tempMatrix, const bool inLoop)
{
size_t packedInputRows = m_kernelWidth * m_kernelHeight * m_inputImageLayout.GetNumChannels();
size_t packedInputColsPerSample = m_imageLayout.GetWidth() * m_imageLayout.GetHeight();
size_t packedInputColsPerSample = m_sampleLayout.GetWidth() * m_sampleLayout.GetHeight();
size_t outputSizePerChannel = packedInputColsPerSample;
//size_t packedInputDim = packedInputRows * packedInputColsPerSample; // size of each packed input sample
//size_t inputDim = m_inputImageLayout.GetWidth() * m_inputImageLayout.GetHeight() * m_inputImageLayout.GetNumChannels(); //size of each input sample
@ -145,7 +133,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//const Matrix<ElemType> & weightMatrix = input0;
//inputGradientValues.Resize(weightMatrix.GetNumRows(), weightMatrix.GetNumCols()); //should have been resized when preparing gradient computation
gradientValues.Reshape(m_imageLayout.GetNumChannels(), outputSizePerChannel * batchSize); //reshape to match the longernal operation
gradientValues.Reshape(m_sampleLayout.GetNumChannels(), outputSizePerChannel * batchSize); //reshape to match the longernal operation
size_t subBatchSize = min(batchSize, maxTempMemSizeInSamples);
size_t numSubBatches = (batchSize + subBatchSize - 1) / subBatchSize;
@ -165,7 +153,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
inputSubBatch.SwitchToMatrixType(MatrixType::DENSE, inputSubBatch.GetFormat(), true);
tempMatrix.AssignPackedConvolutionInput(inputSubBatch,
m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputImageLayout.GetNumChannels(),
m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_imageLayout.GetNumChannels(),
m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels(),
m_kernelWidth, m_kernelHeight, m_horizontalSubsample, m_verticalSubsample,
m_zeroPadding);
@ -174,14 +162,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
gradientValues.Reshape(m_imageLayout.GetNumChannels() * outputSizePerChannel, batchSize); //change back
gradientValues.Reshape(m_sampleLayout.GetNumChannels() * outputSizePerChannel, batchSize); //change back
}
//compute gradient over the packed input and then convert the result to the original input
void ComputeInputPartialOverInputFeature(Matrix<ElemType> &gradientValues, const Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &input0, const Matrix<ElemType> &input1, Matrix<ElemType> &tempMatrix)
void BackpropToOverInputFeature(Matrix<ElemType> &gradientValues, const Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &input0, const Matrix<ElemType> &input1, Matrix<ElemType> &tempMatrix)
{
size_t packedInputRows = m_kernelWidth * m_kernelHeight * m_inputImageLayout.GetNumChannels();
size_t packedInputColsPerSample = m_imageLayout.GetWidth() * m_imageLayout.GetHeight();
size_t packedInputColsPerSample = m_sampleLayout.GetWidth() * m_sampleLayout.GetHeight();
size_t outputSizePerChannel = packedInputColsPerSample;
//size_t packedInputDim = packedInputRows * packedInputColsPerSample; // size of each packed input sample
//size_t inputDim = m_inputImageLayout.GetWidth() * m_inputImageLayout.GetHeight() * m_inputImageLayout.GetNumChannels(); //size of each input sample
@ -192,7 +180,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
const Matrix<ElemType> & weightMatrix = input0;
gradientValues.Reshape(m_imageLayout.GetNumChannels(), outputSizePerChannel * batchSize); //reshape to match the longernal operation
gradientValues.Reshape(m_sampleLayout.GetNumChannels(), outputSizePerChannel * batchSize); //reshape to match the longernal operation
size_t subBatchSize = min(batchSize, maxTempMemSizeInSamples);
size_t numSubBatches = (batchSize + subBatchSize - 1) / subBatchSize;
@ -210,25 +198,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> inputGradientSubBatch = inputGradientValues.ColumnSlice(startSampleID, smallBatchSize);
tempMatrix.UnpackConvolutionInput(inputGradientSubBatch,
m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputImageLayout.GetNumChannels(),
m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_imageLayout.GetNumChannels(),
m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels(),
m_kernelWidth, m_kernelHeight, m_horizontalSubsample, m_verticalSubsample,
m_zeroPadding);
}
gradientValues.Reshape(m_imageLayout.GetNumChannels() * outputSizePerChannel, batchSize); //change back
gradientValues.Reshape(m_sampleLayout.GetNumChannels() * outputSizePerChannel, batchSize); //change back
}
public:
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
//if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, *m_tempMatrix);
ForwardPropS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, *m_tempMatrix);
}
private:
void EvaluateThisNodeS(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0,
void ForwardPropS(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0,
const Matrix<ElemType> &input1, Matrix<ElemType> &tempMatrix)
{
#if NANCHECK
@ -236,7 +223,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
input1.HasNan("Convolution-input1");
#endif
size_t packedInputRows = m_kernelWidth * m_kernelHeight * m_inputImageLayout.GetNumChannels();
size_t packedInputColsPerSample = m_imageLayout.GetWidth() * m_imageLayout.GetHeight();
size_t packedInputColsPerSample = m_sampleLayout.GetWidth() * m_sampleLayout.GetHeight();
size_t outputSizePerChannel = packedInputColsPerSample;
//size_t packedInputDim = packedInputRows * packedInputColsPerSample; // size of each packed input sample
//size_t inputDim = m_inputImageLayout.GetWidth() * m_inputImageLayout.GetHeight() * m_inputImageLayout.GetNumChannels(); //size of each input sample
@ -246,7 +233,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t maxTempMemSizeInSamples = (m_maxTempMemSizeInSamples == 0? batchSize : m_maxTempMemSizeInSamples);
const Matrix<ElemType> & weightMatrix = input0;
assert(weightMatrix.GetNumCols() == packedInputRows && weightMatrix.GetNumRows() == m_imageLayout.GetNumChannels());
assert(weightMatrix.GetNumCols() == packedInputRows && weightMatrix.GetNumRows() == m_sampleLayout.GetNumChannels());
// GPU and 1-dimensional image
bool m_1DConvolutionOnGPUSparse = (m_inputImageLayout.GetHeight() == 1
@ -257,7 +244,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// Reshaping is only necessary if we are going to use the unpacking trick
if (!m_1DConvolutionOnGPUSparse)
functionValues.Reshape(m_imageLayout.GetNumChannels(), outputSizePerChannel * batchSize);
functionValues.Reshape(m_sampleLayout.GetNumChannels(), outputSizePerChannel * batchSize);
size_t subBatchSize = min(batchSize, maxTempMemSizeInSamples);
size_t numSubBatches = (batchSize+subBatchSize-1)/subBatchSize;
@ -298,14 +285,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
tempMatrix.Resize(packedInputRows, packedInputColsPerSample * smallBatchSize);
tempMatrix.AssignPackedConvolutionInput(inputSubBatch,
m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputImageLayout.GetNumChannels(),
m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_imageLayout.GetNumChannels(),
m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels(),
m_kernelWidth, m_kernelHeight, m_horizontalSubsample, m_verticalSubsample, m_zeroPadding);
Matrix<ElemType>::Multiply(weightMatrix, false, tempMatrix, false, outputSubBatch);
}
}
functionValues.Reshape(m_imageLayout.GetNumChannels() * outputSizePerChannel, batchSize); //each sample becomes a column
functionValues.Reshape(m_sampleLayout.GetNumChannels() * outputSizePerChannel, batchSize); //each sample becomes a column
#if NANCHECK
functionValues.HasNan("Convolution");
@ -331,10 +318,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t weightCols = m_kernelWidth * m_kernelHeight * m_inputImageLayout.GetNumChannels();
if (Inputs(0)->FunctionValues().HasNoElements())
ValidateInferChildDims(0, m_imageLayout.GetNumChannels(), weightCols);
ValidateInferChildDims(0, m_sampleLayout.GetNumChannels(), weightCols);
if (isFinalValidationPass && (Inputs(0)->GetNumCols() != weightCols || Inputs(0)->GetNumRows() != m_imageLayout.GetNumChannels()))
LogicError("convolutionWeight matrix %ls should have dimension [%d, %d] which is [outputChannels, kernelWidth * kernelHeight * inputChannels]", m_inputs[0]->NodeName().c_str(), (int)m_imageLayout.GetNumChannels(), (int)weightCols);
if (isFinalValidationPass && (Inputs(0)->GetNumCols() != weightCols || Inputs(0)->GetNumRows() != m_sampleLayout.GetNumChannels()))
LogicError("convolutionWeight matrix %ls should have dimension [%d, %d] which is [outputChannels, kernelWidth * kernelHeight * inputChannels]", m_inputs[0]->NodeName().c_str(), (int)m_sampleLayout.GetNumChannels(), (int)weightCols);
size_t inputDim = m_inputImageLayout.GetWidth() * m_inputImageLayout.GetHeight() * m_inputImageLayout.GetNumChannels();
if (Inputs(1)->GetNumRows() == 0)
@ -343,7 +330,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (isFinalValidationPass && Inputs(1)->GetNumRows() != inputDim)
LogicError("each column of input to the convolution node %ls is a sample and should have dimension %d, which is inputWidth * inputHeight * inputChannels", NodeName().c_str(), (int)inputDim);
size_t outputDim = m_imageLayout.GetWidth() * m_imageLayout.GetHeight() * m_imageLayout.GetNumChannels();
size_t outputDim = m_sampleLayout.GetWidth() * m_sampleLayout.GetHeight() * m_sampleLayout.GetNumChannels();
SetDims(outputDim, Inputs(1)->GetNumCols());
}
@ -358,15 +345,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
const int kernelWidthCenter = m_kernelWidth % 2;
const int kernelHeightCenter = m_kernelHeight % 2;
m_imageLayout = ImageLayoutWHC((m_inputImageLayout.GetWidth() - kernelWidthCenter) / m_horizontalSubsample + 1,
m_sampleLayout = ImageLayoutWHC((m_inputImageLayout.GetWidth() - kernelWidthCenter) / m_horizontalSubsample + 1,
(m_inputImageLayout.GetHeight() - kernelHeightCenter) / m_verticalSubsample + 1,
m_imageLayout.GetNumChannels());
m_sampleLayout.GetNumChannels());
}
else
{
m_imageLayout = ImageLayoutWHC((m_inputImageLayout.GetWidth() - m_kernelWidth) / m_horizontalSubsample + 1,
m_sampleLayout = ImageLayoutWHC((m_inputImageLayout.GetWidth() - m_kernelWidth) / m_horizontalSubsample + 1,
(m_inputImageLayout.GetHeight() - m_kernelHeight) / m_verticalSubsample + 1,
m_imageLayout.GetNumChannels());
m_sampleLayout.GetNumChannels());
}
}
@ -379,7 +366,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fstream << string(str);
sprintf(str, "Kernel[Width:%lu, Height:%lu] SubSample[Horizontal:%lu, Vertical:%lu]\n", m_kernelWidth, m_kernelHeight, m_horizontalSubsample, m_verticalSubsample);
fstream << string(str);
sprintf(str, "Output[Width:%lu, Height:%lu, Channels:%lu] \n", m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_imageLayout.GetNumChannels());
sprintf(str, "Output[Width:%lu, Height:%lu, Channels:%lu] \n", m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels());
fstream << string(str);
sprintf(str, "ZeroPadding=%ls maxTempMemSizeInSamples=%lu\n", m_zeroPadding? L"true" : L"false", m_maxTempMemSizeInSamples);
fstream << string(str);
@ -445,15 +432,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
AttachInputs(configp, this->GetExpectedNumInputs());
}
virtual void SaveToFile(File& fstream) const override
virtual void Save(File& fstream) const override
{
Base::SaveToFile(fstream);
Base::Save(fstream);
fstream << m_windowWidth << m_windowHeight << m_horizontalSubsample << m_verticalSubsample;
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
fstream >> m_windowWidth >> m_windowHeight >> m_horizontalSubsample >> m_verticalSubsample;
}
@ -475,31 +462,29 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
{
//if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
Matrix<ElemType> sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialV(sliceOutputGrad, sliceInput0Grad, sliceInput0Value, sliceOutputValue);
BackpropToV(sliceOutputGrad, sliceInput0Grad, sliceInput0Value, sliceOutputValue);
}
// this function must be overriden by Max or AveragePoolingNode
virtual void ComputeInputPartialV(const Matrix<ElemType> &gradientValues, Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &input0, const Matrix<ElemType> &functionValues) = 0;
virtual void BackpropToV(const Matrix<ElemType> &gradientValues, Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &input0, const Matrix<ElemType> &functionValues) = 0;
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
//if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
EvaluateThisNodeV(sliceOutputValue, sliceInput0Value);
ForwardPropV(sliceOutputValue, sliceInput0Value);
}
// this function must be overriden by Max or AveragePoolingNode
virtual void EvaluateThisNodeV(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0) = 0;
virtual void ForwardPropV(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0) = 0;
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
{
@ -512,7 +497,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
InferImageDimsFromInputs();
m_inputSizePerSample = m_inputImageLayout.GetWidth() * m_inputImageLayout.GetHeight() * m_inputImageLayout.GetNumChannels();
m_outputSizePerSample = m_imageLayout.GetWidth() * m_imageLayout.GetHeight() * m_imageLayout.GetNumChannels();
m_outputSizePerSample = m_sampleLayout.GetWidth() * m_sampleLayout.GetHeight() * m_sampleLayout.GetNumChannels();
if (Inputs(0)->GetNumRows() == 0)
ValidateInferChildDims(0, m_inputSizePerSample, Inputs(0)->GetNumCols());
@ -530,7 +515,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (m_inputImageLayout.GetWidth() < m_windowWidth || m_inputImageLayout.GetHeight() < m_windowHeight)
InvalidArgument("PoolingNodeBase: inputWidth must >= windowWidth and inputHeight must >= windowHeight.");
m_imageLayout = ImageLayoutWHC((m_inputImageLayout.GetWidth() - m_windowWidth) / m_horizontalSubsample + 1,
m_sampleLayout = ImageLayoutWHC((m_inputImageLayout.GetWidth() - m_windowWidth) / m_horizontalSubsample + 1,
(m_inputImageLayout.GetHeight() - m_windowHeight) / m_verticalSubsample + 1,
m_inputImageLayout.GetNumChannels());
}
@ -544,7 +529,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fstream << string(str);
sprintf(str, "PoolingWindow[Width:%lu, Height:%lu] SubSampling[Horizontal:%lu, Vertical:%lu]\n", m_windowWidth, m_windowHeight, m_horizontalSubsample, m_verticalSubsample);
fstream << string(str);
sprintf(str, "Output[Width:%lu, Height:%lu, Channels:%lu] \n", m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_imageLayout.GetNumChannels());
sprintf(str, "Output[Width:%lu, Height:%lu, Channels:%lu] \n", m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels());
fstream << string(str);
sprintf(str, "TotalSizePerSample[Input:%lu, Output:%lu] \n", m_inputSizePerSample, m_outputSizePerSample);
fstream << string(str);
@ -581,19 +566,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(configp)
{ }
virtual void ComputeInputPartialV(const Matrix<ElemType> &gradientValues, Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &input0, const Matrix<ElemType> &functionValues) override
virtual void BackpropToV(const Matrix<ElemType> &gradientValues, Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &input0, const Matrix<ElemType> &functionValues) override
{
inputGradientValues.AddMaxPoolingGradient(gradientValues, input0, functionValues, m_inputImageLayout.GetNumChannels(),
m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputSizePerSample,
m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_outputSizePerSample,
m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_outputSizePerSample,
m_windowWidth, m_windowHeight, m_horizontalSubsample, m_verticalSubsample);
}
virtual void EvaluateThisNodeV(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0) override
virtual void ForwardPropV(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0) override
{
functionValues.AssignMaxPoolingResult(input0, m_inputImageLayout.GetNumChannels(),
m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputSizePerSample,
m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_outputSizePerSample,
m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_outputSizePerSample,
m_windowWidth, m_windowHeight, m_horizontalSubsample, m_verticalSubsample);
}
};
@ -619,19 +604,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(configp)
{ }
virtual void ComputeInputPartialV(const Matrix<ElemType> &gradientValues, Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &/*input0*/, const Matrix<ElemType> &/*functionValues*/) override
virtual void BackpropToV(const Matrix<ElemType> &gradientValues, Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &/*input0*/, const Matrix<ElemType> &/*functionValues*/) override
{
inputGradientValues.AddAveragePoolingGradient(gradientValues, m_inputImageLayout.GetNumChannels(),
m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputSizePerSample,
m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_outputSizePerSample,
m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_outputSizePerSample,
m_windowWidth, m_windowHeight, m_horizontalSubsample, m_verticalSubsample);
}
virtual void EvaluateThisNodeV(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0) override
virtual void ForwardPropV(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0) override
{
functionValues.AssignAveragePoolingResult(input0, m_inputImageLayout.GetNumChannels(),
m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputSizePerSample,
m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_outputSizePerSample,
m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_outputSizePerSample,
m_windowWidth, m_windowHeight, m_horizontalSubsample, m_verticalSubsample);
}
};

Просмотреть файл

@ -42,7 +42,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
virtual void BackpropToNonLooping(size_t inputIndex) override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
if (inputIndex == 0)
@ -57,7 +57,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
FunctionValues().VerifySize(1, 1);
Inputs(0)->FunctionValues().VerifySize(1, 1);
@ -97,7 +97,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
};
@ -160,21 +160,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
stp = lastLbl;
};
virtual void ComputeInputPartialNonLooping(size_t /*inputIndex*/) override //scaled by 2*number of elements in the Matrix<ElemType>
virtual void BackpropToNonLooping(size_t /*inputIndex*/) override //scaled by 2*number of elements in the Matrix<ElemType>
{
LogicError("SequenceDecoder is used for evaluation only.");
}
/// compute posterior probability of label y at position t
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
DecideStartEndingOutputLab(Inputs(0)->FunctionValues(), mStartLab, mEndLab);
EvaluateThisNodeS(mAlpha, mBacktrace, FunctionValues(), Inputs(1)->FunctionValues(),
ForwardPropS(mAlpha, mBacktrace, FunctionValues(), Inputs(1)->FunctionValues(),
Inputs(2)->FunctionValues(), mStartLab, mEndLab);
}
// compute forward backward algorithm
void EvaluateThisNodeS(Matrix<ElemType>& alpha, Matrix<ElemType>& backtrace, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const size_t stt, const size_t stp)
void ForwardPropS(Matrix<ElemType>& alpha, Matrix<ElemType>& backtrace, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const size_t stt, const size_t stp)
{
/// to-do, each slice is for one sentence
/// to-do, number of slices correspond to number of frames
@ -283,7 +283,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
};
@ -334,7 +334,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ }
// BUGBUG: This node needs to serialize and CopyTo m_stride
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { NOT_IMPLEMENTED; return; } // TODO: remove these one by one. And why is this not implemented?
if (inputIndex > 2)
@ -350,7 +350,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
//ComputeInputPartialLeft1(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad);
//BackpropToLeft1(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad);
size_t r = Inputs(0)->GetNumRows();
size_t T1 = Inputs(0)->GetNumCols() / GetNumParallelSequences(); // TODO: if T1 == GetNumTimeSteps() then we can simplify code below.
@ -363,7 +363,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
auto mTmp2 = sliceInput1Value.ColumnSlice(k, 1);
auto mTmp3 = sliceOutputGrad.ColumnSlice(k, 1);
ComputeInputPartialLeft1(mTmp2, mTmp1, mTmp3);
BackpropToLeft1(mTmp2, mTmp1, mTmp3);
for (size_t t = 0; t < T1; t++)
{
@ -375,7 +375,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
Matrix<ElemType> sliceInput1Grad = Inputs(1)->GradientSlice(frameRange);
//ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad);
//BackpropToRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad);
// process sequence by sequence
for (size_t k = 0; k < GetNumParallelSequences(); k++)
@ -390,7 +390,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
auto mTmp2 = sliceInput1Grad.ColumnSlice(k, 1);
auto mTmp3 = sliceOutputGrad.ColumnSlice(k, 1);
ComputeInputPartialRight(mTmp1, mTmp2, mTmp3);
BackpropToRight(mTmp1, mTmp2, mTmp3);
}
}
}
@ -408,7 +408,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
mTmp1.Resize(d, T1);
Matrix<ElemType> mTmp2 = sliceInput1Value.ColumnSlice(k, 1);
Matrix<ElemType> mTmp3 = sliceOutputGrad.ColumnSlice(k, 1);
ComputeInputPartialLeft(mTmp2, mTmp1, mTmp3);
BackpropToLeft(mTmp2, mTmp1, mTmp3);
Matrix<ElemType> mTmp4(sliceInput1Value.GetDeviceId());
for (size_t t = 0; t < T1; t++)
@ -442,14 +442,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> mTmp2 = sliceInput1Grad.ColumnSlice(k, 1);
Matrix<ElemType> mTmp3 = sliceOutputGrad.ColumnSlice(k, 1);
ComputeInputPartialRight(mTmp1, mTmp2, mTmp3);
BackpropToRight(mTmp1, mTmp2, mTmp3);
}
}
}
}
// TODO: the following two functions only differ in the order of argument use in the final MultiplyAndAdd() --is that intended??
static /*TODO: merge with call site*/void ComputeInputPartialLeft1(const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
static /*TODO: merge with call site*/void BackpropToLeft1(const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
#if DUMPOUTPUT
gradientValues.Print("Gradient-in");
@ -466,7 +466,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
static /*TODO: merge with call site*/void ComputeInputPartialLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
static /*TODO: merge with call site*/void BackpropToLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
#if DUMPOUTPUT
gradientValues.Print("Gradient-in");
@ -484,7 +484,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
static /*TODO: merge with call site*/void ComputeInputPartialRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
static /*TODO: merge with call site*/void BackpropToRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
#if DUMPOUTPUT
gradientValues.Print("Gradient-in");
@ -497,7 +497,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
size_t rows0 = Inputs(0)->GetNumRows(), cols1 = Inputs(1)->GetNumCols();
Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
@ -632,7 +632,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
InferImageDimsFromInput(1, false); //the second one is the input since it's column wize
//after multiplication the structure is lost
m_imageLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
m_sampleLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
}
};

Просмотреть файл

@ -31,12 +31,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void ComputeInputPartialNonLooping(size_t /*inputIndex*/) override
virtual void BackpropToNonLooping(size_t /*inputIndex*/) override
{
LogicError("%ls operation is used for evaluation only.", OperationName().c_str());
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
Inputs(0)->ValueSlice(frameRange).VectorMax(*m_maxIndexes0, *m_maxValues, true);
@ -96,7 +96,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override

Просмотреть файл

@ -40,13 +40,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{
m_parameterUpdateRequired = true;
m_imageLayout = ImageLayoutWHC(1, SIZE_MAX, 1);
m_sampleLayout = ImageLayoutWHC(1, SIZE_MAX, 1);
}
LearnableParameter(DEVICEID_TYPE deviceId, const wstring & name, size_t rows, size_t cols) :
Base(deviceId, name)
{
m_parameterUpdateRequired = true;
m_imageLayout = ImageLayoutWHC(1, rows, 1);
m_sampleLayout = ImageLayoutWHC(1, rows, 1);
// TODO: Is ^^ this a wise choice? These are often weight matrices, where rows, not columns, are multiplied with input vectors.
CreateMatrixIfNull(m_functionValues);
SetDims(rows, cols);
@ -81,17 +81,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
RuntimeError("init must be one of the values of [ uniform | gaussian | fixedValue | fromFile ]");
}
virtual void SaveToFile(File& fstream) const override
virtual void Save(File& fstream) const override
{
Base::SaveToFile(fstream);
Base::Save(fstream);
fstream << m_parameterUpdateRequired;
fstream << GetNumRows() << GetNumCols();
fstream << FunctionValues();
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
size_t rows, cols;
fstream >> m_parameterUpdateRequired;
@ -100,7 +100,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
SetDims(rows, cols);
LoadFunctionValues(fstream);
m_imageLayout = ImageLayoutWHC(1, rows, 1);
m_sampleLayout = ImageLayoutWHC(1, rows, 1);
}
// initialize with random numbers
@ -158,8 +158,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// computation functions don't do anything for parameter nodes
virtual void UpdateFunctionMBSize() override { }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange &) override { }
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange &) override { }
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange &) override { }
virtual void /*ComputationNode::*/ForwardProp(const FrameRange &) override { }
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
{
@ -208,9 +208,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_gradientValues->Resize(rows, cols, size);
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
LearnableParameter<ElemType>::LoadFromFile(fstream, modelVersion);
LearnableParameter<ElemType>::Load(fstream, modelVersion);
CreateMatrixIfNull(m_gradientValues);
m_gradientValues->Resize(GetNumRows(), GetNumCols());
}
@ -245,7 +245,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, bool isSparse) :
Base(deviceId, name)
{
m_imageLayout.Invalidate();
m_sampleLayout.Invalidate();
Init(0, 0, isSparse);
}
InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, size_t rows, size_t cols, bool isSparse) :
@ -254,10 +254,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (rows * cols == 0)
LogicError("This InputValue dimension is 0.");
m_imageLayout = ImageLayoutVector(rows);
m_sampleLayout = ImageLayoutVector(rows);
Init(rows, cols, isSparse);
}
InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, const ImageLayout & imageLayout, size_t numImages, bool isSparse) :
InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & imageLayout, size_t numImages, bool isSparse) :
Base(deviceId, name)
{
size_t rows = imageLayout.GetNumElements();
@ -266,7 +266,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (rows * cols == 0)
LogicError("This InputValue dimension is 0.");
m_imageLayout = imageLayout;
m_sampleLayout = imageLayout;
Init(rows, cols, isSparse);
}
@ -279,37 +279,37 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
size_t rows = configp->Get(L"rows");
size_t cols = configp->Get(L"cols");
m_imageLayout = ImageLayoutVector(rows); // no tensor, just a vector
m_sampleLayout = ImageLayoutVector(rows); // no tensor, just a vector
Init(rows, cols, isSparse);
}
else
{
m_imageLayout = ImageLayoutWHC(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"));
size_t rows = m_imageLayout.GetNumElements();
m_sampleLayout = ImageLayoutWHC(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"));
size_t rows = m_sampleLayout.GetNumElements();
size_t cols = configp->Get(L"numImages"); // this is actually the MB size
Init(rows, cols, isSparse);
}
}
public:
virtual void SaveToFile(File& fstream) const override
virtual void Save(File& fstream) const override
{
Base::SaveToFile(fstream);
size_t rows = GetNumRows(); // using explicitly typed variables to be 100% symmetrical to LoadFromFile()
Base::Save(fstream);
size_t rows = GetNumRows(); // using explicitly typed variables to be 100% symmetrical to Load()
size_t cols = m_pMBLayout ? 0 : GetNumCols(); // if this Input depends on MB size, we write it as having 0 dimensions
fstream << rows << cols;
m_imageLayout.SaveToFile(fstream);
m_sampleLayout.Save(fstream);
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
size_t rows, cols;
fstream >> rows >> cols;
if (m_pMBLayout) // some older files retained the #columns when saving, which is meaningless
cols = 0;
m_imageLayout.LoadFromFile(fstream);
m_sampleLayout.Load(fstream);
Init(rows, cols, m_isSparse);
}
@ -320,8 +320,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
VerifyDims(GetNumRows(), m_pMBLayout->GetNumCols());
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange &) override { }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange &) { }
virtual void /*ComputationNode::*/ForwardProp(const FrameRange &) override { }
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange &) { }
virtual void DumpNodeInfo(const bool printValues, File& fstream) const override
{
@ -358,7 +358,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
InputValue(DEVICEID_TYPE deviceId, const wstring & name, size_t rows, size_t cols) :
Base(deviceId, name, rows, cols, false)
{ }
InputValue(DEVICEID_TYPE deviceId, const wstring & name, const ImageLayout & imageLayout, size_t numImages) :
InputValue(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & imageLayout, size_t numImages) :
Base(deviceId, name, imageLayout, numImages, false)
{ }
InputValue(const ScriptableObjects::IConfigRecordPtr configp) :
@ -387,7 +387,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
SparseInputValue(DEVICEID_TYPE deviceId, const wstring & name, size_t rows, size_t cols) :
Base(deviceId, name, rows, cols, true)
{ }
SparseInputValue(DEVICEID_TYPE deviceId, const wstring & name, const ImageLayout & imageLayout, size_t numImages) :
SparseInputValue(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & imageLayout, size_t numImages) :
Base(deviceId, name, imageLayout, numImages, true)
{ }
SparseInputValue(const ScriptableObjects::IConfigRecordPtr configp) :
@ -414,7 +414,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & t) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & t) override
{
if (inputIndex == 0) // left derivative (embedding matrix)
{
@ -422,18 +422,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> sliceInput1Value = Inputs(1)->MaskedValueSlice(t);
Matrix<ElemType> sliceOutputGrad = MaskedGradientSlice(t);
ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad);
BackpropToLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad);
}
else if (inputIndex == 1) // right derivative (input)
{
Matrix<ElemType> sliceInput1Grad = Inputs(1)->GradientSlice(t);
Matrix<ElemType> sliceOutputGrad = GradientSlice(t);
ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad);
BackpropToRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad);
}
}
/*TODO: merge with call site*/void ComputeInputPartialLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& gradientValues)
/*TODO: merge with call site*/void BackpropToLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& gradientValues)
{
size_t rows1 = inputFunctionValues.GetNumRows(), cols1 = inputFunctionValues.GetNumCols();
size_t rowsp = gradientValues.GetNumRows(), colsp = gradientValues.GetNumCols();
@ -448,8 +448,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
gradientValues.Reshape(rowsp, colsp);
}
/*TODO: merge with call site*/void ComputeInputPartialRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& gradientValues)
{
/*TODO: merge with call site*/void BackpropToRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& gradientValues)
{
size_t rows1 =inputGradientValues.GetNumRows(), cols1 = inputGradientValues.GetNumCols();
size_t rowsp = gradientValues.GetNumRows(), colsp = gradientValues.GetNumCols();
int wordsInEachSample = rows1 / inputFunctionValues.GetNumCols();
@ -463,7 +463,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
gradientValues.Reshape(rowsp, colsp);
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & t) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & t) override
{
// input0 is the weight (each column is an embedding of one word), input 1 contains m_bnrLooked words in each column (sample)
Matrix<ElemType> functionValues = ValueSlice(t);
@ -522,7 +522,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
SetDims(nInput, nOutput);
UpdateFunctionValuesSize();
EvaluateThisNode(FrameRange(m_pMBLayout));
ForwardProp(FrameRange(m_pMBLayout));
/// check with expected values
FunctionValues().TransferFromDeviceToDevice(m_deviceId, CPUDEVICE, true);
@ -541,7 +541,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Inputs(i)->GradientValues().SetValue(0);
}
for (size_t i = 0; i < 2; i++)
ComputeInputPartial(i, FrameRange(m_pMBLayout));
BackpropTo(i, FrameRange(m_pMBLayout));
// check with expected values
if (!ISCLOSE(Inputs(1)->GradientValues()(0, 0), 2, EPSILON) /// bi
@ -599,14 +599,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_gradientValues->SetValue(0.0f);
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Init(1, 1); // TODO: this looks wrong; should the dimension not come from the loaded model data?
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
}
/// to-do: need to change to the new way of resetting state
void ComputeInputPartialMap(const size_t inputIndex)
void BackpropToMap(const size_t inputIndex)
{
if (inputIndex > 0)
InvalidArgument("PairNetwork operation only takes one input.");
@ -614,9 +614,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>::ScaleAndAdd(1.0, GradientValues(), Inputs(inputIndex)->GradientValues());
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
assert(m_functionValues->GetNumRows() == GradientValues().GetNumRows()); // original used m_functionValues->GetNumRows() for loop dimension
assert(m_pMBLayout);
@ -624,14 +624,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>::ScaleAndAdd(1.0, GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout)), mTmp);
}
void EvaluateThisNodeMap() // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
m_functionValues->SetValue(Inputs(0)->FunctionValues());
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
{
//if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
Matrix<ElemType> mTmp = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
mTmp.SetValue(Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout)));
}

Просмотреть файл

@ -40,7 +40,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
Matrix<ElemType> gradientValues = GradientSlice(frameRange);
Matrix<ElemType> functionValues = ValueSlice(frameRange);
@ -100,7 +100,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
Matrix<ElemType> functionValues = ValueSliceToDense(frameRange, false); // Switch to dense as a work-around because ColumnSlice doesn't support all the sparse formats
Matrix<ElemType> inputFunctionValues0 = Inputs(0)->ValueSlice(frameRange.AllowBroadcast());
@ -190,7 +190,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
Matrix<ElemType> gradientValues = GradientSlice(frameRange);
Matrix<ElemType> functionValues = ValueSlice(frameRange);
@ -232,7 +232,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
LogicError("%ls %ls operation's Validate() function let invalid dimensions slip by.", NodeName().c_str(), OperationName().c_str());
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
Matrix<ElemType> functionValues = ValueSlice(frameRange);
Matrix<ElemType> inputFunctionValues0 = Inputs(0)->ValueSlice(frameRange.AllowBroadcast());
@ -302,7 +302,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (inputIndex == 0) // left derivative
{
@ -317,7 +317,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
//ValueSlice(frameRange).AssignProductOf(Inputs(0)->FunctionValues().Get00Element(), Inputs(1)->ValueSlice(frameRange));
ValueSlice(frameRange).Assign1x1ProductOf(Inputs(0)->FunctionValues()/*1x1*/, Inputs(1)->ValueSlice(frameRange));
@ -361,12 +361,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
{
Inputs(0)->GradientSlice(frameRange) -= GradientSlice(frameRange);
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
ValueSlice(frameRange).AssignDifferenceOf(0, Inputs(0)->ValueSlice(frameRange));
}
@ -398,7 +398,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (inputIndex == 0) // left derivative
{
@ -421,7 +421,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
size_t rows0 = Inputs(0)->GetNumRows(), cols1 = Inputs(1)->GetNumCols();
VerifyDims(rows0, cols1);
@ -478,7 +478,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
InferImageDimsFromInput(1, false); //the second one is the input since it's columnwise
//after multiplication the structure is lost
m_imageLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
m_sampleLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
}
virtual void AllocateGradientMatricesForChildren(MatrixPool& matrixPool) override
@ -516,7 +516,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (inputIndex == 0) //left derivative
{
@ -524,18 +524,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> sliceOutputGrad = MaskedGradientSlice(frameRange);
Matrix<ElemType> sliceInput1Value = Inputs(1)->MaskedValueSlice(frameRange);
ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad);
BackpropToLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad);
}
else //right derivative
{
Matrix<ElemType> sliceInput1Grad = Inputs(1)->GradientSlice(frameRange);
Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange);
ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad);
BackpropToRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad);
}
}
/*TODO: merge with call site*/void ComputeInputPartialLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
/*TODO: merge with call site*/void BackpropToLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
#if DUMPOUTPUT
gradientValues.Print("Gradient-in");
@ -554,7 +554,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
/*TODO: merge with call site*/void ComputeInputPartialRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
/*TODO: merge with call site*/void BackpropToRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
#if DUMPOUTPUT
gradientValues.Print("Gradient-in");
@ -568,7 +568,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);
@ -607,7 +607,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
InferImageDimsFromInput(1, false); //the second one is the input since it's column wize
//after multiplication the structure is lost
m_imageLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
m_sampleLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
}
};
@ -629,7 +629,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
Matrix<ElemType> sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange);
Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange);
@ -641,13 +641,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
sliceInput0Grad.AddElementProductOf(sliceOutputGrad, sliceInput1Value);
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);
//EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value);
//ForwardPropS(sliceOutputValue, sliceInput0Value, sliceInput1Value);
sliceOutputValue.AssignElementProductOf(sliceInput0Value, sliceInput1Value);
}
@ -683,24 +683,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
void ComputeInputPartialMap(const size_t inputIndex)
void BackpropToMap(const size_t inputIndex)
{
if (inputIndex > 1)
InvalidArgument("RowElementTimes operation only takes two inputs.");
if (inputIndex == 0)
{
ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues(), *m_tempMatrix);
BackpropToLeftS(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues(), *m_tempMatrix);
}
else
{
ComputeInputPartialRightS(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues(), *m_tempMatrix);
BackpropToRightS(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues(), *m_tempMatrix);
}
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
Matrix<ElemType> sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange);
Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange);
@ -708,16 +708,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (inputIndex == 0)
{
ComputeInputPartialLeftS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad, *m_tempMatrix);
BackpropToLeftS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad, *m_tempMatrix);
}
else
{
ComputeInputPartialRightS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad, *m_tempMatrix);
BackpropToRightS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad, *m_tempMatrix);
}
}
//left (input 0) is a matrix
/*TODO: merge with call site*/void ComputeInputPartialLeftS(Matrix<ElemType>& input1FunctionValues,
/*TODO: merge with call site*/void BackpropToLeftS(Matrix<ElemType>& input1FunctionValues,
Matrix<ElemType>& input0GradientValues,
const Matrix<ElemType>& gradientValues,
Matrix<ElemType>& tempMatrix)
@ -732,7 +732,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
//right (input 1) is a row vector
/*TODO: merge with call site*/void ComputeInputPartialRightS(Matrix<ElemType>& input0FunctionValues,
/*TODO: merge with call site*/void BackpropToRightS(Matrix<ElemType>& input0FunctionValues,
Matrix<ElemType>& input1GradientValues,
const Matrix<ElemType>& gradientValues,
Matrix<ElemType>& tempMatrix)
@ -744,22 +744,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
input1GradientValues.HasNan("RowElementTimes");
#endif
}
void EvaluateThisNodeMap() // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
void ForwardPropMap() // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
{
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
ForwardPropS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
//if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
//if (frameRange.IsAllFrames()) { ForwardPropMap(); return; }
Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);
EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value);
ForwardPropS(sliceOutputValue, sliceInput0Value, sliceInput1Value);
}
/*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
/*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
{
functionValues.SetValue(input0);
functionValues.RowElementMultiplyWith(input1);
@ -825,41 +825,41 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
void ComputeInputPartialMap(const size_t inputIndex)
void BackpropToMap(const size_t inputIndex)
{
if (inputIndex > 1)
InvalidArgument("ColumnElementTimes operation only takes two inputs.");
if (inputIndex == 0)
{
ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues(), *m_tempMatrix);
BackpropToLeftS(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues(), *m_tempMatrix);
}
else
{
ComputeInputPartialRightS(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues(), *m_tempMatrix);
BackpropToRightS(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues(), *m_tempMatrix);
}
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange);
if (inputIndex == 0)
{
Matrix<ElemType> sliceInput0Grad = Inputs(0)->GradientSlice(frameRange);
ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), sliceInput0Grad, sliceOutputGrad, *m_tempMatrix);
BackpropToLeftS(Inputs(1)->FunctionValues(), sliceInput0Grad, sliceOutputGrad, *m_tempMatrix);
}
else
{
Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
ComputeInputPartialRightS(sliceInput0Value, Inputs(1)->GradientValues(), sliceOutputGrad, *m_tempMatrix);
BackpropToRightS(sliceInput0Value, Inputs(1)->GradientValues(), sliceOutputGrad, *m_tempMatrix);
}
}
//left (input 0) is a matrix
/*TODO: merge with call site*/void ComputeInputPartialLeftS(Matrix<ElemType>& input1FunctionValues,
/*TODO: merge with call site*/void BackpropToLeftS(Matrix<ElemType>& input1FunctionValues,
Matrix<ElemType>& input0GradientValues,
const Matrix<ElemType>& gradientValues,
Matrix<ElemType>& tempMatrix)
@ -874,7 +874,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
//right (input 1) is a col vector
/*TODO: merge with call site*/void ComputeInputPartialRightS(Matrix<ElemType>& input0FunctionValues,
/*TODO: merge with call site*/void BackpropToRightS(Matrix<ElemType>& input0FunctionValues,
Matrix<ElemType>& input1GradientValues,
const Matrix<ElemType>& gradientValues,
Matrix<ElemType>& tempMatrix)
@ -886,21 +886,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
input1GradientValues.HasNan("ColumnElementTimes");
#endif
}
void EvaluateThisNodeMap() // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
void ForwardPropMap() // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
{
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
ForwardPropS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
//if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
//if (frameRange.IsAllFrames()) { ForwardPropMap(); return; }
Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);
EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues());
ForwardPropS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues());
}
/*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
/*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
{
functionValues.SetValue(input0);
functionValues.ColumnElementMultiplyWith(input1);
@ -974,7 +974,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (inputIndex == 0) // left derivative
{
@ -993,20 +993,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
///*TODO: merge with call site*/void ComputeInputPartialLeft(Matrix<ElemType>& temp, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
///*TODO: merge with call site*/void BackpropToLeft(Matrix<ElemType>& temp, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
//{
// temp.AssignInnerProductOf(gradientValues, inputFunctionValues, false);
// inputGradientValues += temp;
//}
//
///*TODO: merge with call site*/void ComputeInputPartialRight(Matrix<ElemType>& temp, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
///*TODO: merge with call site*/void BackpropToRight(Matrix<ElemType>& temp, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
//{
// temp.SetValue(gradientValues);
// temp.ColumnElementMultiplyWith(inputFunctionValues);
// inputGradientValues += temp;
//}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);
@ -1094,13 +1094,13 @@ private:
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
{
// BUGBUG: In the future we may want to allow this to operate on a scalar that is one step of an outer time loop.
Inputs(0)->GradientSlice(frameRange) += GradientValues(); // here the assumption is that gradientValues are 1x1 matrix
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
FunctionValues().AssignSumOfElements(Inputs(0)->MaskedValueSlice(frameRange)); // since we are reducing over frames, we must first mask gaps in input to zero
}
@ -1118,7 +1118,7 @@ private:
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
};
@ -1141,7 +1141,7 @@ private:
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
{
Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange);
Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange);
@ -1149,12 +1149,12 @@ private:
sliceInputGrad += sliceOutputGrad; // here the assumption is that gradientValues is a row vector
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
Matrix<ElemType> sliceInputValue = Inputs(0)->ValueSlice(frameRange);
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);
//EvaluateThisNodeS(sliceOutputValue, sliceInputValue);
//ForwardPropS(sliceOutputValue, sliceInputValue);
Matrix<ElemType>::VectorSum(sliceInputValue, sliceOutputValue, true);
}
@ -1171,7 +1171,7 @@ private:
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
};
@ -1194,7 +1194,7 @@ private:
Base(deviceId, name)
{ }
virtual void /*ComputationNodeNonLooping::*/ComputeInputPartialNonLooping(size_t /*inputIndex*/) override
virtual void /*ComputationNodeNonLooping::*/BackpropToNonLooping(size_t /*inputIndex*/) override
{
Matrix<ElemType>& inputGradientValues = Inputs(0)->GradientValues();
const Matrix<ElemType>& gradientValues = GradientValues();
@ -1210,7 +1210,7 @@ private:
#endif
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
#if DUMPOUTPUT
Inputs(0)->FunctionValues().Print("TransposeNode- Input0");
@ -1242,7 +1242,7 @@ private:
InferImageDimsFromInput(0, false); // the second one is the input since it's column wize
// after transposition, the structure is lost
m_imageLayout = ImageLayoutWHC(1, Inputs(0)->GetNumCols(), 1);
m_sampleLayout = ImageLayoutWHC(1, Inputs(0)->GetNumCols(), 1);
}
};
@ -1277,7 +1277,7 @@ private:
{
InferImageDimsFromInput(0, true);
m_imageLayout = ImageLayoutWHC(1, m_imageLayout.GetHeight(), 1);
m_sampleLayout = ImageLayoutWHC(1, m_sampleLayout.GetHeight(), 1);
if (m_inputImageLayout.GetWidth() * m_inputImageLayout.GetNumChannels() != 1)
fprintf(stderr, "WARNING: Diagonal operation cannot inherit image size information from its child. Image size info is lost.\n");
@ -1329,7 +1329,7 @@ private:
InferImageDimsFromInputs();
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
Inputs(0)->FunctionValues().AssignDiagonalValuesTo(FunctionValues());
#if NANCHECK
@ -1337,7 +1337,7 @@ private:
#endif
}
virtual void /*ComputationNodeNonLooping::*/ComputeInputPartialNonLooping(size_t /*inputIndex*/) override
virtual void /*ComputationNodeNonLooping::*/BackpropToNonLooping(size_t /*inputIndex*/) override
{
Matrix<ElemType>& inputGradientValues = Inputs(0)->GradientValues();
const Matrix<ElemType>& gradientValues = GradientValues();
@ -1374,7 +1374,7 @@ private:
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
// functionValues, invNorm0, invNorm1 - output from the EvaluateNode() method
// temp, rightTerm, leftTerm - temporary matrices
@ -1397,7 +1397,7 @@ private:
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
@ -1433,7 +1433,7 @@ private:
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -1477,7 +1477,7 @@ private:
ReleaseMatrixToPool(m_temp, matrixPool);
}
private:
// invNorm nodes tranfer data between EvaluateThisNode and ComputeInputPartial
// invNorm nodes tranfer data between ForwardProp and BackpropTo
shared_ptr<Matrix<ElemType>> m_invNorm0;
shared_ptr<Matrix<ElemType>> m_invNorm1;
// the rest are temporaries, values don't need to be maintained
@ -1504,7 +1504,7 @@ private:
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange);
@ -1524,7 +1524,7 @@ private:
}
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
ValueSlice(frameRange).AssignKhatriRaoProductOf(Inputs(0)->ValueSlice(frameRange), Inputs(1)->ValueSlice(frameRange));
}
@ -1553,12 +1553,12 @@ private:
virtual void InferImageDimsFromInputs()
{
//since it's symmetrical any one of the input may be the true input.
//since we dont' use the input image size info in the operation, the input part doesn't matter.
// since it's symmetrical any one of the input may be the true input.
// since we dont' use the input image size info in the operation, the input part doesn't matter.
InferImageDimsFromInput(1, false);
//after KhatriRaoProduct the structure is lost
m_imageLayout = ImageLayoutWHC(1, m_functionValues->GetNumRows(), 1);
// after KhatriRaoProduct the structure is lost
m_sampleLayout = ImageLayoutWHC(1, m_functionValues->GetNumRows(), 1);
}
};
@ -1580,31 +1580,31 @@ private:
Base(deviceId, name)
{ }
void ComputeInputPartialMap(const size_t inputIndex)
void BackpropToMap(const size_t inputIndex)
{
if (inputIndex > 1)
InvalidArgument("CosDistanceWithNegativeSamples operation only takes grdients on the first two inputs.");
ComputeInputPartialS(inputIndex, *m_invNorm0, *m_invNorm1, FunctionValues(), *m_temp, *m_rightTerm, *m_leftTerm, *m_invNormSquare, Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), Inputs(inputIndex)->GradientValues(), GradientValues());
BackpropToS(inputIndex, *m_invNorm0, *m_invNorm1, FunctionValues(), *m_temp, *m_rightTerm, *m_leftTerm, *m_invNormSquare, Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), Inputs(inputIndex)->GradientValues(), GradientValues());
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);
Matrix<ElemType> sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange);
Matrix<ElemType> sliceThisGrad = GradientSlice(frameRange);
ComputeInputPartialS(inputIndex, *m_invNorm0, *m_invNorm1, sliceOutputValue, *m_temp, *m_rightTerm, *m_leftTerm, *m_invNormSquare, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), sliceInputGrad, sliceThisGrad);
BackpropToS(inputIndex, *m_invNorm0, *m_invNorm1, sliceOutputValue, *m_temp, *m_rightTerm, *m_leftTerm, *m_invNormSquare, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), sliceInputGrad, sliceThisGrad);
}
// functionValues, invNorm0, invNorm1 - output from the EvaluateNode() method
// temp, rightTerm, leftTerm - temporary matrices
// in0, in1, in2, in3 - input functionValues from other nodes
// inputGradientValues(x) - gradients to update, where x matches inputIndex
/*TODO: merge with call site*/void ComputeInputPartialS(const size_t inputIndex, const Matrix<ElemType>& invNorm0, const Matrix<ElemType>& invNorm1, const Matrix<ElemType>& functionValues,
/*TODO: merge with call site*/void BackpropToS(const size_t inputIndex, const Matrix<ElemType>& invNorm0, const Matrix<ElemType>& invNorm1, const Matrix<ElemType>& functionValues,
Matrix<ElemType>& temp, Matrix<ElemType>& rightTerm, Matrix<ElemType>& leftTerm, Matrix<ElemType>& invNormSquare, // the temporary variables
const Matrix<ElemType>& in0, const Matrix<ElemType>& in1, const Matrix<ElemType>& in2, const Matrix<ElemType>& in3,
Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& thisGradientValues)
@ -1701,22 +1701,22 @@ private:
}
}
void EvaluateThisNodeMap() // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
void ForwardPropMap() // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
{
EvaluateThisNodeS(*m_invNorm0, *m_invNorm1, FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), *m_leftTerm, *m_rightTerm);
ForwardPropS(*m_invNorm0, *m_invNorm1, FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), *m_leftTerm, *m_rightTerm);
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
//if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
//if (frameRange.IsAllFrames()) { ForwardPropMap(); return; }
Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);
EvaluateThisNodeS(*m_invNorm0, *m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), *m_leftTerm, *m_rightTerm);
ForwardPropS(*m_invNorm0, *m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), *m_leftTerm, *m_rightTerm);
}
/*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& invNorm0, Matrix<ElemType>& invNorm1, Matrix<ElemType>& functionValues, Matrix<ElemType>& in0, Matrix<ElemType>& in1, Matrix<ElemType>& in2, Matrix<ElemType>& in3, Matrix<ElemType>& leftTermTemp, Matrix<ElemType>& rightTermTemp)
/*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& invNorm0, Matrix<ElemType>& invNorm1, Matrix<ElemType>& functionValues, Matrix<ElemType>& in0, Matrix<ElemType>& in1, Matrix<ElemType>& in2, Matrix<ElemType>& in3, Matrix<ElemType>& leftTermTemp, Matrix<ElemType>& rightTermTemp)
{
invNorm0.AssignVectorNorm2Of(in0, true); // seems to modify input (in0)
invNorm0.AssignElementInverseOf(invNorm0);
@ -1779,7 +1779,7 @@ private:
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -1826,7 +1826,7 @@ private:
ReleaseMatrixToPool(m_temp, matrixPool);
}
private:
// invNorm nodes tranfer data between EvaluateThisNode and ComputeInputPartial
// invNorm nodes tranfer data between ForwardProp and BackpropTo
shared_ptr<Matrix<ElemType>> m_invNorm0;
shared_ptr<Matrix<ElemType>> m_invNorm1;
shared_ptr<Matrix<ElemType>> m_leftTerm;

Просмотреть файл

@ -43,24 +43,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ }
// TODO: with FrameRange, this code has now been reduced so much that there is no need to have these overrides here; they can just be implemented in the derived classes directly.
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
assert(inputIndex == 0); inputIndex;
auto gradient = Inputs(0)->GradientSlice(frameRange);
ComputeInputPartialV(*m_gradient, Inputs(0)->ValueSlice(frameRange), gradient, GradientSlice(frameRange));
BackpropToV(*m_gradient, Inputs(0)->ValueSlice(frameRange), gradient, GradientSlice(frameRange));
}
// derived class implement the actual non-linear operation
virtual void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) = 0;
virtual void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) = 0;
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
auto values = ValueSlice(frameRange);
EvaluateThisNodeV(values, Inputs(0)->ValueSlice(frameRange));
ForwardPropV(values, Inputs(0)->ValueSlice(frameRange));
}
// derived class implement the actual non-linear operation
virtual void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) = 0;
virtual void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) = 0;
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
{
@ -111,7 +111,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
NonlinearityNodeBase<ElemType>(deviceId, name)
{ }
void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) override
void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) override
{
gradient.AssignLinearRectifierDerivativeOf(inputFunctionValues);
#if DUMPOUTPUT
@ -123,7 +123,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) override
void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) override
{
functionValues.AssignTruncateBottomOf(inputFunctionValues, 0);
#if NANCHECK
@ -154,15 +154,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ }
// we should get rid of this code dup, need to unify the -V functions
void ComputeInputPartialMap(const size_t inputIndex)
void BackpropToMap(const size_t inputIndex)
{
assert(inputIndex == 0); inputIndex;
ComputeInputPartialS(*m_gradient, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
BackpropToS(*m_gradient, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
assert(inputIndex == 0); inputIndex;
Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -170,19 +170,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialS(*m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
BackpropToS(*m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
}
// should be:
/*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues; inputGradientValues; gradientValues; LogicError("wrong signature :( need to unify code more"); }
/*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues; inputGradientValues; gradientValues; LogicError("wrong signature :( need to unify code more"); }
// but is:
/*virtual*/ void ComputeInputPartialS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
/*virtual*/ void BackpropToS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
{
gradient.AssignSigmoidDerivativeOf(functionValues);
inputGradientValues.AddElementProductOf(gradientValues, gradient);
}
/*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
/*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
{
functionValues.AssignSigmoidOf(inputFunctionValues);
#if NANCHECK
@ -210,15 +210,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ }
// TODO: unify signature & get rid of code dup
void ComputeInputPartialMap(const size_t inputIndex)
void BackpropToMap(const size_t inputIndex)
{
assert(inputIndex == 0); inputIndex;
ComputeInputPartialS(*m_gradient, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
BackpropToS(*m_gradient, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
assert(inputIndex == 0); inputIndex;
Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -226,13 +226,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialS(*m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
BackpropToS(*m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
}
// should be:
/*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues; inputGradientValues; gradientValues; LogicError("wrong signature :( need to unify code more"); }
/*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues; inputGradientValues; gradientValues; LogicError("wrong signature :( need to unify code more"); }
// but is:
/*virtual*/ void ComputeInputPartialS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
/*virtual*/ void BackpropToS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
{
gradient.AssignElementProductOf(functionValues, functionValues); // v .* v
gradient.AssignDifferenceOf(1, gradient); // 1-v^2
@ -240,7 +240,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
inputGradientValues.AddElementProductOf(gradientValues, gradient); // += d .* ((1-v) .* v))
}
/*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
/*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
{
functionValues.AssignTanhOf(inputFunctionValues);
#if NANCHECK
@ -268,15 +268,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ }
// TODO: get rid of code dup
void ComputeInputPartialMap(const size_t inputIndex)
void BackpropToMap(const size_t inputIndex)
{
assert(inputIndex == 0); inputIndex;
ComputeInputPartialS(*m_gradient, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), GradientValues());
BackpropToS(*m_gradient, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), GradientValues());
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
assert(inputIndex == 0); inputIndex;
Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -284,20 +284,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialS(*m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad);
BackpropToS(*m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad);
}
// should be:
/*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues; inputGradientValues; gradientValues; LogicError("wrong signature :( need to unify code more"); }
/*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues; inputGradientValues; gradientValues; LogicError("wrong signature :( need to unify code more"); }
// but is:
/*virtual*/ void ComputeInputPartialS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& gradientValues)
/*virtual*/ void BackpropToS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& gradientValues)
{
gradient.AssignElementInverseOf(inputFunctionValues); // 1/x (x is input to log(x))
inputGradientValues.AddElementProductOf(gradientValues, gradient);
}
/*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
/*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
{
functionValues.AssignLogOf(inputFunctionValues);
#if NANCHECK
@ -324,7 +324,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
NonlinearityNodeBase<ElemType>(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
assert(inputIndex == 0); inputIndex;
@ -335,9 +335,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_gradient->AssignExpOf(sliceInputValue); // Exp(x) is its own partial
sliceInputGrad.AddElementProductOf(sliceOutputGrad, *m_gradient);
}
virtual void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { NOT_IMPLEMENTED; } // not needed
virtual void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { NOT_IMPLEMENTED; } // not needed
void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) override
void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) override
{
functionValues.AssignExpOf(inputFunctionValues);
#if NANCHECK
@ -365,15 +365,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ }
// TODO: code dup
void ComputeInputPartialMap(const size_t inputIndex)
void BackpropToMap(const size_t inputIndex)
{
assert(inputIndex == 0); inputIndex;
ComputeInputPartialS(*m_gradient, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), GradientValues());
BackpropToS(*m_gradient, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), GradientValues());
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
assert(inputIndex == 0); inputIndex;
Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -381,19 +381,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialS(*m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad);
BackpropToS(*m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad);
}
// should be:
/*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues; inputGradientValues; gradientValues; LogicError("wrong signature :( need to unify code more"); }
/*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues; inputGradientValues; gradientValues; LogicError("wrong signature :( need to unify code more"); }
// but is:
/*virtual*/ void ComputeInputPartialS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& gradientValues)
/*virtual*/ void BackpropToS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& gradientValues)
{
gradient.AssignNegativeSineOf(inputFunctionValues); // -sin(x) (x is input to Cosine(x))
inputGradientValues.AddElementProductOf(gradientValues, gradient);
}
/*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
/*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
{
functionValues.AssignCosineOf(inputFunctionValues);
#if NANCHECK
@ -423,15 +423,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ }
// TODO: code dup
void ComputeInputPartialMap(const size_t inputIndex)
void BackpropToMap(const size_t inputIndex)
{
assert(inputIndex == 0); inputIndex;
ComputeInputPartialS(*m_gradient, *m_diff, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
BackpropToS(*m_gradient, *m_diff, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
assert(inputIndex == 0); inputIndex;
Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -439,13 +439,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialS(*m_gradient, *m_diff, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
BackpropToS(*m_gradient, *m_diff, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
}
// should be:
/*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues; inputGradientValues; gradientValues; LogicError("wrong signature :( need to unify code more"); }
/*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues; inputGradientValues; gradientValues; LogicError("wrong signature :( need to unify code more"); }
// but is:
/*virtual*/ void ComputeInputPartialS(Matrix<ElemType>& gradient, Matrix<ElemType>& diff, Matrix<ElemType>& inputGradientValues,
/*virtual*/ void BackpropToS(Matrix<ElemType>& gradient, Matrix<ElemType>& diff, Matrix<ElemType>& inputGradientValues,
const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
{
gradient.AssignInnerProductOf(gradientValues, functionValues, true);
@ -454,7 +454,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
inputGradientValues.AddElementProductOf(diff, functionValues);
}
/*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
/*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
{
functionValues.AssignLogSoftmaxOf(inputFunctionValues, true);
functionValues.InplaceExp();
@ -513,15 +513,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ }
// TODO: code dup
void ComputeInputPartialMap(const size_t inputIndex)
void BackpropToMap(const size_t inputIndex)
{
assert(inputIndex == 0); inputIndex;
ComputeInputPartialS(*m_gradient, *m_softmax, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
BackpropToS(*m_gradient, *m_softmax, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
assert(inputIndex == 0); inputIndex;
Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -529,13 +529,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialS(*m_gradient, *m_softmax, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
BackpropToS(*m_gradient, *m_softmax, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
}
// should be:
/*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues; inputGradientValues; gradientValues; LogicError("wrong signature :( need to unify code more"); }
/*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues; inputGradientValues; gradientValues; LogicError("wrong signature :( need to unify code more"); }
// but is:
/*virtual*/ void ComputeInputPartialS(Matrix<ElemType>& gradient, Matrix<ElemType>& softmax, Matrix<ElemType>& inputGradientValues,
/*virtual*/ void BackpropToS(Matrix<ElemType>& gradient, Matrix<ElemType>& softmax, Matrix<ElemType>& inputGradientValues,
const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
{
softmax.AssignExpOf(functionValues);
@ -544,7 +544,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>::AddScaledDifference(1.0, gradientValues, softmax, inputGradientValues);
}
/*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
/*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
{
functionValues.AssignLogSoftmaxOf(inputFunctionValues, true);
#if NANCHECK
@ -602,30 +602,30 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ComputationNode<ElemType>(deviceId, name)
{ }
void ComputeInputPartialMap(const size_t inputIndex)
void BackpropToMap(const size_t inputIndex)
{
switch (inputIndex)
{
case 0:
ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), GradientValues(), *m_prior, *m_posterior, *m_temp);
BackpropToUnnormedPrior(Inputs(0)->GradientValues(), GradientValues(), *m_prior, *m_posterior, *m_temp);
break;
case 1:
ComputeInputPartialMean(Inputs(1)->GradientValues(), GradientValues(), *m_normedDeviationVectors, *m_posterior, *m_temp);
BackpropToMean(Inputs(1)->GradientValues(), GradientValues(), *m_normedDeviationVectors, *m_posterior, *m_temp);
break;
case 2:
ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), GradientValues(), *m_normedDeviation, *m_posterior, *m_temp);
BackpropToLogStddev(Inputs(2)->GradientValues(), GradientValues(), *m_normedDeviation, *m_posterior, *m_temp);
break;
case 3:
ComputeInputPartialFeature(Inputs(3)->GradientValues(), GradientValues(), *m_normedDeviationVectors, *m_posterior, *m_temp);
BackpropToFeature(Inputs(3)->GradientValues(), GradientValues(), *m_normedDeviationVectors, *m_posterior, *m_temp);
break;
default:
InvalidArgument("GMMLogLikelihoodNode only takes four inputs.");
}
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
//get the right slice
const size_t colsPrior = Inputs(0)->GetNumCols();
@ -637,12 +637,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
case 0:
{
if (colsPrior == 1)
ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), sliceGradientValue, *m_prior, slicePosterior, *m_temp);
BackpropToUnnormedPrior(Inputs(0)->GradientValues(), sliceGradientValue, *m_prior, slicePosterior, *m_temp);
else
{
Matrix<ElemType> sliceUnnormedPriorGradient = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> slicePrior = DataSlice(*m_prior, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialUnnormedPrior(sliceUnnormedPriorGradient, sliceGradientValue, slicePrior, slicePosterior, *m_temp);
BackpropToUnnormedPrior(sliceUnnormedPriorGradient, sliceGradientValue, slicePrior, slicePosterior, *m_temp);
}
}
break;
@ -650,11 +650,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
Matrix<ElemType> sliceNormedDeviationVectors = DataSlice(*m_normedDeviationVectors, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
if (colsPrior == 1)
ComputeInputPartialMean(Inputs(1)->GradientValues(), sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, *m_temp);
BackpropToMean(Inputs(1)->GradientValues(), sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, *m_temp);
else
{
Matrix<ElemType> sliceMeanGradient = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialMean(sliceMeanGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, *m_temp);
BackpropToMean(sliceMeanGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, *m_temp);
}
}
break;
@ -662,11 +662,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
Matrix<ElemType> sliceNormedDeviation = DataSlice(*m_normedDeviation, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
if (colsPrior == 1)
ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), sliceGradientValue, sliceNormedDeviation, slicePosterior, *m_temp);
BackpropToLogStddev(Inputs(2)->GradientValues(), sliceGradientValue, sliceNormedDeviation, slicePosterior, *m_temp);
else
{
Matrix<ElemType> sliceLotStddevGradient = Inputs(2)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialLogStddev(sliceLotStddevGradient, sliceGradientValue, sliceNormedDeviation, slicePosterior, *m_temp);
BackpropToLogStddev(sliceLotStddevGradient, sliceGradientValue, sliceNormedDeviation, slicePosterior, *m_temp);
}
}
break;
@ -674,7 +674,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
Matrix<ElemType> sliceNormedDeviationVectors = DataSlice(*m_normedDeviationVectors, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceFeatureGradient = Inputs(3)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialFeature(sliceFeatureGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, *m_temp);
BackpropToFeature(sliceFeatureGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, *m_temp);
}
break;
default:
@ -682,7 +682,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
/*TODO: merge with call site*/void ComputeInputPartialUnnormedPrior(Matrix<ElemType>& unnormedPriorGradientValues, const Matrix<ElemType>& gradientValues,
/*TODO: merge with call site*/void BackpropToUnnormedPrior(Matrix<ElemType>& unnormedPriorGradientValues, const Matrix<ElemType>& gradientValues,
const Matrix<ElemType>& prior, const Matrix<ElemType>& posterior, Matrix<ElemType>& temp)
{
temp.AssignDifferenceOf(posterior, prior);
@ -695,7 +695,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
RuntimeError("GMMLogLikelihoodNode: UnnormedPrior should either have same number of columns as the features or have only one column.");
}
/*TODO: merge with call site*/void ComputeInputPartialMean(Matrix<ElemType>& meanGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& normedDeviationVectors,
/*TODO: merge with call site*/void BackpropToMean(Matrix<ElemType>& meanGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& normedDeviationVectors,
Matrix<ElemType>& posterior, Matrix<ElemType>& temp)
{
size_t numComponent = posterior.GetNumRows();
@ -721,7 +721,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
RuntimeError("GMMLogLikelihoodNode: stddev should either have same number of columns as the features or have only one column.");
}
/*TODO: merge with call site*/void ComputeInputPartialLogStddev(Matrix<ElemType>& logStddevGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& normedDeviation,
/*TODO: merge with call site*/void BackpropToLogStddev(Matrix<ElemType>& logStddevGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& normedDeviation,
const Matrix<ElemType>& posterior, Matrix<ElemType>& temp)
{
size_t numComponent = posterior.GetNumRows();
@ -738,7 +738,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
RuntimeError("GMMLogLikelihoodNode: stddev should either have same number of columns as the features or have only one column.");
}
/*TODO: merge with call site*/void ComputeInputPartialFeature(Matrix<ElemType>& featureGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& normedDeviationVectors,
/*TODO: merge with call site*/void BackpropToFeature(Matrix<ElemType>& featureGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& normedDeviationVectors,
Matrix<ElemType>& posterior, Matrix<ElemType>& temp)
{
size_t numComponent = posterior.GetNumRows();
@ -776,17 +776,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
//input0=unnormedPrior, input1=mean, input2=logstddev, input3=feature
void EvaluateThisNodeMap() // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
void ForwardPropMap() // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
{
// all internal matrices will be automatically resized since all of them are assigned to a value so no resize is needed here.
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(),
ForwardPropS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(),
*m_prior, *m_stddev, *m_normedDeviationVectors, *m_normedDeviation, *m_posterior, *m_temp);
}
//input0=unnormedPrior, input1=mean, input2=logstddev, input3=feature
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
//if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
//if (frameRange.IsAllFrames()) { ForwardPropMap(); return; }
size_t colsPrior = Inputs(0)->GetNumCols();
size_t numSamples = Inputs(3)->GetNumCols();
@ -799,7 +799,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (colsPrior == 1)
{
EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), sliceFeature,
ForwardPropS(sliceOutputValue, Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), sliceFeature,
*m_prior, *m_stddev, sliceNormedDeviationVectors, sliceNormedDeviation, slicePosterior, *m_temp);
}
else if (colsPrior == numSamples)
@ -811,7 +811,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> slicePrior = DataSlice(*m_prior, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceStddev = DataSlice(*m_stddev, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
EvaluateThisNodeS(sliceOutputValue, sliceUnnormedPrior, sliceMean, sliceLogstddev, sliceFeature,
ForwardPropS(sliceOutputValue, sliceUnnormedPrior, sliceMean, sliceLogstddev, sliceFeature,
slicePrior, sliceStddev, sliceNormedDeviationVectors, sliceNormedDeviation, slicePosterior, *m_temp);
}
else //should not reach the code since validation should fail already
@ -820,7 +820,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//input0=unnormedPrior, input1=mean, input2=logstddev, input3=feature
//If we want to speed up we need to replace following code with a several specialized GPU functions
/*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& unnormedPrior, const Matrix<ElemType>& mean, Matrix<ElemType>& logstddev,
/*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& unnormedPrior, const Matrix<ElemType>& mean, Matrix<ElemType>& logstddev,
const Matrix<ElemType>& feature, Matrix<ElemType>& prior, Matrix<ElemType>& stddev, Matrix<ElemType>& normedDeviationVectors,
Matrix<ElemType>& normedDeviation, Matrix<ElemType>& posterior, Matrix<ElemType>& temp)
{
@ -933,7 +933,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
InferImageDimsFromInput(3, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -1004,16 +1004,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_randomSeed = (unsigned long)CreateUniqId();
}
void ComputeInputPartialMap(const size_t inputIndex)
void BackpropToMap(const size_t inputIndex)
{
if (inputIndex > 0)
InvalidArgument("Dropout operation only takes one input.");
ComputeInputPartialS(m_dropoutRate, Inputs(0)->GradientValues(), *m_maskOfDropout, GradientValues());
BackpropToS(m_dropoutRate, Inputs(0)->GradientValues(), *m_maskOfDropout, GradientValues());
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
Matrix<ElemType> sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -1023,10 +1023,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
sliceMask = DataSlice(*m_maskOfDropout, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
}
ComputeInputPartialS(m_dropoutRate, sliceInput0Grad, sliceMask, sliceOutputGrad);
BackpropToS(m_dropoutRate, sliceInput0Grad, sliceMask, sliceOutputGrad);
}
/*TODO: merge with call site*/void ComputeInputPartialS(const double dropoutRate, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& maskOfDropout, const Matrix<ElemType>& gradientValues)
/*TODO: merge with call site*/void BackpropToS(const double dropoutRate, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& maskOfDropout, const Matrix<ElemType>& gradientValues)
{
if (dropoutRate > 0)
{
@ -1038,13 +1038,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
void EvaluateThisNodeMap() // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
void ForwardPropMap() // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
{
EvaluateThisNodeS(m_dropoutRate, m_randomSeed, FunctionValues(), *m_maskOfDropout, Inputs(0)->FunctionValues());
ForwardPropS(m_dropoutRate, m_randomSeed, FunctionValues(), *m_maskOfDropout, Inputs(0)->FunctionValues());
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
//if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
//if (frameRange.IsAllFrames()) { ForwardPropMap(); return; }
Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceOutputValue = Matrix <ElemType>();
@ -1058,10 +1058,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
EvaluateThisNodeS(m_dropoutRate, m_randomSeed, sliceOutputValue, sliceMask, sliceInput0Value);
ForwardPropS(m_dropoutRate, m_randomSeed, sliceOutputValue, sliceMask, sliceInput0Value);
}
/*TODO: merge with call site*/void EvaluateThisNodeS(const double dropoutRate, unsigned long& randomSeed, Matrix<ElemType>& functionValues, Matrix<ElemType>& maskOfDropout, const Matrix<ElemType>& inputFunctionValues)
/*TODO: merge with call site*/void ForwardPropS(const double dropoutRate, unsigned long& randomSeed, Matrix<ElemType>& functionValues, Matrix<ElemType>& maskOfDropout, const Matrix<ElemType>& inputFunctionValues)
{
if (dropoutRate > 0)
{
@ -1168,23 +1168,23 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void ComputeInputPartial(const size_t /*inputIndex*/) //TODO: this is still needed?
virtual void BackpropTo(const size_t /*inputIndex*/) //TODO: this is still needed?
{
LogicError("Hardmax is not differentiable and is used for evaluation only.");
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & /*frameRange*/) override
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & /*frameRange*/) override
{
LogicError("Hardmax is not differentiable and is used for evaluation only.");
}
/*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
/*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
gradient; inputFunctionValues; inputGradientValues; gradientValues;
LogicError("wrong signature :( need to unify code more");
}
/*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
/*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
{
//TODO: temp solution, we need to write a math function specifically for this
functionValues.AssignHardmaxOf(inputFunctionValues, true);

Просмотреть файл

@ -129,9 +129,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_attachInputsFn = [](){ LogicError("LateAttachingNode::AttachInputs: must only be called once"); };
}
public:
void SaveToFile(File& fstream) const
void Save(File& fstream) const
{
Base::SaveToFile(fstream);
Base::Save(fstream);
fstream << m_timeStep;
fstream << GetNumRows() << GetNumCols();
@ -139,10 +139,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fstream << m_initialActivationValue;
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
// the node has already been initialized e.g. w.r.t. direction and sequence flags
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
fstream >> m_timeStep;
@ -208,7 +208,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
public:
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
assert(inputIndex == 0); inputIndex;
@ -220,7 +220,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// recursive call to ourselves
FrameRangeIteration range(m_pMBLayout, -dir);
for (auto t = range.rbegin(); t != range.rend(); t++) // note: reverse iterator
ComputeInputPartial(inputIndex, t);
BackpropTo(inputIndex, t);
return;
}
@ -253,13 +253,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void OnEvaluateBeginIteration() override // called before first iteration step of EvaluateThisNode()
virtual void OnEvaluateBeginIteration() override // called before first iteration step of ForwardProp()
{
Base::OnEvaluateBeginIteration();
CacheMBLayout();
}
virtual void OnEvaluateEndIteration() override // called after last iteration step of EvaluateThisNode()
virtual void OnEvaluateEndIteration() override // called after last iteration step of ForwardProp()
{
// In BPTT, we carry over left-to-right state across minibatches.
// It is kept in m_delayedActivation, m_delayedActivationMBLayout.
@ -282,7 +282,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// This function assumes OnEvaluateBegin/EndIteration() to be called before/after the iteration loop.
// TODO: In the future, there may be value for one more way of handling the boundary condition: Fill as 'NoInput'. Then we can use this to implement rolling windows (albeit inefficiently). Would require to unshare the layout.
virtual void EvaluateThisNode(const FrameRange & frameRange) override
virtual void ForwardProp(const FrameRange & frameRange) override
{
assert(m_pMBLayout);
@ -294,7 +294,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// recursive call to ourselves
FrameRangeIteration range(m_pMBLayout, -dir);
for (auto t = range.begin(); t != range.end(); t++)
EvaluateThisNode(t);
ForwardProp(t);
return;
}
@ -381,7 +381,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
hist.TransferFromDeviceToDevice(m_deviceId, device, true);
// need a layout as well
// EvaluateThisNode() expects it to have the same number of parallel sequences.
// ForwardProp() expects it to have the same number of parallel sequences.
if (!m_delayedActivationMBLayout) m_delayedActivationMBLayout = make_shared<MBLayout>();
m_delayedActivationMBLayout->Init(GetNumParallelSequences(), hist.GetNumCols() / GetNumParallelSequences());
}
@ -631,16 +631,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
}
virtual void SaveToFile(File& fstream) const override
virtual void Save(File& fstream) const override
{
Base::SaveToFile(fstream);
Base::Save(fstream);
fstream << m_inputDim << m_outputDim;
fstream << m_DefaultState;
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
if (modelVersion == 2)
fstream >> m_inputDim >> m_outputDim;
fstream >> m_DefaultState;
@ -672,7 +672,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
virtual void BackpropToNonLooping(size_t inputIndex) override
{
if (inputIndex > 4)
InvalidArgument("LSTM operation only takes five inputs.");
@ -1063,7 +1063,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
size_t nT = Inputs(0)->GetNumCols();
size_t outputDim = Inputs(1)->GetNumRows();
@ -1117,7 +1117,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
PrepareHistory(timeIdxInSeq, mSlicePrevOutput, mSlicePrevState, FunctionValues(), m_State, m_PastOutput, m_PastState, GetNumParallelSequences(), m_DefaultState, &m_pMBLayout->GetM());
EvaluateThisNodeS(Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), Inputs(4)->FunctionValues(),
ForwardPropS(Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), Inputs(4)->FunctionValues(),
sliceObs, mSlicePrevOutput, mSlicePrevState, sliceOutput, sliceState, sliceGi, sliceGf, sliceGo, sliceTanhState, sliceTanhInput, m_tempMatrix);
}
@ -1313,7 +1313,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
/*TODO: merge with call site*/void EvaluateThisNodeS(
/*TODO: merge with call site*/void ForwardPropS(
const Matrix<ElemType>& mInputGate,
const Matrix<ElemType> &mForgetGate, const Matrix<ElemType> &mOutputGate,
const Matrix<ElemType> &mCellWgt,
@ -1490,7 +1490,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
SetDims(nOutput, nT);
m_DefaultState = 0.0;
EvaluateThisNode(FrameRange(m_pMBLayout));
ForwardProp(FrameRange(m_pMBLayout));
// check with expected values
if (!ISCLOSE(FunctionValues()(0, 0), 0.0335975, EPSILON) ||
@ -1510,7 +1510,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Inputs(i)->GradientValues().SetValue(0);
}
for (size_t i = 0; i < 5; i++)
ComputeInputPartial(i, FrameRange(m_pMBLayout));
BackpropTo(i, FrameRange(m_pMBLayout));
// check with expected values
if (!ISCLOSE(Inputs(1)->GradientValues()(0, 0), 0.07843818, EPSILON) // bi

Просмотреть файл

@ -129,7 +129,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
// ReshapeNode (input) -- reinterpret input matrix as having different dimensions
// where the new row dimension is given, and the column dimension is inferred.
// Also optionally associate a different ImageLayout with the data.
// Also optionally associate a different TensorShape with the data.
//
// If input has no layout, then this reshapes the input matrix
// from (rows x cols) to (newRows x (cols / newRows * rows)).
@ -149,13 +149,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// E.g. ReinterpretRowStackAsSequence and ReinterpretSequenceAsRowStack.
// BUGBUG: This is not actually implemented yet. Instead, it goes from 1 to K steps or from K to 1 step. This is temporary/experimental, until the plumbing for nesting is there.
//
// Thirdly, ReshapeNode can also be used to update only the ImageLayout. In that case, the MBLayout is kept as is.
// Thirdly, ReshapeNode can also be used to update only the TensorShape. In that case, the MBLayout is kept as is.
//
// Note: The new row dimension must be a straight multiple or divisor of the current row dimension.
// To reshape to a non-multiple go to row dim 1 first.
//
// Unlike most other nodes, this node has intimate inside knowlegde of MBLayouts and frameRanges.
// TODO: Changing the ImageLayout does not seem to belong here.
// TODO: Changing the TensorShape does not seem to belong here.
// -----------------------------------------------------------------------
template<class ElemType>
@ -164,7 +164,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
typedef ReinterpretNodeBase<ElemType> Base; UsingReinterpretNodeBaseMembers;
static const std::wstring TypeName() { return L"Reshape"; }
public:
ReshapeNode(DEVICEID_TYPE deviceId, const wstring & name, size_t numRows = 0, const ImageLayout & imageLayout = ImageLayoutWHC(0,0,0)) :
ReshapeNode(DEVICEID_TYPE deviceId, const wstring & name, size_t numRows = 0, const TensorShape & imageLayout = ImageLayoutWHC(0,0,0)) :
Base(deviceId, name),
m_numTargetRows(numRows),
m_targetImageLayout(imageLayout)
@ -186,18 +186,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void SaveToFile(File& fstream) const override
virtual void Save(File& fstream) const override
{
Base::SaveToFile(fstream);
Base::Save(fstream);
fstream << m_numTargetRows;
m_targetImageLayout.SaveToFile(fstream);
m_targetImageLayout.Save(fstream);
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
fstream >> m_numTargetRows;
m_targetImageLayout.LoadFromFile(fstream);
m_targetImageLayout.Load(fstream);
}
virtual void InferImageDimsFromInputs()
@ -207,13 +207,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (m_targetImageLayout.GetWidth() == 0 || m_targetImageLayout.GetHeight() == 0 || m_targetImageLayout.GetNumChannels() == 0)
{
m_imageLayout = ImageLayoutWHC(1, 1, m_numTargetRows);
m_sampleLayout = ImageLayoutWHC(1, 1, m_numTargetRows);
if (m_inputImageLayout.GetWidth() * m_inputImageLayout.GetNumChannels() != 1)
fprintf(stderr, "WARNING: Reshape operation cannot inherit image size information from its child. Image size info is lost.\n");
}
else
{
m_imageLayout = m_targetImageLayout;
m_sampleLayout = m_targetImageLayout;
}
}
@ -251,7 +251,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
SetDims(m_numTargetRows, newCols);
if (factor() == 1) // canonical case: no reshaping actually (e.g. only changing the ImageLayout)
if (factor() == 1) // canonical case: no reshaping actually (e.g. only changing the TensorShape)
m_pMBLayout = Inputs(0)->GetMBLayout();
else if (Inputs(0)->HasMBLayout())
{
@ -308,7 +308,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// notes:
// - input and output have different time base and different layouts (unless the canonical case of factor() == 1)
// - frameRange refers to *functionValues*, not the inputs
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
size_t rows = Inputs(0)->GetNumRows(), cols = Inputs(0)->GetNumCols();
size_t newCols = cols * rows / m_numTargetRows;
@ -335,7 +335,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
{
size_t rows = Inputs(0)->GetNumRows(), cols = Inputs(0)->GetNumCols();
size_t newCols = cols * rows / m_numTargetRows;
@ -359,7 +359,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t m_numTargetRows;
bool weStack() const { return m_numTargetRows > Inputs(0)->GetNumRows(); } // do we stack (multiple frames into one)
size_t factor() const { return m_numTargetRows > Inputs(0)->GetNumRows() ? m_numTargetRows / Inputs(0)->GetNumRows() : Inputs(0)->GetNumRows() / m_numTargetRows; } // factor by which we stack or unstack
ImageLayout m_targetImageLayout;
TensorShape m_targetImageLayout;
void InferImageDimensions()
{
@ -437,13 +437,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
{
Inputs(0)->GradientSlice(frameRange.WithLayout(Inputs(0)->GetMBLayout())) += GradientSlice(frameRange);
// TODO: Once we do in-place, the above must include a copy-to-self check (pay special attention to adding vs. copying).
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
// enforce compatibility of 'dataInput' with 'layoutInput'
// TODO: how to deal with boundary flags?
@ -507,24 +507,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
node->m_sliceHeight = m_sliceHeight;
}
virtual void SaveToFile(File& fstream) const override
virtual void Save(File& fstream) const override
{
Base::SaveToFile(fstream);
Base::Save(fstream);
fstream << m_startIndex << m_sliceHeight;
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
fstream >> m_startIndex >> m_sliceHeight;
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
{
Inputs(0)->GradientSlice(frameRange).AddToRowSliceValuesOf(GradientSlice(frameRange), m_startIndex, m_sliceHeight);
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
ValueSlice(frameRange).AssignRowSliceValuesOf(Inputs(0)->ValueSlice(frameRange), m_startIndex, m_sliceHeight);
}
@ -544,7 +544,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void InferImageDimsFromInputs()
{
InferImageDimsFromInput(0, true);
m_imageLayout = ImageLayoutWHC(m_imageLayout.GetWidth(), m_sliceHeight, m_imageLayout.GetNumChannels());
m_sampleLayout = ImageLayoutWHC(m_sampleLayout.GetWidth(), m_sliceHeight, m_sampleLayout.GetNumChannels());
// warn that this node will destroy the image size information from the child
if (m_inputImageLayout.GetWidth() * m_inputImageLayout.GetNumChannels() != 1)
@ -584,12 +584,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
{
Inputs(inputIndex)->GradientSlice(frameRange).AddWithRowSliceValuesOf(GradientSlice(frameRange), m_startRowIndices[inputIndex], Inputs(inputIndex)->GetNumRows());
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
for (size_t inputIndex = 0; inputIndex < ChildrenSize(); inputIndex++)
ValueSlice(frameRange).AssignToRowSliceValuesOf(Inputs(inputIndex)->ValueSlice(frameRange), m_startRowIndices[inputIndex], Inputs(inputIndex)->GetNumRows());
@ -622,7 +622,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void InferImageDimsFromInputs()
{
InferImageDimsFromInput(0, true);
m_imageLayout = ImageLayoutWHC(m_imageLayout.GetWidth(), GetNumRows(), m_imageLayout.GetNumChannels());
m_sampleLayout = ImageLayoutWHC(m_sampleLayout.GetWidth(), GetNumRows(), m_sampleLayout.GetNumChannels());
// warn that this node will destroy the image size information from the child
if (m_inputImageLayout.GetWidth() * m_inputImageLayout.GetNumChannels() != 1)
@ -666,22 +666,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void SaveToFile(File& fstream) const override
virtual void Save(File& fstream) const override
{
Base::SaveToFile(fstream);
Base::Save(fstream);
fstream << m_numRepeat;
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
fstream >> m_numRepeat;
}
virtual void InferImageDimsFromInputs()
{
InferImageDimsFromInput(0, true);
m_imageLayout = ImageLayoutWHC(m_imageLayout.GetWidth(), m_inputImageLayout.GetHeight() * m_numRepeat, m_imageLayout.GetNumChannels());
m_sampleLayout = ImageLayoutWHC(m_sampleLayout.GetWidth(), m_inputImageLayout.GetHeight() * m_numRepeat, m_sampleLayout.GetNumChannels());
// watn that this node will destroy the image size information from the child
if (m_inputImageLayout.GetWidth() * m_inputImageLayout.GetNumChannels() != 1)
@ -727,13 +727,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
InferImageDimsFromInputs();
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
{
//if (!isNoop()) // if m_numRepeat == 1 then virtual FunctionValues() will return the child --TODO: do this as an in-place optimization instead
ValueSlice(frameRange).AssignRepeatOf(Inputs(0)->ValueSlice(frameRange), m_numRepeat, 1);
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
{
Inputs(0)->GradientSlice(frameRange).AddToRowRepeatValuesOf(GradientSlice(frameRange), m_numRepeat);
}

Просмотреть файл

@ -34,7 +34,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
virtual void BackpropToNonLooping(size_t inputIndex) override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
#if 1
@ -53,7 +53,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_leftMinusRight->Resize(Inputs(0)->GetNumRows(), Inputs(0)->GetNumCols());
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
m_leftMinusRight->AssignDifferenceOf(Inputs(0)->ValueSlice(frameRange), Inputs(1)->ValueSlice(frameRange));
@ -75,7 +75,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -125,7 +125,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
virtual void BackpropToNonLooping(size_t inputIndex) override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
// left input is scalar
@ -172,7 +172,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_softmaxOfRight->Resize(m_logSoftmaxOfRight->GetNumRows(), m_logSoftmaxOfRight->GetNumCols());
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override //-sum(left_i * log(softmax_i(right)))
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override //-sum(left_i * log(softmax_i(right)))
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
// first compute the softmax (column-wise)
@ -202,7 +202,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -250,28 +250,28 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
virtual void BackpropToNonLooping(size_t inputIndex) override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
//left Node must be a scalar
if (inputIndex == 0) //left derivative
{
ComputeInputPartialLeft(*m_logOfRight, Inputs(0)->GradientSlice(frameRange), GradientValues());
BackpropToLeft(*m_logOfRight, Inputs(0)->GradientSlice(frameRange), GradientValues());
}
else
{
ComputeInputPartialRight(*m_leftDivRight, Inputs(0)->ValueSlice(frameRange), Inputs(1)->ValueSlice(frameRange), Inputs(1)->GradientSlice(frameRange), GradientValues());
BackpropToRight(*m_leftDivRight, Inputs(0)->ValueSlice(frameRange), Inputs(1)->ValueSlice(frameRange), Inputs(1)->GradientSlice(frameRange), GradientValues());
}
}
/*TODO: merge with call site*/void ComputeInputPartialLeft(const Matrix<ElemType>& logOfRight, Matrix<ElemType> inputGradientValues,
/*TODO: merge with call site*/void BackpropToLeft(const Matrix<ElemType>& logOfRight, Matrix<ElemType> inputGradientValues,
const Matrix<ElemType>& gradientValues)
{
//Matrix<ElemType>::ScaleAndAdd(-gradientValues.Get00Element(), logOfRight, inputGradientValues);
Matrix<ElemType>::Multiply1x1AndWeightedAdd(-1.0f, gradientValues/*1x1*/, logOfRight, 1.0f, inputGradientValues);
}
/*TODO: merge with call site*/void ComputeInputPartialRight(Matrix<ElemType>& leftDivRight,
/*TODO: merge with call site*/void BackpropToRight(Matrix<ElemType>& leftDivRight,
const Matrix<ElemType> inputFunctionValues0, const Matrix<ElemType> inputFunctionValues1,
Matrix<ElemType> inputGradientValues, const Matrix<ElemType>& gradientValues)
{
@ -289,7 +289,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
//-sum(left_i * log(right_i))
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
m_logOfRight->SetValue(Inputs(1)->ValueSlice(frameRange));
@ -313,7 +313,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -375,14 +375,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override // scale by number of cols (or samples)
virtual void BackpropToNonLooping(size_t inputIndex) override // scale by number of cols (or samples)
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
assert(inputIndex == 0); inputIndex;
ComputeInputPartialS(*m_gradientOfL1Norm, Inputs(0)->GradientSlice(frameRange), GradientValues(), Inputs(0)->ValueSlice(frameRange));
BackpropToS(*m_gradientOfL1Norm, Inputs(0)->GradientSlice(frameRange), GradientValues(), Inputs(0)->ValueSlice(frameRange));
}
/*TODO: merge with call site*/void ComputeInputPartialS(Matrix<ElemType>& gradientOfL1Norm,
/*TODO: merge with call site*/void BackpropToS(Matrix<ElemType>& gradientOfL1Norm,
Matrix<ElemType> inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& inputFunctionValues)
{
gradientOfL1Norm.AssignSignOf(inputFunctionValues);
@ -395,7 +395,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_gradientOfL1Norm->Resize(Inputs(0)->GetNumRows(), Inputs(0)->GetNumCols());
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
VerifyDims(1, 1);
@ -414,7 +414,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -464,20 +464,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override // scale by number of cols (or samples)
virtual void BackpropToNonLooping(size_t inputIndex) override // scale by number of cols (or samples)
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
assert(inputIndex == 0); inputIndex;
ComputeInputPartialS(Inputs(0)->GradientSlice(frameRange), GradientValues(), Inputs(0)->ValueSlice(frameRange), FunctionValues());
BackpropToS(Inputs(0)->GradientSlice(frameRange), GradientValues(), Inputs(0)->ValueSlice(frameRange), FunctionValues());
}
/*TODO: merge with call site*/void ComputeInputPartialS(Matrix<ElemType> inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& functionValues)
/*TODO: merge with call site*/void BackpropToS(Matrix<ElemType> inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& functionValues)
{
ElemType v = gradientValues.Get00Element() / (functionValues.Get00Element() + EPS_IN_INVERSE); // TODO: GPU inefficiency
inputGradientValues.AddWithScaleOf(v, inputFunctionValues);
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
VerifyDims(1,1);
@ -496,7 +496,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
};
@ -535,15 +535,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ }
// ^^ TODO: we can merge these two
virtual void SaveToFile(File& fstream) const override
virtual void Save(File& fstream) const override
{
Base::SaveToFile(fstream);
Base::Save(fstream);
fstream << m_evalMode;
}
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
Base::Load(fstream, modelVersion);
fstream >> m_evalMode;
if (m_evalMode > NCEEvalMode::None)
{
@ -558,14 +558,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
/**
compute gradients to input observations, the weights to the observations, and the class log posterior probabilities
*/
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
virtual void BackpropToNonLooping(size_t inputIndex) override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
m_needRecomputeGradientToSoftmaxInput = false;
//gradient computation@yinggongzhao
//inputIndex should be 2 this time
if (m_evalMode != NCEEvalMode::None)
LogicError("ComputeInputPartial should only be called in training mode");
LogicError("BackpropTo should only be called in training mode");
if (inputIndex == 0)
InvalidArgument("ComputeInput partial should not be called for label");
// samples+probs hidden embedding
@ -573,12 +573,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
#if 0 // TODO: delete this. Seems copy-paste leftover?
/*TODO: merge with call site*/void ComputeInputPartialRight(const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
/*TODO: merge with call site*/void BackpropToRight(const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
Matrix<ElemType>::MultiplyAndAdd(inputFunctionValues, false, gradientValues, true, inputGradientValues);
}
/*TODO: merge with call site*/void ComputeInputPartialLeft(const Matrix<ElemType>& obs, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
/*TODO: merge with call site*/void BackpropToLeft(const Matrix<ElemType>& obs, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
Matrix<ElemType>::MultiplyAndAdd(obs, false, gradientValues, false, inputGradientValues);
}
@ -595,7 +595,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO (this does not really break it since for full matrices, class Matrix will resize by itself)
}
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override //-sum(left_i * log(softmax_i(right)))
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override //-sum(left_i * log(softmax_i(right)))
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
if (Inputs(0)->HasMBLayout() && Inputs(0)->GetMBLayout()->HasGaps())
@ -668,7 +668,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void InferImageDimsFromInputs()
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
protected:
@ -720,7 +720,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
/**
compute gradients to input observations, the weights to the observations, and the class log posterior probabilites
*/
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
virtual void BackpropToNonLooping(size_t inputIndex) override
{
// this should never be called for input[0], which is controlled through the needGradient flag
if (inputIndex != 1 && inputIndex != 2 && inputIndex != 3)
@ -824,10 +824,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
// -sum(left_i * log(softmax_i(right)))
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
if (Inputs(0)->FunctionValues().GetDeviceId() != CPUDEVICE)
LogicError("ClassBasedCrossEntropyWithSoftmax (EvaluateThisNodeNonLooping()): The label matrix is not using CPU device. This will make computation slow, even though the label data is probably saved on GPU. Because of the external loop over time with explicit class id retrieved from the label matrix, the computation will be very slow if the label matrix is saved on GPU. However, this is only a constraint for label matrix and other matrices such as data are suggested to reside on GPU. ");
LogicError("ClassBasedCrossEntropyWithSoftmax (ForwardPropNonLooping()): The label matrix is not using CPU device. This will make computation slow, even though the label data is probably saved on GPU. Because of the external loop over time with explicit class id retrieved from the label matrix, the computation will be very slow if the label matrix is saved on GPU. However, this is only a constraint for label matrix and other matrices such as data are suggested to reside on GPU. ");
// (the below is left-over from refactoring)
Matrix<ElemType>& functionValues = FunctionValues();
@ -857,7 +857,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t rgt_bnd = (size_t)lbl_t(3, 0);
size_t nbr_wrd = (rgt_bnd - lft_bnd); // number of words in the class
if (nbr_wrd == 0)
LogicError("ClassBasedCrossEntropyWithSoftmax (EvaluateThisNodeNonLooping()): Encountered a class of size 0. This sample seems to lack an NoInput flag.");
LogicError("ClassBasedCrossEntropyWithSoftmax (ForwardPropNonLooping()): Encountered a class of size 0. This sample seems to lack an NoInput flag.");
sz += nbr_wrd;
}
@ -909,7 +909,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// add the word's class-conditional log posterior
if (y_t < lft_bnd || y_t >= rgt_bnd)
LogicError("ClassBasedCrossEntropyWithSoftmax (EvaluateThisNodeNonLooping()): Word index out of bounds of class-member index range (word not a class member).");
LogicError("ClassBasedCrossEntropyWithSoftmax (ForwardPropNonLooping()): Word index out of bounds of class-member index range (word not a class member).");
size_t idx_in_class = y_t - lft_bnd;
Matrix<ElemType>::AddElementToElement(logSoftMax_t, 0, idx_in_class, functionValues, 0, 0); // (1x1)
@ -955,7 +955,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
protected:
@ -1016,7 +1016,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{ }
/// compute posterior probability of label y at position t
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
size_t nrow = Inputs(0)->GetNumRows();
@ -1036,7 +1036,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
FrameRange sequenceRange = frameRange.Sequence(i); // FrameRange to select one sequence
// BUGBUG: This ^^ is neither supported nor correct, since this code does not handle gaps or start/end flags
EvaluateThisNodeS(
ForwardPropS(
DataSliceWithMBLayout(mPostProb, sequenceRange, Inputs(0)->GetMBLayout()),
DataSliceWithMBLayout(mAlpha, sequenceRange, Inputs(0)->GetMBLayout()),
DataSliceWithMBLayout(mBeta, sequenceRange, Inputs(0)->GetMBLayout()),
@ -1050,7 +1050,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override //scaled by 2*number of colmns (samples) in the Matrix<ElemType>
virtual void BackpropToNonLooping(size_t inputIndex) override //scaled by 2*number of colmns (samples) in the Matrix<ElemType>
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
// inputIndex 0 should not get us here, it should be prevented by the needGradient flag of input[0]
@ -1083,7 +1083,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
// compute forward backward algorithm
/*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType> postprob, Matrix<ElemType> alpha, Matrix<ElemType> beta, Matrix<ElemType> & functionValues, const Matrix<ElemType> & lbls, const Matrix<ElemType> & pos_scores, const Matrix<ElemType> & pair_scores, int& firstLbl, int& lastLbl, const int iStep = 1)
/*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType> postprob, Matrix<ElemType> alpha, Matrix<ElemType> beta, Matrix<ElemType> & functionValues, const Matrix<ElemType> & lbls, const Matrix<ElemType> & pos_scores, const Matrix<ElemType> & pair_scores, int& firstLbl, int& lastLbl, const int iStep = 1)
{
/// to-do, each slice is for one sentence
/// to-do, number of slices correspond to number of frames
@ -1236,7 +1236,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -1283,17 +1283,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
//compute gradients to input observations, the weights to the observations, and the class log posterior probabilites
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
virtual void BackpropToNonLooping(size_t inputIndex) override
{
//auto t_start_time = Timer::MilliSecondElapsed();
//left Node must be a scalar
if (inputIndex == 0) //left derivative
{
ComputeInputPartialLeft(*m_logSoftmaxOfRight, Inputs(inputIndex)->GradientValues(), GradientValues());
BackpropToLeft(*m_logSoftmaxOfRight, Inputs(inputIndex)->GradientValues(), GradientValues());
}
else if (inputIndex == 1)
{
ComputeInputPartialRight(*m_softmaxOfRight, Inputs(0)->FunctionValues(), Inputs(inputIndex)->GradientValues(),
BackpropToRight(*m_softmaxOfRight, Inputs(0)->FunctionValues(), Inputs(inputIndex)->GradientValues(),
GradientValues(), *m_gammaFromLattice, m_fsSmoothingWeight, m_frameDropThreshold);
#ifdef _DEBUG
Inputs(inputIndex)->InvalidateMissingGradientColumns(FrameRange(Inputs(inputIndex)->GetMBLayout()));
@ -1312,7 +1312,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
RuntimeError("SequenceWithSoftmaxNode criterion only takes with respect to label, DNN output and log likelihood.");
}
static void WINAPI ComputeInputPartialLeft(const Matrix<ElemType>& logSoftmaxOfRight, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
static void WINAPI BackpropToLeft(const Matrix<ElemType>& logSoftmaxOfRight, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
#if DUMPOUTPUT
logSoftmaxOfRight.Print("SequenceWithSoftmaxNode Partial-logSoftmaxOfRight");
@ -1327,7 +1327,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
static void WINAPI ComputeInputPartialRight(const Matrix<ElemType>& softmaxOfRight, const Matrix<ElemType>& inputFunctionValues,
static void WINAPI BackpropToRight(const Matrix<ElemType>& softmaxOfRight, const Matrix<ElemType>& inputFunctionValues,
Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues,
const Matrix<ElemType> & gammaFromLattice, double hsmoothingWeight, double frameDropThresh)
{
@ -1346,7 +1346,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
// -sum(left_i * log(softmax_i(right)))
virtual void EvaluateThisNodeNonLooping()
virtual void ForwardPropNonLooping()
{
// Initialize m_gammaCalculator
// TODO: Would this lend itself to a unique_ptr instead of the init flag?
@ -1407,7 +1407,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -1517,13 +1517,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(deviceId, name)
{ }
virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
virtual void BackpropToNonLooping(size_t inputIndex) override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
if (inputIndex != 1)
InvalidArgument("%ls %ls operation cannot compute the gradient for its first inpute.", NodeName().c_str(), OperationName().c_str());
//ComputeInputPartialRight(m_temp, Inputs(0)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(inputIndex)->GradientValues(), GradientValues(), m_classZeroLabels, m_result);
//BackpropToRight(m_temp, Inputs(0)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(inputIndex)->GradientValues(), GradientValues(), m_classZeroLabels, m_result);
// Create vector with 1 for class 1, and -1 for class 0
m_temp->AssignDifferenceOf(Inputs(0)->ValueSlice(frameRange), *m_classZeroLabels); // TODO: need a slice for m_classZeroLabels?
@ -1547,7 +1547,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
//-sum(left * log(right) + (1-left)*log(1-right)) (optionally * weight)
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
FrameRange frameRange(Inputs(0)->GetMBLayout());
@ -1634,7 +1634,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void InferImageDimsFromInputs()
{
InferImageDimsFromInput(0, false);
m_imageLayout = ImageLayout();
m_sampleLayout = TensorShape();
}
virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const

Просмотреть файл

@ -572,7 +572,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (auto nodeIter = batchComputeNodes.begin(); nodeIter != batchComputeNodes.end(); nodeIter++)
{
ComputationNodeBasePtr node = *nodeIter;
node->EvaluateThisNode(FrameRange(node->GetMBLayout(), atTime));
node->ForwardProp(FrameRange(node->GetMBLayout(), atTime));
if (node->GetNumCols() != node->GetNumParallelSequences())
RuntimeError("preComputeActivityAtTime: the function values has to be a single column matrix ");
}

Просмотреть файл

@ -286,8 +286,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
/// PreCompute(net, encoderTrainSetDataReader, encoderFeatureNodes, encoderlabelNodes, encoderInputMatrices) ||
startEpoch == 0)
{
encoderNet->SaveToFile(GetEncoderModelNameForEpoch(int(startEpoch) - 1));
decoderNet->SaveToFile(GetDecoderModelNameForEpoch(int(startEpoch) - 1));
encoderNet->Save(GetEncoderModelNameForEpoch(int(startEpoch) - 1));
decoderNet->Save(GetDecoderModelNameForEpoch(int(startEpoch) - 1));
}
bool learnRateInitialized = false;
@ -421,8 +421,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
else
{
decoderNet->SaveToFile(GetDecoderModelNameForEpoch(i, true));
encoderNet->SaveToFile(GetEncoderModelNameForEpoch(i, true));
decoderNet->Save(GetDecoderModelNameForEpoch(i, true));
encoderNet->Save(GetEncoderModelNameForEpoch(i, true));
fprintf(stderr, "Finished training and saved final model\n\n");
break;
}
@ -456,8 +456,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
//persist model and check-point info
decoderNet->SaveToFile(GetDecoderModelNameForEpoch(i));
encoderNet->SaveToFile(GetEncoderModelNameForEpoch(i));
decoderNet->Save(GetDecoderModelNameForEpoch(i));
encoderNet->Save(GetEncoderModelNameForEpoch(i));
size_t dummyMinibatchSize = 0;
this->LoadCheckPointInfo(i,
@ -599,7 +599,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (size_t k = 0; k < iNumNetworks; k++)
{
wstring tmpstr = msra::strfun::wstrprintf(L".%d", k);
nets[k]->SaveToFile(GetModelNameForEpoch(int(startEpoch) - 1, false, tmpstr));
nets[k]->Save(GetModelNameForEpoch(int(startEpoch) - 1, false, tmpstr));
}
}
@ -749,7 +749,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//persist model and check-point info
for (size_t k = 0; k < iNumNetworks; k++)
{
nets[k]->SaveToFile(GetModelNameForEpoch(i, true, msra::strfun::wstrprintf(L".%d", k)));
nets[k]->Save(GetModelNameForEpoch(i, true, msra::strfun::wstrprintf(L".%d", k)));
}
fprintf(stderr, "Finished training and saved final model\n\n");
break;
@ -786,7 +786,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//persist model and check-point info
for (size_t k = 0; k < iNumNetworks; k++)
{
nets[k]->SaveToFile(GetModelNameForEpoch(i, false, msra::strfun::wstrprintf(L".%d", k)));
nets[k]->Save(GetModelNameForEpoch(i, false, msra::strfun::wstrprintf(L".%d", k)));
}
this->SaveCheckPointInfo(i, totalSamplesSeen, learnRatePerSample, smoothedGradients, prevCriterion, 0);

Просмотреть файл

@ -444,7 +444,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (startEpoch < 0)
{
// Loads models.
origNet->LoadFromFile<ElemType>(origModelFileName);
origNet->Load<ElemType>(origModelFileName);
// Processes feature nodes.
std::vector<ComputationNodeBasePtr> & sequenceFeatureNodes = sequenceNet->FeatureNodes();
@ -706,7 +706,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
g_mpi->WaitAll();
}
net->SaveToFile(GetModelNameForEpoch(int(startEpoch) - 1));
net->Save(GetModelNameForEpoch(int(startEpoch) - 1));
}
// BUGBUG: This is where the trainSetDataReader->GetNumParallelSequences() is used to further normalize
@ -814,7 +814,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
i + 1, learnRatePerSample, m_minLearnRate);
if (m_autoLearnRateSearchType != LearningRateSearchAlgorithm::None)
{
net->SaveToFile(m_modelPath);
net->Save(m_modelPath);
}
break;
}
@ -1026,7 +1026,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
else
{
net->SaveToFile(GetModelNameForEpoch(i, true));
net->Save(GetModelNameForEpoch(i, true));
fprintf(stderr, "Finished training and saved final model\n\n");
break;
@ -1081,7 +1081,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// persist model and check-point info
if ((g_mpi == nullptr) || g_mpi->IsMainNode())
{
net->SaveToFile(GetModelNameForEpoch(i));
net->Save(GetModelNameForEpoch(i));
SaveCheckPointInfo(i, totalSamplesSeen, learnRatePerSample, smoothedGradients, prevCriterion, chosenMinibatchSize);
if (!m_keepCheckPointFiles)
{

Просмотреть файл

@ -43,7 +43,7 @@ void SetToInitStateValueForResetSeg(const Matrix<ElemType>& sentenceBegin,
}
template<class ElemType>
void rnnEvaluateThisNodeSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& colBegin, const Matrix<ElemType>& needToCompute)
void rnnForwardPropSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& colBegin, const Matrix<ElemType>& needToCompute)
{
size_t ncol = functionValues.GetNumCols();
size_t ntime = ncol / mNbr;
@ -74,7 +74,7 @@ void rnnEvaluateThisNodeSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matri
}
template<class ElemType>
void oldRnnEvaluateThisNodeSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues)
void oldRnnForwardPropSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues)
{
size_t ncol = functionValues.GetNumCols();
size_t ntime = ncol / mNbr;
@ -88,13 +88,13 @@ void oldRnnEvaluateThisNodeSRP(Matrix<ElemType>& functionValues, size_t mNbr, Ma
{
reset = true;
}
oldRNNEvaluateThisNodeSRP<ElemType>(timeIdxInSeq, 1, reset, (ElemType) 0.1, functionValues, pastActivity, inputFunctionValues, i, mNbr);
oldRNNForwardPropSRP<ElemType>(timeIdxInSeq, 1, reset, (ElemType) 0.1, functionValues, pastActivity, inputFunctionValues, i, mNbr);
}
}
}
template<class ElemType>
void oldRNNEvaluateThisNodeSRP(const size_t timeIdxInSeq, const int delay, const bool reset, const ElemType default_activity, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pastActivity, const Matrix<ElemType>& inputFunctionValues, const size_t indexInBatch, const size_t mNbr)
void oldRNNForwardPropSRP(const size_t timeIdxInSeq, const int delay, const bool reset, const ElemType default_activity, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pastActivity, const Matrix<ElemType>& inputFunctionValues, const size_t indexInBatch, const size_t mNbr)
{
assert(delay > 0);
@ -128,7 +128,7 @@ void oldRNNEvaluateThisNodeSRP(const size_t timeIdxInSeq, const int delay, const
The new way of resetting RNN state.
*/
template<class ElemType>
void TestRnnEvaluateThisNodeSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
void TestRnnForwardPropSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
{
Matrix<ElemType> functionValues(deviceID);
Matrix<ElemType> colBegin(deviceID);
@ -144,16 +144,16 @@ void TestRnnEvaluateThisNodeSRP(size_t nRow = 100, size_t nCol = 1000, size_t mN
needToCompute.SetValue(0);
needToCompute.ColumnSlice(0, 1).SetValue(1);
auto t_start = clock();
rnnEvaluateThisNodeSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues, colBegin, needToCompute);
rnnForwardPropSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues, colBegin, needToCompute);
auto t_end = clock();
std::cout << "testRnnEvaluateThisNodeSRP: " << 1.0*(t_end - t_start) / CLOCKS_PER_SEC << " seconds" << endl;
std::cout << "testRnnForwardPropSRP: " << 1.0*(t_end - t_start) / CLOCKS_PER_SEC << " seconds" << endl;
}
/**
The old way of resetting RNN state, which used if statement. Also only supports up to two sentences within a minibatch
*/
template<class ElemType>
void TestOldRnnEvaluateThisNodeSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
void TestOldRnnForwardPropSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
{
Matrix<ElemType> functionValues(deviceID);
Matrix<ElemType> colBegin(deviceID);
@ -165,9 +165,9 @@ void TestOldRnnEvaluateThisNodeSRP(size_t nRow = 100, size_t nCol = 1000, size_t
pastActivity.Resize(nRow, nCol);
inputFunctionValues.Resize(nRow, nCol);
auto t_start = clock();
oldRnnEvaluateThisNodeSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues);
oldRnnForwardPropSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues);
auto t_end = clock();
std::cout << "TestOldRnnEvaluateThisNodeSRP: " << 1.0*(t_end - t_start) / CLOCKS_PER_SEC << " seconds" << endl;
std::cout << "TestOldRnnForwardPropSRP: " << 1.0*(t_end - t_start) / CLOCKS_PER_SEC << " seconds" << endl;
}
template<class ElemType>
@ -441,9 +441,9 @@ int wmain()
{
ColumnSliceMultAndAddTest<float>(2048, 2048, 256, 0);
TestRnnEvaluateThisNodeSRP<float>();
TestRnnForwardPropSRP<float>();
TestOldRnnEvaluateThisNodeSRP<float>();
TestOldRnnForwardPropSRP<float>();
//MandSTest<float>(100, 2);