Integrate clemensm/s2sfix into master

This commit is contained in:
Project Philly 2016-05-19 12:32:28 -07:00
Родитель 2f2ca021c9 832655a154
Коммит b9f3c541b5
11 изменённых файлов: 569 добавлений и 194 удалений

4
.gitignore поставляемый
Просмотреть файл

@ -152,7 +152,9 @@ ModelManifest.xml
# Python
*.pyc
__pychache__/
__pycache__/
contrib/Python/doc/_build/*
contrib/Python/_cntk_default/*
# =========================
# Windows detritus

Просмотреть файл

@ -28,6 +28,32 @@
namespace Microsoft { namespace MSR { namespace CNTK {
template <typename ElemType>
class IEvaluateModelBase
{
public:
//
// Load a model based on configuration. The syntax is the same as when calling the cntk executable.
// e.g. "modelFile=model.dat deviceId=0".
// numCPUThreads can be used to set the thread count of BLAS.
//
virtual void Init(const std::string& config) = 0;
//
// Create a network based on an (NDL) network description.
//
virtual void CreateNetwork(const std::string& networkDescription) = 0;
//
// Free resources
//
virtual void Destroy() = 0;
};
// ------------------------------------------------------------------------
// Basic (legacy) interface
// ------------------------------------------------------------------------
enum NodeGroup
{
nodeInput, // an input node
@ -39,33 +65,54 @@ enum NodeGroup
// NOTICE: This interface is a public interface for evaluating models in CNTK.
// Changes to this interface may affect other projects, such as Argon and LatGen,
// and therefore need to be communicated with such groups.
template <class ElemType>
class IEvaluateModel // Evaluate Model Interface
template <typename ElemType>
class IEvaluateModel : public IEvaluateModelBase<ElemType> // Evaluate Model Interface
{
public:
virtual void Init(const std::string& config) = 0;
virtual void Destroy() = 0;
virtual void CreateNetwork(const std::string& networkDescription) = 0;
//
// Retrieves the (flattened) dimensions
//
virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup) = 0;
//
// Allocate resources for a particular output.
//
virtual void StartEvaluateMinibatchLoop(const std::wstring& outputNodeName) = 0;
//
// Evaluate a model in frame mode. This does not support dynamic axes or sparse input data.
// Given a feature vector of dimension d, the inputs may contain n * d elements. The output will then be computed
// for n samples.
// inputs - map from node name to array of input tensors, flattened to vector
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will
// happen during evaluation
//
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs) = 0;
//
// Evaluate - Evaluate using the network without input and provide the outputs
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will
// happen during evaluation
//
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs) = 0;
virtual void ResetState() = 0;
};
// GetEval - get a evaluator type from the DLL
// since we have 2 evaluator types based on template parameters, exposes 2 exports
// could be done directly with the templated name, but that requires mangled C++ names
template <class ElemType>
template <typename ElemType>
void EVAL_API GetEval(IEvaluateModel<ElemType>** peval);
extern "C" EVAL_API void GetEvalF(IEvaluateModel<float>** peval);
extern "C" EVAL_API void GetEvalD(IEvaluateModel<double>** peval);
// Data Reader class
// interface for clients of the Data Reader
// mirrors the IEvaluateModel interface, except the Init method is private (use the constructor)
template <class ElemType>
template <typename ElemType>
class Eval : public IEvaluateModel<ElemType>, protected Plugin
{
private:
@ -84,6 +131,7 @@ public:
// modelPath=c:\models\model.dnn (model path, if not specified, must call LoadModel() method before Evaluate()
// minibatchSize=1024 (minibatch size used during evaluation if < passed data size)
Eval(const std::string& config);
virtual ~Eval();
// CreateNetwork - create a network based on the network description
@ -101,14 +149,146 @@ public:
// Evaluate - Evaluate using the model with the given inputs and outputs
// inputs - map from node name to input vector
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will
// happen during evaluation
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs);
// Evaluate - Evaluate using the network without input, and provide the outputs
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will
// happen during evaluation
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs);
virtual void Init(const std::string& config);
virtual void ResetState();
};
// ------------------------------------------------------------------------
// Extended interface
// ------------------------------------------------------------------------
//
// A buffer to keep data for all samples in a (variable length) sequence
// from a single input or output.
// This is used for both dense and sparse data.
//
template<typename ElemType>
struct VariableBuffer
{
size_t m_numberOfSamples = 0;
//
// All elements of a sequence, concatenated.
//
std::vector<ElemType> m_buffer;
// In case of sparse data, the following is also used. Otherwise, the
// contents are ignored.
// E.g. a sequence of three sparse vectors with 2 / 4 / 2 non-zero values
// could be represented as the following:
// colIdx: 0 2 6 8
// v v v v
// indices 1 3 2 3 5 6 2 7
// buffer 0 1 2 3 4 5 6 7
//
// For every element in buffer, an entry in this array gives its position.
// For every vector the entries must be ascending.
//
std::vector<int> m_indices;
//
// Contains numberOfsamples + 1 indices into the buffer. The first entry
// is always 0. The last entry points after the last element.
// See http://docs.nvidia.com/cuda/cusparse/#compressed-sparse-column-format-csc
//
std::vector<int> m_colIndices;
};
//
// Meta data
//
struct VariableLayout
{
enum DataType
{
Float32,
Float64
};
enum StorageType
{
Undetermined,
Dense,
Sparse,
};
// Name of the input
std::wstring m_name;
DataType m_dataType;
StorageType m_storageType;
// Dimension of the tensor, flattened to 1 dimension, for one entry on the dynamic axis.
// E.g. for a tensor [2,3,*] this would be 6.
int m_numElements;
// Name of the axis, potentially shared between inputs. For any two inputs sharing the same
// dynamic axis, the sequence cardinality must be the same.
std::wstring m_dynamicAxisName;
};
template <typename ElemType>
using Variables = std::vector<VariableBuffer<ElemType>>;
using VariableSchema = std::vector<VariableLayout>;
//
// Extended interface, allowing for sparse input.
//
template <typename ElemType>
class IEvaluateModelExtended : public IEvaluateModelBase<ElemType>
{
public:
//
// GetOutputSchema - retrieve information about tensor shapes and memory layout of the outputs for this
// model.
//
virtual VariableSchema GetOutputSchema() const = 0;
//
// Allocate internal state for calling ForwardPass(). The call restricts the network (inputs and outputs)
// to the functions represented by the output name.
//
virtual void StartForwardEvaluation(std::vector<std::wstring> outputs) = 0;
//
// GetVariableLayout - retrieve information about tensor shapes and memory layout of inputs necessary for a
// particular output. By default this returns all available inputs. After StartForwardEvaluation(), this
// returns all the inputs necessary to compute the outputs.
//
virtual VariableSchema GetInputSchema() const = 0;
//
// Evaluate - Evaluate (perform a forward pass for) a single unit using the model with the given inputs and
// outputs.
// The layout and shape of the data in inputs vector must match the schema returned by GetInputLayouts.
// This method is not reentrant, as the forward pass keeps internal state.
// outputId - output to compute values for. See GetOutputLayouts()
// inputs - vector of input buffers, one for every input as given by GetInputLayouts()
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing
// will happen during evaluation.
// Called after StartForwardEvaluation()
//
virtual void ForwardPass(const Variables<ElemType>& inputs, Variables<ElemType>& output) = 0;
};
template <typename ElemType>
void EVAL_API GetEvalExtended(IEvaluateModelExtended<ElemType>** peval);
extern "C" EVAL_API void GetEvalExtendedF(IEvaluateModelExtended<float>** peval);
extern "C" EVAL_API void GetEvalExtendedD(IEvaluateModelExtended<double>** peval);
} } }

Просмотреть файл

@ -478,6 +478,47 @@ public:
return std::vector<ComputationNodeBasePtr>{node};
}
std::vector<ComputationNodeBasePtr> OutputNodesByName(const std::vector<std::wstring>& outputNodeNames)
{
std::vector<ComputationNodeBasePtr> outputNodes;
if (outputNodeNames.size() == 0)
{
if (OutputNodes().size() == 0)
RuntimeError("There is no default output node specified in the network.");
outputNodes = OutputNodes();
}
else
{
for (int i = 0; i < outputNodeNames.size(); i++)
outputNodes.push_back(GetNodeFromName(outputNodeNames[i]));
}
return outputNodes;
}
// Collect all input nodes that outputNodes depend on.
std::vector<ComputationNodeBasePtr> InputNodesForOutputs(const std::vector<std::wstring>& outputNodeNames)
{
// use map to remove duplicated items
auto outputNodes = OutputNodesByName(outputNodeNames);
std::set<ComputationNodeBasePtr> inputNodesMap;
for (auto& onode : outputNodes)
{
for (auto& inode : InputNodes(onode))
inputNodesMap.insert(inode);
}
std::vector<ComputationNodeBasePtr> inputNodes;
for (auto& inode : inputNodesMap)
inputNodes.push_back(inode);
return inputNodes;
}
// these are specified as such by the user
const std::vector<ComputationNodeBasePtr>& FeatureNodes() const { return m_featureNodes ; }
const std::vector<ComputationNodeBasePtr>& LabelNodes() const { return m_labelNodes ; }

Просмотреть файл

@ -18,6 +18,11 @@
#endif
#include "BestGpu.h"
#include "MPIWrapper.h"
#include "DataDeserializer.h"
#include "SequencePacker.h"
#include "NoRandomizer.h"
#include "HeapMemoryProvider.h"
#include "InputAndParamNodes.h"
// TODO: Temporary mechanism to enable memory sharing for
// node output value matrices. This will go away when the
@ -26,7 +31,50 @@ bool g_shareNodeValueMatrices = false;
namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
template <typename ElemType>
void CNTKEvalBase<ElemType>::Init(const std::string& config)
{
m_config.Parse(config);
size_t nThreads = m_config("numCPUThreads", "1");
CPUMatrix<ElemType>::SetNumThreads(nThreads);
g_shareNodeValueMatrices = m_config(L"shareNodeValueMatrices", false);
}
// CreateNetwork - create a network based on the network description
// networkDescription - network description
template <typename ElemType>
void CNTKEvalBase<ElemType>::CreateNetwork(const std::string& networkDescription)
{
ConfigParameters config;
config.Parse(networkDescription);
std::vector<wstring> outputNodeNames;
m_net = GetModelFromConfig<ConfigParameters, ElemType>(config, outputNodeNames);
if (m_net == nullptr)
{
LogicError("Unable to construct network from description");
}
}
// Destroy - cleanup and remove this class
// NOTE: this destroys the object, and it can't be used past this point
template <typename ElemType>
void CNTKEvalBase<ElemType>::Destroy()
{
// cleanup everything
m_net.reset();
}
// ----------------------------------------------------------------------------
// Basic interface
// ----------------------------------------------------------------------------
template <typename ElemType>
void EVAL_API GetEval(IEvaluateModel<ElemType>** peval)
{
*peval = new CNTKEval<ElemType>();
@ -41,51 +89,11 @@ extern "C" EVAL_API void GetEvalD(IEvaluateModel<double>** peval)
GetEval(peval);
}
template <class ElemType>
void CNTKEval<ElemType>::Init(const std::string& config)
{
m_start = 0;
m_config.Parse(config);
size_t nThreads = m_config("numCPUThreads", "1");
CPUMatrix<ElemType>::SetNumThreads(nThreads);
g_shareNodeValueMatrices = m_config(L"shareNodeValueMatrices", false);
}
// Destroy - cleanup and remove this class
// NOTE: this destroys the object, and it can't be used past this point
template <class ElemType>
void CNTKEval<ElemType>::Destroy()
{
// cleanup everything
m_net.reset();
delete m_reader;
delete m_writer;
delete this;
}
// CreateNetwork - create a network based on the network description
// networkDescription - network description
template <class ElemType>
void CNTKEval<ElemType>::CreateNetwork(const std::string& networkDescription)
{
ConfigParameters config;
config.Parse(networkDescription);
std::vector<wstring> outputNodeNames;
m_net = GetModelFromConfig<ConfigParameters, ElemType>(config, outputNodeNames);
if (m_net == nullptr)
{
LogicError("Unable to construct network from description");
}
}
// GetNodeDimensions - Get the node dimensions of the specified nodes
// dimensions - map from name of node to dimension of the node, will be appended to for Input/Output scenarios
// nodeGroup - type of node we are requesting (input/output/specified)
// NOTE: when nodeGroup==specified the dimensions map is expected to be populated with the string names of the nodes requested, dimensions will be modified return the current value.
template <class ElemType>
template <typename ElemType>
void CNTKEval<ElemType>::GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup)
{
if (m_net == NULL)
@ -137,7 +145,7 @@ void CNTKEval<ElemType>::GetNodeDimensions(std::map<std::wstring, size_t>& dimen
// StartEvaluateMinibatchLoop - Prepare network for Evaluate() calls.
// ouputNodeName - name of node that will be evaluated
template <class ElemType>
template <typename ElemType>
void CNTKEval<ElemType>::StartEvaluateMinibatchLoop(const std::wstring& outputNodeName)
{
m_net->StartEvaluateMinibatchLoop(m_net->GetNodeFromName(outputNodeName));
@ -146,7 +154,7 @@ void CNTKEval<ElemType>::StartEvaluateMinibatchLoop(const std::wstring& outputNo
// Evaluate - Evalute using the model with the given inputs and outputs
// inputs - map from node name to input vector
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
template <class ElemType>
template <typename ElemType>
void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs)
{
size_t minibatchSize = m_config(L"minibatchSize", (size_t) 10240);
@ -183,7 +191,7 @@ void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>
// Evaluate - Evalute using the model with the given inputs and outputs
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
template <class ElemType>
template <typename ElemType>
void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs)
{
// get the evaluation names from the output string
@ -206,14 +214,168 @@ void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>
eval.WriteOutput(*m_writer, outNodeNames);
}
// ResetState - Reset the cell state when we get start of an utterance
template <class ElemType>
void CNTKEval<ElemType>::ResetState()
template <typename ElemType>
void CNTKEval<ElemType>::Destroy()
{
m_start = 1 - m_start;
CNTKEvalBase<ElemType>::Destroy();
delete m_reader;
delete m_writer;
delete this;
}
// instantiate all the combinations we expect to be used
template class CNTKEval<double>;
template class CNTKEval<float>;
// ----------------------------------------------------------------------------
// Extended interface
// ----------------------------------------------------------------------------
template<typename ElemType>
VariableLayout CNTKEvalExtended<ElemType>::ToVariableLayout(const ComputationNodeBasePtr n)
{
auto matrix = dynamic_pointer_cast<Matrix<ElemType>>(n->ValuePtr());
return VariableLayout
{
/* name */ n->GetName(),
/* type */ sizeof(ElemType) == sizeof(float) ? VariableLayout::Float32 : VariableLayout::Float64,
/* storage */ matrix ? matrix->GetMatrixType() == MatrixType::DENSE ? VariableLayout::Dense :
matrix->GetMatrixType() == MatrixType::SPARSE ? VariableLayout::Sparse :
VariableLayout::Undetermined :
VariableLayout::Undetermined,
/* dimension */ n->GetSampleLayout().GetNumElements(),
/* dynamic axis */ wstring(n->GetMBLayout()->GetAxisName())
};
}
template<typename ElemType>
void CNTKEvalExtended<ElemType>::StartForwardEvaluation(std::vector<wstring> outputNodeNames)
{
m_scopedNetworkOperationMode = make_shared<ScopedNetworkOperationMode>(m_net, NetworkOperationMode::inferring);
// allocate memory for forward computation
m_outputNodes = m_net->OutputNodesByName(outputNodeNames);
m_inputNodes = m_net->InputNodesForOutputs(outputNodeNames);
// allocate memory for forward computation
m_net->AllocateAllMatrices({}, m_outputNodes, nullptr);
m_net->StartEvaluateMinibatchLoop(m_outputNodes);
m_inputMatrices = DataReaderHelpers::RetrieveInputMatrices(m_inputNodes);
}
template<typename ElemType>
VariableSchema CNTKEvalExtended<ElemType>::GetOutputSchema() const
{
VariableSchema schema;
for (const auto& n : m_net->OutputNodes())
{
schema.push_back(ToVariableLayout(n));
}
return schema;
}
template<typename ElemType>
VariableSchema CNTKEvalExtended<ElemType>::GetInputSchema() const
{
VariableSchema inputLayouts;
auto nodes = m_inputNodes;
if (nodes.size() == 0)
{
// Default to all nodes
nodes = m_net->InputNodesForOutputs({});
}
for (const auto& n : nodes)
{
inputLayouts.push_back(ToVariableLayout(n));
}
return inputLayouts;
}
template<typename ElemType>
void CNTKEvalExtended<ElemType>::ForwardPass(const Variables<ElemType>& inputs, Variables<ElemType>& output)
{
if (inputs.size() != (size_t)std::distance(m_inputMatrices.begin(), m_inputMatrices.end()))
{
RuntimeError("Expected %d inputs, but got %d", (int)std::distance(m_inputMatrices.begin(), m_inputMatrices.end()), (int)inputs.size());
}
int i = 0;
for (auto& input : m_inputMatrices)
{
VariableBuffer<ElemType> buffer = inputs[i];
int numRows = input.second.sampleLayout.GetNumElements();
int numCols = buffer.m_numberOfSamples;
shared_ptr<Matrix<ElemType>> matrix = dynamic_pointer_cast<Matrix<ElemType>>(input.second.matrix);
auto type = matrix->GetMatrixType();
input.second.pMBLayout->Init(1, numCols);
input.second.pMBLayout->AddSequence(0, 0, 0, numCols);
if (type == MatrixType::DENSE)
{
matrix->SetValue(numRows, numCols, matrix->GetDeviceId(), buffer.m_buffer.data(), matrixFlagNormal);
}
else if (type == MatrixType::SPARSE)
{
// In the sparse case the m_data layout is identical to CUDA's CSC layout
// (see http://docs.nvidia.com/cuda/cusparse/#compressed-sparse-column-format-csc).
matrix->SetMatrixFromCSCFormat(buffer.m_colIndices.data(), buffer.m_indices.data(), buffer.m_buffer.data(), buffer.m_buffer.size(), numRows, numCols);
}
++i;
}
ComputationNetwork::BumpEvalTimeStamp(m_inputNodes);
for (int i = 0; i < m_outputNodes.size(); ++i)
{
auto node = m_outputNodes[i];
m_net->ForwardProp(node);
shared_ptr<Matrix<ElemType>> outputMatrix = dynamic_pointer_cast<Matrix<ElemType>>(node->ValuePtr());
auto pMBLayout = node->GetMBLayout();
if (!pMBLayout)
{
pMBLayout = make_shared<MBLayout>();
pMBLayout->InitAsFrameMode(1); // treat this as if we have one single sample
}
const auto& seq = pMBLayout->GetAllSequences();
if (seq.size() != 1)
{
RuntimeError("Only 1 sequence supported by this API"); // TODO
}
std::vector<ElemType>& vec = output[i].m_buffer;
vec.resize(outputMatrix->GetNumElements());
ElemType* data = const_cast<ElemType*>(vec.data());
size_t numElements = outputMatrix->GetNumElements();
outputMatrix->CopyToArray(data, numElements);
}
}
template <typename ElemType>
void CNTKEvalExtended<ElemType>::Destroy()
{
CNTKEvalBase<ElemType>::Destroy();
delete this;
}
template <typename ElemType>
void EVAL_API GetEvalExtended(IEvaluateModelExtended<ElemType>** peval)
{
*peval = new CNTKEvalExtended<ElemType>();
}
extern "C" EVAL_API void GetEvalExtendedF(IEvaluateModelExtended<float>** peval)
{
GetEvalExtended(peval);
}
extern "C" EVAL_API void GetEvalExtendedD(IEvaluateModelExtended<double>** peval)
{
GetEvalExtended(peval);
}
template class CNTKEvalExtended<double>;
template class CNTKEvalExtended<float>;
} } }

Просмотреть файл

@ -22,48 +22,97 @@
namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
class CNTKEval : public IEvaluateModel<ElemType>
template <typename ElemType>
class CNTKEvalBase : public IEvaluateModelBase<ElemType>
{
protected:
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
EvalReader<ElemType>* m_reader;
EvalWriter<ElemType>* m_writer;
ConfigParameters m_config;
ComputationNetworkPtr m_net;
std::map<std::wstring, size_t> m_dimensions;
size_t m_start;
public:
// constructor
CNTKEval()
: m_reader(nullptr), m_net(nullptr)
{
}
CNTKEvalBase() : m_net(nullptr) { }
public:
// CreateNetwork - create a network based on the network description
// networkDescription - network description
virtual void CreateNetwork(const std::string& networkDescription);
// GetNodeDimensions - Get the node dimensions of the specified nodes
// dimensions - map from name of node to dimension of the node
// nodeGroup - type of node we are requesting (input/output/specified)
virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup);
// StartEvaluateMinibatchLoop - Prepare network for Evaluate() calls.
// ouputNodeName - name of node that will be evaluated
virtual void StartEvaluateMinibatchLoop(const std::wstring& outputNodeName);
// Evaluate - Evalute using the model with the given inputs and outputs
// inputs - map from node name to input vector
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs);
// Evaluate - Evalute using the model with the given inputs and outputs
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs);
virtual void Init(const std::string& config);
virtual void Destroy();
virtual void ResetState();
};
// ------------------------------------------------------------------------
// Basic interface
// ------------------------------------------------------------------------
template <typename ElemType>
class CNTKEval : public CNTKEvalBase<ElemType>, public IEvaluateModel<ElemType>
{
EvalReader<ElemType>* m_reader;
EvalWriter<ElemType>* m_writer;
std::map<std::wstring, size_t> m_dimensions;
size_t m_start;
public:
CNTKEval() : CNTKEvalBase<ElemType>(), m_reader(nullptr), m_writer(nullptr) {}
virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup);
virtual void StartEvaluateMinibatchLoop(const std::wstring& outputNodeName);
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs);
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs);
virtual void Destroy() override;
virtual void CreateNetwork(const std::string& networkDescription) override
{
CNTKEvalBase<ElemType>::CreateNetwork(networkDescription);
}
virtual void Init(const std::string& config) override
{
CNTKEvalBase<ElemType>::Init(config);
m_start = 0;
}
virtual void ResetState() override
{
m_start = 1 - m_start;
}
};
// ------------------------------------------------------------------------
// Extended interface
// ------------------------------------------------------------------------
template <typename ElemType>
class CNTKEvalExtended : public CNTKEvalBase<ElemType>, public IEvaluateModelExtended<ElemType>
{
virtual VariableSchema GetOutputSchema() const override;
virtual void StartForwardEvaluation(std::vector<wstring> outputs) override;
virtual VariableSchema GetInputSchema() const override;
virtual void ForwardPass(const Variables<ElemType>& inputs, Variables<ElemType>& output) override;
virtual void Destroy() override;
virtual void CreateNetwork(const std::string& networkDescription) override
{
CNTKEvalBase<ElemType>::CreateNetwork(networkDescription);
}
virtual void Init(const std::string& config) override
{
CNTKEvalBase<ElemType>::Init(config);
}
private:
static VariableLayout ToVariableLayout(const ComputationNodeBasePtr n);
std::vector<ComputationNodeBasePtr> m_outputNodes;
std::shared_ptr<ScopedNetworkOperationMode> m_scopedNetworkOperationMode;
std::vector<ComputationNodeBasePtr> m_inputNodes;
StreamMinibatchInputs m_inputMatrices;
};
} } }

Просмотреть файл

@ -55,8 +55,8 @@
<TargetName>EvalDll</TargetName>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(SolutionDir)Source\ActionsLib;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Readers\ReaderLib;$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(SolutionDir)Source\ActionsLib;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\Math;$(MSMPI_LIB64);$(SolutionDir)$(Platform)\$(Configuration);$(NvmlLibPath)</AdditionalLibraryDirectories>
@ -99,7 +99,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; Common.lib; ActionsLib.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; Common.lib; ActionsLib.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib;ReaderLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<DelayLoadDLLs>Math.dll; nvml.dll; $(CudaRuntimeDll)</DelayLoadDLLs>
</Link>
@ -153,4 +153,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -2,39 +2,18 @@
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="CNTKEval.cpp" />
<ClCompile Include="..\Common\fileutil.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\Common\File.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\Common\TimerUtility.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="dllmain.cpp">
<Filter>Misc</Filter>
</ClCompile>
<ClCompile Include="stdafx.cpp">
<Filter>Misc</Filter>
</ClCompile>
<ClCompile Include="..\Common\Config.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\Common\Eval.cpp">
<Filter>For External Use</Filter>
</ClCompile>
<ClCompile Include="..\Common\ExceptionWithCallStack.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\CNTK\BrainScript\BrainScriptEvaluator.cpp">
<Filter>BrainScript</Filter>
</ClCompile>
<ClCompile Include="..\CNTK\BrainScript\BrainScriptParser.cpp">
<Filter>BrainScript</Filter>
</ClCompile>
<ClCompile Include="..\Common\DataReader.cpp">
<Filter>Common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="EvalReader.h" />

Просмотреть файл

@ -715,11 +715,12 @@ void CPUMatrix<ElemType>::SetValue(const ElemType v)
}
else
{
ElemType* bufPtr = Data();
ElemType* bufPtr = Data();
long m = (long) GetNumElements();
// 2-way thread parallelism is sufficient for the memory bound
// operation of just setting the values of an array.
const unsigned SETVALUE_NUM_THREADS = 2;
UNUSED(SETVALUE_NUM_THREADS); // in case OMP is turned off.
#pragma omp parallel for num_threads(SETVALUE_NUM_THREADS)
// four-way unrolling
for (long i = 0; i < (m & ~3); i += 4)

Просмотреть файл

@ -2,12 +2,6 @@
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="Matrix.cpp" />
<ClCompile Include="..\Common\File.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\Common\fileutil.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="CPUMatrix.cpp">
<Filter>CPU</Filter>
</ClCompile>

Просмотреть файл

@ -15,12 +15,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
/*static*/ struct DataReaderHelpers
{
template <class ElemType>
static void NotifyChangedNodes(ComputationNetworkPtr net, StreamMinibatchInputs& inputMatrices)
{
// reader will have resized input node's m_value directly. Nodes must be notified to do necessary internal state updates from that.
// TODO: This is a stopgap. SGD will at some point change from sets of matrices to sets of nodes. Then this will become much simpler.
std::set<MatrixBasePtr> matrices;
for (const auto& iter : inputMatrices)
matrices.insert(iter.second.matrix);
for (auto& node : net->FeatureNodes())
if (matrices.find(node->As<ComputationNode<ElemType>>()->ValuePtr()) != matrices.end())
node->NotifyFunctionValuesMBSizeModified();
for (auto& node : net->LabelNodes())
if (matrices.find(node->As<ComputationNode<ElemType>>()->ValuePtr()) != matrices.end())
node->NotifyFunctionValuesMBSizeModified();
}
// -------------------------------------------------------------------
// GetMinibatchIntoNetwork() -- get one minibatch from Reader (this->trainSetDataReader) into Network (this->net)
// Returns false if no data is read. In that case, no other return value can be expected to contain meaningful values (e.g. actualMBSize will be unchanged).
// Sets actualMBSize to the number of matrix columns. Note that 0 is a valid value to be returned for actualMBSize, caller must handle that correctly.
// -------------------------------------------------------------------
// Note: This will go away with the redesigned reader interface.
// TODO: callers of this often do ComputationNetwork::BumpEvalTimeStamp(featureNodes) and also for labels; we should eliminate the need for this.
template <class ElemType>
@ -78,17 +93,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
DecimateMinibatchInPlace<ElemType>(inputMatrices, mpi->NumNodesInUse(), mpi->CurrentNodeRank(), pMBLayout);
}
// reader will have resized input node's m_value directly. Nodes must be notified to do necessary internal state updates from that.
// TODO: This is a stopgap. SGD will at some point change from sets of matrices to sets of nodes. Then this will become much simpler.
std::set<MatrixBasePtr> matrices;
for (const auto& iter : inputMatrices)
matrices.insert(iter.second.matrix);
for (auto& node : net->FeatureNodes())
if (matrices.find(node->As<ComputationNode<ElemType>>()->ValuePtr()) != matrices.end())
node->NotifyFunctionValuesMBSizeModified();
for (auto& node : net->LabelNodes())
if (matrices.find(node->As<ComputationNode<ElemType>>()->ValuePtr()) != matrices.end())
node->NotifyFunctionValuesMBSizeModified();
NotifyChangedNodes<ElemType>(net, inputMatrices);
// get MB size and tell Network to update its nodes' buffers based on what's in the input matrices
// Note: Decimation may have reduced this to 0 frames. We still must return 'true'.
@ -99,6 +104,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return true;
}
// get StreamMinibatchInputs for a given set of input nodes
static StreamMinibatchInputs RetrieveInputMatrices(const std::vector<ComputationNodeBasePtr>& inputNodes)
{
StreamMinibatchInputs inputMatrices;
for (auto& node : inputNodes)
inputMatrices.AddInput(node->NodeName(), node->ValuePtr(), node->GetMBLayout(), node->GetSampleLayout());
return inputMatrices;
}
// -------------------------------------------------------------------
// DecimateMinibatch - decimate minibatch for parallelization
// -------------------------------------------------------------------

Просмотреть файл

@ -23,63 +23,12 @@ using namespace std;
namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
class SimpleOutputWriter
{
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
private:
std::vector<ComputationNodeBasePtr> DetermineOutputNodes(const std::vector<std::wstring>& outputNodeNames)
{
std::vector<ComputationNodeBasePtr> outputNodes;
if (outputNodeNames.size() == 0)
{
if (m_verbosity > 0)
fprintf(stderr, "OutputNodeNames are not specified, using the default outputnodes.\n");
if (m_net->OutputNodes().size() == 0)
LogicError("There is no default output node specified in the network.");
outputNodes = m_net->OutputNodes();
}
else
{
for (int i = 0; i < outputNodeNames.size(); i++)
outputNodes.push_back(m_net->GetNodeFromName(outputNodeNames[i]));
}
return outputNodes;
}
// collect all input nodes that outputNodes depend on
// TODO: This is rather generic, we should move this to a shared place. DataReaderHelpers.h?
std::vector<ComputationNodeBasePtr> DetermineInputNodes(const std::vector<ComputationNodeBasePtr>& outputNodes)
{
// use map to remove duplicated items
std::set<ComputationNodeBasePtr> inputNodesMap;
for (auto& onode : outputNodes)
{
for (auto& inode : m_net->InputNodes(onode))
inputNodesMap.insert(inode);
}
std::vector<ComputationNodeBasePtr> inputNodes;
for (auto& inode : inputNodesMap)
inputNodes.push_back(inode);
return inputNodes;
}
// get StreamMinibatchInputs for a given set of input nodes
// TODO: This seems generic, we should have that in a shared place.
StreamMinibatchInputs RetrieveInputMatrices(const std::vector<ComputationNodeBasePtr>& inputNodes)
{
StreamMinibatchInputs inputMatrices;
for (auto& node : inputNodes)
inputMatrices.AddInput(node->NodeName(), node->ValuePtr(), node->GetMBLayout(), node->GetSampleLayout());
return inputMatrices;
}
public:
SimpleOutputWriter(ComputationNetworkPtr net, int verbosity = 0)
: m_net(net), m_verbosity(verbosity)
@ -90,13 +39,16 @@ public:
{
ScopedNetworkOperationMode modeGuard(m_net, NetworkOperationMode::inferring);
std::vector<ComputationNodeBasePtr> outputNodes = DetermineOutputNodes(outputNodeNames);
std::vector<ComputationNodeBasePtr> inputNodes = DetermineInputNodes(outputNodes);
if (outputNodeNames.size() == 0 && m_verbosity > 0)
fprintf(stderr, "OutputNodeNames are not specified, using the default outputnodes.\n");
std::vector<ComputationNodeBasePtr> outputNodes = m_net->OutputNodesByName(outputNodeNames);
std::vector<ComputationNodeBasePtr> inputNodes = m_net->InputNodesForOutputs(outputNodeNames);
// allocate memory for forward computation
m_net->AllocateAllMatrices({}, outputNodes, nullptr);
StreamMinibatchInputs inputMatrices = RetrieveInputMatrices(inputNodes);
StreamMinibatchInputs inputMatrices = DataReaderHelpers::RetrieveInputMatrices(inputNodes);
// evaluate with minibatches
dataReader.StartMinibatchLoop(mbSize, 0, numOutputSamples);
@ -148,7 +100,7 @@ public:
// Perform a single forward pass to obtain the output values from a network
void WriteOutput(IDataWriter& dataWriter, const std::vector<std::wstring>& outputNodeNames, size_t numOutputSamples = requestDataSize, bool doUnitTest = false)
{
std::vector<ComputationNodeBasePtr> outputNodes = DetermineOutputNodes(outputNodeNames);
std::vector<ComputationNodeBasePtr> outputNodes = m_net->OutputNodesByName(outputNodeNames);
// allocate memory for forward computation
m_net->AllocateAllMatrices({}, outputNodes, nullptr);
@ -203,8 +155,8 @@ public:
// In case of unit test, make sure backprop works
ScopedNetworkOperationMode modeGuard(m_net, nodeUnitTest ? NetworkOperationMode::training : NetworkOperationMode::inferring);
std::vector<ComputationNodeBasePtr> outputNodes = DetermineOutputNodes(outputNodeNames);
std::vector<ComputationNodeBasePtr> inputNodes = DetermineInputNodes(outputNodes);
std::vector<ComputationNodeBasePtr> outputNodes = m_net->OutputNodesByName(outputNodeNames);
std::vector<ComputationNodeBasePtr> inputNodes = m_net->InputNodesForOutputs(outputNodeNames);
std::vector<ComputationNodePtr> gradientNodes;
std::vector<ComputationNodeBasePtr> allOutputNodes = outputNodes;
@ -244,7 +196,7 @@ public:
m_net->AllocateAllMatrices({}, outputNodes, outputNodes[0]);
}
StreamMinibatchInputs inputMatrices = RetrieveInputMatrices(inputNodes);
StreamMinibatchInputs inputMatrices = DataReaderHelpers::RetrieveInputMatrices(inputNodes);
// load a label mapping if requested
std::vector<std::string> labelMapping;