// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // // CNTKEval.cpp : Defines the exported functions for the CNTK DLL. // #define __STDC_FORMAT_MACROS #include #include #include #define EVAL_EXPORTS // creating the exports here #include "Eval.h" #include "Actions.h" #include "CNTKEval.h" #include "CPUMatrix.h" // for SetNumThreads() #include "SimpleOutputWriter.h" #include "NDLNetworkBuilder.h" #ifdef LEAKDETECT #include // leak detection #endif #include "BestGpu.h" #include "MPIWrapper.h" #include "DataDeserializer.h" #include "SequencePacker.h" #include "NoRandomizer.h" #include "HeapMemoryProvider.h" #include "InputAndParamNodes.h" #include "latticearchive.h" #include // TODO: Temporary mechanism to enable memory sharing for // node output value matrices. This will go away when the // sharing is ready to be enabled by default bool g_shareNodeValueMatrices = false; namespace Microsoft { namespace MSR { namespace CNTK { template void CNTKEvalBase::Init(const std::string& config) { m_config.Parse(config); size_t nThreads = m_config("numCPUThreads", "1"); CPUMatrix::SetNumThreads(nThreads); g_shareNodeValueMatrices = m_config(L"shareNodeValueMatrices", false); } // CreateNetwork - create a network based on the network description // networkDescription - network description template void CNTKEvalBase::CreateNetwork(const std::string& networkDescription) { ConfigParameters config; config.Parse(networkDescription); std::vector outputNodeNames; this->m_net = GetModelFromConfig(config, L"outputNodeNames", outputNodeNames); if (this->m_net == nullptr) { LogicError("Unable to construct network from description"); } } // Destroy - cleanup and remove this class // NOTE: this destroys the object, and it can't be used past this point template void CNTKEvalBase::Destroy() { // cleanup everything this->m_net.reset(); } // ---------------------------------------------------------------------------- // Basic interface // ---------------------------------------------------------------------------- template void EVAL_API GetEval(IEvaluateModel** peval) { *peval = new CNTKEval(); } extern "C" EVAL_API void GetEvalF(IEvaluateModel** peval) { GetEval(peval); } extern "C" EVAL_API void GetEvalD(IEvaluateModel** peval) { GetEval(peval); } // GetNodeDimensions - Get the node dimensions of the specified nodes // dimensions - map from name of node to dimension of the node, will be appended to for Input/Output scenarios // nodeGroup - type of node we are requesting (input/output/specified) // NOTE: when nodeGroup==specified the dimensions map is expected to be populated with the string names of the nodes requested, dimensions will be modified return the current value. template void CNTKEval::GetNodeDimensions(std::map& dimensions, NodeGroup nodeGroup) { // On Linux with gcc 4.8.4, it is required to add "this->" when referencing m_net, which is the protected member of the base class with templates, // in order to make the name correctly resolved by the compiler. if (this->m_net == NULL) { for (auto iter = dimensions.begin(); iter != dimensions.end(); iter++) iter->second = 0; return; } const auto& outputNodes = this->m_net->OutputNodes(); switch (nodeGroup) { case nodeInput: { if (outputNodes.size() == 0) { LogicError("No Output nodes found: Cannot determine Input node dimensions due to lack of Output nodes.\n(are 'outputNodeNames' and/or 'OutputNodes' properly defined in the configuration file?)"); } auto& nodes = this->m_net->InputNodes(outputNodes[0]); for (auto& node : nodes) { std::wstring name = node->NodeName(); size_t size = node->GetSampleMatrixNumRows(); dimensions[name] = size; } break; } case nodeOutput: { const auto& nodes = outputNodes; for (auto& node : nodes) { std::wstring name = node->NodeName(); size_t size = node->GetSampleMatrixNumRows(); dimensions[name] = size; } break; } case nodeSpecified: for (auto iter = dimensions.begin(); iter != dimensions.end(); iter++) { auto node = this->m_net->GetNodeFromName(iter->first); iter->second = node->GetSampleMatrixNumRows(); } break; } } // StartEvaluateMinibatchLoop - Prepare network for Evaluate() calls. // ouputNodeName - name of node that will be evaluated template void CNTKEval::StartEvaluateMinibatchLoop(const std::wstring& outputNodeName) { this->m_net->StartEvaluateMinibatchLoop(this->m_net->GetNodeFromName(outputNodeName)); } // Evaluate - Evalute using the model with the given inputs and outputs // inputs - map from node name to input vector // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation template void CNTKEval::Evaluate(std::map*>& inputs, std::map*>& outputs) { size_t minibatchSize = this->m_config(L"minibatchSize", (size_t) 10240); // get the evaluation names from the output string vector outNodeNames; ConfigParameters config; // config["deviceId"] = to_string(this->m_net->GetDeviceId()); // create the reader if necessary if (m_reader == nullptr) { m_reader = new EvalReader(config); } // now set the data in the reader GetNodeDimensions(m_dimensions, nodeInput); m_reader->SetData(&inputs, &m_dimensions); m_reader->SetBoundary(m_start); // create the writer if necessary if (m_writer == nullptr) { m_writer = new EvalWriter(config); } // now set the data in the writer GetNodeDimensions(m_dimensions, nodeOutput); m_writer->SetData(&outputs, &m_dimensions); // call the evaluator SimpleOutputWriter eval(this->m_net); eval.WriteOutput(*m_reader, minibatchSize, *m_writer, outNodeNames); } // Evaluate - Evalute using the model with the given inputs and outputs // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation template void CNTKEval::Evaluate(std::map*>& outputs) { // get the evaluation names from the output string vector outNodeNames; ConfigParameters config; // create the writer if necessary if (m_writer == nullptr) { m_writer = new EvalWriter(config); } // now set the data in the writer GetNodeDimensions(m_dimensions, nodeOutput); m_writer->SetData(&outputs, &m_dimensions); // call the evaluator SimpleOutputWriter eval(this->m_net); eval.WriteOutput(*m_writer, outNodeNames); } template void CNTKEval::Destroy() { CNTKEvalBase::Destroy(); delete m_reader; delete m_writer; delete this; } // instantiate all the combinations we expect to be used template class CNTKEval; template class CNTKEval; // ---------------------------------------------------------------------------- // Extended interface // ---------------------------------------------------------------------------- template VariableLayout CNTKEvalExtended::ToVariableLayout(const ComputationNodeBasePtr n) { auto matrix = dynamic_pointer_cast>(n->ValuePtr()); return VariableLayout { /* name */ n->GetName(), /* type */ sizeof(ElemType) == sizeof(float) ? VariableLayout::Float32 : VariableLayout::Float64, /* storage */ matrix ? matrix->GetMatrixType() == MatrixType::DENSE ? VariableLayout::Dense : matrix->GetMatrixType() == MatrixType::SPARSE ? VariableLayout::Sparse : VariableLayout::Undetermined : VariableLayout::Undetermined, /* dimension */ n->GetSampleLayout().GetNumElements() }; } template void CNTKEvalExtended::StartForwardEvaluation(const std::vector& outputNodeNames) { m_scopedNetworkOperationMode = make_shared(this->m_net, NetworkOperationMode::inferring); m_outputNodes = this->m_net->OutputNodesByName(outputNodeNames); m_inputNodes = this->m_net->InputNodesForOutputs(outputNodeNames); // allocate memory for forward computation this->m_net->AllocateAllMatrices({}, m_outputNodes, nullptr); this->m_net->StartEvaluateMinibatchLoop(m_outputNodes); m_inputMatrices = DataReaderHelpers::RetrieveInputMatrices(m_inputNodes); for (const auto& node : m_outputNodes) { shared_ptr> outputMatrix = dynamic_pointer_cast>(node->ValuePtr()); if (outputMatrix->GetMatrixType() != MatrixType::DENSE) RuntimeError("Sparse outputs are not supported by this API."); } m_started = true; } template VariableSchema CNTKEvalExtended::GetOutputSchema() const { VariableSchema schema; auto& nodes = m_started ? m_outputNodes : this->m_net->OutputNodes(); for (const auto& n : nodes) { schema.push_back(ToVariableLayout(n)); } return schema; } template VariableSchema CNTKEvalExtended::GetInputSchema() const { VariableSchema inputLayouts; auto nodes = m_inputNodes; if (nodes.size() == 0) { // Default to all nodes nodes = this->m_net->InputNodesForOutputs({}); } for (const auto& n : nodes) { inputLayouts.push_back(ToVariableLayout(n)); } return inputLayouts; } template template class ValueContainer> void CNTKEvalExtended::ForwardPassT(const std::vector >& inputs, std::vector >& outputs, bool resetRNN) { if (!m_started) RuntimeError("ForwardPass() called before StartForwardEvaluation()"); if (inputs.size() != (size_t)std::distance(m_inputMatrices.begin(), m_inputMatrices.end())) RuntimeError("Expected %d inputs, but got %d.", (int)std::distance(m_inputMatrices.begin(), m_inputMatrices.end()), (int)inputs.size()); if (outputs.size() != m_outputNodes.size()) RuntimeError("Expected %d outputs, but got %d.", (int)m_outputNodes.size(), (int)outputs.size()); size_t i = 0; for (auto& inputNode : m_inputNodes) { // const cast: The matrix class takes this over without copying and could theoretically change the contents, // though it doesn't in this case. auto& buffer = const_cast&>(inputs[i]); auto matrix = dynamic_pointer_cast>(inputNode->ValuePtr()); auto type = matrix->GetMatrixType(); size_t numRows = inputNode->GetSampleLayout().GetNumElements(); if (buffer.m_buffer.data() == nullptr) RuntimeError("Input %ls: Buffer is not allocated.", m_inputNodes[i]->GetName().c_str()); if (type == MatrixType::DENSE) { if (buffer.m_buffer.size() % numRows != 0) RuntimeError("Input %ls: Expected input data to be a multiple of %" PRIu64 ", but it is %" PRIu64 ".", m_inputNodes[i]->GetName().c_str(), numRows, buffer.m_buffer.size()); if (buffer.m_buffer.size() == 0) RuntimeError("Input %ls: Expected at least one element.", m_inputNodes[i]->GetName().c_str()); } else if (type == MatrixType::SPARSE) { if (buffer.m_colIndices.data() == nullptr) RuntimeError("Input %ls: Due to sparse input format, expected colIndices array, but was nullptr.", m_inputNodes[i]->GetName().c_str()); if (buffer.m_indices.data() == nullptr) RuntimeError("Input %ls: Due to sparse input format, expected Indices array, but was nullptr.", m_inputNodes[i]->GetName().c_str()); if (buffer.m_colIndices.size() < 2) RuntimeError("Input %ls: Expected at least one element (2 entries in colIndices array).", m_inputNodes[i]->GetName().c_str()); if (buffer.m_colIndices[0] != 0) RuntimeError("Input %ls: First element of column indices must be 0", m_inputNodes[i]->GetName().c_str()); if (buffer.m_colIndices[buffer.m_colIndices.size() - 1] != buffer.m_indices.size()) RuntimeError("Input %ls: Last element of column indices must be equal to the size of indices (%ld), but was %d", m_inputNodes[i]->GetName().c_str(), buffer.m_indices.size(), buffer.m_colIndices[buffer.m_colIndices.size() - 1]); } int numCols = type == MatrixType::DENSE ? buffer.m_buffer.size() / numRows : buffer.m_colIndices.size() - 1; assert(numCols >= 1); inputNode->GetMBLayout()->Init(1, numCols); // INT_MIN is used to specify the lower bound of look-back step of recurrent nodes inputNode->GetMBLayout()->AddSequence(0, 0, resetRNN ? 0 : INT_MIN, numCols); if (type == MatrixType::DENSE) matrix->SetValue(numRows, numCols, matrix->GetDeviceId(), buffer.m_buffer.data(), matrixFlagNormal); else if (type == MatrixType::SPARSE) { // In the sparse case the m_data layout is identical to CUDA's CSC layout // (see http://docs.nvidia.com/cuda/cusparse/#compressed-sparse-column-format-csc). matrix->SetMatrixFromCSCFormat(buffer.m_colIndices.data(), buffer.m_indices.data(), buffer.m_buffer.data(), buffer.m_buffer.size(), numRows, numCols); } ++i; } ComputationNetwork::BumpEvalTimeStamp(m_inputNodes); for (size_t i = 0; i < m_outputNodes.size(); ++i) { auto node = m_outputNodes[i]; this->m_net->ForwardProp(node); shared_ptr> outputMatrix = dynamic_pointer_cast>(node->ValuePtr()); auto pMBLayout = node->GetMBLayout(); if (!pMBLayout) { pMBLayout = make_shared(); pMBLayout->InitAsFrameMode(1); // treat this as if we have one single sample } const auto& seq = pMBLayout->GetAllSequences(); if (seq.size() != 1) RuntimeError("Only 1 output sequence supported by this API"); ValueContainer& vec = outputs[i].m_buffer; size_t numElements = outputMatrix->GetNumElements(); if (vec.capacity() < numElements) { // Bad luck - we can't reallocate memory of an external object at this point. RuntimeError("Not enough space in output buffer for output '%ls'.", node->GetName().c_str()); } vec.resize(numElements); ElemType* data = const_cast(vec.data()); outputMatrix->CopyToArray(data, numElements); } } template void CNTKEvalExtended::ForwardPass(const Values& inputs, Values& outputs) { ForwardPassT(inputs, outputs, false); } template void CNTKEvalExtended::ForwardPass(const Values& inputs, Values& outputs, bool resetRNN) { ForwardPassT(inputs, outputs, resetRNN); } template void CNTKEvalExtended::ForwardPass(const ValueRefs& inputs, ValueRefs& outputs) { ForwardPassT(inputs, outputs, false); } template void CNTKEvalExtended::ForwardPass(const ValueRefs& inputs, ValueRefs& outputs, bool resetRNN) { ForwardPassT(inputs, outputs, resetRNN); } template void CNTKEvalExtended::Destroy() { // Since m_scopeNetworkOperationMode has a reference to m_net, it has to be released first. m_scopedNetworkOperationMode.reset(); CNTKEvalBase::Destroy(); delete this; } template void EVAL_API GetEvalExtended(IEvaluateModelExtended** peval) { *peval = new CNTKEvalExtended(); } extern "C" EVAL_API void GetEvalExtendedF(IEvaluateModelExtended** peval) { GetEvalExtended(peval); } extern "C" EVAL_API void GetEvalExtendedD(IEvaluateModelExtended** peval) { GetEvalExtended(peval); } template class CNTKEvalExtended; template class CNTKEvalExtended; } } }