Add an overload that takes reference to a preallocated array instead of std::vector

This commit is contained in:
Clemens Marschner 2016-06-09 10:14:07 +02:00
Родитель 6888af15a6
Коммит 40a16a45ec
4 изменённых файлов: 92 добавлений и 23 удалений

Просмотреть файл

@ -171,7 +171,7 @@ public:
// from a single input or output.
// This is used for both dense and sparse data.
//
template<typename ElemType>
template<template<typename> class Container, typename ElemType>
struct ValueBuffer
{
//
@ -181,7 +181,7 @@ struct ValueBuffer
// [2,2] and 12 elements in the buffer, the number of samples is 3.
// For sparse inputs, the number of samples is indicated by the m_colIndices field.
//
std::vector<ElemType> m_buffer;
Container<ElemType> m_buffer;
// In case of sparse data, the following is also used. Otherwise, the
// contents are ignored.
@ -197,19 +197,44 @@ struct ValueBuffer
// For every element in buffer, an entry in this array gives its position.
// For every vector the entries must be ascending.
//
std::vector<int> m_indices;
Container<int> m_indices;
//
// Contains numberOfsamples + 1 indices into the buffer. The first entry
// is always 0. The last entry points after the last element.
// See http://docs.nvidia.com/cuda/cusparse/#compressed-sparse-column-format-csc
//
std::vector<int> m_colIndices;
Container<int> m_colIndices;
};
template <typename ElemType>
using Vector = std::vector<ElemType, std::allocator<ElemType>>;
template <typename ElemType>
using Values = std::vector<ValueBuffer<ElemType>>;
using Values = std::vector<ValueBuffer<Vector, ElemType>>;
template <typename ElemType>
struct VectorRef
{
ElemType* m_vector;
size_t m_capacity;
size_t m_size;
VectorRef() : m_vector(nullptr), m_capacity(0), m_size(0) {}
void InitFrom(std::vector<ElemType>& src) { m_vector = src.data(); m_capacity = src.capacity(); m_size = src.size(); }
size_t size() const { return m_size; }
size_t capacity() const { return m_capacity; }
ElemType* data() { return m_vector; }
// const ElemType* data() const { return m_vector; }
ElemType* begin() { return m_vector; }
ElemType* end() { return m_vector + m_size; }
void resize(size_t size) { m_size = size; }
ElemType& operator[](size_t idx) { return m_vector[idx]; }
const ElemType& operator[](size_t idx) const { return m_vector[idx]; }
};
template <typename ElemType>
using ValueRefs = std::vector<ValueBuffer<VectorRef, ElemType>>;
//
// Meta data
@ -290,7 +315,7 @@ public:
virtual VariableSchema GetInputSchema() const = 0;
//
// Evaluate - Evaluate (perform a forward pass for) a single unit using the model with the given inputs and
// ForwardPass - Evaluate (perform a forward pass for) a single unit using the model with the given inputs and
// outputs.
// The layout and shape of the data in inputs vector must match the schema returned by GetInputLayouts.
// Output must be preallocated and sized to avoid memory allocation / deallocation across DLL
@ -300,6 +325,8 @@ public:
// outputs - vector of output buffers. Must be sized to fit output schema.
//
virtual void ForwardPass(const Values<ElemType>& inputs, Values<ElemType>& output) = 0;
virtual void ForwardPass(const ValueRefs<ElemType>& inputs, ValueRefs<ElemType>& output) = 0;
};
template <typename ElemType>

Просмотреть файл

@ -301,7 +301,8 @@ VariableSchema CNTKEvalExtended<ElemType>::GetInputSchema() const
}
template<typename ElemType>
void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Values<ElemType>& outputs)
template<template<typename> class ValueContainer>
void CNTKEvalExtended<ElemType>::ForwardPassT(const std::vector<ValueBuffer<ValueContainer, ElemType> >& inputs, std::vector<ValueBuffer<ValueContainer, ElemType> >& outputs)
{
if (!m_started)
RuntimeError("ForwardPass() called before StartForwardEvaluation()");
@ -315,7 +316,9 @@ void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Val
size_t i = 0;
for (auto& input : m_inputMatrices)
{
ValueBuffer<ElemType> buffer = inputs[i];
// const cast: The matrix class takes this over without copying and could theoretically change the contents,
// though it doesn't in this case.
ValueBuffer<ValueContainer, ElemType>& buffer = const_cast<ValueBuffer<ValueContainer, ElemType>&>(inputs[i]);
shared_ptr<Matrix<ElemType>> matrix = dynamic_pointer_cast<Matrix<ElemType>>(input.second.matrix);
auto type = matrix->GetMatrixType();
int numRows = input.second.sampleLayout.GetNumElements();
@ -333,7 +336,7 @@ void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Val
RuntimeError("Input %ls: Expected at least one element.", m_inputNodes[i]->GetName().c_str());
if (buffer.m_colIndices[0] != 0)
RuntimeError("Input %ls: First element of column indices must be 0", m_inputNodes[i]->GetName().c_str());
if (buffer.m_colIndices[buffer.m_colIndices.size()-1] != buffer.m_indices.size())
if (buffer.m_colIndices[buffer.m_colIndices.size() - 1] != buffer.m_indices.size())
RuntimeError("Input %ls: Last element of column indices must be equal to the size of indices (%ld), but was %d", m_inputNodes[i]->GetName().c_str(), buffer.m_indices.size(), buffer.m_colIndices[buffer.m_colIndices.size() - 1]);
}
@ -341,7 +344,7 @@ void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Val
assert(numCols >= 1);
input.second.pMBLayout->Init(1, numCols);
input.second.pMBLayout->AddSequence(0, 0, 0, numCols);
if (type == MatrixType::DENSE)
{
matrix->SetValue(numRows, numCols, matrix->GetDeviceId(), buffer.m_buffer.data(), matrixFlagNormal);
@ -357,14 +360,14 @@ void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Val
}
ComputationNetwork::BumpEvalTimeStamp(m_inputNodes);
for (size_t i = 0; i < m_outputNodes.size(); ++i)
{
auto node = m_outputNodes[i];
m_net->ForwardProp(node);
shared_ptr<Matrix<ElemType>> outputMatrix = dynamic_pointer_cast<Matrix<ElemType>>(node->ValuePtr());
auto pMBLayout = node->GetMBLayout();
if (!pMBLayout)
if (!pMBLayout)
{
pMBLayout = make_shared<MBLayout>();
pMBLayout->InitAsFrameMode(1); // treat this as if we have one single sample
@ -376,8 +379,8 @@ void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Val
RuntimeError("Only 1 output sequence supported by this API");
}
std::vector<ElemType>& vec = outputs[i].m_buffer;
ValueContainer<ElemType>& vec = outputs[i].m_buffer;
size_t numElements = outputMatrix->GetNumElements();
if (vec.capacity() < numElements)
@ -392,6 +395,18 @@ void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Val
}
}
template<typename ElemType>
void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Values<ElemType>& outputs)
{
ForwardPassT(inputs, outputs);
}
template<typename ElemType>
void CNTKEvalExtended<ElemType>::ForwardPass(const ValueRefs<ElemType>& inputs, ValueRefs<ElemType>& outputs)
{
ForwardPassT(inputs, outputs);
}
template <typename ElemType>
void CNTKEvalExtended<ElemType>::Destroy()
{

Просмотреть файл

@ -100,6 +100,8 @@ public:
virtual void ForwardPass(const Values<ElemType>& inputs, Values<ElemType>& output) override;
virtual void ForwardPass(const ValueRefs<ElemType>& inputs, ValueRefs<ElemType>& output) override;
virtual void Destroy() override;
virtual void CreateNetwork(const std::string& networkDescription) override
@ -118,5 +120,9 @@ private:
std::vector<ComputationNodeBasePtr> m_inputNodes;
StreamMinibatchInputs m_inputMatrices;
bool m_started;
template<template<typename> class ValueContainer>
void ForwardPassT(const std::vector < ValueBuffer<ValueContainer, ElemType> >& inputs,
std::vector < ValueBuffer<ValueContainer, ElemType> >& outputs);
};
} } }

Просмотреть файл

@ -89,8 +89,7 @@ BOOST_AUTO_TEST_CASE(EvalConstantPlusTest)
Values<float> outputBuffer = outputLayouts.CreateBuffers<float>({ 1 });
// Allocate the input values layer (empty)
Values<float> inputBuffer;
Values<float> inputBuffer(0);
// We can call the evaluate method and get back the results...
eval->ForwardPass(inputBuffer, outputBuffer);
@ -161,10 +160,10 @@ BOOST_AUTO_TEST_CASE(EvalScalarTimesDualOutputTest)
eval = SetupNetworkAndGetLayouts(modelDefinition, inputLayouts, outputLayouts);
// Allocate the output values layer
std::vector<ValueBuffer<float>> outputBuffer = outputLayouts.CreateBuffers<float>({ 1 });
auto outputBuffer = outputLayouts.CreateBuffers<float>({ 1 });
// Allocate the input values layer
std::vector<ValueBuffer<float>> inputBuffer(1);
Values<float> inputBuffer(1);
inputBuffer[0].m_buffer = { 2 };
// We can call the evaluate method and get back the results...
@ -197,14 +196,14 @@ BOOST_AUTO_TEST_CASE(EvalDenseTimesTest)
eval = SetupNetworkAndGetLayouts(modelDefinition, inputLayouts, outputLayouts);
// Allocate the output values layer
std::vector<ValueBuffer<float>> outputBuffer = outputLayouts.CreateBuffers<float>({ 1 });
Values<float> outputBuffer = outputLayouts.CreateBuffers<float>({ 1 });
// Number of inputs must adhere to the schema
std::vector<ValueBuffer<float>> inputBuffer1(0);
Values<float> inputBuffer1(0);
BOOST_REQUIRE_THROW(eval->ForwardPass(inputBuffer1, outputBuffer), std::exception); // Not enough inputs
// Number of elements in the input must adhere to the schema
std::vector<ValueBuffer<float>> inputBuffer(1);
Values<float> inputBuffer(1);
inputBuffer[0].m_buffer = { 1, 2, 3 };
BOOST_REQUIRE_THROW(eval->ForwardPass(inputBuffer, outputBuffer), std::exception); // Not enough elements in the sample
@ -216,6 +215,17 @@ BOOST_AUTO_TEST_CASE(EvalDenseTimesTest)
auto buf = outputBuffer[0].m_buffer;
BOOST_CHECK_EQUAL_COLLECTIONS(buf.begin(), buf.end(), expected.begin(), expected.end());
// Do the same via ValueRefs
ValueRefs<float> inputRefs(1);
inputRefs[0].m_buffer.InitFrom(inputBuffer[0].m_buffer);
inputRefs[0].m_colIndices.InitFrom(inputBuffer[0].m_colIndices);
inputRefs[0].m_indices.InitFrom(inputBuffer[0].m_indices);
ValueRefs<float> outputRefs(1);
std::vector<float> output(1);
outputRefs[0].m_buffer.InitFrom(output);
eval->ForwardPass(inputRefs, outputRefs);
BOOST_CHECK_EQUAL_COLLECTIONS(output.begin(), output.end(), expected.begin(), expected.end());
eval->Destroy();
}
@ -238,10 +248,10 @@ BOOST_AUTO_TEST_CASE(EvalSparseTimesTest)
eval = SetupNetworkAndGetLayouts(modelDefinition, inputLayouts, outputLayouts);
// Allocate the output values layer
std::vector<ValueBuffer<float>> outputBuffer = outputLayouts.CreateBuffers<float>({ 3 });
Values<float> outputBuffer = outputLayouts.CreateBuffers<float>({ 3 });
// Allocate the input values layer
std::vector<ValueBuffer<float>> inputBuffer(1);
Values<float> inputBuffer(1);
inputBuffer[0].m_buffer = {1, 2, 3, 5, 6};
inputBuffer[0].m_indices = {0, 2, 2, 1, 2};
@ -267,6 +277,17 @@ BOOST_AUTO_TEST_CASE(EvalSparseTimesTest)
auto buf = outputBuffer[0].m_buffer;
BOOST_CHECK_EQUAL_COLLECTIONS(buf.begin(), buf.end(), expected.begin(), expected.end());
// Do the same via ValueRefs
ValueRefs<float> inputRefs(1);
inputRefs[0].m_buffer.InitFrom(inputBuffer[0].m_buffer);
inputRefs[0].m_colIndices.InitFrom(inputBuffer[0].m_colIndices);
inputRefs[0].m_indices.InitFrom(inputBuffer[0].m_indices);
ValueRefs<float> outputRefs(1);
std::vector<float> output(3);
outputRefs[0].m_buffer.InitFrom(output);
eval->ForwardPass(inputRefs, outputRefs);
BOOST_CHECK_EQUAL_COLLECTIONS(output.begin(), output.end(), expected.begin(), expected.end());
outputBuffer = outputLayouts.CreateBuffers<float>({ 1 });
BOOST_REQUIRE_THROW(eval->ForwardPass(inputBuffer, outputBuffer), std::exception); // Not enough capacity in output.