Add an overload that takes reference to a preallocated array instead of std::vector
This commit is contained in:
Родитель
6888af15a6
Коммит
40a16a45ec
|
@ -171,7 +171,7 @@ public:
|
|||
// from a single input or output.
|
||||
// This is used for both dense and sparse data.
|
||||
//
|
||||
template<typename ElemType>
|
||||
template<template<typename> class Container, typename ElemType>
|
||||
struct ValueBuffer
|
||||
{
|
||||
//
|
||||
|
@ -181,7 +181,7 @@ struct ValueBuffer
|
|||
// [2,2] and 12 elements in the buffer, the number of samples is 3.
|
||||
// For sparse inputs, the number of samples is indicated by the m_colIndices field.
|
||||
//
|
||||
std::vector<ElemType> m_buffer;
|
||||
Container<ElemType> m_buffer;
|
||||
|
||||
// In case of sparse data, the following is also used. Otherwise, the
|
||||
// contents are ignored.
|
||||
|
@ -197,19 +197,44 @@ struct ValueBuffer
|
|||
// For every element in buffer, an entry in this array gives its position.
|
||||
// For every vector the entries must be ascending.
|
||||
//
|
||||
std::vector<int> m_indices;
|
||||
Container<int> m_indices;
|
||||
|
||||
//
|
||||
// Contains numberOfsamples + 1 indices into the buffer. The first entry
|
||||
// is always 0. The last entry points after the last element.
|
||||
// See http://docs.nvidia.com/cuda/cusparse/#compressed-sparse-column-format-csc
|
||||
//
|
||||
std::vector<int> m_colIndices;
|
||||
Container<int> m_colIndices;
|
||||
};
|
||||
|
||||
template <typename ElemType>
|
||||
using Vector = std::vector<ElemType, std::allocator<ElemType>>;
|
||||
|
||||
template <typename ElemType>
|
||||
using Values = std::vector<ValueBuffer<ElemType>>;
|
||||
using Values = std::vector<ValueBuffer<Vector, ElemType>>;
|
||||
|
||||
template <typename ElemType>
|
||||
struct VectorRef
|
||||
{
|
||||
ElemType* m_vector;
|
||||
size_t m_capacity;
|
||||
size_t m_size;
|
||||
|
||||
VectorRef() : m_vector(nullptr), m_capacity(0), m_size(0) {}
|
||||
void InitFrom(std::vector<ElemType>& src) { m_vector = src.data(); m_capacity = src.capacity(); m_size = src.size(); }
|
||||
size_t size() const { return m_size; }
|
||||
size_t capacity() const { return m_capacity; }
|
||||
ElemType* data() { return m_vector; }
|
||||
// const ElemType* data() const { return m_vector; }
|
||||
ElemType* begin() { return m_vector; }
|
||||
ElemType* end() { return m_vector + m_size; }
|
||||
void resize(size_t size) { m_size = size; }
|
||||
ElemType& operator[](size_t idx) { return m_vector[idx]; }
|
||||
const ElemType& operator[](size_t idx) const { return m_vector[idx]; }
|
||||
};
|
||||
|
||||
template <typename ElemType>
|
||||
using ValueRefs = std::vector<ValueBuffer<VectorRef, ElemType>>;
|
||||
|
||||
//
|
||||
// Meta data
|
||||
|
@ -290,7 +315,7 @@ public:
|
|||
virtual VariableSchema GetInputSchema() const = 0;
|
||||
|
||||
//
|
||||
// Evaluate - Evaluate (perform a forward pass for) a single unit using the model with the given inputs and
|
||||
// ForwardPass - Evaluate (perform a forward pass for) a single unit using the model with the given inputs and
|
||||
// outputs.
|
||||
// The layout and shape of the data in inputs vector must match the schema returned by GetInputLayouts.
|
||||
// Output must be preallocated and sized to avoid memory allocation / deallocation across DLL
|
||||
|
@ -300,6 +325,8 @@ public:
|
|||
// outputs - vector of output buffers. Must be sized to fit output schema.
|
||||
//
|
||||
virtual void ForwardPass(const Values<ElemType>& inputs, Values<ElemType>& output) = 0;
|
||||
|
||||
virtual void ForwardPass(const ValueRefs<ElemType>& inputs, ValueRefs<ElemType>& output) = 0;
|
||||
};
|
||||
|
||||
template <typename ElemType>
|
||||
|
|
|
@ -301,7 +301,8 @@ VariableSchema CNTKEvalExtended<ElemType>::GetInputSchema() const
|
|||
}
|
||||
|
||||
template<typename ElemType>
|
||||
void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Values<ElemType>& outputs)
|
||||
template<template<typename> class ValueContainer>
|
||||
void CNTKEvalExtended<ElemType>::ForwardPassT(const std::vector<ValueBuffer<ValueContainer, ElemType> >& inputs, std::vector<ValueBuffer<ValueContainer, ElemType> >& outputs)
|
||||
{
|
||||
if (!m_started)
|
||||
RuntimeError("ForwardPass() called before StartForwardEvaluation()");
|
||||
|
@ -315,7 +316,9 @@ void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Val
|
|||
size_t i = 0;
|
||||
for (auto& input : m_inputMatrices)
|
||||
{
|
||||
ValueBuffer<ElemType> buffer = inputs[i];
|
||||
// const cast: The matrix class takes this over without copying and could theoretically change the contents,
|
||||
// though it doesn't in this case.
|
||||
ValueBuffer<ValueContainer, ElemType>& buffer = const_cast<ValueBuffer<ValueContainer, ElemType>&>(inputs[i]);
|
||||
shared_ptr<Matrix<ElemType>> matrix = dynamic_pointer_cast<Matrix<ElemType>>(input.second.matrix);
|
||||
auto type = matrix->GetMatrixType();
|
||||
int numRows = input.second.sampleLayout.GetNumElements();
|
||||
|
@ -333,7 +336,7 @@ void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Val
|
|||
RuntimeError("Input %ls: Expected at least one element.", m_inputNodes[i]->GetName().c_str());
|
||||
if (buffer.m_colIndices[0] != 0)
|
||||
RuntimeError("Input %ls: First element of column indices must be 0", m_inputNodes[i]->GetName().c_str());
|
||||
if (buffer.m_colIndices[buffer.m_colIndices.size()-1] != buffer.m_indices.size())
|
||||
if (buffer.m_colIndices[buffer.m_colIndices.size() - 1] != buffer.m_indices.size())
|
||||
RuntimeError("Input %ls: Last element of column indices must be equal to the size of indices (%ld), but was %d", m_inputNodes[i]->GetName().c_str(), buffer.m_indices.size(), buffer.m_colIndices[buffer.m_colIndices.size() - 1]);
|
||||
}
|
||||
|
||||
|
@ -341,7 +344,7 @@ void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Val
|
|||
assert(numCols >= 1);
|
||||
input.second.pMBLayout->Init(1, numCols);
|
||||
input.second.pMBLayout->AddSequence(0, 0, 0, numCols);
|
||||
|
||||
|
||||
if (type == MatrixType::DENSE)
|
||||
{
|
||||
matrix->SetValue(numRows, numCols, matrix->GetDeviceId(), buffer.m_buffer.data(), matrixFlagNormal);
|
||||
|
@ -357,14 +360,14 @@ void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Val
|
|||
}
|
||||
|
||||
ComputationNetwork::BumpEvalTimeStamp(m_inputNodes);
|
||||
|
||||
|
||||
for (size_t i = 0; i < m_outputNodes.size(); ++i)
|
||||
{
|
||||
auto node = m_outputNodes[i];
|
||||
m_net->ForwardProp(node);
|
||||
shared_ptr<Matrix<ElemType>> outputMatrix = dynamic_pointer_cast<Matrix<ElemType>>(node->ValuePtr());
|
||||
auto pMBLayout = node->GetMBLayout();
|
||||
if (!pMBLayout)
|
||||
if (!pMBLayout)
|
||||
{
|
||||
pMBLayout = make_shared<MBLayout>();
|
||||
pMBLayout->InitAsFrameMode(1); // treat this as if we have one single sample
|
||||
|
@ -376,8 +379,8 @@ void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Val
|
|||
RuntimeError("Only 1 output sequence supported by this API");
|
||||
}
|
||||
|
||||
std::vector<ElemType>& vec = outputs[i].m_buffer;
|
||||
|
||||
ValueContainer<ElemType>& vec = outputs[i].m_buffer;
|
||||
|
||||
size_t numElements = outputMatrix->GetNumElements();
|
||||
|
||||
if (vec.capacity() < numElements)
|
||||
|
@ -392,6 +395,18 @@ void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Val
|
|||
}
|
||||
}
|
||||
|
||||
template<typename ElemType>
|
||||
void CNTKEvalExtended<ElemType>::ForwardPass(const Values<ElemType>& inputs, Values<ElemType>& outputs)
|
||||
{
|
||||
ForwardPassT(inputs, outputs);
|
||||
}
|
||||
|
||||
template<typename ElemType>
|
||||
void CNTKEvalExtended<ElemType>::ForwardPass(const ValueRefs<ElemType>& inputs, ValueRefs<ElemType>& outputs)
|
||||
{
|
||||
ForwardPassT(inputs, outputs);
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
void CNTKEvalExtended<ElemType>::Destroy()
|
||||
{
|
||||
|
|
|
@ -100,6 +100,8 @@ public:
|
|||
|
||||
virtual void ForwardPass(const Values<ElemType>& inputs, Values<ElemType>& output) override;
|
||||
|
||||
virtual void ForwardPass(const ValueRefs<ElemType>& inputs, ValueRefs<ElemType>& output) override;
|
||||
|
||||
virtual void Destroy() override;
|
||||
|
||||
virtual void CreateNetwork(const std::string& networkDescription) override
|
||||
|
@ -118,5 +120,9 @@ private:
|
|||
std::vector<ComputationNodeBasePtr> m_inputNodes;
|
||||
StreamMinibatchInputs m_inputMatrices;
|
||||
bool m_started;
|
||||
|
||||
template<template<typename> class ValueContainer>
|
||||
void ForwardPassT(const std::vector < ValueBuffer<ValueContainer, ElemType> >& inputs,
|
||||
std::vector < ValueBuffer<ValueContainer, ElemType> >& outputs);
|
||||
};
|
||||
} } }
|
||||
|
|
|
@ -89,8 +89,7 @@ BOOST_AUTO_TEST_CASE(EvalConstantPlusTest)
|
|||
Values<float> outputBuffer = outputLayouts.CreateBuffers<float>({ 1 });
|
||||
|
||||
// Allocate the input values layer (empty)
|
||||
|
||||
Values<float> inputBuffer;
|
||||
Values<float> inputBuffer(0);
|
||||
|
||||
// We can call the evaluate method and get back the results...
|
||||
eval->ForwardPass(inputBuffer, outputBuffer);
|
||||
|
@ -161,10 +160,10 @@ BOOST_AUTO_TEST_CASE(EvalScalarTimesDualOutputTest)
|
|||
eval = SetupNetworkAndGetLayouts(modelDefinition, inputLayouts, outputLayouts);
|
||||
|
||||
// Allocate the output values layer
|
||||
std::vector<ValueBuffer<float>> outputBuffer = outputLayouts.CreateBuffers<float>({ 1 });
|
||||
auto outputBuffer = outputLayouts.CreateBuffers<float>({ 1 });
|
||||
|
||||
// Allocate the input values layer
|
||||
std::vector<ValueBuffer<float>> inputBuffer(1);
|
||||
Values<float> inputBuffer(1);
|
||||
inputBuffer[0].m_buffer = { 2 };
|
||||
|
||||
// We can call the evaluate method and get back the results...
|
||||
|
@ -197,14 +196,14 @@ BOOST_AUTO_TEST_CASE(EvalDenseTimesTest)
|
|||
eval = SetupNetworkAndGetLayouts(modelDefinition, inputLayouts, outputLayouts);
|
||||
|
||||
// Allocate the output values layer
|
||||
std::vector<ValueBuffer<float>> outputBuffer = outputLayouts.CreateBuffers<float>({ 1 });
|
||||
Values<float> outputBuffer = outputLayouts.CreateBuffers<float>({ 1 });
|
||||
|
||||
// Number of inputs must adhere to the schema
|
||||
std::vector<ValueBuffer<float>> inputBuffer1(0);
|
||||
Values<float> inputBuffer1(0);
|
||||
BOOST_REQUIRE_THROW(eval->ForwardPass(inputBuffer1, outputBuffer), std::exception); // Not enough inputs
|
||||
|
||||
// Number of elements in the input must adhere to the schema
|
||||
std::vector<ValueBuffer<float>> inputBuffer(1);
|
||||
Values<float> inputBuffer(1);
|
||||
inputBuffer[0].m_buffer = { 1, 2, 3 };
|
||||
BOOST_REQUIRE_THROW(eval->ForwardPass(inputBuffer, outputBuffer), std::exception); // Not enough elements in the sample
|
||||
|
||||
|
@ -216,6 +215,17 @@ BOOST_AUTO_TEST_CASE(EvalDenseTimesTest)
|
|||
auto buf = outputBuffer[0].m_buffer;
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(buf.begin(), buf.end(), expected.begin(), expected.end());
|
||||
|
||||
// Do the same via ValueRefs
|
||||
ValueRefs<float> inputRefs(1);
|
||||
inputRefs[0].m_buffer.InitFrom(inputBuffer[0].m_buffer);
|
||||
inputRefs[0].m_colIndices.InitFrom(inputBuffer[0].m_colIndices);
|
||||
inputRefs[0].m_indices.InitFrom(inputBuffer[0].m_indices);
|
||||
ValueRefs<float> outputRefs(1);
|
||||
std::vector<float> output(1);
|
||||
outputRefs[0].m_buffer.InitFrom(output);
|
||||
eval->ForwardPass(inputRefs, outputRefs);
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(output.begin(), output.end(), expected.begin(), expected.end());
|
||||
|
||||
eval->Destroy();
|
||||
}
|
||||
|
||||
|
@ -238,10 +248,10 @@ BOOST_AUTO_TEST_CASE(EvalSparseTimesTest)
|
|||
eval = SetupNetworkAndGetLayouts(modelDefinition, inputLayouts, outputLayouts);
|
||||
|
||||
// Allocate the output values layer
|
||||
std::vector<ValueBuffer<float>> outputBuffer = outputLayouts.CreateBuffers<float>({ 3 });
|
||||
Values<float> outputBuffer = outputLayouts.CreateBuffers<float>({ 3 });
|
||||
|
||||
// Allocate the input values layer
|
||||
std::vector<ValueBuffer<float>> inputBuffer(1);
|
||||
Values<float> inputBuffer(1);
|
||||
inputBuffer[0].m_buffer = {1, 2, 3, 5, 6};
|
||||
inputBuffer[0].m_indices = {0, 2, 2, 1, 2};
|
||||
|
||||
|
@ -267,6 +277,17 @@ BOOST_AUTO_TEST_CASE(EvalSparseTimesTest)
|
|||
auto buf = outputBuffer[0].m_buffer;
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(buf.begin(), buf.end(), expected.begin(), expected.end());
|
||||
|
||||
// Do the same via ValueRefs
|
||||
ValueRefs<float> inputRefs(1);
|
||||
inputRefs[0].m_buffer.InitFrom(inputBuffer[0].m_buffer);
|
||||
inputRefs[0].m_colIndices.InitFrom(inputBuffer[0].m_colIndices);
|
||||
inputRefs[0].m_indices.InitFrom(inputBuffer[0].m_indices);
|
||||
ValueRefs<float> outputRefs(1);
|
||||
std::vector<float> output(3);
|
||||
outputRefs[0].m_buffer.InitFrom(output);
|
||||
eval->ForwardPass(inputRefs, outputRefs);
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(output.begin(), output.end(), expected.begin(), expected.end());
|
||||
|
||||
outputBuffer = outputLayouts.CreateBuffers<float>({ 1 });
|
||||
BOOST_REQUIRE_THROW(eval->ForwardPass(inputBuffer, outputBuffer), std::exception); // Not enough capacity in output.
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче