This commit is contained in:
Eldar Akchurin 2016-02-05 15:08:17 +01:00
Родитель 55da016d5a
Коммит 1edf828f54
15 изменённых файлов: 209 добавлений и 166 удалений

Просмотреть файл

@ -11,6 +11,7 @@
#ifndef UNREFERENCED_PARAMETER
#define UNREFERENCED_PARAMETER(P) (P)
#endif
#include "ImageSequence.h"
namespace Microsoft { namespace MSR { namespace CNTK {
@ -79,6 +80,68 @@ ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
CreateSequenceDescriptions(configHelper.GetMapPath(), labelDimension);
}
class ImageDataDeserializer::ImageChunk : public Chunk, public std::enable_shared_from_this<ImageChunk>
{
ImageSequenceDescription m_description;
ImageDataDeserializer& m_parent;
public:
ImageChunk(ImageSequenceDescription& description, ImageDataDeserializer& parent)
: m_description(description), m_parent(parent)
{
}
virtual std::vector<SequenceDataPtr> GetSequence(const size_t& sequenceId) override
{
assert(sequenceId == m_description.m_id);
UNREFERENCED_PARAMETER(sequenceId);
const auto& imageSequence = m_description;
auto image = std::make_shared<ImageSequenceData>();
image->m_image = std::move(cv::imread(imageSequence.m_path, cv::IMREAD_COLOR));
auto& cvImage = image->m_image;
if (!cvImage.data)
{
RuntimeError("Cannot open file '%s'", imageSequence.m_path.c_str());
}
// Convert element type.
int dataType = m_parent.m_featureElementType == ElementType::tfloat ? CV_32F : CV_64F;
if (cvImage.type() != CV_MAKETYPE(dataType, cvImage.channels()))
{
cvImage.convertTo(cvImage, dataType);
}
if (!cvImage.isContinuous())
{
cvImage = cvImage.clone();
}
assert(cvImage.isContinuous());
image->m_data = image->m_image.data;
ImageDimensions dimensions(cvImage.cols, cvImage.rows, cvImage.channels());
image->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(HWC));
image->m_numberOfSamples = 1;
image->m_chunk = shared_from_this();
SparseSequenceDataPtr label = std::make_shared<SparseSequenceData>();
label->m_chunk = shared_from_this();
m_parent.m_labelGenerator->CreateLabelFor(imageSequence.m_classId, *label);
return std::vector<SequenceDataPtr> { image, label };
}
~ImageChunk()
{
}
};
ChunkPtr ImageDataDeserializer::GetChunk(size_t chunkId)
{
auto sequenceDescription = m_imageSequences[chunkId];
return std::make_shared<ImageChunk>(sequenceDescription, *this);
}
void ImageDataDeserializer::CreateSequenceDescriptions(std::string mapPath, size_t labelDimension)
{
UNREFERENCED_PARAMETER(labelDimension);
@ -127,72 +190,6 @@ std::vector<StreamDescriptionPtr> ImageDataDeserializer::GetStreamDescriptions()
return m_streams;
}
std::vector<std::vector<SequenceDataPtr>> ImageDataDeserializer::GetSequencesById(const std::vector<size_t>& ids)
{
if (ids.empty())
{
RuntimeError("Number of requested sequences cannot be zero.");
}
m_currentImages.resize(ids.size());
m_labels.resize(ids.size());
std::vector<std::vector<SequenceDataPtr>> result;
result.resize(ids.size());
#pragma omp parallel for ordered schedule(dynamic)
for (int i = 0; i < ids.size(); ++i)
{
if (ids[i] >= m_imageSequences.size())
{
RuntimeError("Invalid sequence id is provided '%d', expected range [0..%d].",
static_cast<int>(ids[i]),
static_cast<int>(m_imageSequences.size()) - 1);
}
const auto& imageSequence = m_imageSequences[ids[i]];
// Construct image
m_currentImages[i] = std::move(cv::imread(imageSequence.m_path, cv::IMREAD_COLOR));
cv::Mat& cvImage = m_currentImages[i];
if (!cvImage.data)
{
RuntimeError("Cannot open file '%s'", imageSequence.m_path.c_str());
}
// Convert element type.
// TODO We should all native CV element types to be able to match the behavior of the old reader.
int dataType = m_featureElementType == ElementType::tfloat ? CV_32F : CV_64F;
if (cvImage.type() != CV_MAKETYPE(dataType, cvImage.channels()))
{
cvImage.convertTo(cvImage, dataType);
}
if (!cvImage.isContinuous())
{
cvImage = cvImage.clone();
}
assert(cvImage.isContinuous());
ImageDimensions dimensions(cvImage.cols, cvImage.rows, cvImage.channels());
auto image = std::make_shared<DenseSequenceData>();
image->m_data = cvImage.data;
image->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(HWC));
image->m_numberOfSamples = 1;
if (m_labels[i] == nullptr)
{
m_labels[i] = std::make_shared<SparseSequenceData>();
}
m_labelGenerator->CreateLabelFor(imageSequence.m_classId, *m_labels[i]);
result[i] = std::move(std::vector<SequenceDataPtr>{image, m_labels[i]});
}
return result;
}
void ImageDataDeserializer::FillSequenceDescriptions(SequenceDescriptions& timeline) const
{
timeline.resize(m_imageSequences.size());

Просмотреть файл

@ -24,7 +24,7 @@ public:
std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override;
// Get sequences by specified ids. Order of returned sequences corresponds to the order of provided ids.
std::vector<std::vector<SequenceDataPtr>> GetSequencesById(const std::vector<size_t>& ids) override;
virtual ChunkPtr GetChunk(size_t chunkId) override;
protected:
void FillSequenceDescriptions(SequenceDescriptions& timeline) const override;
@ -40,6 +40,8 @@ private:
size_t m_classId;
};
class ImageChunk;
// A helper class for generation of type specific labels (currently float/double only).
class LabelGenerator;
typedef std::shared_ptr<LabelGenerator> LabelGeneratorPtr;
@ -48,12 +50,6 @@ private:
// Sequence descriptions for all input data.
std::vector<ImageSequenceDescription> m_imageSequences;
// Buffer to store label data.
std::vector<SparseSequenceDataPtr> m_labels;
// Buffer to store feature data.
std::vector<cv::Mat> m_currentImages;
// Element type of the feature/label stream (currently float/double only).
ElementType m_featureElementType;
};

Просмотреть файл

@ -113,6 +113,7 @@
<ClInclude Include="ImageConfigHelper.h" />
<ClInclude Include="ImageDataDeserializer.h" />
<ClInclude Include="ImageReader.h" />
<ClInclude Include="ImageSequence.h" />
<ClInclude Include="ImageTransformers.h" />
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
@ -150,4 +151,4 @@
<Target Name="CheckDependencies">
<Warning Condition="!$(HasOpenCV)" Text="ImageReader requires the OpenCV library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#opencv for installation instructions." />
</Target>
</Project>
</Project>

Просмотреть файл

@ -43,6 +43,7 @@
<ClInclude Include="ImageDataDeserializer.h" />
<ClInclude Include="ImageReader.h" />
<ClInclude Include="ImageConfigHelper.h" />
<ClInclude Include="ImageSequence.h" />
</ItemGroup>
<ItemGroup>
<Filter Include="Common">

Просмотреть файл

@ -0,0 +1,17 @@
#pragma once
#include <opencv2/core/mat.hpp>
#include "DataDeserializer.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// Used to keep track of the image. Used in the implementations of open cv transformers
// and deserializer. Accessed only using DenseSequenceData interface.
struct ImageSequenceData : DenseSequenceData
{
cv::Mat m_image;
// In case we do not copy data - we preserve original sequence.
SequenceDataPtr m_original;
};
}}}

Просмотреть файл

@ -14,6 +14,7 @@
#include "ImageConfigHelper.h"
#include "StringUtil.h"
#include "ElementTypeUtils.h"
#include "ImageSequence.h"
namespace Microsoft { namespace MSR { namespace CNTK
{
@ -34,10 +35,11 @@ void ImageTransformerBase::Initialize(TransformerPtr next,
}
SequenceDataPtr
ImageTransformerBase::Apply(const DenseSequenceData &inputSequence,
const StreamDescription &inputStream, cv::Mat &buffer,
ImageTransformerBase::Apply(SequenceDataPtr sequence,
const StreamDescription &inputStream,
const StreamDescription & /*outputStream*/)
{
auto inputSequence = reinterpret_cast<const DenseSequenceData&>(*sequence.get());
ImageDimensions dimensions(*inputSequence.m_sampleLayout, HWC);
int columns = static_cast<int>(dimensions.m_width);
int rows = static_cast<int>(dimensions.m_height);
@ -57,20 +59,25 @@ ImageTransformerBase::Apply(const DenseSequenceData &inputSequence,
RuntimeError("Unsupported type");
}
auto result = std::make_shared<ImageSequenceData>();
int type = CV_MAKETYPE(typeId, channels);
buffer = cv::Mat(rows, columns, type, inputSequence.m_data);
cv::Mat buffer = cv::Mat(rows, columns, type, inputSequence.m_data);
this->Apply(buffer);
if (!buffer.isContinuous())
{
buffer = buffer.clone();
}
else
{
result->m_original = sequence;
}
assert(buffer.isContinuous());
result->m_image = buffer;
result->m_data = buffer.ptr();
result->m_numberOfSamples = inputSequence.m_numberOfSamples;
auto result = std::make_shared<DenseSequenceData>();
ImageDimensions outputDimensions(buffer.cols, buffer.rows, buffer.channels());
result->m_sampleLayout = std::make_shared<TensorShape>(outputDimensions.AsTensorShape(HWC));
result->m_numberOfSamples = inputSequence.m_numberOfSamples;
result->m_data = buffer.ptr();
return result;
}
@ -367,7 +374,7 @@ void MeanTransformer::Apply(cv::Mat &mat)
void TransposeTransformer::Initialize(TransformerPtr next,
const ConfigParameters &readerConfig)
{
Base::Initialize(next, readerConfig);
TransformerBase::Initialize(next, readerConfig);
// Currently we only support a single stream.
ImageConfigHelper config(readerConfig);
@ -392,39 +399,46 @@ void TransposeTransformer::Initialize(TransformerPtr next,
}
SequenceDataPtr
TransposeTransformer::Apply(const DenseSequenceData &inputSequence,
TransposeTransformer::Apply(SequenceDataPtr inputSequence,
const StreamDescription &inputStream,
vector<char> &buffer,
const StreamDescription &outputStream)
{
if (inputStream.m_elementType == ElementType::tdouble)
{
return TypedApply<double>(inputSequence, inputStream, buffer, outputStream);
return TypedApply<double>(inputSequence, inputStream, outputStream);
}
if (inputStream.m_elementType == ElementType::tfloat)
{
return TypedApply<float>(inputSequence, inputStream, buffer, outputStream);
return TypedApply<float>(inputSequence, inputStream, outputStream);
}
RuntimeError("Unsupported type");
}
struct OwnedDenseSequence : DenseSequenceData
{
std::vector<char> m_buffer;
};
template <class TElement>
SequenceDataPtr
TransposeTransformer::TypedApply(const DenseSequenceData &inputSequence,
TransposeTransformer::TypedApply(SequenceDataPtr sequence,
const StreamDescription &inputStream,
vector<char> &buffer,
const StreamDescription &outputStream)
{
assert(inputStream.m_storageType == StorageType::dense);
auto inputSequence = reinterpret_cast<DenseSequenceData&>(*sequence.get());
assert(inputSequence.m_numberOfSamples == 1);
assert(inputStream.m_sampleLayout->GetNumElements() ==
outputStream.m_sampleLayout->GetNumElements());
size_t count = inputStream.m_sampleLayout->GetNumElements() * GetSizeByType(inputStream.m_elementType);
buffer.resize(count);
TElement* typedBuffer = reinterpret_cast<TElement*>(&buffer[0]);
auto result = std::make_shared<OwnedDenseSequence>();
result->m_buffer.resize(count);
TElement* typedBuffer = reinterpret_cast<TElement*>(&result->m_buffer[0]);
ImageDimensions dimensions(*inputStream.m_sampleLayout, ImageLayoutKind::HWC);
size_t rowCount = dimensions.m_height * dimensions.m_width;
@ -441,9 +455,8 @@ TransposeTransformer::TypedApply(const DenseSequenceData &inputSequence,
}
}
auto result = std::make_shared<DenseSequenceData>();
result->m_sampleLayout = outputStream.m_sampleLayout;
result->m_data = &buffer[0];
result->m_data = &result->m_buffer[0];
result->m_numberOfSamples = inputSequence.m_numberOfSamples;
return result;
}

Просмотреть файл

@ -20,7 +20,7 @@ class ConfigParameters;
// Base class for image transformations based on OpenCV
// that helps to wrap the sequences into OpenCV::Mat class.
class ImageTransformerBase : public TransformerBase<cv::Mat>
class ImageTransformerBase : public TransformerBase
{
public:
// Initializes the transformer.
@ -44,13 +44,13 @@ protected:
return m_seed;
}
using Base = TransformerBase<cv::Mat>;
using Base = TransformerBase;
using UniRealT = std::uniform_real_distribution<double>;
using UniIntT = std::uniform_int_distribution<int>;
// Applies transformation to the sequence.
SequenceDataPtr Apply(const DenseSequenceData &inputSequence,
const StreamDescription &inputStream, cv::Mat &buffer,
SequenceDataPtr Apply(SequenceDataPtr inputSequence,
const StreamDescription &inputStream,
const StreamDescription &outputStream) override;
// The only function that should be redefined by the inherited classes.
@ -139,7 +139,7 @@ private:
};
// Transpose transformation from HWC to CHW.
class TransposeTransformer : public TransformerBase<vector<char>>
class TransposeTransformer : public TransformerBase
{
public:
virtual void Initialize(TransformerPtr next,
@ -156,18 +156,14 @@ protected:
return m_outputStreams;
}
SequenceDataPtr Apply(const DenseSequenceData &inputSequence,
SequenceDataPtr Apply(SequenceDataPtr inputSequence,
const StreamDescription &inputStream,
vector<char> &buffer,
const StreamDescription &outputStream) override;
private:
using Base = TransformerBase<vector<char>>;
template <class TElement>
SequenceDataPtr TypedApply(const DenseSequenceData &inputSequence,
SequenceDataPtr TypedApply(SequenceDataPtr inputSequence,
const StreamDescription &inputStream,
vector<char> &buffer,
const StreamDescription &outputStream);
std::vector<StreamDescriptionPtr> m_outputStreams;

Просмотреть файл

@ -296,8 +296,6 @@ void BlockRandomizer::Initialize(TransformerPtr next, const ConfigParameters& re
void BlockRandomizer::StartEpoch(const EpochConfiguration& config)
{
m_deserializer->StartEpoch(config);
m_workerRank = config.m_workerRank;
m_numberOfWorkers = config.m_numberOfWorkers;
@ -319,10 +317,10 @@ void BlockRandomizer::StartEpoch(const EpochConfiguration& config)
RandomizeForGlobalSamplePosition(timeframe);
};
bool BlockRandomizer::GetNextSequenceIds(size_t sampleCount, std::vector<size_t>& originalIds)
bool BlockRandomizer::GetNextSequenceIds(size_t sampleCount, SequenceDescriptions& sequenceDescriptions)
{
assert(m_frameMode); // TODO !m_frameMode not implemented yet
assert(originalIds.size() == 0);
assert(sequenceDescriptions.size() == 0);
assert(sampleCount < m_numSamples);
if (m_samplePositionInEpoch < m_epochSize)
@ -332,7 +330,7 @@ bool BlockRandomizer::GetNextSequenceIds(size_t sampleCount, std::vector<size_t>
assert(m_numberOfWorkers == 1); // TODO needs implementation
while ((m_samplePositionInEpoch < m_epochSize) &&
(originalIds.size() < sampleCount))
(sequenceDescriptions.size() < sampleCount))
{
RandomizeIfNewSweepIsEntered();
@ -340,7 +338,7 @@ bool BlockRandomizer::GetNextSequenceIds(size_t sampleCount, std::vector<size_t>
if ((seqDesc.m_chunkId % m_numberOfWorkers) == m_workerRank)
{
// Got one, collect it
originalIds.push_back(seqDesc.m_id);
sequenceDescriptions.push_back(m_deserializer->GetSequenceDescriptions()[seqDesc.m_id]);
}
m_samplePositionInEpoch += seqDesc.m_numberOfSamples;
@ -362,7 +360,7 @@ bool BlockRandomizer::GetNextSequenceIds(size_t sampleCount, std::vector<size_t>
if (strideBegin <= i && i < strideEnd)
{
const auto& seqDesc = m_randomTimeline[m_sequencePositionInSweep];
originalIds.push_back(seqDesc.m_id);
sequenceDescriptions.push_back(m_deserializer->GetSequenceDescriptions()[seqDesc.m_id]);
}
}
assert(m_samplePositionInEpoch == nextSamplePositionInEpoch);
@ -376,14 +374,13 @@ Sequences BlockRandomizer::GetNextSequences(size_t sampleCount)
{
assert(m_samplePositionInEpoch != SIZE_MAX); // SetEpochConfiguration() must be called first
std::vector<size_t> originalIds;
Sequences result;
assert(m_frameMode); // TODO sequence mode not implemented yet
result.m_endOfEpoch = GetNextSequenceIds(sampleCount, originalIds);
SequenceDescriptions sequenceDescriptions;
result.m_endOfEpoch = GetNextSequenceIds(sampleCount, sequenceDescriptions);
if (originalIds.size() == 0)
if (sequenceDescriptions.size() == 0)
{
return result;
}
@ -391,7 +388,15 @@ Sequences BlockRandomizer::GetNextSequences(size_t sampleCount)
// TODO implement require and release chunks from the data deserializer, but only for this worker
// (probably in GetNextSequenceIds())
result.m_data = m_deserializer->GetSequencesById(originalIds);
result.m_data.resize(sequenceDescriptions.size());
#pragma omp parallel for ordered schedule(dynamic)
for (int i = 0; i < sequenceDescriptions.size(); ++i)
{
ChunkPtr chunk = m_deserializer->GetChunk(sequenceDescriptions[i]->m_chunkId);
result.m_data[i] = chunk->GetSequence(sequenceDescriptions[i]->m_id);
}
return result;
};

Просмотреть файл

@ -99,6 +99,6 @@ private:
void RandomizeIfNewSweepIsEntered();
bool GetNextSequenceIds(size_t sampleCount, std::vector<size_t>& ids);
bool GetNextSequenceIds(size_t sampleCount, SequenceDescriptions& originalIds);
};
} } }

Просмотреть файл

@ -10,6 +10,25 @@
namespace Microsoft { namespace MSR { namespace CNTK {
struct SequenceDataBase;
typedef std::shared_ptr<SequenceDataBase> SequenceDataPtr;
class Chunk
{
public:
// Gets sequences by id.
virtual std::vector<SequenceDataPtr> GetSequence(const size_t& sequenceId) = 0;
virtual ~Chunk() {};
protected:
Chunk() {}
private:
Chunk(const Chunk&) = delete;
Chunk& operator=(const Chunk&) = delete;
};
typedef std::shared_ptr<Chunk> ChunkPtr;
// Defines main properties of a sequence.
// Sequence descriptions are used by the randomizer to establish a global timeline for complete input.
// A sequence is defined as an ordered set of samples (size == 1 is used for sample training).
@ -31,7 +50,12 @@ typedef std::vector<const SequenceDescription*> SequenceDescriptions;
struct SequenceDataBase
{
SequenceDataBase() : m_data(nullptr) { }
virtual ~SequenceDataBase()
{
}
ChunkPtr m_chunk;
// A non-owned pointer. The actual size is provided for particular sequences,
// i.e. see DenseSequenceData, or SparseSequenceData.
void* m_data;
@ -79,25 +103,8 @@ public:
// Retrieves description of all sequences this data deserializer can produce.
virtual const SequenceDescriptions& GetSequenceDescriptions() const = 0;
// Sets epoch configuration.
virtual void StartEpoch(const EpochConfiguration& config) = 0;
// Gets sequences by id.
// The return value can be used until the next call to GetSequencesById.
// All non-owned pointers returned are valid till the next call to this method.
virtual std::vector<std::vector<SequenceDataPtr>> GetSequencesById(const std::vector<size_t>& ids) = 0;
// Requires the chunk. Each sequence is assigned to the IO chunk by the data deserializer.
// This information is communicated thru GetSequenceDescriptions method.
// The randomizer guarantees that it accesses sequences only from a limited number of chunks.
// When randomizer requires a sequence from a particular chunk it notifies about this the data deserializer,
// so that the data deserializer can load/cache sequences more efficiently (loading complete chunks in memory).
virtual void RequireChunk(size_t chunkIndex) = 0;
// Releases the chunk.
// When randomizer read all sequences from a particular chunk it notifies the data deserializer
// that the chunk can be freed.
virtual void ReleaseChunk(size_t chunkIndex) = 0;
// Gets a chunk.
virtual ChunkPtr GetChunk(size_t chunkId) = 0;
virtual ~DataDeserializer() {};
};

Просмотреть файл

@ -17,9 +17,6 @@ public:
DataDeserializerBase() : m_sequencesInitialized(false)
{}
// Sets configuration for the current epoch.
void StartEpoch(const EpochConfiguration& /*config*/) override {};
// Provides description of all sequences the deserializer can produce.
const SequenceDescriptions& GetSequenceDescriptions() const override
{
@ -31,14 +28,6 @@ public:
return m_sequences;
}
// To be called by the randomizer for prefetching the next chunk.
// By default IO read-ahead is not implemented.
void RequireChunk(size_t /*chunkIndex*/) override{};
// To be called by the randomizer for releasing a prefetched chunk.
// By default IO read-ahead is not implemented.
void ReleaseChunk(size_t /*chunkIndex*/) override{};
protected:
// Fills the timeline with sequence descriptions.
// Inherited classes should provide the complete Sequence descriptions for all input data.

Просмотреть файл

@ -35,7 +35,6 @@ void NoRandomizer::Initialize(TransformerPtr, const ConfigParameters&)
void NoRandomizer::StartEpoch(const EpochConfiguration& config)
{
m_deserializer->StartEpoch(config);
m_config = config;
if (m_config.m_totalEpochSizeInSamples == requestDataSize)
@ -62,24 +61,54 @@ Sequences NoRandomizer::GetNextSequences(size_t sampleCount)
size_t end = maxSampleCount * (m_config.m_workerRank + 1) / m_config.m_numberOfWorkers;
size_t subsetSize = end - start;
std::vector<size_t> originalIds;
originalIds.reserve(subsetSize);
std::vector<size_t> chunkIds;
SequenceDescriptions sequences;
sequences.reserve(subsetSize);
size_t previousChunk = std::numeric_limits<size_t>::max();
for (size_t i = start; i < end; ++i)
{
const auto& sequence = m_timeline[(m_sequencePosition + i) % m_timeline.size()];
assert(sequence->m_numberOfSamples == 1);
originalIds.push_back(sequence->m_id);
sequences.push_back(sequence);
if (previousChunk != sequence->m_chunkId)
{
chunkIds.push_back(sequence->m_chunkId);
previousChunk = sequence->m_chunkId;
}
}
m_samplePositionInEpoch += maxSampleCount;
m_sequencePosition = (m_sequencePosition + maxSampleCount) % m_timeline.size();
if (originalIds.size() == 0)
if (sequences.size() == 0)
{
return result;
}
result.m_data = m_deserializer->GetSequencesById(originalIds);
std::map<size_t, ChunkPtr> chunks;
for (size_t id : chunkIds)
{
auto chunk = m_chunks.find(id);
if (chunk == m_chunks.end())
{
chunks[id] = m_deserializer->GetChunk(id);
}
else
{
chunks[id] = chunk->second;
}
}
m_chunks.swap(chunks);
result.m_data.resize(sequences.size());
#pragma omp parallel for ordered schedule(dynamic)
for (int i = 0; i < sequences.size(); ++i)
{
result.m_data[i] = m_chunks[sequences[i]->m_chunkId]->GetSequence(sequences[i]->m_id);
}
return result;
}

Просмотреть файл

@ -6,7 +6,7 @@
#pragma once
#include <vector>
#include <set>
#include "Transformer.h"
#include "DataDeserializer.h"
@ -41,6 +41,8 @@ private:
EpochConfiguration m_config;
size_t m_samplePositionInEpoch;
size_t m_sequencePosition;
std::map<size_t, ChunkPtr> m_chunks;
};
}}}

Просмотреть файл

@ -12,8 +12,6 @@
namespace Microsoft { namespace MSR { namespace CNTK {
// Currently supports only dense data format.
template <class TBufferElement>
class TransformerBase : public Transformer
{
public:
@ -48,7 +46,6 @@ public:
const auto &appliedStreamIds = GetAppliedStreamIds();
const auto &outputStreams = GetOutputStreams();
assert(m_inputStreams.size() == outputStreams.size());
m_buffer.resize(samples.m_data.size());
#pragma omp parallel for ordered schedule(dynamic)
for (int i = 0; i < samples.m_data.size(); ++i)
@ -56,15 +53,10 @@ public:
auto &sample = samples.m_data[i];
assert(sample.size() == m_inputStreams.size());
m_buffer[i].resize(appliedStreamIds.size());
for (int j = 0; j < appliedStreamIds.size(); ++j)
{
size_t id = appliedStreamIds[j];
assert(m_inputStreams[id]->m_storageType == StorageType::dense);
const DenseSequenceData &sequence =
reinterpret_cast<DenseSequenceData &>(*sample[id]);
sample[id] = Apply(sequence, *m_inputStreams[id], m_buffer[i][j],
*outputStreams[id]);
sample[id] = Apply(sample[id], *m_inputStreams[id], *outputStreams[id]);
}
}
@ -85,14 +77,12 @@ protected:
private:
// Applies transformation to the sequence.
virtual SequenceDataPtr Apply(const DenseSequenceData &inputSequence,
virtual SequenceDataPtr Apply(SequenceDataPtr inputSequence,
const StreamDescription &inputStream,
TBufferElement &buffer,
const StreamDescription &outputStream) = 0;
TransformerPtr m_next;
std::vector<StreamId> m_featureStreamIds;
std::vector<std::vector<TBufferElement>> m_buffer;
std::vector<StreamDescriptionPtr> m_inputStreams;
};

Просмотреть файл

@ -1,9 +1,9 @@
dataDir: ../Data
tags:
# running on every BVT job in 'I' (Image) leg:
- bvt-i (build_sku == 'gpu') and (device=='gpu') and (flavor=='release') and (os=='linux')
- bvt-i (build_sku == 'gpu') and (device=='gpu') and (flavor=='release')
# running every Nightly job in 'I' leg
- nightly-i (build_sku == 'gpu') and (device=='gpu') and (flavor=='release') and (os=='linux')
- nightly-i (build_sku == 'gpu') and (device=='gpu') and (flavor=='release')
testCases:
CNTK Run must be completed: